diff --git a/source/net/yacy/search/index/Fulltext.java b/source/net/yacy/search/index/Fulltext.java index 693378aa9..b456ffded 100644 --- a/source/net/yacy/search/index/Fulltext.java +++ b/source/net/yacy/search/index/Fulltext.java @@ -211,15 +211,26 @@ public final class Fulltext implements Iterable { // get the metadata from Solr try { SolrDocument doc = this.solr.get(ASCII.String(urlHash)); - if (doc != null) return new URIMetadataNode(doc, wre, weight); + if (doc != null) { + if (this.urlIndexFile != null) this.urlIndexFile.remove(urlHash); + return new URIMetadataNode(doc, wre, weight); + } } catch (IOException e) { Log.logException(e); } // get the metadata from the old metadata index if (this.urlIndexFile != null) try { - final Row.Entry entry = this.urlIndexFile.get(urlHash, false); - if (entry != null) return new URIMetadataRow(entry, wre, weight); + if (this.connectedSolr()) { + // slow migration to solr + final Row.Entry entry = this.urlIndexFile.remove(urlHash); + if (entry == null) return null; + URIMetadataRow row = new URIMetadataRow(entry, wre, weight); + this.putDocument(this.solrScheme.metadata2solr(row)); + return row; + } + final Row.Entry entry = this.urlIndexFile.get(urlHash, false); + if (entry != null) return new URIMetadataRow(entry, wre, weight); } catch (final IOException e) { Log.logException(e); } @@ -242,14 +253,25 @@ public final class Fulltext implements Iterable { // get the document from Solr try { SolrDocument doc = this.solr.get(ASCII.String(urlHash)); - if (doc != null) return doc; + if (doc != null) { + if (this.urlIndexFile != null) this.urlIndexFile.remove(urlHash); + return doc; + } } catch (IOException e) { Log.logException(e); } // get the document from the old metadata index if (this.urlIndexFile != null) try { - final Row.Entry entry = this.urlIndexFile.get(urlHash, false); + if (this.connectedSolr()) { + // slow migration to solr + final Row.Entry entry = this.urlIndexFile.remove(urlHash); + if (entry == null) return null; + URIMetadataRow row = new URIMetadataRow(entry, wre, weight); + this.putDocument(this.solrScheme.metadata2solr(row)); + return ClientUtils.toSolrDocument(getSolrScheme().metadata2solr(row)); + } + final Row.Entry entry = this.urlIndexFile.get(urlHash, false); if (entry == null) return null; return ClientUtils.toSolrDocument(getSolrScheme().metadata2solr(new URIMetadataRow(entry, wre, weight))); } catch (final IOException e) { diff --git a/source/net/yacy/search/index/SolrConfiguration.java b/source/net/yacy/search/index/SolrConfiguration.java index 1a1dbc3f7..c331ba361 100644 --- a/source/net/yacy/search/index/SolrConfiguration.java +++ b/source/net/yacy/search/index/SolrConfiguration.java @@ -206,16 +206,16 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable if (allAttr || contains(YaCySchema.description)) addSolr(solrdoc, YaCySchema.description, md.snippet()); if (allAttr || contains(YaCySchema.content_type)) addSolr(solrdoc, YaCySchema.content_type, Response.doctype2mime(digestURI.getFileExtension(), md.doctype())); if (allAttr || contains(YaCySchema.last_modified)) addSolr(solrdoc, YaCySchema.last_modified, md.moddate()); - if (allAttr || contains(YaCySchema.text_t)) addSolr(solrdoc, YaCySchema.text_t, ""); // not delivered in metadata if (allAttr || contains(YaCySchema.wordcount_i)) addSolr(solrdoc, YaCySchema.wordcount_i, md.wordCount()); - if (allAttr || contains(YaCySchema.keywords)) { - String keywords = md.dc_subject(); - Bitfield flags = md.flags(); - if (flags.get(Condenser.flag_cat_indexof)) { - if (keywords == null || keywords.isEmpty()) keywords = "indexof"; else { - if (keywords.indexOf(',') > 0) keywords += ", indexof"; else keywords += " indexof"; - } - } + + String keywords = md.dc_subject(); + Bitfield flags = md.flags(); + if (flags.get(Condenser.flag_cat_indexof)) { + if (keywords == null || keywords.isEmpty()) keywords = "indexof"; else { + if (keywords.indexOf(',') > 0) keywords += ", indexof"; else keywords += " indexof"; + } + } + if (allAttr || contains(YaCySchema.keywords)) { addSolr(solrdoc, YaCySchema.keywords, keywords); } @@ -250,9 +250,28 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable if (allAttr || contains(YaCySchema.audiolinkscount_i)) addSolr(solrdoc, YaCySchema.audiolinkscount_i, md.laudio()); if (allAttr || contains(YaCySchema.videolinkscount_i)) addSolr(solrdoc, YaCySchema.videolinkscount_i, md.lvideo()); if (allAttr || contains(YaCySchema.applinkscount_i)) addSolr(solrdoc, YaCySchema.applinkscount_i, md.lapp()); - + if (allAttr || contains(YaCySchema.text_t)) { + // construct the text from other metadata parts. + // This is necessary here since that is used to search the link when no other data (parsed text body) is available + StringBuilder sb = new StringBuilder(120); + accText(sb, md.dc_title()); + accText(sb, md.dc_creator()); + accText(sb, md.dc_publisher()); + accText(sb, md.snippet()); + accText(sb, digestURI.toTokens()); + accText(sb, keywords); + addSolr(solrdoc, YaCySchema.text_t, sb.toString()); + } + return solrdoc; } + + private static void accText(final StringBuilder sb, String text) { + if (text == null || text.length() == 0) return; + if (sb.length() != 0) sb.append(' '); + text = text.trim(); + if (text.charAt(text.length() - 1) == '.') sb.append(text); else sb.append(text).append('.'); + } public SolrDoc yacy2solr(final String id, final ResponseHeader header, final Document yacydoc, final URIMetadata metadata) { // we use the SolrCell design as index scheme