diff --git a/source/net/yacy/cora/federate/solr/YaCySchema.java b/source/net/yacy/cora/federate/solr/YaCySchema.java index c4c4ec737..6e140ec86 100644 --- a/source/net/yacy/cora/federate/solr/YaCySchema.java +++ b/source/net/yacy/cora/federate/solr/YaCySchema.java @@ -21,9 +21,11 @@ package net.yacy.cora.federate.solr; import java.util.Date; +import java.util.HashSet; import java.util.List; +import java.util.Set; - +import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrInputDocument; public enum YaCySchema implements Schema { @@ -350,5 +352,24 @@ public enum YaCySchema implements Schema { doc.setField(this.getSolrFieldName(), value); } + /** + * Convert a SolrDocument to a SolrInputDocument. + * This is useful if a document from the search index shall be modified and indexed again. + * This shall be used as replacement of ClientUtils.toSolrInputDocument because we remove some fields + * which are created automatically during the indexing process. + * @param doc the solr document + * @return a solr input document + */ + public static SolrInputDocument toSolrInputDocument(SolrDocument doc) { + SolrInputDocument sid = new SolrInputDocument(); + Set omitFields = new HashSet(); + omitFields.add(YaCySchema.coordinate_p.getSolrFieldName() + "_0_coordinate"); + omitFields.add(YaCySchema.coordinate_p.getSolrFieldName() + "_1_coordinate"); + omitFields.add(YaCySchema.author_sxt.getSolrFieldName()); + for (String name: doc.getFieldNames()) { + if (!omitFields.contains(name)) sid.addField(name, doc.getFieldValue(name), 1.0f); + } + return sid; + } } diff --git a/source/net/yacy/peers/Protocol.java b/source/net/yacy/peers/Protocol.java index d21d86d10..bb3d9a8ef 100644 --- a/source/net/yacy/peers/Protocol.java +++ b/source/net/yacy/peers/Protocol.java @@ -1161,7 +1161,7 @@ public final class Protocol // passed all checks, store url if (!localsearch) { try { - event.query.getSegment().fulltext().putDocument(ClientUtils.toSolrInputDocument(doc)); + event.query.getSegment().fulltext().putDocument(YaCySchema.toSolrInputDocument(doc)); ResultURLs.stack( ASCII.String(urlEntry.url().hash()), urlEntry.url().getHost(), diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java index d6e8db437..6030cdbd4 100644 --- a/source/net/yacy/search/Switchboard.java +++ b/source/net/yacy/search/Switchboard.java @@ -82,7 +82,6 @@ import java.util.zip.GZIPOutputStream; import java.util.zip.ZipEntry; import java.util.zip.ZipInputStream; -import org.apache.solr.client.solrj.util.ClientUtils; import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrInputDocument; @@ -2233,7 +2232,9 @@ public final class Switchboard extends serverSwitch { if (this.crawlQueues.coreCrawlJobSize() == 0 && index.connectedCitation() && index.fulltext().getSolrScheme().contains(YaCySchema.process_sxt)) { // that means we must search for those entries. index.fulltext().getSolr().commit(true); // make sure that we have latest information that can be found + //BlockingQueue docs = index.fulltext().getSolr().concurrentQuery("*:*", 0, 1000, 60000, 10); BlockingQueue docs = index.fulltext().getSolr().concurrentQuery(YaCySchema.process_sxt.getSolrFieldName() + ":[* TO *]", 0, 1000, 60000, 10); + SolrDocument doc; int proccount_clickdepth = 0; int proccount_clickdepthchange = 0; @@ -2256,7 +2257,7 @@ public final class Switchboard extends serverSwitch { url = new DigestURI((String) doc.getFieldValue(YaCySchema.sku.getSolrFieldName()), ASCII.getBytes((String) doc.getFieldValue(YaCySchema.id.getSolrFieldName()))); int clickdepth = SolrConfiguration.getClickDepth(index.urlCitation(), url); if (oldclickdepth == null || oldclickdepth.intValue() != clickdepth) proccount_clickdepthchange++; - SolrInputDocument sid = ClientUtils.toSolrInputDocument(doc); + SolrInputDocument sid = YaCySchema.toSolrInputDocument(doc); sid.setField(YaCySchema.clickdepth_i.getSolrFieldName(), clickdepth); // refresh the link count; it's 'cheap' to do this here diff --git a/source/net/yacy/search/index/Segment.java b/source/net/yacy/search/index/Segment.java index e3bfc69c7..853df90b7 100644 --- a/source/net/yacy/search/index/Segment.java +++ b/source/net/yacy/search/index/Segment.java @@ -436,7 +436,7 @@ public class Segment { // switch attribute also in all existing documents (which should be exactly only one!) SolrDocumentList docs = this.fulltext.getSolr().query(checkfield.getSolrFieldName() + ":" + checkstring + " AND " + uniquefield.getSolrFieldName() + ":true", 0, 1000); for (SolrDocument doc: docs) { - SolrInputDocument sid = ClientUtils.toSolrInputDocument(doc); + SolrInputDocument sid = YaCySchema.toSolrInputDocument(doc); sid.setField(uniquefield.getSolrFieldName(), false); this.fulltext.getSolr().add(sid); }