fixed a problem with re-feeding of already indexed documents whith

coordinates attached.
12 years ago · 7806680ab8
parent cb38e860cf
commit 7806680ab8
4 changed files with 27 additions and 5 deletions
--- a/source/net/yacy/cora/federate/solr/YaCySchema.java
+++ b/source/net/yacy/cora/federate/solr/YaCySchema.java
@ -21,9 +21,11 @@
 package net.yacy.cora.federate.solr;

 import java.util.Date;
+import java.util.HashSet;
 import java.util.List;
+import java.util.Set;

-
+import org.apache.solr.common.SolrDocument;
 import org.apache.solr.common.SolrInputDocument;

 public enum YaCySchema implements Schema {
@ -350,5 +352,24 @@ public enum YaCySchema implements Schema {
        doc.setField(this.getSolrFieldName(), value);
    }

+    /**
+     * Convert a SolrDocument to a SolrInputDocument.
+     * This is useful if a document from the search index shall be modified and indexed again.
+     * This shall be used as replacement of ClientUtils.toSolrInputDocument because we remove some fields
+     * which are created automatically during the indexing process.
+     * @param doc the solr document
+     * @return a solr input document
+     */
+    public static SolrInputDocument toSolrInputDocument(SolrDocument doc) {
+        SolrInputDocument sid = new SolrInputDocument();
+        Set<String> omitFields = new HashSet<String>();
+        omitFields.add(YaCySchema.coordinate_p.getSolrFieldName() + "_0_coordinate");
+        omitFields.add(YaCySchema.coordinate_p.getSolrFieldName() + "_1_coordinate");
+        omitFields.add(YaCySchema.author_sxt.getSolrFieldName());
+        for (String name: doc.getFieldNames()) {
+            if (!omitFields.contains(name)) sid.addField(name, doc.getFieldValue(name), 1.0f);
+        }
+        return sid;
+    }
 }

--- a/source/net/yacy/peers/Protocol.java
+++ b/source/net/yacy/peers/Protocol.java
@ -1161,7 +1161,7 @@ public final class Protocol
            // passed all checks, store url
            if (!localsearch) {
                try {
-                    event.query.getSegment().fulltext().putDocument(ClientUtils.toSolrInputDocument(doc));
+                    event.query.getSegment().fulltext().putDocument(YaCySchema.toSolrInputDocument(doc));
                    ResultURLs.stack(
                        ASCII.String(urlEntry.url().hash()),
                        urlEntry.url().getHost(),
--- a/source/net/yacy/search/Switchboard.java
+++ b/source/net/yacy/search/Switchboard.java
@ -82,7 +82,6 @@ import java.util.zip.GZIPOutputStream;
 import java.util.zip.ZipEntry;
 import java.util.zip.ZipInputStream;

-import org.apache.solr.client.solrj.util.ClientUtils;
 import org.apache.solr.common.SolrDocument;
 import org.apache.solr.common.SolrInputDocument;

@ -2233,7 +2232,9 @@ public final class Switchboard extends serverSwitch {
            if (this.crawlQueues.coreCrawlJobSize() == 0 && index.connectedCitation() && index.fulltext().getSolrScheme().contains(YaCySchema.process_sxt)) {
                // that means we must search for those entries.
                index.fulltext().getSolr().commit(true); // make sure that we have latest information that can be found
+                //BlockingQueue<SolrDocument> docs = index.fulltext().getSolr().concurrentQuery("*:*", 0, 1000, 60000, 10);
                BlockingQueue<SolrDocument> docs = index.fulltext().getSolr().concurrentQuery(YaCySchema.process_sxt.getSolrFieldName() + ":[* TO *]", 0, 1000, 60000, 10);
+                
                SolrDocument doc;
                int proccount_clickdepth = 0;
                int proccount_clickdepthchange = 0;
@ -2256,7 +2257,7 @@ public final class Switchboard extends serverSwitch {
                                    url = new DigestURI((String) doc.getFieldValue(YaCySchema.sku.getSolrFieldName()), ASCII.getBytes((String) doc.getFieldValue(YaCySchema.id.getSolrFieldName())));
                                    int clickdepth = SolrConfiguration.getClickDepth(index.urlCitation(), url);
                                    if (oldclickdepth == null || oldclickdepth.intValue() != clickdepth) proccount_clickdepthchange++;
-                                    SolrInputDocument sid = ClientUtils.toSolrInputDocument(doc);
+                                    SolrInputDocument sid = YaCySchema.toSolrInputDocument(doc);
                                    sid.setField(YaCySchema.clickdepth_i.getSolrFieldName(), clickdepth);
                                    
                                    // refresh the link count; it's 'cheap' to do this here
--- a/source/net/yacy/search/index/Segment.java
+++ b/source/net/yacy/search/index/Segment.java
@ -436,7 +436,7 @@ public class Segment {
                        // switch attribute also in all existing documents (which should be exactly only one!)
                        SolrDocumentList docs = this.fulltext.getSolr().query(checkfield.getSolrFieldName() + ":" + checkstring + " AND " + uniquefield.getSolrFieldName() + ":true", 0, 1000);
                        for (SolrDocument doc: docs) {
-                            SolrInputDocument sid = ClientUtils.toSolrInputDocument(doc);
+                            SolrInputDocument sid = YaCySchema.toSolrInputDocument(doc);
                            sid.setField(uniquefield.getSolrFieldName(), false);
                            this.fulltext.getSolr().add(sid);
                        }