if the webgraph is used, then use it also for reference computation to

avoid contradictions with references_i in the collection index.
10 years ago · f94c91315b
parent 6e1dc444c3
commit f94c91315b
1 changed files with 22 additions and 25 deletions
--- a/source/net/yacy/search/index/Segment.java
+++ b/source/net/yacy/search/index/Segment.java
@ -241,31 +241,7 @@ public class Segment {
            this.externalHosts = new RowHandleSet(6, Base64Order.enhancedCoder, 0);
            this.internalIDs = new RowHandleSet(Word.commonHashLength, Base64Order.enhancedCoder, 0);
            this.externalIDs = new RowHandleSet(Word.commonHashLength, Base64Order.enhancedCoder, 0);
-            try {
-                if (connectedCitation()) {
-                    // read the references from the citation index
-                    ReferenceContainer<CitationReference> references;
-                    references = urlCitation().get(id, null);
-                    if (references == null) return; // no references at all
-                    Iterator<CitationReference> ri = references.entries();
-                    while (ri.hasNext()) {
-                        CitationReference ref = ri.next();
-                        byte[] hh = ref.hosthash(); // host hash
-                        if (ByteBuffer.equals(hh, 0, id, 6, 6)) {
-                            internalIDs.put(ref.urlhash());
-                            internal++;
-                        } else {
-                            externalHosts.put(hh);
-                            externalIDs.put(ref.urlhash());
-                            external++;
-                        }
-                    }
-                }
-            } catch (SpaceExceededException e) {
-                // the Citation Index got too large, we ignore the problem and hope that a second solr index is attached which will take over now
-                if (Segment.this.fulltext.useWebgraph()) internalIDs.clear();
-            }
-            if ((internalIDs.size() == 0 || !connectedCitation()) && Segment.this.fulltext.useWebgraph()) {
+            if (Segment.this.fulltext.useWebgraph()) {
                // reqd the references from the webgraph
                SolrConnector webgraph = Segment.this.fulltext.getWebgraphConnector();
                BlockingQueue<SolrDocument> docs = webgraph.concurrentDocumentsByQuery("{!raw f=" + WebgraphSchema.target_id_s.getSolrFieldName() + "}" + ASCII.String(id), WebgraphSchema.source_chars_i.getSolrFieldName() + " asc", 0, 10000000, Long.MAX_VALUE, 100, 1, WebgraphSchema.source_id_s.getSolrFieldName());
@ -292,6 +268,27 @@ public class Segment {
                } catch (final InterruptedException e) {
                    ConcurrentLog.logException(e);
                }
+            } else if (connectedCitation()) try {
+                // read the references from the citation index
+                ReferenceContainer<CitationReference> references;
+                references = urlCitation().get(id, null);
+                if (references == null) return; // no references at all
+                Iterator<CitationReference> ri = references.entries();
+                while (ri.hasNext()) {
+                    CitationReference ref = ri.next();
+                    byte[] hh = ref.hosthash(); // host hash
+                    if (ByteBuffer.equals(hh, 0, id, 6, 6)) {
+                        internalIDs.put(ref.urlhash());
+                        internal++;
+                    } else {
+                        externalHosts.put(hh);
+                        externalIDs.put(ref.urlhash());
+                        external++;
+                    }
+                }
+            } catch (SpaceExceededException e) {
+                // the Citation Index got too large, we ignore the problem and hope that a second solr index is attached which will take over now
+                if (Segment.this.fulltext.useWebgraph()) internalIDs.clear();
            }
            this.externalHosts.optimize();
            this.internalIDs.optimize();