|
|
@ -241,31 +241,7 @@ public class Segment {
|
|
|
|
this.externalHosts = new RowHandleSet(6, Base64Order.enhancedCoder, 0);
|
|
|
|
this.externalHosts = new RowHandleSet(6, Base64Order.enhancedCoder, 0);
|
|
|
|
this.internalIDs = new RowHandleSet(Word.commonHashLength, Base64Order.enhancedCoder, 0);
|
|
|
|
this.internalIDs = new RowHandleSet(Word.commonHashLength, Base64Order.enhancedCoder, 0);
|
|
|
|
this.externalIDs = new RowHandleSet(Word.commonHashLength, Base64Order.enhancedCoder, 0);
|
|
|
|
this.externalIDs = new RowHandleSet(Word.commonHashLength, Base64Order.enhancedCoder, 0);
|
|
|
|
try {
|
|
|
|
if (Segment.this.fulltext.useWebgraph()) {
|
|
|
|
if (connectedCitation()) {
|
|
|
|
|
|
|
|
// read the references from the citation index
|
|
|
|
|
|
|
|
ReferenceContainer<CitationReference> references;
|
|
|
|
|
|
|
|
references = urlCitation().get(id, null);
|
|
|
|
|
|
|
|
if (references == null) return; // no references at all
|
|
|
|
|
|
|
|
Iterator<CitationReference> ri = references.entries();
|
|
|
|
|
|
|
|
while (ri.hasNext()) {
|
|
|
|
|
|
|
|
CitationReference ref = ri.next();
|
|
|
|
|
|
|
|
byte[] hh = ref.hosthash(); // host hash
|
|
|
|
|
|
|
|
if (ByteBuffer.equals(hh, 0, id, 6, 6)) {
|
|
|
|
|
|
|
|
internalIDs.put(ref.urlhash());
|
|
|
|
|
|
|
|
internal++;
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
|
|
|
externalHosts.put(hh);
|
|
|
|
|
|
|
|
externalIDs.put(ref.urlhash());
|
|
|
|
|
|
|
|
external++;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
} catch (SpaceExceededException e) {
|
|
|
|
|
|
|
|
// the Citation Index got too large, we ignore the problem and hope that a second solr index is attached which will take over now
|
|
|
|
|
|
|
|
if (Segment.this.fulltext.useWebgraph()) internalIDs.clear();
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
if ((internalIDs.size() == 0 || !connectedCitation()) && Segment.this.fulltext.useWebgraph()) {
|
|
|
|
|
|
|
|
// reqd the references from the webgraph
|
|
|
|
// reqd the references from the webgraph
|
|
|
|
SolrConnector webgraph = Segment.this.fulltext.getWebgraphConnector();
|
|
|
|
SolrConnector webgraph = Segment.this.fulltext.getWebgraphConnector();
|
|
|
|
BlockingQueue<SolrDocument> docs = webgraph.concurrentDocumentsByQuery("{!raw f=" + WebgraphSchema.target_id_s.getSolrFieldName() + "}" + ASCII.String(id), WebgraphSchema.source_chars_i.getSolrFieldName() + " asc", 0, 10000000, Long.MAX_VALUE, 100, 1, WebgraphSchema.source_id_s.getSolrFieldName());
|
|
|
|
BlockingQueue<SolrDocument> docs = webgraph.concurrentDocumentsByQuery("{!raw f=" + WebgraphSchema.target_id_s.getSolrFieldName() + "}" + ASCII.String(id), WebgraphSchema.source_chars_i.getSolrFieldName() + " asc", 0, 10000000, Long.MAX_VALUE, 100, 1, WebgraphSchema.source_id_s.getSolrFieldName());
|
|
|
@ -292,6 +268,27 @@ public class Segment {
|
|
|
|
} catch (final InterruptedException e) {
|
|
|
|
} catch (final InterruptedException e) {
|
|
|
|
ConcurrentLog.logException(e);
|
|
|
|
ConcurrentLog.logException(e);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
} else if (connectedCitation()) try {
|
|
|
|
|
|
|
|
// read the references from the citation index
|
|
|
|
|
|
|
|
ReferenceContainer<CitationReference> references;
|
|
|
|
|
|
|
|
references = urlCitation().get(id, null);
|
|
|
|
|
|
|
|
if (references == null) return; // no references at all
|
|
|
|
|
|
|
|
Iterator<CitationReference> ri = references.entries();
|
|
|
|
|
|
|
|
while (ri.hasNext()) {
|
|
|
|
|
|
|
|
CitationReference ref = ri.next();
|
|
|
|
|
|
|
|
byte[] hh = ref.hosthash(); // host hash
|
|
|
|
|
|
|
|
if (ByteBuffer.equals(hh, 0, id, 6, 6)) {
|
|
|
|
|
|
|
|
internalIDs.put(ref.urlhash());
|
|
|
|
|
|
|
|
internal++;
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
|
|
|
externalHosts.put(hh);
|
|
|
|
|
|
|
|
externalIDs.put(ref.urlhash());
|
|
|
|
|
|
|
|
external++;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
} catch (SpaceExceededException e) {
|
|
|
|
|
|
|
|
// the Citation Index got too large, we ignore the problem and hope that a second solr index is attached which will take over now
|
|
|
|
|
|
|
|
if (Segment.this.fulltext.useWebgraph()) internalIDs.clear();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
this.externalHosts.optimize();
|
|
|
|
this.externalHosts.optimize();
|
|
|
|
this.internalIDs.optimize();
|
|
|
|
this.internalIDs.optimize();
|
|
|
|