RowHandleSet ignore=newRowHandleSet(URIMetadataRow.rowdef.primaryKeyLength,URIMetadataRow.rowdef.objectOrder,100);// a set of urlhashes to be ignored. This is generated from all hashes that are seen during recursion to prevent enless loops
RowHandleSet levelhashes=newRowHandleSet(URIMetadataRow.rowdef.primaryKeyLength,URIMetadataRow.rowdef.objectOrder,1);// all hashes of a clickdepth. The first call contains the target hash only and therefore just one entry
Set<byte[]>ignore=newTreeSet<byte[]>(NaturalOrder.naturalOrder);// a set of urlhashes to be ignored. This is generated from all hashes that are seen during recursion to prevent enless loops
Set<byte[]>levelhashes=newTreeSet<byte[]>(NaturalOrder.naturalOrder);// all hashes of a clickdepth. The first call contains the target hash only and therefore just one entry
levelhashes.add(searchhash);
intleveldepth=0;// the recursion depth and therefore the result depth-1. Shall be 0 for the first call
finalbyte[]hosthash=newbyte[6];// the host of the url to be checked
@ -907,7 +908,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
// To do so, we first must collect all canonical links, find all references to them, get the anchor list of the documents and patch the citation reference of these links
Stringpatchquery=CollectionSchema.host_s.getSolrFieldName()+":"+host+" AND "+CollectionSchema.canonical_s.getSolrFieldName()+":[* TO *]";
@ -1062,10 +1071,11 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
proccount++;
}catch(finalThrowablee1){
ConcurrentLog.logException(e1);
}
countcheck++;
}
if(count!=countcheck)ConcurrentLog.warn("CollectionConfiguration","ambiguous collection document count for harvestkey "+harvestkey+": expected="+count+", counted="+countcheck);
if(count!=countcheck)ConcurrentLog.warn("CollectionConfiguration","ambiguous collection document count for harvestkey "+harvestkey+": expected="+count+", counted="+countcheck);// big gap for harvestkey = null
ConcurrentLog.info("CollectionConfiguration","cleanup_processing: re-calculated "+proccount+" new documents, "+
Stringquery=(harvestkey==null||!this.contains(WebgraphSchema.harvestkey_s)?"":WebgraphSchema.harvestkey_s.getSolrFieldName()+":\""+harvestkey+"\" AND ")+
WebgraphSchema.process_sxt.getSolrFieldName()+":[* TO *]";
Stringquery=(harvestkey==null||!this.contains(WebgraphSchema.harvestkey_s)?"":WebgraphSchema.harvestkey_s.getSolrFieldName()+":\""+harvestkey+"\" AND ")+WebgraphSchema.process_sxt.getSolrFieldName()+":[* TO *]";