- showing references count and clickdepth in host browser

- fixed generation and presentation of both values
pull/1/head
Michael Peter Christen 12 years ago
parent 2c3b024196
commit edc0b33f6d

@ -128,7 +128,7 @@ function updatepage(str) {
<td align="left" nowrap class=#(stored)#"listingem"::"listing"#(/stored)#>#[url]#&nbsp;<a href="#[url]#" target="_blank"><img src="/env/grafics/link.gif"/></a></td>
#(stored)#
#(load)#<td align="left" colspan="5" nowrap class="listingem">link, detected from context</td>::<td align="left" colspan="5" nowrap class="listingnok"><a href="/HostBrowser.html?load=#[url]#&path=#[path]#">load &amp; index</a>#(/load)#</td>::
<td align="left" colspan="3" nowrap class="listingok">indexed</td><td align="left" colspan="2" nowrap class="listingok">#[comment]#</td>::
<td align="left" colspan="1" nowrap class="listingok">indexed</td><td align="left" colspan="4" nowrap class="listingem">#[comment]#</td>::
<td align="left" colspan="5" nowrap class="pending">loading</td>::
<td align="left" colspan="5" nowrap class="listingnok">#[error]#</td>
#(/stored)#

@ -254,14 +254,16 @@ public class HostBrowser {
CollectionSchema.inboundlinks_urlstub_txt.getSolrFieldName(),
CollectionSchema.outboundlinks_protocol_sxt.getSolrFieldName(),
CollectionSchema.outboundlinks_urlstub_txt.getSolrFieldName(),
CollectionSchema.clickdepth_i.getSolrFieldName()
CollectionSchema.clickdepth_i.getSolrFieldName(),
CollectionSchema.references_i.getSolrFieldName()
);
SolrDocument doc;
Set<String> storedDocs = new HashSet<String>();
Map<String, FailType> errorDocs = new HashMap<String, FailType>();
Set<String> inboundLinks = new HashSet<String>();
Map<String, ReversibleScoreMap<String>> outboundHosts = new HashMap<String, ReversibleScoreMap<String>>();
RowHandleMap clickdepth = new RowHandleMap(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 1, 100, "clickdepth");
RowHandleMap clickdepth = new RowHandleMap(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 2, 100, "clickdepth");
RowHandleMap references = new RowHandleMap(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 2, 100, "references");
int hostsize = 0;
final List<byte[]> deleteIDs = new ArrayList<byte[]>();
long timeout = System.currentTimeMillis() + TIMEOUT;
@ -271,6 +273,8 @@ public class HostBrowser {
FailType error = errortype == null ? null : FailType.valueOf(errortype);
Integer cd = (Integer) doc.getFieldValue(CollectionSchema.clickdepth_i.getSolrFieldName());
if (cd != null && cd.intValue() >= 0) clickdepth.add(ASCII.getBytes((String) doc.getFieldValue(CollectionSchema.id.getSolrFieldName())), cd.intValue());
Integer rc = (Integer) doc.getFieldValue(CollectionSchema.references_i.getSolrFieldName());
if (rc != null && rc.intValue() >= 0) references.add(ASCII.getBytes((String) doc.getFieldValue(CollectionSchema.id.getSolrFieldName())), rc.intValue());
if (u.startsWith(path)) {
if (delete) {
deleteIDs.add(ASCII.getBytes((String) doc.getFieldValue(CollectionSchema.id.getSolrFieldName())));
@ -407,7 +411,8 @@ public class HostBrowser {
prop.put("files_list_" + c + "_type_stored", type == StoreType.INDEX ? 1 : error ? 3 : loading ? 2 : 0 /*linked*/);
if (type == StoreType.INDEX) {
long cd = clickdepth.get(uri.hash());
prop.put("files_list_" + c + "_type_stored_comment", cd >= 0 ? "clickdepth = " + cd : "");
long rc = references.get(uri.hash());
prop.put("files_list_" + c + "_type_stored_comment", (rc >= 0 ? rc + " references" : "") + (rc >= 0 && cd >= 0 ? ", " : "") + (cd >= 0 ? "clickdepth " + cd : ""));
}
prop.put("files_list_" + c + "_type_stored_load", loadRight ? 1 : 0);
if (error) {

@ -545,7 +545,7 @@ public class Segment {
// ENRICH DOCUMENT WITH RANKING INFORMATION
if (this.urlCitationIndex != null && this.fulltext.getDefaultConfiguration().contains(CollectionSchema.references_i)) {
int references = this.urlCitationIndex.count(url.hash());
if (references > 0) vector.setField(CollectionSchema.references_i.getSolrFieldName(), references);
vector.setField(CollectionSchema.references_i.getSolrFieldName(), references);
}
// STORE TO SOLR

@ -37,7 +37,6 @@ import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrQuery.ORDER;
import org.apache.solr.client.solrj.SolrQuery.SortClause;
import net.yacy.cora.document.ASCII;

@ -354,7 +354,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
clickdepth = 999;
processTypes.add(ProcessType.CLICKDEPTH); // postprocessing needed; this is also needed if the depth is positive; there could be a shortcut
}
add(doc, CollectionSchema.clickdepth_i, clickdepth);
CollectionSchema.clickdepth_i.add(doc, clickdepth); // no lazy value checking to get a '0' into the index
}
if (allAttr || contains(CollectionSchema.ip_s)) {

Loading…
Cancel
Save