This attribute can be used for ranking and for other purpose (demand by
customer)
The click depth is computed in two steps:
- during indexing the current fill-state of the reverse link index is
used to backtrack the current page to the root page. The length of that
backtrack is the clickdepth. But this does not discover the shortest
click depth. To get this, a second process to check again is needed
- added a process tag that can be used to do operations on the existing
index after a crawl; i.e. calculation the shortest clickpath. Added a
field to control this operation but not a method to operate on this.
- added a visualization of the clickpath length in the host browser
#(load)#<tdalign="left"colspan="5"nowrapclass="listingem">link, detected from context</td>::<tdalign="left"colspan="5"nowrapclass="listingnok"><ahref="/HostBrowser.html?load=#[url]#&path=#[path]#">load & index</a>#(/load)#</td>::
@ -42,14 +42,14 @@ public enum YaCySchema implements Schema {
fuzzy_signature_text_t(SolrType.text_general,true,true,false,"intermediate data produced in EnhancedTextProfileSignature: a list of word frequencies"),
fuzzy_signature_unique_b(SolrType.bool,true,true,false,"flag shows if fuzzy_signature_l is unique at the time of document creation, used for double-check during search"),
size_i(SolrType.num_integer,true,true,false,"the size of the raw source"),// int size();
failreason_t(SolrType.text_general,true,true,false,"fail reason if a page was not loaded. if the page was loaded then this field is empty"),
failtype_s(SolrType.string,true,true,false,"fail type if a page was not loaded. This field is either empty, 'excl' or 'fail'"),
httpstatus_i(SolrType.num_integer,true,true,false,"html status return code (i.e. \"200\" for ok), -1 if not loaded"),
httpstatus_redirect_s(SolrType.num_integer,true,true,false,"html status return code (i.e. \"200\" for ok), -1 if not loaded"),
references_i(SolrType.num_integer,true,true,false,"number of unique http references; used for ranking"),
clickdepth_i(SolrType.num_integer,true,true,false,"depth of web page according to number of clicks from the 'main' page, which is the page that appears if only the host is entered as url"),
process_sxt(SolrType.string,true,true,true,"needed (post-)processing steps on this metadata set"),
// optional but recommended, part of index distribution
load_date_dt(SolrType.date,true,true,false,"time when resource was loaded"),
fresh_date_dt(SolrType.date,true,true,false,"date until resource shall be considered as fresh"),
if(allAttr||contains(YaCySchema.failreason_t))add(doc,YaCySchema.failreason_t,"");// overwrite a possible fail reason (in case that there was a fail reason before)
RowHandleSetignore=newRowHandleSet(URIMetadataRow.rowdef.primaryKeyLength,URIMetadataRow.rowdef.objectOrder,100);// a set of urlhashes to be ignored. This is generated from all hashes that are seen during recursion to prevent enless loops
RowHandleSetlevelhashes=newRowHandleSet(URIMetadataRow.rowdef.primaryKeyLength,URIMetadataRow.rowdef.objectOrder,1);// all hashes of a clickdepth. The first call contains the target hash only and therefore just one entry