in host browser also show the properties of failed documents including

referrer urls (this is a VERY USEFUL SEO and Web Admin feature!!)
pull/1/head
Michael Peter Christen 11 years ago
parent 176acce5cb
commit 434e13b46d

@ -441,7 +441,9 @@ public class HostBrowser {
// maybe this is only in the errorURL // maybe this is only in the errorURL
prop.put("files_list_" + c + "_type_stored_error", process == HarvestProcess.ERRORS ? sb.crawlQueues.errorURL.get(ASCII.String(uri.hash())).getFailReason() : "unknown error"); prop.put("files_list_" + c + "_type_stored_error", process == HarvestProcess.ERRORS ? sb.crawlQueues.errorURL.get(ASCII.String(uri.hash())).getFailReason() : "unknown error");
} else { } else {
prop.put("files_list_" + c + "_type_stored_error", failType == FailType.excl ? "excluded from indexing" : "load fail"); String ids = ASCII.String(uri.hash());
InfoCacheEntry ice = infoCache.get(ids);
prop.put("files_list_" + c + "_type_stored_error", failType == FailType.excl ? "excluded from indexing" : "load fail; " + ice.toString());
} }
} }
if (loadRight) { if (loadRight) {

@ -327,7 +327,8 @@ public class Segment {
this.externalHosts = new RowHandleSet(6, Base64Order.enhancedCoder, 0); this.externalHosts = new RowHandleSet(6, Base64Order.enhancedCoder, 0);
this.internalIDs = new RowHandleSet(12, Base64Order.enhancedCoder, 0); this.internalIDs = new RowHandleSet(12, Base64Order.enhancedCoder, 0);
this.externalIDs = new RowHandleSet(12, Base64Order.enhancedCoder, 0); this.externalIDs = new RowHandleSet(12, Base64Order.enhancedCoder, 0);
if (Segment.this.fulltext.writeToWebgraph()) { boolean useWebgraph = Segment.this.fulltext.writeToWebgraph();
if (useWebgraph) {
// reqd the references from the webgraph // reqd the references from the webgraph
SolrConnector webgraph = Segment.this.fulltext.getWebgraphConnector(); SolrConnector webgraph = Segment.this.fulltext.getWebgraphConnector();
webgraph.commit(true); webgraph.commit(true);
@ -354,7 +355,8 @@ public class Segment {
} catch (final InterruptedException e) { } catch (final InterruptedException e) {
ConcurrentLog.logException(e); ConcurrentLog.logException(e);
} }
} else if (connectedCitation()) { }
if ((!useWebgraph || (internalIDs.size() == 0 && externalIDs.size() == 0)) && connectedCitation()) {
// read the references from the citation index // read the references from the citation index
ReferenceContainer<CitationReference> references; ReferenceContainer<CitationReference> references;
references = urlCitation().get(id, null); references = urlCitation().get(id, null);

Loading…
Cancel
Save