removed unused images from webgraph edge computation

pull/1/head
orbiter 11 years ago
parent 2371d6b8db
commit 3491ab4c38

@ -898,7 +898,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri
Boolean canonical_equal_sku = canonical == null ? null : canonical.toNormalform(true).equals(url);
if (webgraph != null && (!containsCanonical || (canonical_equal_sku != null && (canonical_equal_sku.booleanValue())))) {
// a document with canonical tag should not get a webgraph relation, because that belongs to the canonical document
List<SolrInputDocument> edges = webgraph.getEdges(subgraph, digestURL, responseHeader, collections, crawldepth, images, processTypes, document.getAnchors(), sourceName);
List<SolrInputDocument> edges = webgraph.getEdges(subgraph, digestURL, responseHeader, collections, crawldepth, processTypes, document.getAnchors(), sourceName);
// this also enriched the subgraph
doc.webgraphDocuments.addAll(edges);
} else {

@ -100,7 +100,7 @@ public class WebgraphConfiguration extends SchemaConfiguration implements Serial
public List<SolrInputDocument> getEdges(
final Subgraph subgraph,
final DigestURL source, final ResponseHeader responseHeader, Map<String, Pattern> collections, int crawldepth_source,
final List<ImageEntry> images, final Set<ProcessType> processTypes, final Collection<AnchorURL> links,
final Set<ProcessType> processTypes, final Collection<AnchorURL> links,
final String sourceName) {
boolean allAttr = this.isEmpty();
boolean generalNofollow = responseHeader == null ? false : responseHeader.get("X-Robots-Tag", "").indexOf("nofollow") >= 0;

Loading…
Cancel
Save