From 3491ab4c38dab7ccf24e2c726503e1709eb82b1f Mon Sep 17 00:00:00 2001 From: orbiter <mc@yacy.net> Date: Fri, 1 Aug 2014 13:21:16 +0200 Subject: [PATCH] removed unused images from webgraph edge computation --- source/net/yacy/search/schema/CollectionConfiguration.java | 2 +- source/net/yacy/search/schema/WebgraphConfiguration.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/source/net/yacy/search/schema/CollectionConfiguration.java b/source/net/yacy/search/schema/CollectionConfiguration.java index 519bab1b9..36c7aa77e 100644 --- a/source/net/yacy/search/schema/CollectionConfiguration.java +++ b/source/net/yacy/search/schema/CollectionConfiguration.java @@ -898,7 +898,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri Boolean canonical_equal_sku = canonical == null ? null : canonical.toNormalform(true).equals(url); if (webgraph != null && (!containsCanonical || (canonical_equal_sku != null && (canonical_equal_sku.booleanValue())))) { // a document with canonical tag should not get a webgraph relation, because that belongs to the canonical document - List<SolrInputDocument> edges = webgraph.getEdges(subgraph, digestURL, responseHeader, collections, crawldepth, images, processTypes, document.getAnchors(), sourceName); + List<SolrInputDocument> edges = webgraph.getEdges(subgraph, digestURL, responseHeader, collections, crawldepth, processTypes, document.getAnchors(), sourceName); // this also enriched the subgraph doc.webgraphDocuments.addAll(edges); } else { diff --git a/source/net/yacy/search/schema/WebgraphConfiguration.java b/source/net/yacy/search/schema/WebgraphConfiguration.java index 9204b95b5..dc01e6093 100644 --- a/source/net/yacy/search/schema/WebgraphConfiguration.java +++ b/source/net/yacy/search/schema/WebgraphConfiguration.java @@ -100,7 +100,7 @@ public class WebgraphConfiguration extends SchemaConfiguration implements Serial public List<SolrInputDocument> getEdges( final Subgraph subgraph, final DigestURL source, final ResponseHeader responseHeader, Map<String, Pattern> collections, int crawldepth_source, - final List<ImageEntry> images, final Set<ProcessType> processTypes, final Collection<AnchorURL> links, + final Set<ProcessType> processTypes, final Collection<AnchorURL> links, final String sourceName) { boolean allAttr = this.isEmpty(); boolean generalNofollow = responseHeader == null ? false : responseHeader.get("X-Robots-Tag", "").indexOf("nofollow") >= 0;