diff --git a/htroot/api/webstructure.java b/htroot/api/webstructure.java index 47287ef1e..7e820f495 100644 --- a/htroot/api/webstructure.java +++ b/htroot/api/webstructure.java @@ -66,7 +66,7 @@ public class webstructure { urlhash = ASCII.getBytes(about); hosthash = about.substring(6); url = authenticated ? sb.getURL(urlhash) : null; - } else if (authenticated && about.length() > 0) { + } else if (about.length() > 0) { // consider "about" as url or hostname try { url = new DigestURL(about.indexOf("://") >= 0 ? about : "http://" + about); // accept also domains diff --git a/source/net/yacy/search/schema/CollectionConfiguration.java b/source/net/yacy/search/schema/CollectionConfiguration.java index 4a13cd9c9..8950dcd9f 100644 --- a/source/net/yacy/search/schema/CollectionConfiguration.java +++ b/source/net/yacy/search/schema/CollectionConfiguration.java @@ -859,7 +859,7 @@ public class CollectionConfiguration extends SchemaConfiguration implements Seri // create a subgraph if (!containsCanonical) { // a document with canonical tag should not get a webgraph relation, because that belongs to the canonical document - webgraph.addEdges(subgraph, digestURL, responseHeader, collections, clickdepth, images, true, document.getAnchors(), sourceName); + webgraph.addEdges(subgraph, digestURL, responseHeader, collections, clickdepth, images, document.getAnchors(), sourceName); } // list all links diff --git a/source/net/yacy/search/schema/WebgraphConfiguration.java b/source/net/yacy/search/schema/WebgraphConfiguration.java index e5a4be458..ab8995c9b 100644 --- a/source/net/yacy/search/schema/WebgraphConfiguration.java +++ b/source/net/yacy/search/schema/WebgraphConfiguration.java @@ -111,14 +111,14 @@ public class WebgraphConfiguration extends SchemaConfiguration implements Serial public void addEdges( final Subgraph subgraph, final DigestURL source, final ResponseHeader responseHeader, Map collections, int clickdepth_source, - final List images, final boolean inbound, final Collection links, + final List images, final Collection links, final String sourceName) { boolean allAttr = this.isEmpty(); boolean generalNofollow = responseHeader == null ? false : responseHeader.get("X-Robots-Tag", "").indexOf("nofollow") >= 0; int target_order = 0; for (final AnchorURL target_url: links) { SolrInputDocument edge = getEdge( - subgraph, source, responseHeader, collections, clickdepth_source, images, inbound, + subgraph, source, responseHeader, collections, clickdepth_source, images, sourceName, allAttr, generalNofollow, target_order, target_url); target_order++; // add the edge to the subgraph @@ -129,13 +129,14 @@ public class WebgraphConfiguration extends SchemaConfiguration implements Serial public SolrInputDocument getEdge( final Subgraph subgraph, final DigestURL source, final ResponseHeader responseHeader, Map collections, int clickdepth_source, - final List images, final boolean inbound, + final List images, final String sourceName, boolean allAttr, boolean generalNofollow, int target_order, AnchorURL target_url) { Set processTypes = new LinkedHashSet(); final String name = target_url.getNameProperty(); // the name attribute final String text = target_url.getTextProperty(); // the text between the tag String rel = target_url.getRelProperty(); // the rel-attribute + boolean inbound = target_url.getHost().equals(source.getHost()); // well, not everybody defines 'outbound' that way but however, thats used here. int ioidx = inbound ? 0 : 1; if (generalNofollow) { // patch the rel attribute since the header makes nofollow valid for all links