From 5746aae3db80fd4b4629e5e4a71cd2a1989bbb9c Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Fri, 18 Apr 2014 06:51:46 +0200 Subject: [PATCH] add canonical links to the same crawldepth, not the next crawldepth --- source/net/yacy/document/Document.java | 6 +++++- source/net/yacy/search/Switchboard.java | 3 ++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/source/net/yacy/document/Document.java b/source/net/yacy/document/Document.java index 71b8b79e2..1d901f99c 100644 --- a/source/net/yacy/document/Document.java +++ b/source/net/yacy/document/Document.java @@ -907,6 +907,8 @@ dc_rights return newDoc; } + public final static String CANONICAL_MARKER = "canonical"; + public static Map getHyperlinks(final Document[] documents) { final Map result = new HashMap(); for (final Document d: documents) { @@ -917,7 +919,9 @@ dc_rights String refresh = html.getRefreshPath(); if (refresh != null && refresh.length() > 0) try {result.put(new DigestURL(refresh), "refresh");} catch (final MalformedURLException e) {} DigestURL canonical = html.getCanonical(); - if (canonical != null) result.put(canonical, "canonical"); + if (canonical != null) { + result.put(canonical, CANONICAL_MARKER); + } } } return result; diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java index a7705abdd..111c716d0 100644 --- a/source/net/yacy/search/Switchboard.java +++ b/source/net/yacy/search/Switchboard.java @@ -2634,6 +2634,7 @@ public final class Switchboard extends serverSwitch { //if (m.matches()) u = m.replaceAll(""); // enqueue the hyperlink into the pre-notice-url db + int nextdepth = nextEntry.getValue() != null && nextEntry.getValue().equals(Document.CANONICAL_MARKER) ? response.depth() : response.depth() + 1; // canonical documents are on the same depth try { this.crawlStacker.enqueueEntry(new Request( response.initiator(), @@ -2642,7 +2643,7 @@ public final class Switchboard extends serverSwitch { nextEntry.getValue(), new Date(), response.profile().handle(), - response.depth() + 1, + nextdepth, 0, 0)); } catch (final MalformedURLException e ) {