add canonical links to the same crawldepth, not the next crawldepth

pull/1/head
Michael Peter Christen 11 years ago
parent 74ab5ef9fa
commit 5746aae3db

@ -907,6 +907,8 @@ dc_rights
return newDoc;
}
public final static String CANONICAL_MARKER = "canonical";
public static Map<DigestURL, String> getHyperlinks(final Document[] documents) {
final Map<DigestURL, String> result = new HashMap<DigestURL, String>();
for (final Document d: documents) {
@ -917,7 +919,9 @@ dc_rights
String refresh = html.getRefreshPath();
if (refresh != null && refresh.length() > 0) try {result.put(new DigestURL(refresh), "refresh");} catch (final MalformedURLException e) {}
DigestURL canonical = html.getCanonical();
if (canonical != null) result.put(canonical, "canonical");
if (canonical != null) {
result.put(canonical, CANONICAL_MARKER);
}
}
}
return result;

@ -2634,6 +2634,7 @@ public final class Switchboard extends serverSwitch {
//if (m.matches()) u = m.replaceAll("");
// enqueue the hyperlink into the pre-notice-url db
int nextdepth = nextEntry.getValue() != null && nextEntry.getValue().equals(Document.CANONICAL_MARKER) ? response.depth() : response.depth() + 1; // canonical documents are on the same depth
try {
this.crawlStacker.enqueueEntry(new Request(
response.initiator(),
@ -2642,7 +2643,7 @@ public final class Switchboard extends serverSwitch {
nextEntry.getValue(),
new Date(),
response.profile().handle(),
response.depth() + 1,
nextdepth,
0,
0));
} catch (final MalformedURLException e ) {

Loading…
Cancel
Save