diff --git a/htroot/yacy/crawlOrder.java b/htroot/yacy/crawlOrder.java index 791d1828b..6719ea853 100644 --- a/htroot/yacy/crawlOrder.java +++ b/htroot/yacy/crawlOrder.java @@ -185,7 +185,7 @@ public final class crawlOrder { if (!newURL.equals(urlv.get(0))) { env.getLog().logWarning("crawlOrder: Received not normalized URL " + urlv.get(0)); } - String refURL = (refv.get(0) == null) ? null : new yacyURL((String) refv.get(0), null).toNormalform(true, true); + yacyURL refURL = (refv.get(0) == null) ? null : new yacyURL((String) refv.get(0), null); if ((refURL != null) && (!refURL.equals(refv.get(0)))) { env.getLog().logWarning("crawlOrder: Received not normalized Referer URL " + refv.get(0) + " of URL " + urlv.get(0)); } @@ -211,7 +211,7 @@ public final class crawlOrder { for (int i = 0; i < count; i++) { env.getLog().logFinest("crawlOrder: b: url='" + (String) urlv.get(i) + "'"); try { - stackresult = stack(switchboard, new yacyURL((String) urlv.get(i), null), (String) refv.get(i), iam, youare); + stackresult = stack(switchboard, new yacyURL((String) urlv.get(i), null), ((refv.get(i) == null) || (((String) refv.get(i)).length() == 0)) ? null : new yacyURL((String) refv.get(i), null), iam, youare); response = (String) stackresult[0]; prop.put("list_" + i + "_job", (String) stackresult[0] + "," + (String) stackresult[1]); prop.put("list_" + i + "_lurl", (String) stackresult[2]); @@ -244,7 +244,7 @@ public final class crawlOrder { return prop; } - private static Object[] stack(plasmaSwitchboard switchboard, yacyURL url, String referrer, String iam, String youare) { + private static Object[] stack(plasmaSwitchboard switchboard, yacyURL url, yacyURL referrer, String iam, String youare) { String response, reason, lurl; // stack url switchboard.getLog().logFinest("crawlOrder: stack: url='" + url + "'"); diff --git a/source/de/anomic/plasma/plasmaCrawlEntry.java b/source/de/anomic/plasma/plasmaCrawlEntry.java index b86101862..712281286 100644 --- a/source/de/anomic/plasma/plasmaCrawlEntry.java +++ b/source/de/anomic/plasma/plasmaCrawlEntry.java @@ -63,7 +63,7 @@ public class plasmaCrawlEntry { private String initiator; // the initiator hash, is NULL or "" if it is the own proxy; // if this is generated by a crawl, the own peer hash in entered - private String referrer; // the url's referrer hash + private String refhash; // the url's referrer hash private yacyURL url; // the url as string private String name; // the name of the url, from anchor tag name private long appdate; // the time when the url was first time appeared @@ -97,7 +97,7 @@ public class plasmaCrawlEntry { public plasmaCrawlEntry( String initiator, yacyURL url, - String referrer, + String referrerhash, String name, Date appdate, String profileHandle, @@ -111,7 +111,7 @@ public class plasmaCrawlEntry { if ((initiator == null) || (initiator.length() == 0)) initiator = yacyURL.dummyHash; this.initiator = initiator; this.url = url; - this.referrer = (referrer == null) ? yacyURL.dummyHash : referrer; + this.refhash = (referrerhash == null) ? yacyURL.dummyHash : referrerhash; this.name = (name == null) ? "" : name; this.appdate = (appdate == null) ? 0 : appdate.getTime(); this.profileHandle = profileHandle; // must not be null @@ -137,7 +137,7 @@ public class plasmaCrawlEntry { if (urlstring == null) throw new IOException ("url string is null"); this.initiator = entry.getColString(1, null); this.url = new yacyURL(urlstring, entry.getColString(0, null)); - this.referrer = (entry.empty(3)) ? yacyURL.dummyHash : entry.getColString(3, null); + this.refhash = (entry.empty(3)) ? yacyURL.dummyHash : entry.getColString(3, null); this.name = (entry.empty(4)) ? "" : entry.getColString(4, "UTF-8").trim(); this.appdate = entry.getColLong(5); this.profileHandle = (entry.empty(6)) ? null : entry.getColString(6, null).trim(); @@ -189,7 +189,7 @@ public class plasmaCrawlEntry { this.url.hash().getBytes(), (initiator == null) ? "".getBytes() : this.initiator.getBytes(), this.url.toString().getBytes(), - this.referrer.getBytes(), + this.refhash.getBytes(), namebytes, appdatestr, (this.profileHandle == null) ? null : this.profileHandle.getBytes(), @@ -216,7 +216,7 @@ public class plasmaCrawlEntry { public String referrerhash() { // the urlhash of a referer url - return this.referrer; + return this.refhash; } public String initiator() { diff --git a/source/de/anomic/plasma/plasmaCrawlStacker.java b/source/de/anomic/plasma/plasmaCrawlStacker.java index 4ccf35a65..9f86eac0b 100644 --- a/source/de/anomic/plasma/plasmaCrawlStacker.java +++ b/source/de/anomic/plasma/plasmaCrawlStacker.java @@ -343,7 +343,7 @@ public final class plasmaCrawlStacker extends Thread { return new plasmaCrawlEntry(entry); } - public String stackCrawl(yacyURL url, String referrerhash, String initiatorHash, String name, Date loadDate, int currentdepth, plasmaCrawlProfile.entry profile) { + public String stackCrawl(yacyURL url, yacyURL referrer, String initiatorHash, String name, Date loadDate, int currentdepth, plasmaCrawlProfile.entry profile) { // stacks a crawl item. The position can also be remote // returns null if successful, a reason string if not successful //this.log.logFinest("stackCrawl: nexturlString='" + nexturlString + "'"); @@ -352,7 +352,7 @@ public final class plasmaCrawlStacker extends Thread { plasmaCrawlEntry entry = new plasmaCrawlEntry( initiatorHash, // initiator, needed for p2p-feedback url, // url clear text string - referrerhash, // last url in crawling queue + (referrer == null) ? null : referrer.hash(), // last url in crawling queue name, // load date loadDate, // the anchor name (profile == null) ? null : profile.handle(), // profile must not be null!