fix for problem with remote crawl referrers

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4210 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 17 years ago
parent 875096552f
commit bc2368e907

@ -185,7 +185,7 @@ public final class crawlOrder {
if (!newURL.equals(urlv.get(0))) { if (!newURL.equals(urlv.get(0))) {
env.getLog().logWarning("crawlOrder: Received not normalized URL " + urlv.get(0)); env.getLog().logWarning("crawlOrder: Received not normalized URL " + urlv.get(0));
} }
String refURL = (refv.get(0) == null) ? null : new yacyURL((String) refv.get(0), null).toNormalform(true, true); yacyURL refURL = (refv.get(0) == null) ? null : new yacyURL((String) refv.get(0), null);
if ((refURL != null) && (!refURL.equals(refv.get(0)))) { if ((refURL != null) && (!refURL.equals(refv.get(0)))) {
env.getLog().logWarning("crawlOrder: Received not normalized Referer URL " + refv.get(0) + " of URL " + urlv.get(0)); env.getLog().logWarning("crawlOrder: Received not normalized Referer URL " + refv.get(0) + " of URL " + urlv.get(0));
} }
@ -211,7 +211,7 @@ public final class crawlOrder {
for (int i = 0; i < count; i++) { for (int i = 0; i < count; i++) {
env.getLog().logFinest("crawlOrder: b: url='" + (String) urlv.get(i) + "'"); env.getLog().logFinest("crawlOrder: b: url='" + (String) urlv.get(i) + "'");
try { try {
stackresult = stack(switchboard, new yacyURL((String) urlv.get(i), null), (String) refv.get(i), iam, youare); stackresult = stack(switchboard, new yacyURL((String) urlv.get(i), null), ((refv.get(i) == null) || (((String) refv.get(i)).length() == 0)) ? null : new yacyURL((String) refv.get(i), null), iam, youare);
response = (String) stackresult[0]; response = (String) stackresult[0];
prop.put("list_" + i + "_job", (String) stackresult[0] + "," + (String) stackresult[1]); prop.put("list_" + i + "_job", (String) stackresult[0] + "," + (String) stackresult[1]);
prop.put("list_" + i + "_lurl", (String) stackresult[2]); prop.put("list_" + i + "_lurl", (String) stackresult[2]);
@ -244,7 +244,7 @@ public final class crawlOrder {
return prop; return prop;
} }
private static Object[] stack(plasmaSwitchboard switchboard, yacyURL url, String referrer, String iam, String youare) { private static Object[] stack(plasmaSwitchboard switchboard, yacyURL url, yacyURL referrer, String iam, String youare) {
String response, reason, lurl; String response, reason, lurl;
// stack url // stack url
switchboard.getLog().logFinest("crawlOrder: stack: url='" + url + "'"); switchboard.getLog().logFinest("crawlOrder: stack: url='" + url + "'");

@ -63,7 +63,7 @@ public class plasmaCrawlEntry {
private String initiator; // the initiator hash, is NULL or "" if it is the own proxy; private String initiator; // the initiator hash, is NULL or "" if it is the own proxy;
// if this is generated by a crawl, the own peer hash in entered // if this is generated by a crawl, the own peer hash in entered
private String referrer; // the url's referrer hash private String refhash; // the url's referrer hash
private yacyURL url; // the url as string private yacyURL url; // the url as string
private String name; // the name of the url, from anchor tag <a>name</a> private String name; // the name of the url, from anchor tag <a>name</a>
private long appdate; // the time when the url was first time appeared private long appdate; // the time when the url was first time appeared
@ -97,7 +97,7 @@ public class plasmaCrawlEntry {
public plasmaCrawlEntry( public plasmaCrawlEntry(
String initiator, String initiator,
yacyURL url, yacyURL url,
String referrer, String referrerhash,
String name, String name,
Date appdate, Date appdate,
String profileHandle, String profileHandle,
@ -111,7 +111,7 @@ public class plasmaCrawlEntry {
if ((initiator == null) || (initiator.length() == 0)) initiator = yacyURL.dummyHash; if ((initiator == null) || (initiator.length() == 0)) initiator = yacyURL.dummyHash;
this.initiator = initiator; this.initiator = initiator;
this.url = url; this.url = url;
this.referrer = (referrer == null) ? yacyURL.dummyHash : referrer; this.refhash = (referrerhash == null) ? yacyURL.dummyHash : referrerhash;
this.name = (name == null) ? "" : name; this.name = (name == null) ? "" : name;
this.appdate = (appdate == null) ? 0 : appdate.getTime(); this.appdate = (appdate == null) ? 0 : appdate.getTime();
this.profileHandle = profileHandle; // must not be null this.profileHandle = profileHandle; // must not be null
@ -137,7 +137,7 @@ public class plasmaCrawlEntry {
if (urlstring == null) throw new IOException ("url string is null"); if (urlstring == null) throw new IOException ("url string is null");
this.initiator = entry.getColString(1, null); this.initiator = entry.getColString(1, null);
this.url = new yacyURL(urlstring, entry.getColString(0, null)); this.url = new yacyURL(urlstring, entry.getColString(0, null));
this.referrer = (entry.empty(3)) ? yacyURL.dummyHash : entry.getColString(3, null); this.refhash = (entry.empty(3)) ? yacyURL.dummyHash : entry.getColString(3, null);
this.name = (entry.empty(4)) ? "" : entry.getColString(4, "UTF-8").trim(); this.name = (entry.empty(4)) ? "" : entry.getColString(4, "UTF-8").trim();
this.appdate = entry.getColLong(5); this.appdate = entry.getColLong(5);
this.profileHandle = (entry.empty(6)) ? null : entry.getColString(6, null).trim(); this.profileHandle = (entry.empty(6)) ? null : entry.getColString(6, null).trim();
@ -189,7 +189,7 @@ public class plasmaCrawlEntry {
this.url.hash().getBytes(), this.url.hash().getBytes(),
(initiator == null) ? "".getBytes() : this.initiator.getBytes(), (initiator == null) ? "".getBytes() : this.initiator.getBytes(),
this.url.toString().getBytes(), this.url.toString().getBytes(),
this.referrer.getBytes(), this.refhash.getBytes(),
namebytes, namebytes,
appdatestr, appdatestr,
(this.profileHandle == null) ? null : this.profileHandle.getBytes(), (this.profileHandle == null) ? null : this.profileHandle.getBytes(),
@ -216,7 +216,7 @@ public class plasmaCrawlEntry {
public String referrerhash() { public String referrerhash() {
// the urlhash of a referer url // the urlhash of a referer url
return this.referrer; return this.refhash;
} }
public String initiator() { public String initiator() {

@ -343,7 +343,7 @@ public final class plasmaCrawlStacker extends Thread {
return new plasmaCrawlEntry(entry); return new plasmaCrawlEntry(entry);
} }
public String stackCrawl(yacyURL url, String referrerhash, String initiatorHash, String name, Date loadDate, int currentdepth, plasmaCrawlProfile.entry profile) { public String stackCrawl(yacyURL url, yacyURL referrer, String initiatorHash, String name, Date loadDate, int currentdepth, plasmaCrawlProfile.entry profile) {
// stacks a crawl item. The position can also be remote // stacks a crawl item. The position can also be remote
// returns null if successful, a reason string if not successful // returns null if successful, a reason string if not successful
//this.log.logFinest("stackCrawl: nexturlString='" + nexturlString + "'"); //this.log.logFinest("stackCrawl: nexturlString='" + nexturlString + "'");
@ -352,7 +352,7 @@ public final class plasmaCrawlStacker extends Thread {
plasmaCrawlEntry entry = new plasmaCrawlEntry( plasmaCrawlEntry entry = new plasmaCrawlEntry(
initiatorHash, // initiator, needed for p2p-feedback initiatorHash, // initiator, needed for p2p-feedback
url, // url clear text string url, // url clear text string
referrerhash, // last url in crawling queue (referrer == null) ? null : referrer.hash(), // last url in crawling queue
name, // load date name, // load date
loadDate, // the anchor name loadDate, // the anchor name
(profile == null) ? null : profile.handle(), // profile must not be null! (profile == null) ? null : profile.handle(), // profile must not be null!

Loading…
Cancel
Save