fix for problem with remote crawl referrers

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4210 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 17 years ago
parent 875096552f
commit bc2368e907

@ -185,7 +185,7 @@ public final class crawlOrder {
if (!newURL.equals(urlv.get(0))) {
env.getLog().logWarning("crawlOrder: Received not normalized URL " + urlv.get(0));
}
String refURL = (refv.get(0) == null) ? null : new yacyURL((String) refv.get(0), null).toNormalform(true, true);
yacyURL refURL = (refv.get(0) == null) ? null : new yacyURL((String) refv.get(0), null);
if ((refURL != null) && (!refURL.equals(refv.get(0)))) {
env.getLog().logWarning("crawlOrder: Received not normalized Referer URL " + refv.get(0) + " of URL " + urlv.get(0));
}
@ -211,7 +211,7 @@ public final class crawlOrder {
for (int i = 0; i < count; i++) {
env.getLog().logFinest("crawlOrder: b: url='" + (String) urlv.get(i) + "'");
try {
stackresult = stack(switchboard, new yacyURL((String) urlv.get(i), null), (String) refv.get(i), iam, youare);
stackresult = stack(switchboard, new yacyURL((String) urlv.get(i), null), ((refv.get(i) == null) || (((String) refv.get(i)).length() == 0)) ? null : new yacyURL((String) refv.get(i), null), iam, youare);
response = (String) stackresult[0];
prop.put("list_" + i + "_job", (String) stackresult[0] + "," + (String) stackresult[1]);
prop.put("list_" + i + "_lurl", (String) stackresult[2]);
@ -244,7 +244,7 @@ public final class crawlOrder {
return prop;
}
private static Object[] stack(plasmaSwitchboard switchboard, yacyURL url, String referrer, String iam, String youare) {
private static Object[] stack(plasmaSwitchboard switchboard, yacyURL url, yacyURL referrer, String iam, String youare) {
String response, reason, lurl;
// stack url
switchboard.getLog().logFinest("crawlOrder: stack: url='" + url + "'");

@ -63,7 +63,7 @@ public class plasmaCrawlEntry {
private String initiator; // the initiator hash, is NULL or "" if it is the own proxy;
// if this is generated by a crawl, the own peer hash in entered
private String referrer; // the url's referrer hash
private String refhash; // the url's referrer hash
private yacyURL url; // the url as string
private String name; // the name of the url, from anchor tag <a>name</a>
private long appdate; // the time when the url was first time appeared
@ -97,7 +97,7 @@ public class plasmaCrawlEntry {
public plasmaCrawlEntry(
String initiator,
yacyURL url,
String referrer,
String referrerhash,
String name,
Date appdate,
String profileHandle,
@ -111,7 +111,7 @@ public class plasmaCrawlEntry {
if ((initiator == null) || (initiator.length() == 0)) initiator = yacyURL.dummyHash;
this.initiator = initiator;
this.url = url;
this.referrer = (referrer == null) ? yacyURL.dummyHash : referrer;
this.refhash = (referrerhash == null) ? yacyURL.dummyHash : referrerhash;
this.name = (name == null) ? "" : name;
this.appdate = (appdate == null) ? 0 : appdate.getTime();
this.profileHandle = profileHandle; // must not be null
@ -137,7 +137,7 @@ public class plasmaCrawlEntry {
if (urlstring == null) throw new IOException ("url string is null");
this.initiator = entry.getColString(1, null);
this.url = new yacyURL(urlstring, entry.getColString(0, null));
this.referrer = (entry.empty(3)) ? yacyURL.dummyHash : entry.getColString(3, null);
this.refhash = (entry.empty(3)) ? yacyURL.dummyHash : entry.getColString(3, null);
this.name = (entry.empty(4)) ? "" : entry.getColString(4, "UTF-8").trim();
this.appdate = entry.getColLong(5);
this.profileHandle = (entry.empty(6)) ? null : entry.getColString(6, null).trim();
@ -189,7 +189,7 @@ public class plasmaCrawlEntry {
this.url.hash().getBytes(),
(initiator == null) ? "".getBytes() : this.initiator.getBytes(),
this.url.toString().getBytes(),
this.referrer.getBytes(),
this.refhash.getBytes(),
namebytes,
appdatestr,
(this.profileHandle == null) ? null : this.profileHandle.getBytes(),
@ -216,7 +216,7 @@ public class plasmaCrawlEntry {
public String referrerhash() {
// the urlhash of a referer url
return this.referrer;
return this.refhash;
}
public String initiator() {

@ -343,7 +343,7 @@ public final class plasmaCrawlStacker extends Thread {
return new plasmaCrawlEntry(entry);
}
public String stackCrawl(yacyURL url, String referrerhash, String initiatorHash, String name, Date loadDate, int currentdepth, plasmaCrawlProfile.entry profile) {
public String stackCrawl(yacyURL url, yacyURL referrer, String initiatorHash, String name, Date loadDate, int currentdepth, plasmaCrawlProfile.entry profile) {
// stacks a crawl item. The position can also be remote
// returns null if successful, a reason string if not successful
//this.log.logFinest("stackCrawl: nexturlString='" + nexturlString + "'");
@ -352,7 +352,7 @@ public final class plasmaCrawlStacker extends Thread {
plasmaCrawlEntry entry = new plasmaCrawlEntry(
initiatorHash, // initiator, needed for p2p-feedback
url, // url clear text string
referrerhash, // last url in crawling queue
(referrer == null) ? null : referrer.hash(), // last url in crawling queue
name, // load date
loadDate, // the anchor name
(profile == null) ? null : profile.handle(), // profile must not be null!

Loading…
Cancel
Save