borg-0300 20 years ago
parent b8ceb1ffde
commit 440e6ed747

@ -4,7 +4,10 @@
// (C) by Michael Peter Christen; mc@anomic.de
// first published on http://www.anomic.de
// Frankfurt, Germany, 2004
// last change: 02.05.2004
//
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
@ -42,15 +45,12 @@
// You must compile this file with
// javac -classpath .:../classes crawlOrder.java
import java.util.ArrayList;
import java.util.Date;
import de.anomic.http.httpHeader;
import de.anomic.plasma.plasmaCrawlLURL;
import de.anomic.plasma.plasmaParser;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaURL;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.tools.crypt;
@ -59,7 +59,6 @@ import de.anomic.yacy.yacySeed;
public final class crawlOrder {
public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) {
// return variable that accumulates replacements
plasmaSwitchboard switchboard = (plasmaSwitchboard) env;
@ -155,10 +154,12 @@ public final class crawlOrder {
}
if (refencoded != null) {
// old method: only one url
env.getLog().logFinest("crawlOrder: refencoded=" + refencoded + " key=" + key);
refv.add(crypt.simpleDecode(refencoded, key)); // the referrer url
} else {
// new method: read a vector of urls
while ((refencoded = (String) post.get("ref" + refv.size(), null)) != null) {
env.getLog().logFinest("crawlOrder: refencoded=" + refencoded + " key=" + key);
refv.add(crypt.simpleDecode(refencoded, key));
}
}
@ -180,6 +181,8 @@ public final class crawlOrder {
}
// adding URL to noticeURL Queue
env.getLog().logFinest("crawlOrder: a: url='" + newURL + "'");
stackresult = stack(switchboard, newURL, refURL, iam, youare);
response = (String) stackresult[0];
reason = (String) stackresult[1];
@ -191,6 +194,8 @@ public final class crawlOrder {
int doubleCount = 0;
int rejectedCount = 0;
for (int i = 0; i < count; i++) {
env.getLog().logFinest("crawlOrder: b: url='" + (String) urlv.get(i) + "'");
stackresult = stack(switchboard, (String) urlv.get(i), (String) refv.get(i), iam, youare);
response = (String) stackresult[0];
prop.put("list_" + i + "_job", (String) stackresult[0] + "," + (String) stackresult[1]);
@ -223,10 +228,10 @@ public final class crawlOrder {
return prop;
}
private static Object[] stack(plasmaSwitchboard switchboard, String url, String referrer, String iam, String youare) {
String response, reason, lurl;
// stack url
switchboard.getLog().logFinest("crawlOrder: stack: url='" + url + "'");
String reasonString = switchboard.sbStackCrawlThread.stackCrawl(url, referrer, iam, "REMOTE-CRAWLING", new Date(), 0, switchboard.defaultRemoteProfile);
if (reasonString == null) {
// liftoff!

Loading…
Cancel
Save