*) Adding redirection support to plasmaCrawlWorker.java

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@327 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
theli 20 years ago
parent 6697d5e52e
commit cd279907c0

@ -105,6 +105,7 @@ public final class httpHeader extends TreeMap implements Map {
public static final String RANGE = "Range";
public static final String CACHE_CONTROL = "Cache-Control";
public static final String TRANSFER_ENCODING = "Transfer-Encoding";
public static final String LOCATION = "Location";
public static final String X_CACHE = "X-Cache";
public static final String X_CACHE_LOOKUP = "X-Cache-Lookup";

@ -232,6 +232,35 @@ public final class plasmaCrawlWorker extends Thread {
plasmaHTCache cacheManager,
serverLog log
) throws IOException {
load(url,
referer,
initiator,
depth,
profile,
socketTimeout,
remoteProxyHost,
remoteProxyPort,
remoteProxyUse,
cacheManager,
log,
0
);
}
public static void load(
URL url,
String referer,
String initiator,
int depth,
plasmaCrawlProfile.entry profile,
int socketTimeout,
String remoteProxyHost,
int remoteProxyPort,
boolean remoteProxyUse,
plasmaHTCache cacheManager,
serverLog log,
int redirectionCount
) throws IOException {
if (url == null) return;
Date requestDate = new Date(); // remember the time...
String host = url.getHost();
@ -316,7 +345,37 @@ public final class plasmaCrawlWorker extends Thread {
if (cacheFile.exists()) cacheFile.delete();
log.logError("CRAWLER LOADER ERROR1: with url=" + url.toString() + ": " + e.toString());
}
} else {
} else if (res.status.startsWith("30")) {
if (redirectionCount < 5) {
if (res.responseHeader.containsKey(httpHeader.LOCATION)) {
// generating the new url
URL redirectionUrl = new URL(url, (String) res.responseHeader.get(httpHeader.LOCATION));
// returning the used httpc
httpc.returnInstance(remote);
remote = null;
// restart crawling with new url
log.logInfo("Redirection detected ('" + res.status + "') for url " + url.toString() +
"\nRedirecting request to: " + redirectionUrl);
load(redirectionUrl,
referer,
initiator,
depth,
profile,
socketTimeout,
remoteProxyHost,
remoteProxyPort,
remoteProxyUse,
cacheManager,
log,
++redirectionCount
);
}
} else {
log.logInfo("Redirection counter exceeded for url " + url.toString() + ". Processing aborted.");
}
}else {
// if the response has not the right response type then reject file
log.logInfo("REJECTED WRONG STATUS TYPE '" + res.status + "' for url " + url.toString());
// not processed any further

Loading…
Cancel
Save