- patch for bad web structure dumps

- added automatic slow down of accessed to specific domains when access to a web page fails

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5765 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 16 years ago
parent 0139988c04
commit b6c2167143

@ -45,6 +45,17 @@ public class Latency {
} }
} }
public static void slowdown(String hosthash, String host) {
assert hosthash.length() == 6;
Host h = map.get(hosthash);
if (h == null) {
h = new Host(host, 3000);
map.put(hosthash, h);
} else {
h.slowdown();
}
}
public static Host host(String hosthash) { public static Host host(String hosthash) {
assert hosthash.length() == 6; assert hosthash.length() == 6;
return map.get(hosthash); return map.get(hosthash);
@ -172,6 +183,11 @@ public class Latency {
this.timeacc += time; this.timeacc += time;
this.count++; this.count++;
} }
public void slowdown() {
this.lastacc = System.currentTimeMillis();
this.timeacc = Math.min(60000, average() * 5);
this.count = 1;
}
public int count() { public int count() {
return this.count; return this.count;
} }

@ -59,8 +59,10 @@ import org.apache.commons.httpclient.params.HttpMethodParams;
import org.apache.commons.httpclient.protocol.Protocol; import org.apache.commons.httpclient.protocol.Protocol;
import org.apache.commons.httpclient.protocol.ProtocolSocketFactory; import org.apache.commons.httpclient.protocol.ProtocolSocketFactory;
import de.anomic.crawler.Latency;
import de.anomic.kelondro.order.Base64Order; import de.anomic.kelondro.order.Base64Order;
import de.anomic.kelondro.util.Log; import de.anomic.kelondro.util.Log;
import de.anomic.yacy.yacyURL;
/** /**
* HttpClient implementation which uses Jakarta Commons HttpClient 3.x {@link http://hc.apache.org/httpclient-3.x/} * HttpClient implementation which uses Jakarta Commons HttpClient 3.x {@link http://hc.apache.org/httpclient-3.x/}
@ -448,14 +450,20 @@ public class httpClient {
} }
} catch (final IllegalThreadStateException e) { } catch (final IllegalThreadStateException e) {
// cleanUp statistics // cleanUp statistics
yacyURL url = new yacyURL(method.getURI().toString(), null);
Latency.slowdown(url.hash().substring(6), url.getHost());
HttpConnectionInfo.removeConnection(generateConInfo(method)); HttpConnectionInfo.removeConnection(generateConInfo(method));
throw e; throw e;
} catch (final IOException e) { } catch (final IOException e) {
// cleanUp statistics // cleanUp statistics
yacyURL url = new yacyURL(method.getURI().toString(), null);
Latency.slowdown(url.hash().substring(6), url.getHost());
HttpConnectionInfo.removeConnection(generateConInfo(method)); HttpConnectionInfo.removeConnection(generateConInfo(method));
throw e; throw e;
} catch (final IllegalStateException e) { } catch (final IllegalStateException e) {
// cleanUp statistics // cleanUp statistics
yacyURL url = new yacyURL(method.getURI().toString(), null);
Latency.slowdown(url.hash().substring(6), url.getHost());
HttpConnectionInfo.removeConnection(generateConInfo(method)); HttpConnectionInfo.removeConnection(generateConInfo(method));
throw new IOException(e.getMessage()); throw new IOException(e.getMessage());
} }

@ -184,9 +184,15 @@ public class plasmaWebStructure {
final Map<String, Integer> map = new HashMap<String, Integer>(); final Map<String, Integer> map = new HashMap<String, Integer>();
String c; String c;
final int refsc = refstr2count(refs); final int refsc = refstr2count(refs);
int d;
for (int i = 0; i < refsc; i++) { for (int i = 0; i < refsc; i++) {
c = refs.substring(8 + i * 10, 8 + (i + 1) * 10); c = refs.substring(8 + i * 10, 8 + (i + 1) * 10);
map.put(c.substring(0, 6), Integer.valueOf(c.substring(6), 16)); try {
d = Integer.valueOf(c.substring(6), 16);
} catch (NumberFormatException e) {
d = 1;
}
map.put(c.substring(0, 6), d);
} }
return map; return map;
} }

Loading…
Cancel
Save