removed the dns prefetch because that was not soo useful

pull/1/head
orbiter 12 years ago
parent aa3c26c62e
commit 2555542f7a

@ -8,10 +8,6 @@
// This file was contributed by Martin Thelian
// ([MC] removed all multithreading and thread pools, this is not necessary here; complete renovation 2007)
//
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
@ -31,7 +27,6 @@ package net.yacy.crawler;
import java.io.IOException;
import java.net.InetAddress;
import java.net.MalformedURLException;
import java.net.UnknownHostException;
import java.util.Date;
import java.util.Locale;
import java.util.Map;
@ -76,7 +71,7 @@ public final class CrawlStacker {
private final Log log = new Log("STACKCRAWL");
private final RobotsTxt robots;
private final WorkflowProcessor<Request> fastQueue, slowQueue;
private final WorkflowProcessor<Request> requestQueue;
private final CrawlQueues nextQueue;
private final CrawlSwitchboard crawler;
private final Segment indexSegment;
@ -103,56 +98,37 @@ public final class CrawlStacker {
this.acceptLocalURLs = acceptLocalURLs;
this.acceptGlobalURLs = acceptGlobalURLs;
this.domainList = domainList;
this.fastQueue = new WorkflowProcessor<Request>("CrawlStackerFast", "This process checks new urls before they are enqueued into the balancer (proper, double-check, correct domain, filter)", new String[]{"Balancer"}, this, "job", 10000, null, WorkflowProcessor.availableCPU);
this.slowQueue = new WorkflowProcessor<Request>("CrawlStackerSlow", "This is like CrawlStackerFast, but does additionaly a DNS lookup. The CrawlStackerFast does not need this because it can use the DNS cache.", new String[]{"Balancer"}, this, "job", 1000, null, 2);
this.requestQueue = new WorkflowProcessor<Request>("CrawlStacker", "This process checks new urls before they are enqueued into the balancer (proper, double-check, correct domain, filter)", new String[]{"Balancer"}, this, "job", 10000, null, WorkflowProcessor.availableCPU);
this.log.logInfo("STACKCRAWL thread initialized.");
}
public int size() {
return this.fastQueue.queueSize() + this.slowQueue.queueSize();
return this.requestQueue.queueSize();
}
public boolean isEmpty() {
if (!this.fastQueue.queueIsEmpty()) return false;
if (!this.slowQueue.queueIsEmpty()) return false;
if (!this.requestQueue.queueIsEmpty()) return false;
return true;
}
public void clear() {
this.fastQueue.clear();
this.slowQueue.clear();
this.requestQueue.clear();
}
public void announceClose() {
this.log.logInfo("Flushing remaining " + size() + " crawl stacker job entries.");
this.fastQueue.shutdown();
this.slowQueue.shutdown();
this.requestQueue.shutdown();
}
public synchronized void close() {
this.log.logInfo("Shutdown. waiting for remaining " + size() + " crawl stacker job entries. please wait.");
this.fastQueue.shutdown();
this.slowQueue.shutdown();
this.requestQueue.shutdown();
this.log.logInfo("Shutdown. Closing stackCrawl queue.");
clear();
}
private static boolean prefetchHost(final String host) {
// returns true when the host was known in the dns cache.
// If not, the host is stacked on the fetch stack and false is returned
try {
if (Domains.dnsResolveFromCache(host) != null) return true; // found entry
} catch (final UnknownHostException e) {
// we know that this is unknown
return false;
}
// we just don't know anything about that host
return false;
}
public Request job(final Request entry) {
// this is the method that is called by the busy thread from outside
if (entry == null) return null;
@ -175,20 +151,9 @@ public final class CrawlStacker {
// DEBUG
if (this.log.isFinest()) this.log.logFinest("ENQUEUE " + entry.url() + ", referer=" + entry.referrerhash() + ", initiator=" + ((entry.initiator() == null) ? "" : ASCII.String(entry.initiator())) + ", name=" + entry.name() + ", appdate=" + entry.appdate() + ", depth=" + entry.depth());
if (prefetchHost(entry.url().getHost())) {
try {
this.fastQueue.enQueue(entry);
//this.dnsHit++;
} catch (final InterruptedException e) {
Log.logException(e);
}
} else {
try {
this.slowQueue.enQueue(entry);
} catch (final InterruptedException e) {
Log.logException(e);
}
try {
this.requestQueue.enQueue(entry);
} catch (InterruptedException e) {
}
}
public void enqueueEntriesAsynchronous(final byte[] initiator, final String profileHandle, final Map<DigestURI, Properties> hyperlinks) {

Loading…
Cancel
Save