diff --git a/source/net/yacy/crawler/CrawlStacker.java b/source/net/yacy/crawler/CrawlStacker.java index c57378a20..f24293bf2 100644 --- a/source/net/yacy/crawler/CrawlStacker.java +++ b/source/net/yacy/crawler/CrawlStacker.java @@ -8,10 +8,6 @@ // This file was contributed by Martin Thelian // ([MC] removed all multithreading and thread pools, this is not necessary here; complete renovation 2007) // -// $LastChangedDate$ -// $LastChangedRevision$ -// $LastChangedBy$ -// // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or @@ -31,7 +27,6 @@ package net.yacy.crawler; import java.io.IOException; import java.net.InetAddress; import java.net.MalformedURLException; -import java.net.UnknownHostException; import java.util.Date; import java.util.Locale; import java.util.Map; @@ -76,7 +71,7 @@ public final class CrawlStacker { private final Log log = new Log("STACKCRAWL"); private final RobotsTxt robots; - private final WorkflowProcessor fastQueue, slowQueue; + private final WorkflowProcessor requestQueue; private final CrawlQueues nextQueue; private final CrawlSwitchboard crawler; private final Segment indexSegment; @@ -103,56 +98,37 @@ public final class CrawlStacker { this.acceptLocalURLs = acceptLocalURLs; this.acceptGlobalURLs = acceptGlobalURLs; this.domainList = domainList; - - this.fastQueue = new WorkflowProcessor("CrawlStackerFast", "This process checks new urls before they are enqueued into the balancer (proper, double-check, correct domain, filter)", new String[]{"Balancer"}, this, "job", 10000, null, WorkflowProcessor.availableCPU); - this.slowQueue = new WorkflowProcessor("CrawlStackerSlow", "This is like CrawlStackerFast, but does additionaly a DNS lookup. The CrawlStackerFast does not need this because it can use the DNS cache.", new String[]{"Balancer"}, this, "job", 1000, null, 2); + this.requestQueue = new WorkflowProcessor("CrawlStacker", "This process checks new urls before they are enqueued into the balancer (proper, double-check, correct domain, filter)", new String[]{"Balancer"}, this, "job", 10000, null, WorkflowProcessor.availableCPU); this.log.logInfo("STACKCRAWL thread initialized."); } public int size() { - return this.fastQueue.queueSize() + this.slowQueue.queueSize(); + return this.requestQueue.queueSize(); } public boolean isEmpty() { - if (!this.fastQueue.queueIsEmpty()) return false; - if (!this.slowQueue.queueIsEmpty()) return false; + if (!this.requestQueue.queueIsEmpty()) return false; return true; } public void clear() { - this.fastQueue.clear(); - this.slowQueue.clear(); + this.requestQueue.clear(); } public void announceClose() { this.log.logInfo("Flushing remaining " + size() + " crawl stacker job entries."); - this.fastQueue.shutdown(); - this.slowQueue.shutdown(); + this.requestQueue.shutdown(); } public synchronized void close() { this.log.logInfo("Shutdown. waiting for remaining " + size() + " crawl stacker job entries. please wait."); - this.fastQueue.shutdown(); - this.slowQueue.shutdown(); + this.requestQueue.shutdown(); this.log.logInfo("Shutdown. Closing stackCrawl queue."); clear(); } - private static boolean prefetchHost(final String host) { - // returns true when the host was known in the dns cache. - // If not, the host is stacked on the fetch stack and false is returned - try { - if (Domains.dnsResolveFromCache(host) != null) return true; // found entry - } catch (final UnknownHostException e) { - // we know that this is unknown - return false; - } - // we just don't know anything about that host - return false; - } - public Request job(final Request entry) { // this is the method that is called by the busy thread from outside if (entry == null) return null; @@ -175,20 +151,9 @@ public final class CrawlStacker { // DEBUG if (this.log.isFinest()) this.log.logFinest("ENQUEUE " + entry.url() + ", referer=" + entry.referrerhash() + ", initiator=" + ((entry.initiator() == null) ? "" : ASCII.String(entry.initiator())) + ", name=" + entry.name() + ", appdate=" + entry.appdate() + ", depth=" + entry.depth()); - - if (prefetchHost(entry.url().getHost())) { - try { - this.fastQueue.enQueue(entry); - //this.dnsHit++; - } catch (final InterruptedException e) { - Log.logException(e); - } - } else { - try { - this.slowQueue.enQueue(entry); - } catch (final InterruptedException e) { - Log.logException(e); - } + try { + this.requestQueue.enQueue(entry); + } catch (InterruptedException e) { } } public void enqueueEntriesAsynchronous(final byte[] initiator, final String profileHandle, final Map hyperlinks) {