diff --git a/htroot/IndexCreateLoaderQueue_p.java b/htroot/IndexCreateLoaderQueue_p.java index 8f0b54915..0b835dec6 100644 --- a/htroot/IndexCreateLoaderQueue_p.java +++ b/htroot/IndexCreateLoaderQueue_p.java @@ -47,7 +47,7 @@ import de.anomic.data.wikiCode; import de.anomic.http.httpHeader; import de.anomic.plasma.plasmaCrawlLoaderMessage; import de.anomic.plasma.plasmaSwitchboard; -import de.anomic.plasma.crawler.http.CrawlWorker; +import de.anomic.plasma.crawler.plasmaCrawlWorker; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; import de.anomic.yacy.yacyCore; @@ -74,8 +74,8 @@ public class IndexCreateLoaderQueue_p { yacySeed initiator; int i, count = 0; for (i = 0; i < threadCount; i++) { - CrawlWorker theWorker = (CrawlWorker)threadList[i]; - plasmaCrawlLoaderMessage theMsg = theWorker.theMsg; + plasmaCrawlWorker theWorker = (plasmaCrawlWorker)threadList[i]; + plasmaCrawlLoaderMessage theMsg = theWorker.getMessage(); if (theMsg == null) continue; initiator = yacyCore.seedDB.getConnected(theMsg.initiator); diff --git a/htroot/PerformanceQueues_p.java b/htroot/PerformanceQueues_p.java index c13e7252f..fa827de07 100644 --- a/htroot/PerformanceQueues_p.java +++ b/htroot/PerformanceQueues_p.java @@ -197,7 +197,7 @@ public class PerformanceQueues_p { GenericKeyedObjectPool.Config crawlerPoolConfig = switchboard.cacheLoader.getPoolConfig(); int maxActive = Integer.parseInt(post.get("Crawler Pool_maxActive","8")); int maxIdle = Integer.parseInt(post.get("Crawler Pool_maxIdle","4")); - int minIdle = Integer.parseInt(post.get("Crawler Pool_minIdle","0")); + int minIdle = 0; // Integer.parseInt(post.get("Crawler Pool_minIdle","0")); //crawlerPoolConfig.minIdle = (minIdle > maxIdle) ? maxIdle/2 : minIdle; crawlerPoolConfig.maxIdle = (maxIdle > maxActive) ? maxActive/2 : maxIdle; diff --git a/source/de/anomic/plasma/crawler/AbstractCrawlWorker.java b/source/de/anomic/plasma/crawler/AbstractCrawlWorker.java index cd77a4c65..8ef2932ba 100644 --- a/source/de/anomic/plasma/crawler/AbstractCrawlWorker.java +++ b/source/de/anomic/plasma/crawler/AbstractCrawlWorker.java @@ -27,6 +27,9 @@ public abstract class AbstractCrawlWorker extends Thread implements plasmaCrawlW public boolean destroyed = false; protected boolean running = false; protected boolean stopped = false; + /** + * Specifies that the execution of the current crawl job has finished + */ protected boolean done = false; /* ============================================================ @@ -86,6 +89,14 @@ public abstract class AbstractCrawlWorker extends Thread implements plasmaCrawlW this.log = theLog; } + public void setNameTrailer(String trailer) { + this.setName(plasmaCrawlWorker.threadBaseName + trailer); + } + + public plasmaCrawlLoaderMessage getMessage() { + return this.theMsg; + } + public abstract void close(); public void run() { @@ -173,6 +184,10 @@ public abstract class AbstractCrawlWorker extends Thread implements plasmaCrawlW public void setStopped(boolean isStopped) { this.stopped = isStopped; } + + public void setDestroyed(boolean isDestroyed) { + this.destroyed = isDestroyed; + } public boolean isRunning() { return this.running; diff --git a/source/de/anomic/plasma/crawler/plasmaCrawlWorker.java b/source/de/anomic/plasma/crawler/plasmaCrawlWorker.java index 1fa2753f5..349d3d7ea 100644 --- a/source/de/anomic/plasma/crawler/plasmaCrawlWorker.java +++ b/source/de/anomic/plasma/crawler/plasmaCrawlWorker.java @@ -10,6 +10,13 @@ public interface plasmaCrawlWorker { public static final String threadBaseName = "CrawlerWorker"; + public void setNameTrailer(String trailer); + + public void setStopped(boolean isStopped); + public void setDestroyed(boolean isDestroyed); + + public plasmaCrawlLoaderMessage getMessage(); + public void reset(); public void execute(); public void execute(plasmaCrawlLoaderMessage theNewMsg); diff --git a/source/de/anomic/plasma/crawler/plasmaCrawlerFactory.java b/source/de/anomic/plasma/crawler/plasmaCrawlerFactory.java index dfd29d5e5..549cf8a87 100644 --- a/source/de/anomic/plasma/crawler/plasmaCrawlerFactory.java +++ b/source/de/anomic/plasma/crawler/plasmaCrawlerFactory.java @@ -6,7 +6,6 @@ import org.apache.commons.pool.KeyedPoolableObjectFactory; import de.anomic.plasma.plasmaHTCache; import de.anomic.plasma.plasmaSwitchboard; -import de.anomic.plasma.crawler.http.CrawlWorker; import de.anomic.server.logging.serverLog; public final class plasmaCrawlerFactory implements KeyedPoolableObjectFactory { @@ -62,7 +61,7 @@ public final class plasmaCrawlerFactory implements KeyedPoolableObjectFactory { } ); // instantiating class - CrawlWorker theCrawlWorker = (CrawlWorker) classConstructor.newInstance(new Object[] { + plasmaCrawlWorker theCrawlWorker = (plasmaCrawlWorker) classConstructor.newInstance(new Object[] { this.theThreadGroup, this.thePool, this.sb, @@ -86,13 +85,13 @@ public final class plasmaCrawlerFactory implements KeyedPoolableObjectFactory { */ public void destroyObject(Object key, Object obj) { if (obj == null) return; - if (obj instanceof CrawlWorker) { - CrawlWorker theWorker = (CrawlWorker) obj; + if (obj instanceof plasmaCrawlWorker) { + plasmaCrawlWorker theWorker = (plasmaCrawlWorker) obj; synchronized(theWorker) { - theWorker.destroyed = true; - theWorker.setName(plasmaCrawlWorker.threadBaseName + "_destroyed"); + theWorker.setDestroyed(true); + theWorker.setNameTrailer("_destroyed"); theWorker.setStopped(true); - theWorker.interrupt(); + ((Thread)theWorker).interrupt(); } } } diff --git a/source/de/anomic/plasma/crawler/plasmaCrawlerPool.java b/source/de/anomic/plasma/crawler/plasmaCrawlerPool.java index 2c0772552..11accfae8 100644 --- a/source/de/anomic/plasma/crawler/plasmaCrawlerPool.java +++ b/source/de/anomic/plasma/crawler/plasmaCrawlerPool.java @@ -1,8 +1,6 @@ package de.anomic.plasma.crawler; import org.apache.commons.pool.impl.GenericKeyedObjectPool; - -import de.anomic.plasma.crawler.http.CrawlWorker; import de.anomic.server.logging.serverLog; public final class plasmaCrawlerPool extends GenericKeyedObjectPool { @@ -21,12 +19,12 @@ public final class plasmaCrawlerPool extends GenericKeyedObjectPool { public void returnObject(Object key,Object obj) { if (obj == null) return; - if (obj instanceof CrawlWorker) { + if (obj instanceof plasmaCrawlWorker) { try { - ((CrawlWorker)obj).setName(plasmaCrawlWorker.threadBaseName + "_inPool"); + ((plasmaCrawlWorker)obj).setNameTrailer("_inPool"); super.returnObject(key,obj); } catch (Exception e) { - ((CrawlWorker)obj).setStopped(true); + ((plasmaCrawlWorker)obj).setStopped(true); serverLog.logSevere("CRAWLER-POOL","Unable to return crawler thread to pool.",e); } } else { @@ -38,10 +36,10 @@ public final class plasmaCrawlerPool extends GenericKeyedObjectPool { public void invalidateObject(Object key,Object obj) { if (obj == null) return; if (this.isClosed) return; - if (obj instanceof CrawlWorker) { + if (obj instanceof plasmaCrawlWorker) { try { - ((CrawlWorker)obj).setName(plasmaCrawlWorker.threadBaseName + "_invalidated"); - ((CrawlWorker)obj).setStopped(true); + ((plasmaCrawlWorker)obj).setNameTrailer("_invalidated"); + ((plasmaCrawlWorker)obj).setStopped(true); super.invalidateObject(key,obj); } catch (Exception e) { serverLog.logSevere("CRAWLER-POOL","Unable to invalidate crawling thread.",e); @@ -64,7 +62,7 @@ public final class plasmaCrawlerPool extends GenericKeyedObjectPool { // signaling shutdown to all still running or pooled threads ... serverLog.logInfo("CRAWLER","Signaling shutdown to " + threadCount + " remaining crawler threads ..."); for ( int currentThreadIdx = 0; currentThreadIdx < threadCount; currentThreadIdx++ ) { - ((CrawlWorker)threadList[currentThreadIdx]).setStopped(true); + ((plasmaCrawlWorker)threadList[currentThreadIdx]).setStopped(true); } // giving the crawlers some time to finish shutdown @@ -80,7 +78,7 @@ public final class plasmaCrawlerPool extends GenericKeyedObjectPool { Thread currentThread = threadList[currentThreadIdx]; if (currentThread.isAlive()) { serverLog.logInfo("CRAWLER","Trying to shutdown crawler thread '" + currentThread.getName() + "' [" + currentThreadIdx + "]."); - ((CrawlWorker)currentThread).close(); + ((plasmaCrawlWorker)currentThread).close(); } } diff --git a/source/de/anomic/plasma/plasmaCrawlLoader.java b/source/de/anomic/plasma/plasmaCrawlLoader.java index 1604d1ebf..63d10555d 100644 --- a/source/de/anomic/plasma/plasmaCrawlLoader.java +++ b/source/de/anomic/plasma/plasmaCrawlLoader.java @@ -48,10 +48,10 @@ import org.apache.commons.pool.impl.GenericKeyedObjectPool; import org.apache.commons.pool.impl.GenericObjectPool; import de.anomic.net.URL; +import de.anomic.plasma.crawler.plasmaCrawlWorker; import de.anomic.plasma.crawler.plasmaCrawlerFactory; import de.anomic.plasma.crawler.plasmaCrawlerMsgQueue; import de.anomic.plasma.crawler.plasmaCrawlerPool; -import de.anomic.plasma.crawler.http.CrawlWorker; import de.anomic.server.logging.serverLog; public final class plasmaCrawlLoader extends Thread { @@ -147,8 +147,12 @@ public final class plasmaCrawlLoader extends Thread { String protocol = theMsg.url.getProtocol(); // getting a new crawler from the crawler pool - CrawlWorker theWorker = (CrawlWorker) this.crawlwerPool.borrowObject(protocol); - if (theWorker != null) theWorker.execute(theMsg); + plasmaCrawlWorker theWorker = (plasmaCrawlWorker) this.crawlwerPool.borrowObject(protocol); + if (theWorker == null) { + this.log.logWarning("Unsupported protocol '" + protocol + "' in url " + theMsg.url); + } else { + theWorker.execute(theMsg); + } } public void run() {