diff --git a/htroot/Performance_p.java b/htroot/Performance_p.java index 5daa79d33..f5d8a5f26 100644 --- a/htroot/Performance_p.java +++ b/htroot/Performance_p.java @@ -178,6 +178,11 @@ public class Performance_p { } if ((post != null) && (post.containsKey("poolConfig"))) { + + /* + * configuring the crawler pool + */ + // getting the current crawler pool configuration GenericObjectPool.Config crawlerPoolConfig = switchboard.cacheLoader.getPoolConfig(); int maxActive = Integer.parseInt(post.get("Crawler Pool_maxActive","8")); int maxIdle = Integer.parseInt(post.get("Crawler Pool_maxIdle","4")); @@ -187,9 +192,18 @@ public class Performance_p { crawlerPoolConfig.maxIdle = (maxIdle > maxActive) ? maxActive/2 : maxIdle; crawlerPoolConfig.maxActive = maxActive; + // accept new crawler pool settings plasmaSwitchboard.crawlSlots = maxActive; switchboard.cacheLoader.setPoolConfig(crawlerPoolConfig); + // storing the new values into configfile + switchboard.setConfig("crawlerMaxActiveThreads",maxActive); + switchboard.setConfig("crawlerMaxIdleThreads",maxIdle); + switchboard.setConfig("crawlerMinIdleThreads",minIdle); + + /* + * configuring the http pool + */ serverThread httpd = switchboard.getThread("10_httpd"); GenericObjectPool.Config httpdPoolConfig = ((serverCore)httpd).getPoolConfig(); maxActive = Integer.parseInt(post.get("httpd Session Pool_maxActive","8")); @@ -200,8 +214,12 @@ public class Performance_p { httpdPoolConfig.maxIdle = (maxIdle > maxActive) ? maxActive/2 : maxIdle; httpdPoolConfig.maxActive = maxActive; - ((serverCore)httpd).maxSessions = maxActive; - ((serverCore)httpd).setPoolConfig(httpdPoolConfig); + ((serverCore)httpd).setPoolConfig(httpdPoolConfig); + + // storing the new values into configfile + switchboard.setConfig("httpdMaxActiveSessions",maxActive); + switchboard.setConfig("httpdMaxIdleSessions",maxIdle); + switchboard.setConfig("httpdMinIdleSessions",minIdle); } if ((post != null) && (post.containsKey("proxyControlSubmit"))) { @@ -236,14 +254,14 @@ public class Performance_p { prop.put("pool_0_name","Crawler Pool"); prop.put("pool_0_maxActive",crawlerPoolConfig.maxActive); prop.put("pool_0_maxIdle",crawlerPoolConfig.maxIdle); - prop.put("pool_0_minIdle",crawlerPoolConfig.maxIdle); + prop.put("pool_0_minIdle",crawlerPoolConfig.minIdle); serverThread httpd = switchboard.getThread("10_httpd"); GenericObjectPool.Config httpdPoolConfig = ((serverCore)httpd).getPoolConfig(); prop.put("pool_1_name","httpd Session Pool"); prop.put("pool_1_maxActive",httpdPoolConfig.maxActive); prop.put("pool_1_maxIdle",httpdPoolConfig.maxIdle); - prop.put("pool_1_minIdle",httpdPoolConfig.maxIdle); + prop.put("pool_1_minIdle",httpdPoolConfig.minIdle); prop.put("pool",2); diff --git a/source/de/anomic/plasma/plasmaCrawlLoader.java b/source/de/anomic/plasma/plasmaCrawlLoader.java index bae139794..6bc5f52e8 100644 --- a/source/de/anomic/plasma/plasmaCrawlLoader.java +++ b/source/de/anomic/plasma/plasmaCrawlLoader.java @@ -57,8 +57,6 @@ public final class plasmaCrawlLoader extends Thread { private final plasmaHTCache cacheManager; private final int socketTimeout; - private final int loadTimeout; - private final int maxSlots; private final serverLog log; private final CrawlerMessageQueue theQueue; @@ -68,21 +66,16 @@ public final class plasmaCrawlLoader extends Thread { private boolean stopped = false; public plasmaCrawlLoader( + plasmaSwitchboard sb, plasmaHTCache cacheManager, - serverLog log, - int socketTimeout, - int loadTimeout, - int mslots, - boolean proxyUse, - String proxyHost, - int proxyPort) { + serverLog log) { + this.setName("plasmaCrawlLoader"); this.cacheManager = cacheManager; this.log = log; - this.socketTimeout = socketTimeout; - this.loadTimeout = loadTimeout; - this.maxSlots = mslots; + + this.socketTimeout = Integer.parseInt(sb.getConfig("clientTimeout", "10000")); // configuring the crawler messagequeue this.theQueue = new CrawlerMessageQueue(); @@ -93,12 +86,12 @@ public final class plasmaCrawlLoader extends Thread { // The maximum number of active connections that can be allocated from pool at the same time, // 0 for no limit - this.cralwerPoolConfig.maxActive = this.maxSlots; + this.cralwerPoolConfig.maxActive = Integer.parseInt(sb.getConfig("crawlerMaxActiveThreads","10")); // The maximum number of idle connections connections in the pool // 0 = no limit. - this.cralwerPoolConfig.maxIdle = this.maxSlots / 2; - this.cralwerPoolConfig.minIdle = this.maxSlots / 4; + this.cralwerPoolConfig.maxIdle = Integer.parseInt(sb.getConfig("crawlerMaxIdleThreads","7")); + this.cralwerPoolConfig.minIdle = Integer.parseInt(sb.getConfig("crawlerMinIdleThreads","5")); // block undefinitely this.cralwerPoolConfig.maxWait = -1; @@ -113,9 +106,9 @@ public final class plasmaCrawlLoader extends Thread { this.theThreadGroup, cacheManager, socketTimeout, - proxyUse, - proxyHost, - proxyPort, + sb.getConfig("remoteProxyUse","false").equals("true"), + sb.getConfig("remoteProxyHost",""), + Integer.parseInt(sb.getConfig("remoteProxyPort","3128")), log); this.crawlwerPool = new CrawlerPool(theFactory,this.cralwerPoolConfig,this.theThreadGroup); diff --git a/source/de/anomic/plasma/plasmaParser.java b/source/de/anomic/plasma/plasmaParser.java index b6dadbf5f..fc8873c29 100644 --- a/source/de/anomic/plasma/plasmaParser.java +++ b/source/de/anomic/plasma/plasmaParser.java @@ -588,14 +588,14 @@ public final class plasmaParser { } // fetching a new parser object from pool - Parser theParser = (Parser) this.theParserPool.borrowObject(parserClassName); + Parser theParser = (Parser) theParserPool.borrowObject(parserClassName); // checking if the created parser really supports the given mimetype Hashtable supportedMimeTypes = theParser.getSupportedMimeTypes(); if ((supportedMimeTypes != null) && (supportedMimeTypes.containsKey(mimeType))) { return theParser; } - this.theParserPool.returnObject(parserClassName,theParser); + theParserPool.returnObject(parserClassName,theParser); } catch (Exception e) { System.err.println("ERROR: Unable to load the correct parser for type " + mimeType); @@ -673,11 +673,11 @@ public final class plasmaParser { //File out = new File(args[1]); plasmaParser theParser = new plasmaParser(); theParser.initRealtimeParsableMimeTypes("application/xhtml+xml,text/html,text/plain"); - theParser.initParseableMimeTypes("application/atom+xml,application/gzip,application/java-archive,application/msword,application/octet-stream,application/pdf,application/rdf+xml,application/rss+xml,application/rtf,application/x-gzip,application/x-tar,application/xml,application/zip,text/rss,text/rtf,text/xml,application/x-bzip2"); + theParser.initParseableMimeTypes("application/atom+xml,application/gzip,application/java-archive,application/msword,application/octet-stream,application/pdf,application/rdf+xml,application/rss+xml,application/rtf,application/x-gzip,application/x-tar,application/xml,application/zip,text/rss,text/rtf,text/xml,application/x-bzip2,application/postscript"); FileInputStream theInput = new FileInputStream(in); ByteArrayOutputStream theOutput = new ByteArrayOutputStream(); serverFileUtils.copy(theInput, theOutput); - plasmaParserDocument document = theParser.parseSource(new URL("http://brain/~theli/test.pdf"), null, theOutput.toByteArray()); + plasmaParserDocument document = theParser.parseSource(new URL("http://brain/~theli/test.ps"), null, theOutput.toByteArray()); //plasmaParserDocument document = theParser.parseSource(new URL("http://brain.yacy"), "application/pdf", theOutput.toByteArray()); //byte[] theText = document.getText(); //serverFileUtils.write(theText, out); diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index 86ecb414c..967b893d9 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -294,12 +294,12 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser int remoteport; try { remoteport = Integer.parseInt(getConfig("remoteProxyPort","3128")); } catch (NumberFormatException e) { remoteport = 3128; } - this.cacheLoader = new plasmaCrawlLoader(this.cacheManager, this.log, - Integer.parseInt(getConfig("clientTimeout", "10000")), - 5000, crawlSlots, - getConfig("remoteProxyUse","false").equals("true"), - getConfig("remoteProxyHost",""), - remoteport); + + crawlSlots = Integer.parseInt(getConfig("crawlerMaxActiveThreads", "10")); + this.cacheLoader = new plasmaCrawlLoader( + this, + this.cacheManager, + this.log); // init boards log.logSystem("Starting Message Board"); diff --git a/yacy.init b/yacy.init index 4e695fde5..85c796a51 100644 --- a/yacy.init +++ b/yacy.init @@ -19,9 +19,16 @@ port = 8080 clientTimeout = 8000 # maximal number of httpd sessions -# a client may open several connections at one, and the maxSessions value sets +# a client may open several connections at one, and the httpdMaxActiveSessions value sets # a limit on the number of concurrent connections -httpdMaxSessions = 150 +httpdMaxActiveSessions = 150 +httpdMaxIdleSessions = 75 +httpdMinIdleSessions = 5 + +# maximum number of crawler threads +crawlerMaxActiveThreads = 10 +crawlerMaxIdleThreads = 7 +crawlerMinIdleThreads = 5 # default root path for the file server # may be overridden by the htdocs parameter