diff --git a/defaults/yacy.init b/defaults/yacy.init index 98a786ec5..8aad8cd88 100644 --- a/defaults/yacy.init +++ b/defaults/yacy.init @@ -772,6 +772,14 @@ crawler.defaultAverageLatency = 500 # The result is the minimum remote server access delay time crawler.latencyFactor = 0.5 +# The onDemandLimit is the maximum number of crawl queues that are concurrently opened +# at the same time. If the number of hosts exceeds this number, onDemand queues are opened +# which are opened each time a queue is accessed which creates high IO load. On the other +# hand, having too many entries in onDemandLimit may exceed the maximum number of file +# pointers. You can increase this number in /proc/sys/fs/file-max and adopt it to the number +# defined here +crawler.onDemandLimit = 1000 + # maximum size of indexing queue indexer.slots = 100 diff --git a/htroot/BlogComments.java b/htroot/BlogComments.java index 7e21a1528..51849b73f 100644 --- a/htroot/BlogComments.java +++ b/htroot/BlogComments.java @@ -48,6 +48,7 @@ import net.yacy.data.BlogBoard.BlogEntry; import net.yacy.peers.Network; import net.yacy.peers.Seed; import net.yacy.search.Switchboard; +import net.yacy.search.SwitchboardConstants; import net.yacy.server.serverObjects; import net.yacy.server.serverSwitch; @@ -143,8 +144,8 @@ public class BlogComments { messageForwardingViaEmail(sb, msgEntry); // finally write notification - final File notifierSource = new File(sb.getAppPath(), sb.getConfig("htRootPath","htroot") + "/env/grafics/message.gif"); - final File notifierDest = new File(sb.getDataPath("htDocsPath", "DATA/HTDOCS"), "notifier.gif"); + final File notifierSource = new File(sb.getAppPath(), sb.getConfig(SwitchboardConstants.HTROOT_PATH, SwitchboardConstants.HTROOT_PATH_DEFAULT) + "/env/grafics/message.gif"); + final File notifierDest = new File(sb.getDataPath(SwitchboardConstants.HTDOCS_PATH, SwitchboardConstants.HTDOCS_PATH_DEFAULT), "notifier.gif"); try { Files.copy(notifierSource, notifierDest); } catch (final IOException e) { diff --git a/htroot/ConfigAppearance_p.java b/htroot/ConfigAppearance_p.java index e7495bb6b..8635e90a3 100644 --- a/htroot/ConfigAppearance_p.java +++ b/htroot/ConfigAppearance_p.java @@ -162,7 +162,7 @@ public class ConfigAppearance_p { } private static boolean changeSkin(final Switchboard sb, final String skinPath, final String skin) { - final File htdocsDir = new File(sb.getDataPath("htDocsPath", "DATA/HTDOCS"), "env"); + final File htdocsDir = new File(sb.getDataPath(SwitchboardConstants.HTDOCS_PATH, SwitchboardConstants.HTDOCS_PATH_DEFAULT), "env"); final File styleFile = new File(htdocsDir, "style.css"); final File skinFile = new File(skinPath, skin); diff --git a/htroot/Messages_p.java b/htroot/Messages_p.java index 80aeb46ef..45c59a749 100644 --- a/htroot/Messages_p.java +++ b/htroot/Messages_p.java @@ -38,6 +38,7 @@ import net.yacy.cora.protocol.RequestHeader; import net.yacy.data.MessageBoard; import net.yacy.peers.Seed; import net.yacy.search.Switchboard; +import net.yacy.search.SwitchboardConstants; import net.yacy.server.serverObjects; import net.yacy.server.serverSwitch; @@ -96,8 +97,8 @@ public class Messages_p { MessageBoard.entry message; // first reset notification - final File notifierSource = new File(sb.getAppPath(), sb.getConfig("htRootPath", "htroot") + "/env/grafics/empty.gif"); - final File notifierDest = new File(sb.getDataPath("htDocsPath", "DATA/HTDOCS"), "notifier.gif"); + final File notifierSource = new File(sb.getAppPath(), sb.getConfig(SwitchboardConstants.HTROOT_PATH, SwitchboardConstants.HTROOT_PATH_DEFAULT) + "/env/grafics/empty.gif"); + final File notifierDest = new File(sb.getDataPath(SwitchboardConstants.HTDOCS_PATH, SwitchboardConstants.HTDOCS_PATH_DEFAULT), "notifier.gif"); try { Files.copy(notifierSource, notifierDest); } catch (final IOException e) { diff --git a/htroot/www/welcome.java b/htroot/www/welcome.java index 86aed58b3..e131383df 100644 --- a/htroot/www/welcome.java +++ b/htroot/www/welcome.java @@ -35,6 +35,7 @@ import net.yacy.cora.protocol.HeaderFramework; import net.yacy.cora.protocol.RequestHeader; import net.yacy.peers.Seed; import net.yacy.search.Switchboard; +import net.yacy.search.SwitchboardConstants; import net.yacy.server.serverObjects; import net.yacy.server.serverSwitch; @@ -61,8 +62,8 @@ public class welcome { final boolean senior = (peertype.equals(Seed.PEERTYPE_SENIOR)) || (peertype.equals(Seed.PEERTYPE_PRINCIPAL)); if (senior) { prop.put("couldcan", "can"); } else { prop.put("couldcan", "could"); } if (senior) { prop.put("seniorinfo", "This peer runs in senior mode which means that your peer can be accessed using the addresses shown above."); } else { prop.putHTML("seniorinfo", "Nobody can access your peer from the outside of your intranet. You must open your firewall and/or set a 'virtual server' in the settings of your router to enable access to the addresses as shown below."); } - final File wwwpath = env.getDataPath("htDocsPath", "DATA/HTDOCS"); - prop.putHTML("wwwpath", wwwpath.isAbsolute() ? wwwpath.getAbsolutePath() : "/" + env.getConfig("htDocsPath", "DATA/HTDOCS")); + final File wwwpath = env.getDataPath(SwitchboardConstants.HTDOCS_PATH, SwitchboardConstants.HTDOCS_PATH_DEFAULT); + prop.putHTML("wwwpath", wwwpath.isAbsolute() ? wwwpath.getAbsolutePath() : "/" + env.getConfig(SwitchboardConstants.HTDOCS_PATH, SwitchboardConstants.HTDOCS_PATH_DEFAULT)); // return rewrite properties return prop; diff --git a/htroot/yacy/message.java b/htroot/yacy/message.java index 3544c4186..41bb6dde4 100644 --- a/htroot/yacy/message.java +++ b/htroot/yacy/message.java @@ -45,6 +45,7 @@ import net.yacy.peers.Network; import net.yacy.peers.Protocol; import net.yacy.peers.Seed; import net.yacy.search.Switchboard; +import net.yacy.search.SwitchboardConstants; import net.yacy.server.serverObjects; import net.yacy.server.serverSwitch; import net.yacy.utils.crypt; @@ -148,8 +149,8 @@ public final class message { messageForwardingViaEmail(sb, msgEntry); // finally write notification - final File notifierSource = new File(sb.getAppPath(), sb.getConfig("htRootPath","htroot") + "/env/grafics/message.gif"); - final File notifierDest = new File(sb.getDataPath("htDocsPath", "DATA/HTDOCS"), "notifier.gif"); + final File notifierSource = new File(sb.getAppPath(), sb.getConfig(SwitchboardConstants.HTROOT_PATH,SwitchboardConstants.HTROOT_PATH_DEFAULT) + "/env/grafics/message.gif"); + final File notifierDest = new File(sb.getDataPath(SwitchboardConstants.HTDOCS_PATH, SwitchboardConstants.HTDOCS_PATH_DEFAULT), "notifier.gif"); try { Files.copy(notifierSource, notifierDest); } catch (final IOException e) { diff --git a/source/net/yacy/crawler/HostBalancer.java b/source/net/yacy/crawler/HostBalancer.java index 9a48f7396..1404ddec4 100644 --- a/source/net/yacy/crawler/HostBalancer.java +++ b/source/net/yacy/crawler/HostBalancer.java @@ -68,11 +68,14 @@ public class HostBalancer implements Balancer { private final boolean exceed134217727; private final Map queues; private final Set roundRobinHostHashes; + private final int onDemandLimit; public HostBalancer( final File hostsPath, + final int onDemandLimit, final boolean exceed134217727) { this.hostsPath = hostsPath; + this.onDemandLimit = onDemandLimit; this.exceed134217727 = exceed134217727; // create a stack for newly entered entries @@ -81,7 +84,7 @@ public class HostBalancer implements Balancer { String[] list = this.hostsPath.list(); for (String address: list) try { File queuePath = new File(this.hostsPath, address); - HostQueue queue = new HostQueue(queuePath, this.queues.size() > 100, this.exceed134217727); + HostQueue queue = new HostQueue(queuePath, this.queues.size() > this.onDemandLimit, this.exceed134217727); if (queue.size() == 0) { queue.close(); queuePath.delete(); @@ -210,7 +213,7 @@ public class HostBalancer implements Balancer { synchronized (this) { HostQueue queue = this.queues.get(hosthash); if (queue == null) { - queue = new HostQueue(this.hostsPath, entry.url().getHost(), entry.url().getPort(), this.queues.size() > 100, this.exceed134217727); + queue = new HostQueue(this.hostsPath, entry.url().getHost(), entry.url().getPort(), this.queues.size() > this.onDemandLimit, this.exceed134217727); this.queues.put(hosthash, queue); // profile might be null when continue crawls after YaCy restart robots.ensureExist(entry.url(), profile == null ? ClientIdentification.yacyInternetCrawlerAgent : profile.getAgent(), true); // concurrently load all robots.txt diff --git a/source/net/yacy/crawler/data/CrawlQueues.java b/source/net/yacy/crawler/data/CrawlQueues.java index 34dcf0521..3430bdfc9 100644 --- a/source/net/yacy/crawler/data/CrawlQueues.java +++ b/source/net/yacy/crawler/data/CrawlQueues.java @@ -87,7 +87,7 @@ public class CrawlQueues { // start crawling management log.config("Starting Crawling Management"); - this.noticeURL = new NoticedURL(queuePath, sb.useTailCache, sb.exceed134217727); + this.noticeURL = new NoticedURL(queuePath, sb.getConfigInt("crawler.onDemandLimit", 1000), sb.exceed134217727); this.errorURL = new ErrorCache(sb.index.fulltext()); this.delegatedURL = new ConcurrentHashMap(); } @@ -104,7 +104,7 @@ public class CrawlQueues { this.errorURL.clearCache(); this.remoteCrawlProviderHashes.clear(); this.noticeURL.close(); - this.noticeURL = new NoticedURL(newQueuePath, this.sb.useTailCache, this.sb.exceed134217727); + this.noticeURL = new NoticedURL(newQueuePath, sb.getConfigInt("crawler.onDemandLimit", 1000), this.sb.exceed134217727); this.delegatedURL.clear(); } diff --git a/source/net/yacy/crawler/data/NoticedURL.java b/source/net/yacy/crawler/data/NoticedURL.java index 5754e902d..c49f6aa85 100644 --- a/source/net/yacy/crawler/data/NoticedURL.java +++ b/source/net/yacy/crawler/data/NoticedURL.java @@ -60,13 +60,13 @@ public class NoticedURL { protected NoticedURL( final File cachePath, - @SuppressWarnings("unused") final boolean useTailCache, + final int onDemandLimit, final boolean exceed134217727) { ConcurrentLog.info("NoticedURL", "CREATING STACKS at " + cachePath.toString()); - this.coreStack = new HostBalancer(new File(cachePath, "CrawlerCoreStacks"), exceed134217727); - this.limitStack = new HostBalancer(new File(cachePath, "CrawlerLimitStacks"), exceed134217727); - this.remoteStack = new HostBalancer(new File(cachePath, "CrawlerRemoteStacks"), exceed134217727); - this.noloadStack = new HostBalancer(new File(cachePath, "CrawlerNoLoadStacks"), exceed134217727); + this.coreStack = new HostBalancer(new File(cachePath, "CrawlerCoreStacks"), onDemandLimit, exceed134217727); + this.limitStack = new HostBalancer(new File(cachePath, "CrawlerLimitStacks"), onDemandLimit, exceed134217727); + this.remoteStack = new HostBalancer(new File(cachePath, "CrawlerRemoteStacks"), onDemandLimit, exceed134217727); + this.noloadStack = new HostBalancer(new File(cachePath, "CrawlerNoLoadStacks"), onDemandLimit, exceed134217727); } public void clear() { diff --git a/source/net/yacy/http/servlets/YaCyDefaultServlet.java b/source/net/yacy/http/servlets/YaCyDefaultServlet.java index 6ad91b7a1..dd8098a5b 100644 --- a/source/net/yacy/http/servlets/YaCyDefaultServlet.java +++ b/source/net/yacy/http/servlets/YaCyDefaultServlet.java @@ -356,7 +356,7 @@ public class YaCyDefaultServlet extends HttpServlet { } } finally { if (resource != null) { - resource.release(); + resource.close(); } } }