Merge branch 'master' of ssh://git@gitorious.org/yacy/rc1.git

pull/1/head
Michael Peter Christen 11 years ago
commit 698f053658

@ -772,6 +772,14 @@ crawler.defaultAverageLatency = 500
# The result is the minimum remote server access delay time
crawler.latencyFactor = 0.5
# The onDemandLimit is the maximum number of crawl queues that are concurrently opened
# at the same time. If the number of hosts exceeds this number, onDemand queues are opened
# which are opened each time a queue is accessed which creates high IO load. On the other
# hand, having too many entries in onDemandLimit may exceed the maximum number of file
# pointers. You can increase this number in /proc/sys/fs/file-max and adopt it to the number
# defined here
crawler.onDemandLimit = 1000
# maximum size of indexing queue
indexer.slots = 100

@ -48,6 +48,7 @@ import net.yacy.data.BlogBoard.BlogEntry;
import net.yacy.peers.Network;
import net.yacy.peers.Seed;
import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
import net.yacy.server.serverObjects;
import net.yacy.server.serverSwitch;
@ -143,8 +144,8 @@ public class BlogComments {
messageForwardingViaEmail(sb, msgEntry);
// finally write notification
final File notifierSource = new File(sb.getAppPath(), sb.getConfig("htRootPath","htroot") + "/env/grafics/message.gif");
final File notifierDest = new File(sb.getDataPath("htDocsPath", "DATA/HTDOCS"), "notifier.gif");
final File notifierSource = new File(sb.getAppPath(), sb.getConfig(SwitchboardConstants.HTROOT_PATH, SwitchboardConstants.HTROOT_PATH_DEFAULT) + "/env/grafics/message.gif");
final File notifierDest = new File(sb.getDataPath(SwitchboardConstants.HTDOCS_PATH, SwitchboardConstants.HTDOCS_PATH_DEFAULT), "notifier.gif");
try {
Files.copy(notifierSource, notifierDest);
} catch (final IOException e) {

@ -162,7 +162,7 @@ public class ConfigAppearance_p {
}
private static boolean changeSkin(final Switchboard sb, final String skinPath, final String skin) {
final File htdocsDir = new File(sb.getDataPath("htDocsPath", "DATA/HTDOCS"), "env");
final File htdocsDir = new File(sb.getDataPath(SwitchboardConstants.HTDOCS_PATH, SwitchboardConstants.HTDOCS_PATH_DEFAULT), "env");
final File styleFile = new File(htdocsDir, "style.css");
final File skinFile = new File(skinPath, skin);

@ -38,6 +38,7 @@ import net.yacy.cora.protocol.RequestHeader;
import net.yacy.data.MessageBoard;
import net.yacy.peers.Seed;
import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
import net.yacy.server.serverObjects;
import net.yacy.server.serverSwitch;
@ -96,8 +97,8 @@ public class Messages_p {
MessageBoard.entry message;
// first reset notification
final File notifierSource = new File(sb.getAppPath(), sb.getConfig("htRootPath", "htroot") + "/env/grafics/empty.gif");
final File notifierDest = new File(sb.getDataPath("htDocsPath", "DATA/HTDOCS"), "notifier.gif");
final File notifierSource = new File(sb.getAppPath(), sb.getConfig(SwitchboardConstants.HTROOT_PATH, SwitchboardConstants.HTROOT_PATH_DEFAULT) + "/env/grafics/empty.gif");
final File notifierDest = new File(sb.getDataPath(SwitchboardConstants.HTDOCS_PATH, SwitchboardConstants.HTDOCS_PATH_DEFAULT), "notifier.gif");
try {
Files.copy(notifierSource, notifierDest);
} catch (final IOException e) {

@ -35,6 +35,7 @@ import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.peers.Seed;
import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
import net.yacy.server.serverObjects;
import net.yacy.server.serverSwitch;
@ -61,8 +62,8 @@ public class welcome {
final boolean senior = (peertype.equals(Seed.PEERTYPE_SENIOR)) || (peertype.equals(Seed.PEERTYPE_PRINCIPAL));
if (senior) { prop.put("couldcan", "can"); } else { prop.put("couldcan", "could"); }
if (senior) { prop.put("seniorinfo", "This peer runs in senior mode which means that your peer can be accessed using the addresses shown above."); } else { prop.putHTML("seniorinfo", "<b>Nobody can access your peer from the outside of your intranet. You must open your firewall and/or set a 'virtual server' in the settings of your router to enable access to the addresses as shown below.</b>"); }
final File wwwpath = env.getDataPath("htDocsPath", "DATA/HTDOCS");
prop.putHTML("wwwpath", wwwpath.isAbsolute() ? wwwpath.getAbsolutePath() : "<application_root_path>/" + env.getConfig("htDocsPath", "DATA/HTDOCS"));
final File wwwpath = env.getDataPath(SwitchboardConstants.HTDOCS_PATH, SwitchboardConstants.HTDOCS_PATH_DEFAULT);
prop.putHTML("wwwpath", wwwpath.isAbsolute() ? wwwpath.getAbsolutePath() : "<application_root_path>/" + env.getConfig(SwitchboardConstants.HTDOCS_PATH, SwitchboardConstants.HTDOCS_PATH_DEFAULT));
// return rewrite properties
return prop;

@ -45,6 +45,7 @@ import net.yacy.peers.Network;
import net.yacy.peers.Protocol;
import net.yacy.peers.Seed;
import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
import net.yacy.server.serverObjects;
import net.yacy.server.serverSwitch;
import net.yacy.utils.crypt;
@ -148,8 +149,8 @@ public final class message {
messageForwardingViaEmail(sb, msgEntry);
// finally write notification
final File notifierSource = new File(sb.getAppPath(), sb.getConfig("htRootPath","htroot") + "/env/grafics/message.gif");
final File notifierDest = new File(sb.getDataPath("htDocsPath", "DATA/HTDOCS"), "notifier.gif");
final File notifierSource = new File(sb.getAppPath(), sb.getConfig(SwitchboardConstants.HTROOT_PATH,SwitchboardConstants.HTROOT_PATH_DEFAULT) + "/env/grafics/message.gif");
final File notifierDest = new File(sb.getDataPath(SwitchboardConstants.HTDOCS_PATH, SwitchboardConstants.HTDOCS_PATH_DEFAULT), "notifier.gif");
try {
Files.copy(notifierSource, notifierDest);
} catch (final IOException e) {

@ -68,11 +68,14 @@ public class HostBalancer implements Balancer {
private final boolean exceed134217727;
private final Map<String, HostQueue> queues;
private final Set<String> roundRobinHostHashes;
private final int onDemandLimit;
public HostBalancer(
final File hostsPath,
final int onDemandLimit,
final boolean exceed134217727) {
this.hostsPath = hostsPath;
this.onDemandLimit = onDemandLimit;
this.exceed134217727 = exceed134217727;
// create a stack for newly entered entries
@ -81,7 +84,7 @@ public class HostBalancer implements Balancer {
String[] list = this.hostsPath.list();
for (String address: list) try {
File queuePath = new File(this.hostsPath, address);
HostQueue queue = new HostQueue(queuePath, this.queues.size() > 100, this.exceed134217727);
HostQueue queue = new HostQueue(queuePath, this.queues.size() > this.onDemandLimit, this.exceed134217727);
if (queue.size() == 0) {
queue.close();
queuePath.delete();
@ -210,7 +213,7 @@ public class HostBalancer implements Balancer {
synchronized (this) {
HostQueue queue = this.queues.get(hosthash);
if (queue == null) {
queue = new HostQueue(this.hostsPath, entry.url().getHost(), entry.url().getPort(), this.queues.size() > 100, this.exceed134217727);
queue = new HostQueue(this.hostsPath, entry.url().getHost(), entry.url().getPort(), this.queues.size() > this.onDemandLimit, this.exceed134217727);
this.queues.put(hosthash, queue);
// profile might be null when continue crawls after YaCy restart
robots.ensureExist(entry.url(), profile == null ? ClientIdentification.yacyInternetCrawlerAgent : profile.getAgent(), true); // concurrently load all robots.txt

@ -87,7 +87,7 @@ public class CrawlQueues {
// start crawling management
log.config("Starting Crawling Management");
this.noticeURL = new NoticedURL(queuePath, sb.useTailCache, sb.exceed134217727);
this.noticeURL = new NoticedURL(queuePath, sb.getConfigInt("crawler.onDemandLimit", 1000), sb.exceed134217727);
this.errorURL = new ErrorCache(sb.index.fulltext());
this.delegatedURL = new ConcurrentHashMap<String, DigestURL>();
}
@ -104,7 +104,7 @@ public class CrawlQueues {
this.errorURL.clearCache();
this.remoteCrawlProviderHashes.clear();
this.noticeURL.close();
this.noticeURL = new NoticedURL(newQueuePath, this.sb.useTailCache, this.sb.exceed134217727);
this.noticeURL = new NoticedURL(newQueuePath, sb.getConfigInt("crawler.onDemandLimit", 1000), this.sb.exceed134217727);
this.delegatedURL.clear();
}

@ -60,13 +60,13 @@ public class NoticedURL {
protected NoticedURL(
final File cachePath,
@SuppressWarnings("unused") final boolean useTailCache,
final int onDemandLimit,
final boolean exceed134217727) {
ConcurrentLog.info("NoticedURL", "CREATING STACKS at " + cachePath.toString());
this.coreStack = new HostBalancer(new File(cachePath, "CrawlerCoreStacks"), exceed134217727);
this.limitStack = new HostBalancer(new File(cachePath, "CrawlerLimitStacks"), exceed134217727);
this.remoteStack = new HostBalancer(new File(cachePath, "CrawlerRemoteStacks"), exceed134217727);
this.noloadStack = new HostBalancer(new File(cachePath, "CrawlerNoLoadStacks"), exceed134217727);
this.coreStack = new HostBalancer(new File(cachePath, "CrawlerCoreStacks"), onDemandLimit, exceed134217727);
this.limitStack = new HostBalancer(new File(cachePath, "CrawlerLimitStacks"), onDemandLimit, exceed134217727);
this.remoteStack = new HostBalancer(new File(cachePath, "CrawlerRemoteStacks"), onDemandLimit, exceed134217727);
this.noloadStack = new HostBalancer(new File(cachePath, "CrawlerNoLoadStacks"), onDemandLimit, exceed134217727);
}
public void clear() {

@ -356,7 +356,7 @@ public class YaCyDefaultServlet extends HttpServlet {
}
} finally {
if (resource != null) {
resource.release();
resource.close();
}
}
}

Loading…
Cancel
Save