From 89eb9a2292e109ae7ea3b93d1195afc0707be335 Mon Sep 17 00:00:00 2001 From: orbiter Date: Sun, 10 Apr 2005 23:51:42 +0000 Subject: [PATCH] fixed bug with crawl profiles git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@12 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- .../de/anomic/plasma/plasmaSwitchboard.java | 28 +++++++----- source/yacy.java | 44 ------------------- yacy.init | 5 +++ 3 files changed, 21 insertions(+), 56 deletions(-) diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index bb09b4736..a05bca134 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -204,6 +204,9 @@ public class plasmaSwitchboard extends serverAbstractSwitch implements serverSwi // make crawl profiles database and default profiles profiles = new plasmaCrawlProfile(new File(plasmaPath, "crawlProfiles0.db")); + //System.out.println("profiles.size=" + profiles.size()); + //System.out.println("profile-config=" + getConfig("defaultProxyProfile", "").length()); + //System.out.println("profile-entry=" + profiles.getEntry(getConfig("defaultProxyProfile", "")).toString()); if ((profiles.size() == 0) || (getConfig("defaultProxyProfile", "").length() == 0) || (profiles.getEntry(getConfig("defaultProxyProfile", "")) == null)) { @@ -278,24 +281,25 @@ public class plasmaSwitchboard extends serverAbstractSwitch implements serverSwi long[] testresult = facilityDB.selectLong("statistik", "yyyyMMddHHm"); testresult = facilityDB.selectLong("statistik", (new serverDate()).toShortString(false).substring(0, 11)); + // start yacy core + yacyCore yc = new yacyCore(this); + serverInstantThread.oneTimeJob(yc, "loadSeeds", yc.log, 3000); + // deploy threads - deployThread("70_cachemanager", "Proxy Cache Enqueue", "job takes new proxy files from RAM stack, stores them, and hands over to the Indexing Stack", - new serverInstantThread(cacheManager, "job", "size"), log, 10000); - deployThread("50_localcrawl", "Local Crawl", "thread that performes a single crawl step from the local crawl queue", - new serverInstantThread(this, "localCrawlJob", "localCrawlJobSize"), log, 20000); - deployThread("60_globalcrawl", "Global Crawl", "thread that performes a single crawl/indexing step of a web page for global crawling", - new serverInstantThread(this, "globalCrawlJob", "globalCrawlJobSize"), log, 30000); deployThread("90_cleanup", "Cleanup", "simple cleaning process for monitoring information" , new serverInstantThread(this, "cleanupJob", "cleanupJobSize"), log, 10000); // all 5 Minutes deployThread("80_dequeue", "Indexing Dequeue", "thread that creates database entries from scraped web content and performes indexing" , new serverInstantThread(this, "deQueue", "queueSize"), log, 10000); - // start yacy core - yacyCore yc = new yacyCore(this); - serverInstantThread.oneTimeJob(yc, "loadSeeds", yc.log, 3000); - deployThread("30_peerping", "YaCy Core", "this is the p2p-control and peer-ping task", - new serverInstantThread(yc, "peerPing", null), yc.log, 6000); + deployThread("70_cachemanager", "Proxy Cache Enqueue", "job takes new proxy files from RAM stack, stores them, and hands over to the Indexing Stack", + new serverInstantThread(cacheManager, "job", "size"), log, 10000); + deployThread("60_globalcrawl", "Global Crawl", "thread that performes a single crawl/indexing step of a web page for global crawling", + new serverInstantThread(this, "globalCrawlJob", "globalCrawlJobSize"), log, 30000); + deployThread("50_localcrawl", "Local Crawl", "thread that performes a single crawl step from the local crawl queue", + new serverInstantThread(this, "localCrawlJob", "localCrawlJobSize"), log, 20000); deployThread("40_peerseedcycle", "Seed-List Upload", "task that a principal peer performes to generate and upload a seed-list to a ftp account", new serverInstantThread(yc, "publishSeedList", null), yc.log, 180000); + deployThread("30_peerping", "YaCy Core", "this is the p2p-control and peer-ping task", + new serverInstantThread(yc, "peerPing", null), yc.log, 4000); indexDistribution = new distributeIndex(100 /*indexCount*/, 8000, 1 /*peerCount*/); deployThread("20_dhtdistribution", "DHT Distribution (currently by juniors only)", "selection, transfer and deletion of index entries that are not searched on your peer, but on others", new serverInstantThread(indexDistribution, "job", null), log, 120000); @@ -680,7 +684,7 @@ public class plasmaSwitchboard extends serverAbstractSwitch implements serverSwi } log.logDebug("plasmaSwitchboard.processCrawling: url=" + urlEntry.url() + ", initiator=" + urlEntry.initiator() + ", crawlOrder=" + ((profile.remoteIndexing()) ? "true" : "false") + ", depth=" + urlEntry.depth() + ", crawlDepth=" + profile.generalDepth() + ", filter=" + profile.generalFilter() + - ", permission=" + (((yacyCore.seedDB.mySeed.isSenior()) || (yacyCore.seedDB.mySeed.isPrincipal())) ? "true" : "false")); + ", permission=" + ((yacyCore.seedDB == null) ? "undefined" : (((yacyCore.seedDB.mySeed.isSenior()) || (yacyCore.seedDB.mySeed.isPrincipal())) ? "true" : "false"))); boolean tryRemote = (profile.remoteIndexing()) /* granted */ && diff --git a/source/yacy.java b/source/yacy.java index 5aa5ea1f4..51e2d4c15 100644 --- a/source/yacy.java +++ b/source/yacy.java @@ -525,47 +525,3 @@ public class yacy { } } - -/* - -package de; -import java.io.BufferedReader; -import java.io.InputStreamReader; -import java.io.OutputStreamWriter; -import java.io.Writer; -import java.net.Socket; - -import javax.net.ssl.SSLSocketFactory; - -public class ssltest { - - public static final String TARGET_HTTPS_SERVER = "www.verisign.com"; - public static final int TARGET_HTTPS_PORT = 443; - - public static void main(String[] args) throws Exception { - - Socket socket = SSLSocketFactory.getDefault(). - createSocket(TARGET_HTTPS_SERVER, TARGET_HTTPS_PORT); - try { - Writer out = new OutputStreamWriter( - socket.getOutputStream(), "ISO-8859-1"); - out.write("GET / HTTP/1.1\r\n"); - out.write("Host: " + TARGET_HTTPS_SERVER + ":" + - TARGET_HTTPS_PORT + "\r\n"); - out.write("Agent: SSL-TEST\r\n"); - out.write("\r\n"); - out.flush(); - BufferedReader in = new BufferedReader( - new InputStreamReader(socket.getInputStream(), "ISO-8859-1")); - String line = null; - while ((line = in.readLine()) != null) { - System.out.println(line); - } - } finally { - socket.close(); - } - } -} - -*/ - diff --git a/yacy.init b/yacy.init index a2874c3c5..f0f62fe49 100644 --- a/yacy.init +++ b/yacy.init @@ -359,6 +359,11 @@ crawlingQ=false storeHTCache=false storeTXCache=true +# default crawl profile entries +# if these entries are empty, then a new entry will be generated +defaultProxyProfile= +defaultRemoteProfile= + # peers may initiate remote crawling tasks. # every peer may allow or disallow to be used as crawling-peer; # you can also set a maximum crawl depth that can be requested or accepted