From fbbbf5f411c6e587009dbc8676de542242eb4d25 Mon Sep 17 00:00:00 2001 From: theli Date: Fri, 24 Feb 2006 09:35:54 +0000 Subject: [PATCH] *) remote trigger for proxy-crawl - remote crawling can now be enabled for the proxy crawling profile See: http://www.yacy-forum.de/viewtopic.php?p=17753#17753 git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1758 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/ProxyIndexingMonitor_p.html | 9 +++++++++ htroot/ProxyIndexingMonitor_p.java | 8 +++++++- source/de/anomic/plasma/plasmaCrawlStacker.java | 8 +++++--- source/de/anomic/plasma/plasmaSwitchboard.java | 12 ++++++------ yacy.init | 1 + 5 files changed, 28 insertions(+), 10 deletions(-) diff --git a/htroot/ProxyIndexingMonitor_p.html b/htroot/ProxyIndexingMonitor_p.html index 2f7d559a0..3f3f45a44 100644 --- a/htroot/ProxyIndexingMonitor_p.html +++ b/htroot/ProxyIndexingMonitor_p.html @@ -39,6 +39,14 @@ URLs as crawling start points for crawling. It is almost always recommended to set this on. The only exception is that you have another caching proxy running as secondary proxy and YaCy is configured to used that proxy in proxy-proxy - mode. + + Do Remote Indexing + + If checked, the crawler will contact other peers and use them as remote indexers for your crawl. + If you need your crawling results locally, you should switch this off. + Only senior and principal peers can initiate or receive remote crawls. + Please note that this setting only take effect for a prefetch depth greater than 0. +
Proxy generally
@@ -73,6 +81,7 @@ Please delete that file and restart.
Pre-fetch is now set to depth-#[message]#.
Caching is now #(caching)#off::on#(/caching)#.
+Remote Indexing is now #(crawlOrder)#off::on#(/crawlOrder)#.
#(path)#::Cachepath is now set to '#[return]#'. Please move the old data in the new directory.
#(/path)# #(size)#::Cachesize is now set to #[return]#MB.
#(/size)# #(restart)#::
Changes will take effect after restart only.
#(/restart)# diff --git a/htroot/ProxyIndexingMonitor_p.java b/htroot/ProxyIndexingMonitor_p.java index 96c83a79b..68ef2e483 100644 --- a/htroot/ProxyIndexingMonitor_p.java +++ b/htroot/ProxyIndexingMonitor_p.java @@ -90,7 +90,9 @@ public class ProxyIndexingMonitor_p { env.setConfig("proxyPrefetchDepth", Integer.toString(newProxyPrefetchDepth)); boolean proxyStoreHTCache = (post.get("proxyStoreHTCache", "")).equals("on"); env.setConfig("proxyStoreHTCache", (proxyStoreHTCache) ? "true" : "false"); - + boolean proxyCrawlOrder = post.containsKey("proxyCrawlOrder"); + env.setConfig("proxyCrawlOrder", proxyCrawlOrder ? "true" : "false"); + // added proxyCache, proxyCacheSize - Borg-0300 // proxyCache - check and create the directory oldProxyCachePath = env.getConfig("proxyCache", "DATA/HTCACHE"); @@ -118,9 +120,12 @@ public class ProxyIndexingMonitor_p { try { profile.changeEntry("generalDepth", Integer.toString(newProxyPrefetchDepth)); profile.changeEntry("storeHTCache", (proxyStoreHTCache) ? "true": "false"); + profile.changeEntry("remoteIndexing",proxyCrawlOrder ? "true":"false"); + prop.put("info", 2);//new proxyPrefetchdepth prop.put("info_message", newProxyPrefetchDepth); prop.put("info_caching", (proxyStoreHTCache) ? 1 : 0); + prop.put("info_crawlOrder", (proxyCrawlOrder) ? 1 : 0); // proxyCache - only display on change if (oldProxyCachePath.equals(newProxyCachePath)) { @@ -159,6 +164,7 @@ public class ProxyIndexingMonitor_p { prop.put("proxyPrefetchDepth", env.getConfig("proxyPrefetchDepth", "0")); prop.put("proxyStoreHTCacheChecked", env.getConfig("proxyStoreHTCache", "").equals("true") ? 1 : 0); + prop.put("proxyCrawlOrder", env.getConfig("proxyCrawlOrder", "").equals("true") ? 1 : 0); prop.put("proxyCache", env.getConfig("proxyCache", "DATA/HTCACHE")); prop.put("proxyCacheSize", env.getConfig("proxyCacheSize", "64")); // return rewrite properties diff --git a/source/de/anomic/plasma/plasmaCrawlStacker.java b/source/de/anomic/plasma/plasmaCrawlStacker.java index efa2bf42f..48d7acade 100644 --- a/source/de/anomic/plasma/plasmaCrawlStacker.java +++ b/source/de/anomic/plasma/plasmaCrawlStacker.java @@ -343,9 +343,11 @@ public final class plasmaCrawlStacker { (profile != null) && (profile.remoteIndexing()) /* granted */ && (currentdepth == profile.generalDepth()) /* leaf node */ && - (initiatorHash.equals(yacyCore.seedDB.mySeed.hash)) /* not proxy */ && - ((yacyCore.seedDB.mySeed.isSenior()) || - (yacyCore.seedDB.mySeed.isPrincipal())) /* qualified */; + //(initiatorHash.equals(yacyCore.seedDB.mySeed.hash)) /* not proxy */ && + ( + (yacyCore.seedDB.mySeed.isSenior()) || + (yacyCore.seedDB.mySeed.isPrincipal()) + ) /* qualified */; if ((!local)&&(!global)&&(!profile.handle().equals(this.sb.defaultRemoteProfile.handle()))) { this.log.logSevere("URL '" + nexturlString + "' can neither be crawled local nor global."); diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index a9cd6b65d..c72d57ff7 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -665,14 +665,14 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser } private void initProfiles() { - if ((profiles.size() == 0) || + if ((this.profiles.size() == 0) || (getConfig(STR_PROXYPROFILE, "").length() == 0) || - (profiles.getEntry(getConfig(STR_PROXYPROFILE, "")) == null)) { + (this.profiles.getEntry(getConfig(STR_PROXYPROFILE, "")) == null)) { // generate new default entry for proxy crawling - defaultProxyProfile = profiles.newEntry("proxy", "", ".*", ".*", Integer.parseInt(getConfig("proxyPrefetchDepth", "0")), Integer.parseInt(getConfig("proxyPrefetchDepth", "0")), false, true, true, true, false, true, true, true); - setConfig(STR_PROXYPROFILE, defaultProxyProfile.handle()); + this.defaultProxyProfile = this.profiles.newEntry("proxy", "", ".*", ".*", Integer.parseInt(getConfig("proxyPrefetchDepth", "0")), Integer.parseInt(getConfig("proxyPrefetchDepth", "0")), false, true, true, true, getConfigBool("proxyCrawlOrder", false), true, true, true); + setConfig(STR_PROXYPROFILE, this.defaultProxyProfile.handle()); } else { - defaultProxyProfile = profiles.getEntry(getConfig(STR_PROXYPROFILE, "")); + this.defaultProxyProfile = this.profiles.getEntry(getConfig(STR_PROXYPROFILE, "")); } if ((profiles.size() == 1) || (getConfig(STR_REMOTEPROFILE, "").length() == 0) || @@ -1156,7 +1156,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser boolean tryRemote = ((urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_CORE) != 0) || (sbQueue.size() != 0)) && (profile.remoteIndexing()) && (urlEntry.initiator() != null) && - (!(urlEntry.initiator().equals(plasmaURL.dummyHash))) && + // (!(urlEntry.initiator().equals(plasmaURL.dummyHash))) && ((yacyCore.seedDB.mySeed.isSenior()) || (yacyCore.seedDB.mySeed.isPrincipal())); if (tryRemote) { boolean success = processRemoteCrawlTrigger(urlEntry); diff --git a/yacy.init b/yacy.init index 7e0b8de69..56136aad8 100644 --- a/yacy.init +++ b/yacy.init @@ -365,6 +365,7 @@ defaultLinkReceiveFrequency=30 # of 2 would result in hundreds of prefetched URLs for each single proxy fill. proxyPrefetchDepth=0 proxyStoreHTCache=true +proxyCrawlOrder=false # From the 'IndexCreate' menu point you can also define a crawling start point. # The crawling works the same way as the prefetch, but it is possible to