*) remote trigger for proxy-crawl

- remote crawling can now be enabled for the proxy crawling profile
   See: http://www.yacy-forum.de/viewtopic.php?p=17753#17753
   

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1758 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
theli 19 years ago
parent ff34377ac5
commit fbbbf5f411

@ -39,6 +39,14 @@ URLs as crawling start points for crawling.</div></td>
<td class=small><input type="checkbox" name="proxyStoreHTCache" align="top" #(proxyStoreHTCacheChecked)#::checked#(/proxyStoreHTCacheChecked)#></td>
<td class=small>It is almost always recommended to set this on. The only exception is that you have another caching proxy running as secondary proxy and YaCy is configured to used that proxy in proxy-proxy - mode.</td>
</tr>
<tr valign="top" class="TableCellLight">
<td class=small>Do Remote Indexing</td>
<td class=small><input type="checkbox" name="proxyCrawlOrder" align="top" #(proxyCrawlOrder)#::checked#(/proxyCrawlOrder)#></td>
<td class=small>If checked, the crawler will contact other peers and use them as remote indexers for your crawl.
If you need your crawling results locally, you should switch this off.
Only senior and principal peers can initiate or receive remote crawls.
Please note that this setting only take effect for a prefetch depth greater than 0.</td>
</tr>
<tr class="TableCellDark">
<td colspan="3"><div class=small><b>Proxy generally</b></div></td>
@ -73,6 +81,7 @@ Please delete that file and restart.</b><br>
<!-- info 2 -->
<b>Pre-fetch is now set to depth-#[message]#.</b><br>
<b>Caching is now #(caching)#off::on#(/caching)#.</b><br>
<b>Remote Indexing is now #(crawlOrder)#off::on#(/crawlOrder)#.</b><br>
#(path)#::<b>Cachepath is now set to '#[return]#'.</b> Please move the old data in the new directory.<br>#(/path)#
#(size)#::<b>Cachesize is now set to #[return]#MB.</b><br>#(/size)#
#(restart)#::<br><font color="red"><b>Changes will take effect after restart only.</b></font><br>#(/restart)#

@ -90,7 +90,9 @@ public class ProxyIndexingMonitor_p {
env.setConfig("proxyPrefetchDepth", Integer.toString(newProxyPrefetchDepth));
boolean proxyStoreHTCache = (post.get("proxyStoreHTCache", "")).equals("on");
env.setConfig("proxyStoreHTCache", (proxyStoreHTCache) ? "true" : "false");
boolean proxyCrawlOrder = post.containsKey("proxyCrawlOrder");
env.setConfig("proxyCrawlOrder", proxyCrawlOrder ? "true" : "false");
// added proxyCache, proxyCacheSize - Borg-0300
// proxyCache - check and create the directory
oldProxyCachePath = env.getConfig("proxyCache", "DATA/HTCACHE");
@ -118,9 +120,12 @@ public class ProxyIndexingMonitor_p {
try {
profile.changeEntry("generalDepth", Integer.toString(newProxyPrefetchDepth));
profile.changeEntry("storeHTCache", (proxyStoreHTCache) ? "true": "false");
profile.changeEntry("remoteIndexing",proxyCrawlOrder ? "true":"false");
prop.put("info", 2);//new proxyPrefetchdepth
prop.put("info_message", newProxyPrefetchDepth);
prop.put("info_caching", (proxyStoreHTCache) ? 1 : 0);
prop.put("info_crawlOrder", (proxyCrawlOrder) ? 1 : 0);
// proxyCache - only display on change
if (oldProxyCachePath.equals(newProxyCachePath)) {
@ -159,6 +164,7 @@ public class ProxyIndexingMonitor_p {
prop.put("proxyPrefetchDepth", env.getConfig("proxyPrefetchDepth", "0"));
prop.put("proxyStoreHTCacheChecked", env.getConfig("proxyStoreHTCache", "").equals("true") ? 1 : 0);
prop.put("proxyCrawlOrder", env.getConfig("proxyCrawlOrder", "").equals("true") ? 1 : 0);
prop.put("proxyCache", env.getConfig("proxyCache", "DATA/HTCACHE"));
prop.put("proxyCacheSize", env.getConfig("proxyCacheSize", "64"));
// return rewrite properties

@ -343,9 +343,11 @@ public final class plasmaCrawlStacker {
(profile != null) &&
(profile.remoteIndexing()) /* granted */ &&
(currentdepth == profile.generalDepth()) /* leaf node */ &&
(initiatorHash.equals(yacyCore.seedDB.mySeed.hash)) /* not proxy */ &&
((yacyCore.seedDB.mySeed.isSenior()) ||
(yacyCore.seedDB.mySeed.isPrincipal())) /* qualified */;
//(initiatorHash.equals(yacyCore.seedDB.mySeed.hash)) /* not proxy */ &&
(
(yacyCore.seedDB.mySeed.isSenior()) ||
(yacyCore.seedDB.mySeed.isPrincipal())
) /* qualified */;
if ((!local)&&(!global)&&(!profile.handle().equals(this.sb.defaultRemoteProfile.handle()))) {
this.log.logSevere("URL '" + nexturlString + "' can neither be crawled local nor global.");

@ -665,14 +665,14 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
}
private void initProfiles() {
if ((profiles.size() == 0) ||
if ((this.profiles.size() == 0) ||
(getConfig(STR_PROXYPROFILE, "").length() == 0) ||
(profiles.getEntry(getConfig(STR_PROXYPROFILE, "")) == null)) {
(this.profiles.getEntry(getConfig(STR_PROXYPROFILE, "")) == null)) {
// generate new default entry for proxy crawling
defaultProxyProfile = profiles.newEntry("proxy", "", ".*", ".*", Integer.parseInt(getConfig("proxyPrefetchDepth", "0")), Integer.parseInt(getConfig("proxyPrefetchDepth", "0")), false, true, true, true, false, true, true, true);
setConfig(STR_PROXYPROFILE, defaultProxyProfile.handle());
this.defaultProxyProfile = this.profiles.newEntry("proxy", "", ".*", ".*", Integer.parseInt(getConfig("proxyPrefetchDepth", "0")), Integer.parseInt(getConfig("proxyPrefetchDepth", "0")), false, true, true, true, getConfigBool("proxyCrawlOrder", false), true, true, true);
setConfig(STR_PROXYPROFILE, this.defaultProxyProfile.handle());
} else {
defaultProxyProfile = profiles.getEntry(getConfig(STR_PROXYPROFILE, ""));
this.defaultProxyProfile = this.profiles.getEntry(getConfig(STR_PROXYPROFILE, ""));
}
if ((profiles.size() == 1) ||
(getConfig(STR_REMOTEPROFILE, "").length() == 0) ||
@ -1156,7 +1156,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
boolean tryRemote = ((urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_CORE) != 0) || (sbQueue.size() != 0)) &&
(profile.remoteIndexing()) &&
(urlEntry.initiator() != null) &&
(!(urlEntry.initiator().equals(plasmaURL.dummyHash))) &&
// (!(urlEntry.initiator().equals(plasmaURL.dummyHash))) &&
((yacyCore.seedDB.mySeed.isSenior()) || (yacyCore.seedDB.mySeed.isPrincipal()));
if (tryRemote) {
boolean success = processRemoteCrawlTrigger(urlEntry);

@ -365,6 +365,7 @@ defaultLinkReceiveFrequency=30
# of 2 would result in hundreds of prefetched URLs for each single proxy fill.
proxyPrefetchDepth=0
proxyStoreHTCache=true
proxyCrawlOrder=false
# From the 'IndexCreate' menu point you can also define a crawling start point.
# The crawling works the same way as the prefetch, but it is possible to

Loading…
Cancel
Save