*) remote trigger for proxy-crawl

- remote crawling can now be enabled for the proxy crawling profile
   See: http://www.yacy-forum.de/viewtopic.php?p=17753#17753
   

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1758 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
theli 19 years ago
parent ff34377ac5
commit fbbbf5f411

@ -39,6 +39,14 @@ URLs as crawling start points for crawling.</div></td>
<td class=small><input type="checkbox" name="proxyStoreHTCache" align="top" #(proxyStoreHTCacheChecked)#::checked#(/proxyStoreHTCacheChecked)#></td> <td class=small><input type="checkbox" name="proxyStoreHTCache" align="top" #(proxyStoreHTCacheChecked)#::checked#(/proxyStoreHTCacheChecked)#></td>
<td class=small>It is almost always recommended to set this on. The only exception is that you have another caching proxy running as secondary proxy and YaCy is configured to used that proxy in proxy-proxy - mode.</td> <td class=small>It is almost always recommended to set this on. The only exception is that you have another caching proxy running as secondary proxy and YaCy is configured to used that proxy in proxy-proxy - mode.</td>
</tr> </tr>
<tr valign="top" class="TableCellLight">
<td class=small>Do Remote Indexing</td>
<td class=small><input type="checkbox" name="proxyCrawlOrder" align="top" #(proxyCrawlOrder)#::checked#(/proxyCrawlOrder)#></td>
<td class=small>If checked, the crawler will contact other peers and use them as remote indexers for your crawl.
If you need your crawling results locally, you should switch this off.
Only senior and principal peers can initiate or receive remote crawls.
Please note that this setting only take effect for a prefetch depth greater than 0.</td>
</tr>
<tr class="TableCellDark"> <tr class="TableCellDark">
<td colspan="3"><div class=small><b>Proxy generally</b></div></td> <td colspan="3"><div class=small><b>Proxy generally</b></div></td>
@ -73,6 +81,7 @@ Please delete that file and restart.</b><br>
<!-- info 2 --> <!-- info 2 -->
<b>Pre-fetch is now set to depth-#[message]#.</b><br> <b>Pre-fetch is now set to depth-#[message]#.</b><br>
<b>Caching is now #(caching)#off::on#(/caching)#.</b><br> <b>Caching is now #(caching)#off::on#(/caching)#.</b><br>
<b>Remote Indexing is now #(crawlOrder)#off::on#(/crawlOrder)#.</b><br>
#(path)#::<b>Cachepath is now set to '#[return]#'.</b> Please move the old data in the new directory.<br>#(/path)# #(path)#::<b>Cachepath is now set to '#[return]#'.</b> Please move the old data in the new directory.<br>#(/path)#
#(size)#::<b>Cachesize is now set to #[return]#MB.</b><br>#(/size)# #(size)#::<b>Cachesize is now set to #[return]#MB.</b><br>#(/size)#
#(restart)#::<br><font color="red"><b>Changes will take effect after restart only.</b></font><br>#(/restart)# #(restart)#::<br><font color="red"><b>Changes will take effect after restart only.</b></font><br>#(/restart)#

@ -90,7 +90,9 @@ public class ProxyIndexingMonitor_p {
env.setConfig("proxyPrefetchDepth", Integer.toString(newProxyPrefetchDepth)); env.setConfig("proxyPrefetchDepth", Integer.toString(newProxyPrefetchDepth));
boolean proxyStoreHTCache = (post.get("proxyStoreHTCache", "")).equals("on"); boolean proxyStoreHTCache = (post.get("proxyStoreHTCache", "")).equals("on");
env.setConfig("proxyStoreHTCache", (proxyStoreHTCache) ? "true" : "false"); env.setConfig("proxyStoreHTCache", (proxyStoreHTCache) ? "true" : "false");
boolean proxyCrawlOrder = post.containsKey("proxyCrawlOrder");
env.setConfig("proxyCrawlOrder", proxyCrawlOrder ? "true" : "false");
// added proxyCache, proxyCacheSize - Borg-0300 // added proxyCache, proxyCacheSize - Borg-0300
// proxyCache - check and create the directory // proxyCache - check and create the directory
oldProxyCachePath = env.getConfig("proxyCache", "DATA/HTCACHE"); oldProxyCachePath = env.getConfig("proxyCache", "DATA/HTCACHE");
@ -118,9 +120,12 @@ public class ProxyIndexingMonitor_p {
try { try {
profile.changeEntry("generalDepth", Integer.toString(newProxyPrefetchDepth)); profile.changeEntry("generalDepth", Integer.toString(newProxyPrefetchDepth));
profile.changeEntry("storeHTCache", (proxyStoreHTCache) ? "true": "false"); profile.changeEntry("storeHTCache", (proxyStoreHTCache) ? "true": "false");
profile.changeEntry("remoteIndexing",proxyCrawlOrder ? "true":"false");
prop.put("info", 2);//new proxyPrefetchdepth prop.put("info", 2);//new proxyPrefetchdepth
prop.put("info_message", newProxyPrefetchDepth); prop.put("info_message", newProxyPrefetchDepth);
prop.put("info_caching", (proxyStoreHTCache) ? 1 : 0); prop.put("info_caching", (proxyStoreHTCache) ? 1 : 0);
prop.put("info_crawlOrder", (proxyCrawlOrder) ? 1 : 0);
// proxyCache - only display on change // proxyCache - only display on change
if (oldProxyCachePath.equals(newProxyCachePath)) { if (oldProxyCachePath.equals(newProxyCachePath)) {
@ -159,6 +164,7 @@ public class ProxyIndexingMonitor_p {
prop.put("proxyPrefetchDepth", env.getConfig("proxyPrefetchDepth", "0")); prop.put("proxyPrefetchDepth", env.getConfig("proxyPrefetchDepth", "0"));
prop.put("proxyStoreHTCacheChecked", env.getConfig("proxyStoreHTCache", "").equals("true") ? 1 : 0); prop.put("proxyStoreHTCacheChecked", env.getConfig("proxyStoreHTCache", "").equals("true") ? 1 : 0);
prop.put("proxyCrawlOrder", env.getConfig("proxyCrawlOrder", "").equals("true") ? 1 : 0);
prop.put("proxyCache", env.getConfig("proxyCache", "DATA/HTCACHE")); prop.put("proxyCache", env.getConfig("proxyCache", "DATA/HTCACHE"));
prop.put("proxyCacheSize", env.getConfig("proxyCacheSize", "64")); prop.put("proxyCacheSize", env.getConfig("proxyCacheSize", "64"));
// return rewrite properties // return rewrite properties

@ -343,9 +343,11 @@ public final class plasmaCrawlStacker {
(profile != null) && (profile != null) &&
(profile.remoteIndexing()) /* granted */ && (profile.remoteIndexing()) /* granted */ &&
(currentdepth == profile.generalDepth()) /* leaf node */ && (currentdepth == profile.generalDepth()) /* leaf node */ &&
(initiatorHash.equals(yacyCore.seedDB.mySeed.hash)) /* not proxy */ && //(initiatorHash.equals(yacyCore.seedDB.mySeed.hash)) /* not proxy */ &&
((yacyCore.seedDB.mySeed.isSenior()) || (
(yacyCore.seedDB.mySeed.isPrincipal())) /* qualified */; (yacyCore.seedDB.mySeed.isSenior()) ||
(yacyCore.seedDB.mySeed.isPrincipal())
) /* qualified */;
if ((!local)&&(!global)&&(!profile.handle().equals(this.sb.defaultRemoteProfile.handle()))) { if ((!local)&&(!global)&&(!profile.handle().equals(this.sb.defaultRemoteProfile.handle()))) {
this.log.logSevere("URL '" + nexturlString + "' can neither be crawled local nor global."); this.log.logSevere("URL '" + nexturlString + "' can neither be crawled local nor global.");

@ -665,14 +665,14 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
} }
private void initProfiles() { private void initProfiles() {
if ((profiles.size() == 0) || if ((this.profiles.size() == 0) ||
(getConfig(STR_PROXYPROFILE, "").length() == 0) || (getConfig(STR_PROXYPROFILE, "").length() == 0) ||
(profiles.getEntry(getConfig(STR_PROXYPROFILE, "")) == null)) { (this.profiles.getEntry(getConfig(STR_PROXYPROFILE, "")) == null)) {
// generate new default entry for proxy crawling // generate new default entry for proxy crawling
defaultProxyProfile = profiles.newEntry("proxy", "", ".*", ".*", Integer.parseInt(getConfig("proxyPrefetchDepth", "0")), Integer.parseInt(getConfig("proxyPrefetchDepth", "0")), false, true, true, true, false, true, true, true); this.defaultProxyProfile = this.profiles.newEntry("proxy", "", ".*", ".*", Integer.parseInt(getConfig("proxyPrefetchDepth", "0")), Integer.parseInt(getConfig("proxyPrefetchDepth", "0")), false, true, true, true, getConfigBool("proxyCrawlOrder", false), true, true, true);
setConfig(STR_PROXYPROFILE, defaultProxyProfile.handle()); setConfig(STR_PROXYPROFILE, this.defaultProxyProfile.handle());
} else { } else {
defaultProxyProfile = profiles.getEntry(getConfig(STR_PROXYPROFILE, "")); this.defaultProxyProfile = this.profiles.getEntry(getConfig(STR_PROXYPROFILE, ""));
} }
if ((profiles.size() == 1) || if ((profiles.size() == 1) ||
(getConfig(STR_REMOTEPROFILE, "").length() == 0) || (getConfig(STR_REMOTEPROFILE, "").length() == 0) ||
@ -1156,7 +1156,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
boolean tryRemote = ((urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_CORE) != 0) || (sbQueue.size() != 0)) && boolean tryRemote = ((urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_CORE) != 0) || (sbQueue.size() != 0)) &&
(profile.remoteIndexing()) && (profile.remoteIndexing()) &&
(urlEntry.initiator() != null) && (urlEntry.initiator() != null) &&
(!(urlEntry.initiator().equals(plasmaURL.dummyHash))) && // (!(urlEntry.initiator().equals(plasmaURL.dummyHash))) &&
((yacyCore.seedDB.mySeed.isSenior()) || (yacyCore.seedDB.mySeed.isPrincipal())); ((yacyCore.seedDB.mySeed.isSenior()) || (yacyCore.seedDB.mySeed.isPrincipal()));
if (tryRemote) { if (tryRemote) {
boolean success = processRemoteCrawlTrigger(urlEntry); boolean success = processRemoteCrawlTrigger(urlEntry);

@ -365,6 +365,7 @@ defaultLinkReceiveFrequency=30
# of 2 would result in hundreds of prefetched URLs for each single proxy fill. # of 2 would result in hundreds of prefetched URLs for each single proxy fill.
proxyPrefetchDepth=0 proxyPrefetchDepth=0
proxyStoreHTCache=true proxyStoreHTCache=true
proxyCrawlOrder=false
# From the 'IndexCreate' menu point you can also define a crawling start point. # From the 'IndexCreate' menu point you can also define a crawling start point.
# The crawling works the same way as the prefetch, but it is possible to # The crawling works the same way as the prefetch, but it is possible to

Loading…
Cancel
Save