diff --git a/htroot/ConfigNetwork_p.html b/htroot/ConfigNetwork_p.html new file mode 100644 index 000000000..fa80cb446 --- /dev/null +++ b/htroot/ConfigNetwork_p.html @@ -0,0 +1,98 @@ + + +
++ You can configure if you want to participate at the global YaCy network or if you want to have your + own separate search cluster with or without connection to the global network. You may also define + a completely independent search engine instance, without any data exchange between your peer and other + peers, which we call a 'Robinson' peer. +
+ + #%env/templates/footer.template%# + + \ No newline at end of file diff --git a/htroot/ConfigNetwork_p.java b/htroot/ConfigNetwork_p.java new file mode 100644 index 000000000..9f03d7874 --- /dev/null +++ b/htroot/ConfigNetwork_p.java @@ -0,0 +1,191 @@ +// ConfigNetwork_p.java +// -------------------- +// (C) 2007 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany +// first published 20.04.2007 on http://yacy.net +// +// This is a part of YaCy, a peer-to-peer based web search engine +// +// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $ +// $LastChangedRevision: 1986 $ +// $LastChangedBy: orbiter $ +// +// LICENSE +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +import de.anomic.http.httpHeader; +import de.anomic.plasma.plasmaSwitchboard; +import de.anomic.server.serverCodings; +import de.anomic.server.serverObjects; +import de.anomic.server.serverSwitch; +import de.anomic.server.serverThread; +import de.anomic.yacy.yacyCore; + +public class ConfigNetwork_p { + + public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { + + plasmaSwitchboard sb = (plasmaSwitchboard) env; + serverObjects prop = new serverObjects(); + int commit = 0; + + if (post != null) { + + boolean crawlResponse = post.get("crawlResponse", "off").equals("on"); + + // DHT control + boolean indexDistribute = post.get("indexDistribute", "").equals("on"); + boolean indexReceive = post.get("indexReceive", "").equals("on"); + boolean robinsonmode = post.get("network", "").equals("robinson"); + String clustermode = post.get("cluster.mode", "publicpeer"); + if (robinsonmode) { + indexDistribute = false; + indexReceive = false; + if ((clustermode.equals("privatepeer")) || (clustermode.equals("publicpeer"))) { + prop.put("commitRobinsonWithoutRemoteIndexing", 1); + crawlResponse = false; + } + if ((clustermode.equals("privatecluster")) || (clustermode.equals("publiccluster"))) { + prop.put("commitRobinsonWithRemoteIndexing", 1); + crawlResponse = true; + } + commit = 1; + } else { + if (!indexDistribute && !indexReceive) { + prop.put("commitDHTIsRobinson", 1); + commit = 2; + } else if (indexDistribute && indexReceive) { + commit = 1; + } else { + prop.put("commitDHTNoGlobalSearch", 1); + commit = 1; + } + if (!crawlResponse) { + prop.put("commitCrawlPlea", 1); + } + } + + if (indexDistribute) { + sb.setConfig(plasmaSwitchboard.INDEX_DIST_ALLOW, "true"); + } else { + sb.setConfig(plasmaSwitchboard.INDEX_DIST_ALLOW, "false"); + } + + if (post.get("indexDistributeWhileCrawling","").equals("on")) { + sb.setConfig(plasmaSwitchboard.INDEX_DIST_ALLOW_WHILE_CRAWLING, "true"); + } else { + sb.setConfig(plasmaSwitchboard.INDEX_DIST_ALLOW_WHILE_CRAWLING, "false"); + } + + if (indexReceive) { + sb.setConfig("allowReceiveIndex", "true"); + yacyCore.seedDB.mySeed.setFlagAcceptRemoteIndex(true); + } else { + sb.setConfig("allowReceiveIndex", "false"); + yacyCore.seedDB.mySeed.setFlagAcceptRemoteIndex(false); + } + + if (post.get("indexReceiveBlockBlacklist", "").equals("on")) { + sb.setConfig("indexReceiveBlockBlacklist", "true"); + } else { + sb.setConfig("indexReceiveBlockBlacklist", "false"); + } + + if (post.containsKey("peertags")) { + yacyCore.seedDB.mySeed.setPeerTags(serverCodings.string2set(normalizedList((String) post.get("peertags")), ",")); + } + + sb.setConfig("cluster.mode", post.get("cluster.mode", "publicpeer")); + + // read remote crawl request settings + sb.setConfig("crawlResponse", (crawlResponse) ? "true" : "false"); + int newppm = Math.max(1, Integer.parseInt(post.get("acceptCrawlLimit", "1"))); + long newBusySleep = Math.max(100, 60000 / newppm); + serverThread rct = sb.getThread(plasmaSwitchboard.CRAWLJOB_REMOTE_TRIGGERED_CRAWL); + rct.setBusySleep(newBusySleep); + sb.setConfig(plasmaSwitchboard.CRAWLJOB_REMOTE_TRIGGERED_CRAWL_BUSYSLEEP, Long.toString(newBusySleep)); + + sb.setConfig("cluster.peers.ipport", checkIPPortList(post.get("cluster.peers.ipport", ""))); + sb.setConfig("cluster.peers.yacydomain", checkYaCyDomainList(post.get("cluster.peers.yacydomain", ""))); + + } + + // write answer code + prop.put("commit", commit); + + // write remote crawl request settings + prop.put("crawlResponse", sb.getConfigBool("crawlResponse", false) ? 1 : 0); + long RTCbusySleep = Integer.parseInt(env.getConfig(plasmaSwitchboard.CRAWLJOB_REMOTE_TRIGGERED_CRAWL_BUSYSLEEP, "100")); + int RTCppm = (int) (60000L / RTCbusySleep); + prop.put("acceptCrawlLimit", RTCppm); + + boolean indexDistribute = sb.getConfig(plasmaSwitchboard.INDEX_DIST_ALLOW, "true").equals("true"); + boolean indexReceive = sb.getConfig("allowReceiveIndex", "true").equals("true"); + prop.put("indexDistributeChecked", (indexDistribute) ? 1 : 0); + prop.put("indexDistributeWhileCrawling.on", (sb.getConfig(plasmaSwitchboard.INDEX_DIST_ALLOW_WHILE_CRAWLING, "true").equals("true")) ? 1 : 0); + prop.put("indexDistributeWhileCrawling.off", (sb.getConfig(plasmaSwitchboard.INDEX_DIST_ALLOW_WHILE_CRAWLING, "true").equals("true")) ? 0 : 1); + prop.put("indexReceiveChecked", (indexReceive) ? 1 : 0); + prop.put("indexReceiveBlockBlacklistChecked.on", (sb.getConfig("indexReceiveBlockBlacklist", "true").equals("true")) ? 1 : 0); + prop.put("indexReceiveBlockBlacklistChecked.off", (sb.getConfig("indexReceiveBlockBlacklist", "true").equals("true")) ? 0 : 1); + prop.put("peertags", serverCodings.set2string(yacyCore.seedDB.mySeed.getPeerTags(), ",", false)); + + // set seed information directly + yacyCore.seedDB.mySeed.setFlagAcceptRemoteCrawl(sb.getConfigBool("crawlResponse", false)); + yacyCore.seedDB.mySeed.setFlagAcceptRemoteIndex(indexReceive); + + // set p2p/robinson mode flags and values + prop.put("p2p.checked", (indexDistribute || indexReceive) ? 1 : 0); + prop.put("robinson.checked", (indexDistribute || indexReceive) ? 0 : 1); + prop.put("cluster.peers.ipport", sb.getConfig("cluster.peers.ipport", "")); + prop.put("cluster.peers.yacydomain", sb.getConfig("cluster.peers.yacydomain", "")); + + // set p2p mode flags + prop.put("privatepeerChecked", (sb.getConfig("cluster.mode", "").equals("privatepeer")) ? 1 : 0); + prop.put("privateclusterChecked", (sb.getConfig("cluster.mode", "").equals("privatecluster")) ? 1 : 0); + prop.put("publicclusterChecked", (sb.getConfig("cluster.mode", "").equals("publiccluster")) ? 1 : 0); + prop.put("publicpeerChecked", (sb.getConfig("cluster.mode", "").equals("publicpeer")) ? 1 : 0); + + return prop; + } + + public static String normalizedList(String input) { + input = input.replace(' ', ','); + input = input.replace(' ', ';'); + input = input.replaceAll(",,", ","); + if (input.startsWith(",")) input = input.substring(1); + if (input.endsWith(",")) input = input.substring(0, input.length() - 1); + return input; + } + + public static String checkYaCyDomainList(String input) { + input = normalizedList(input); + String[] s = input.split(","); + input = ""; + for (int i = 0; i < s.length; i++) { + if ((s[i].endsWith(".yacyh")) || (s[i].endsWith(".yacy"))) input += "," + s[i]; + } + if (input.length() == 0) return input; else return input.substring(1); + } + + public static String checkIPPortList(String input) { + input = normalizedList(input); + String[] s = input.split(","); + input = ""; + for (int i = 0; i < s.length; i++) { + if (s[i].indexOf(':') >= 9) input += "," + s[i]; + } + if (input.length() == 0) return input; else return input.substring(1); + } +} diff --git a/htroot/ConfigRobotsTxt_p.html b/htroot/ConfigRobotsTxt_p.html index 899f9cbd7..478574176 100644 --- a/htroot/ConfigRobotsTxt_p.html +++ b/htroot/ConfigRobotsTxt_p.html @@ -4,7 +4,7 @@
- DHT Transmission control:
- The transmission is necessary for the functionality of global search on other peers.
- If you switch off distribution or receipt of RWIs you will be banned from global search.
-
Index Distribution: | -- | - | This enables automated, DHT-ruled Index Transmission to other peers. - | -
- | - | - | If checked, DHT-Transmission is enabled even during crawling. | -
Index Receive: | -- | - | Accept remote Index Transmissions. This works only if you are a senior peer. - The DHT-rules do not work without this function. | -
- | - | - | If checked, your peer silently ignores transmitted URLs that match your blacklist | -
Peer Tags: | -- | If your peer runs in 'Robinson Mode' (Distribution and Receive off), you probably run YaCy as a search engine - for your own search portal. Please describe your search portal with some keywords (comma-separated). - This will help to use your peer as search target even if you do not distribute your web index by - DHT distribution. | -|
- | - | - | Changes will take effect immediately | -
#(info)# diff --git a/htroot/IndexCreate_p.java b/htroot/IndexCreate_p.java index ef4c04812..3ff278828 100644 --- a/htroot/IndexCreate_p.java +++ b/htroot/IndexCreate_p.java @@ -52,7 +52,6 @@ import de.anomic.plasma.plasmaURL; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; -import de.anomic.server.serverThread; import de.anomic.yacy.yacyCore; import de.anomic.yacy.yacyNewsPool; import de.anomic.yacy.yacyNewsRecord; @@ -69,26 +68,6 @@ public class IndexCreate_p { prop.put("refreshbutton", 0); if (post != null) { - if (post.containsKey("distributedcrawling")) { - long newBusySleep = Integer.parseInt(env.getConfig(plasmaSwitchboard.CRAWLJOB_REMOTE_TRIGGERED_CRAWL_BUSYSLEEP, "100")); - if (post.get("dcr", "").equals("acceptCrawlMax")) { - env.setConfig("crawlResponse", "true"); - newBusySleep = 100; - } else if (post.get("dcr", "").equals("acceptCrawlLimited")) { - env.setConfig("crawlResponse", "true"); - int newppm = Integer.parseInt(post.get("acceptCrawlLimit", "1")); - if (newppm < 1) newppm = 1; - newBusySleep = 60000 / newppm; - if (newBusySleep < 100) newBusySleep = 100; - } else if (post.get("dcr", "").equals("acceptCrawlDenied")) { - env.setConfig("crawlResponse", "false"); - } - serverThread rct = switchboard.getThread(plasmaSwitchboard.CRAWLJOB_REMOTE_TRIGGERED_CRAWL); - rct.setBusySleep(newBusySleep); - env.setConfig(plasmaSwitchboard.CRAWLJOB_REMOTE_TRIGGERED_CRAWL_BUSYSLEEP, Long.toString(newBusySleep)); - //boolean crawlResponse = ((String) post.get("acceptCrawlMax", "")).equals("on"); - //env.setConfig("crawlResponse", (crawlResponse) ? "true" : "false"); - } if (post.containsKey("pausecrawlqueue")) { switchboard.pauseCrawlJob(plasmaSwitchboard.CRAWLJOB_LOCAL_CRAWL); @@ -150,31 +129,7 @@ public class IndexCreate_p { prop.put("crawlingSpeedMaxChecked", (LCppm >= 1000) ? 1 : 0); prop.put("crawlingSpeedCustChecked", ((LCppm > 10) && (LCppm < 1000)) ? 1 : 0); prop.put("crawlingSpeedMinChecked", (LCppm <= 10) ? 1 : 0); - prop.put("customPPMdefault", ((LCppm > 10) && (LCppm < 1000)) ? Integer.toString(LCppm) : ""); - - long RTCbusySleep = Integer.parseInt(env.getConfig(plasmaSwitchboard.CRAWLJOB_REMOTE_TRIGGERED_CRAWL_BUSYSLEEP, "100")); - if (RTCbusySleep < 100) { - RTCbusySleep = 100; - env.setConfig(plasmaSwitchboard.CRAWLJOB_REMOTE_TRIGGERED_CRAWL_BUSYSLEEP, Long.toString(RTCbusySleep)); - } - if (env.getConfig("crawlResponse", "").equals("true")) { - if (RTCbusySleep <= 100) { - prop.put("acceptCrawlMaxChecked", 1); - prop.put("acceptCrawlLimitedChecked", 0); - prop.put("acceptCrawlDeniedChecked", 0); - } else { - prop.put("acceptCrawlMaxChecked", 0); - prop.put("acceptCrawlLimitedChecked", 1); - prop.put("acceptCrawlDeniedChecked", 0); - } - } else { - prop.put("acceptCrawlMaxChecked", 0); - prop.put("acceptCrawlLimitedChecked", 0); - prop.put("acceptCrawlDeniedChecked", 1); - } - int RTCppm = (RTCbusySleep == 0) ? 60 : (int) (60000L / RTCbusySleep); - if (RTCppm > 60) RTCppm = 60; - prop.put("PPM", RTCppm); + prop.put("customPPMdefault", ((LCppm > 10) && (LCppm < 1000)) ? Integer.toString(LCppm) : ""); prop.put("xsstopwChecked", env.getConfig("xsstopw", "").equals("true") ? 1 : 0); prop.put("xdstopwChecked", env.getConfig("xdstopw", "").equals("true") ? 1 : 0); diff --git a/htroot/IndexShare_p.java b/htroot/IndexShare_p.java index eb6dc95c8..26e60efe0 100644 --- a/htroot/IndexShare_p.java +++ b/htroot/IndexShare_p.java @@ -71,7 +71,7 @@ public class IndexShare_p { } if (post.containsKey("indexsharesetting")) { - switchboard.setConfig("allowDistributeIndex", (post.containsKey("distribute")) ? "true" : "false"); + switchboard.setConfig(plasmaSwitchboard.INDEX_DIST_ALLOW, (post.containsKey("distribute")) ? "true" : "false"); switchboard.setConfig("allowReceiveIndex", (post.containsKey("receive")) ? "true" : "false"); switchboard.setConfig("defaultLinkReceiveFrequency", post.get("linkfreq", "30")); switchboard.setConfig("defaultWordReceiveFrequency", post.get("wordfreq", "10")); diff --git a/htroot/env/templates/submenuConfig.template b/htroot/env/templates/submenuConfig.template index 02144e747..717450126 100644 --- a/htroot/env/templates/submenuConfig.template +++ b/htroot/env/templates/submenuConfig.template @@ -3,6 +3,7 @@
public static final String INDEX_DIST_ALLOW_WHILE_CRAWLING = "allowDistributeIndexWhileCrawling"
Name of the setting whether Index Distribution shall be allowed while crawling is in progress, i.e. @@ -1324,9 +1325,71 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser } public boolean isRobinsonMode() { - return (yacyCore.seedDB.sizeConnected() == 0) && (yacyCore.seedDB.mySeed.isVirgin()); + // we are in robinson mode, if we do not exchange index by dht distribution + // we need to take care that search requests and remote indexing requests go only + // to the peers in the same cluster, if we run a robinson cluster. + return getConfigBool(plasmaSwitchboard.INDEX_DIST_ALLOW, false) && !getConfigBool(plasmaSwitchboard.INDEX_RECEIVE_ALLOW, false); } + public boolean isClosedRobinsonCluster() { + // robinson peers may be member of robinson clusters, which can be public or private + // this does not check the robinson attribute, only the specific subtype of the cluster + String clustermode = getConfig("cluster.mode", "publicpeer"); + return (clustermode.equals("privatecluster")) || (clustermode.equals("privatepeer")); + } + + public boolean isInMyCluster(String peer) { + // check if the given peer is in the own network, if this is a robinson cluster + // depending on the robinson cluster type, the peer String may be a peerhash (b64-hash) + // or a ip:port String or simply a ip String + if (!isRobinsonMode()) return false; + String clustermode = getConfig("cluster.mode", "publicpeer"); + if (clustermode.equals("privatecluster")) { + // check if we got the request from a peer in the private cluster + String network = getConfig("cluster.peers.ipport", ""); + return network.indexOf(peer) >= 0; + } else if (clustermode.equals("publiccluster")) { + // check if we got the request from a peer in the public cluster + String network = getConfig("cluster.peers.yacydomain", ""); + // check for .yacyh hexhash-domain + String hexhash = yacySeed.b64Hash2hexHash(peer); + if (hexhash == null) return false; + if (network.indexOf(hexhash + ".yacyh") >= 0) return true; + // resolve seed + yacySeed seed = yacyCore.seedDB.get(peer); + if (seed == null) return false; + // check for .yacy (name) - Domain + if (network.indexOf(seed.getName() + ".yacy") >= 0) return true; + return false; + } else { + return false; + } + } + + public boolean isInMyCluster(yacySeed seed) { + // check if the given peer is in the own network, if this is a robinson cluster + if (seed == null) return false; + if (!isRobinsonMode()) return false; + String clustermode = getConfig("cluster.mode", "publicpeer"); + if (clustermode.equals("privatecluster")) { + // check if we got the request from a peer in the private cluster + String network = getConfig("cluster.peers.ipport", ""); + return network.indexOf(seed.getAddress()) >= 0; + } else if (clustermode.equals("publiccluster")) { + // check if we got the request from a peer in the public cluster + String network = getConfig("cluster.peers.yacydomain", ""); + // check for .yacyh hexhash-domain + String hexhash = yacySeed.b64Hash2hexHash(seed.hash); + if (hexhash == null) return false; + if (network.indexOf(hexhash + ".yacyh") >= 0) return true; + // check for .yacy (name) - Domain + if (network.indexOf(seed.getName() + ".yacy") >= 0) return true; + return false; + } else { + return false; + } + } + public String urlExists(String hash) { // tests if hash occurrs in any database // if it exists, the name of the database is returned, diff --git a/yacy.init b/yacy.init index 8879d4e61..25be7f9c6 100644 --- a/yacy.init +++ b/yacy.init @@ -83,8 +83,6 @@ pkcs12ImportPwd = superseedFile=superseed.txt superseedLocation=http://www.yacy.net/superseed.txt - - # network definition # we distiguish local and global networks. Each network type can have different user groups # groups can be uncontrolled, moderated or controlled @@ -98,8 +96,22 @@ superseedLocation=http://www.yacy.net/superseed.txt # network = all:world:global:uncontrolled:http://yacy.net/ # the network-uri must have a sub-path yacy/seed.txt containing a list of urls pointing to the # peer-address of peers within the group of that network -# several network definition strings can be listed in a single - +# several network definition strings can be listed + +# clusters within a network: +# every network can have an unlimited number of clusters. Clusters may be also completely +# sealed and have no connection to other peers. When a cluster does not use the +# p2p protocol and the bootstraping mechanism to contact other peers, we call them +# Robinson peers. They can appear in different 'visibilities': +# - privatepeer: no connection and no data exchange to any other peer +# - privatecluster: connections only to self-defined addresses (other peers in same mode) +# - publiccluster: like privatecluster, but visible and searcheable by public p2p nodes +# - publicpeer: a single peer without cluster connection, but visible for p2p nodes +# all public robinson peers should use a peer tag string to be searcheable if in the +# search request these tags appear +cluster.mode=publicpeer +cluster.peers.yacydomain=localpeer.yacy +cluster.peers.ipport=localhost:8080 # bootstrapLoadTimeout # this is the time-out for loading of the seedlist files during bootstraping @@ -541,7 +553,7 @@ filterOutStopwordsFromTopwords=true # ram cache for database files -# ram cache for assortment cache cluster (for all 64 files) +# ram cache for collection index ramCacheRWI_time = 30000 # ram cache for responseHeader.db