diff --git a/htroot/IndexControl_p.html b/htroot/IndexControl_p.html index 7618f0548..ade51b3a7 100644 --- a/htroot/IndexControl_p.html +++ b/htroot/IndexControl_p.html @@ -59,13 +59,19 @@ The transmission is necessary for the functionality of global search on other pe If you switch off distribution or receipt of RWIs you will be banned from global search. - + + + + + + + diff --git a/htroot/IndexControl_p.java b/htroot/IndexControl_p.java index 870529478..6c3bc806a 100644 --- a/htroot/IndexControl_p.java +++ b/htroot/IndexControl_p.java @@ -80,6 +80,7 @@ public class IndexControl_p { prop.put("ucount", Integer.toString(switchboard.urlPool.loadedURL.size())); prop.put("otherHosts", ""); prop.put("indexDistributeChecked", (switchboard.getConfig("allowDistributeIndex", "true").equals("true")) ? "checked" : ""); + prop.put("indexDistributeWhileCrawling", (switchboard.getConfig("allowDistributeIndexWhileCrawling", "true").equals("true")) ? "checked" : ""); prop.put("indexReceiveChecked", (switchboard.getConfig("allowReceiveIndex", "true").equals("true")) ? "checked" : ""); prop.put("indexReceiveBlockBlacklistChecked", (switchboard.getConfig("indexReceiveBlockBlacklist", "true").equals("true")) ? "checked" : ""); return prop; // be save @@ -108,10 +109,16 @@ public class IndexControl_p { if (post.containsKey("setIndexTransmission")) { boolean allowDistributeIndex = ((String) post.get("indexDistribute", "")).equals("on"); switchboard.setConfig("allowDistributeIndex", (allowDistributeIndex) ? "true" : "false"); - if (allowDistributeIndex) switchboard.indexDistribution.enable(); else switchboard.indexDistribution.disable(); + if (allowDistributeIndex) switchboard.indexDistribution.enable(); else switchboard.indexDistribution.disable(); + + boolean allowDistributeIndexWhileCrawling = post.containsKey("indexDistributeWhileCrawling"); + switchboard.setConfig("allowDistributeIndexWhileCrawling", (allowDistributeIndexWhileCrawling) ? "true" : "false"); + if (allowDistributeIndexWhileCrawling) switchboard.indexDistribution.enableWhileCrawling(); else switchboard.indexDistribution.disableWhileCrawling(); + boolean allowReceiveIndex = ((String) post.get("indexReceive", "")).equals("on"); switchboard.setConfig("allowReceiveIndex", (allowReceiveIndex) ? "true" : "false"); yacyCore.seedDB.mySeed.setFlagAcceptRemoteIndex(allowReceiveIndex); + boolean indexReceiveBlockBlacklist = ((String) post.get("indexReceiveBlockBlacklist", "")).equals("on"); switchboard.setConfig("indexReceiveBlockBlacklist", (indexReceiveBlockBlacklist) ? "true" : "false"); } @@ -204,6 +211,7 @@ public class IndexControl_p { try {indexes[0].close();} catch (IOException e) {} } + if (post.containsKey("keyhashsimilar")) { Iterator hashIt = switchboard.wordIndex.wordHashes(keyhash, true, true); String result = "Sequential List of Word-Hashes:
"; @@ -293,6 +301,7 @@ public class IndexControl_p { prop.put("wcount", Integer.toString(switchboard.wordIndex.size())); prop.put("ucount", Integer.toString(switchboard.urlPool.loadedURL.size())); prop.put("indexDistributeChecked", (switchboard.getConfig("allowDistributeIndex", "true").equals("true")) ? "checked" : ""); + prop.put("indexDistributeWhileCrawling", (switchboard.getConfig("allowDistributeIndexWhileCrawling", "true").equals("true")) ? "checked" : ""); prop.put("indexReceiveChecked", (switchboard.getConfig("allowReceiveIndex", "true").equals("true")) ? "checked" : ""); prop.put("indexReceiveBlockBlacklistChecked", (switchboard.getConfig("indexReceiveBlockBlacklist", "true").equals("true")) ? "checked" : ""); // return rewrite properties diff --git a/htroot/yacy/transferURL.java b/htroot/yacy/transferURL.java index 3a218164e..bf903f95e 100644 --- a/htroot/yacy/transferURL.java +++ b/htroot/yacy/transferURL.java @@ -67,7 +67,7 @@ public class transferURL { String iam = (String) post.get("iam", ""); // seed hash of requester String youare = (String) post.get("youare", ""); // seed hash of the target peer, needed for network stability String key = (String) post.get("key", ""); // transmission key - int urlc = Integer.parseInt((String) post.get("urlc", "")); // number of transported urls + int urlc = Integer.parseInt((String) post.get("urlc", "")); // number of transported urls boolean granted = switchboard.getConfig("allowReceiveIndex", "false").equals("true"); boolean blockBlacklist = switchboard.getConfig("indexReceiveBlockBlacklist", "false").equals("true"); diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index b81ff55eb..f57e6e9bf 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -381,7 +381,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser peerPing.setSyncObject(new Object()); indexDistribution = new plasmaWordIndexDistribution(urlPool, wordIndex, log, - getConfig("allowDistributeIndex", "false").equals("true")); + getConfig("allowDistributeIndex", "false").equals("true"), + getConfig("allowDistributeIndexWhileCrawling","false").equals("true")); indexDistribution.setCounts(100, 1, 3, 8000); deployThread("20_dhtdistribution", "DHT Distribution", "selection, transfer and deletion of index entries that are not searched on your peer, but on others", null, new serverInstantThread(indexDistribution, "job", null), 12000); diff --git a/source/de/anomic/plasma/plasmaWordIndexDistribution.java b/source/de/anomic/plasma/plasmaWordIndexDistribution.java index a53ca3b98..8fe5e1add 100644 --- a/source/de/anomic/plasma/plasmaWordIndexDistribution.java +++ b/source/de/anomic/plasma/plasmaWordIndexDistribution.java @@ -28,13 +28,15 @@ public class plasmaWordIndexDistribution { private plasmaWordIndex wordIndex; private serverLog log; private boolean enabled; + private boolean enabledWhileCrawling; private boolean closed; public plasmaWordIndexDistribution(plasmaURLPool urlPool, plasmaWordIndex wordIndex, serverLog log, - boolean enable) { + boolean enable, boolean enabledWhileCrawling) { this.urlPool = urlPool; this.wordIndex = wordIndex; this.enabled = enable; + this.enabledWhileCrawling = enabledWhileCrawling; this.log = log; this.closed = false; setCounts(100 /*indexCount*/, 1 /*juniorPeerCount*/, 3 /*seniorPeerCount*/, 8000); @@ -48,6 +50,14 @@ public class plasmaWordIndexDistribution { enabled = false; } + public void enableWhileCrawling() { + this.enabledWhileCrawling = true; + } + + public void disableWhileCrawling() { + this.enabledWhileCrawling = false; + } + public void close() { closed = true; } @@ -82,7 +92,7 @@ public class plasmaWordIndexDistribution { log.logDebug("no word distribution: not enough words - wordIndex.size() = " + wordIndex.size()); return false; } - if (urlPool.noticeURL.stackSize() > 0) { + if ((!enabledWhileCrawling) && (urlPool.noticeURL.stackSize() > 0)) { log.logDebug("no word distribution: crawl in progress - noticeURL.stackSize() = " + urlPool.noticeURL.stackSize()); return false; } @@ -117,6 +127,68 @@ public class plasmaWordIndexDistribution { this.seniorPeerCount = seniorPeerCount; } +// For testing purposes only ... +// public int performTransferWholeIndex() { +// +// boolean success = true; +// int indexCount = 1000; +// int totalCount = 0; +// String peerHash = "Z-X31fMiBs9h"; +// yacySeed seed = (yacySeed) yacyCore.seedDB.getConnected(peerHash); +// String startPointHash = serverCodings.encodeMD5B64("" + System.currentTimeMillis(), true).substring(0, yacySeedDB.commonHashLength); +// +// if ((yacyCore.seedDB == null) || (yacyCore.seedDB.sizeConnected() == 0)) return -1; +// +// while (success) { +// // collect index +// //String startPointHash = yacyCore.seedCache.mySeed.hash; +// +// plasmaWordIndexEntity[] indexEntities = selectTransferIndexes(startPointHash, indexCount); +// if ((indexEntities == null) || (indexEntities.length == 0)) { +// log.logDebug("No index available for index transfer, hash start-point " + startPointHash); +// return -1; +// } +// // count the indexes again, can be smaller as expected +// indexCount = 0; for (int i = 0; i < indexEntities.length; i++) indexCount += indexEntities[i].size(); +// +// // iterate over DHT-peers and send away the indexes +// String error; +// String peerNames = ""; +// +// +// if ((seed != null) && (indexCount > 0)) { +// error = yacyClient.transferIndex(seed,indexEntities, urlPool.loadedURL); +// if (error == null) { +// log.logInfo("Index transfer of " + indexCount + " words [" + indexEntities[0].wordHash() + " .. " + indexEntities[indexEntities.length-1].wordHash() + "] to peer " + seed.getName() + ":" + seed.hash + " successfull"); +// peerNames += ", " + seed.getName(); +// +// } else { +// log.logWarning("Index transfer to peer " + seed.getName() + ":" + seed.hash + " failed:'" + error + "', disconnecting peer"); +// yacyCore.peerActions.peerDeparture(seed); +// success = false; +// } +// } else { +// success = false; +// } +// +// try { +// if (deleteTransferIndexes(indexEntities)) { +// log.logDebug("Deleted all transferred whole-word indexes locally"); +// totalCount += indexCount;; +// startPointHash = indexEntities[indexEntities.length - 1].wordHash(); +// } else { +// log.logError("Deleted not all transferred whole-word indexes"); +// return -1; +// } +// } catch (IOException ee) { +// log.logError("Deletion of indexes not possible:" + ee.getMessage()); +// ee.printStackTrace(); +// return -1; +// } +// } +// return totalCount; +// } + public int performTransferIndex(int indexCount, int peerCount, boolean delete) { if ((yacyCore.seedDB == null) || (yacyCore.seedDB.sizeConnected() == 0)) return -1; diff --git a/yacy.init b/yacy.init index 85c796a51..58cdcc2a8 100644 --- a/yacy.init +++ b/yacy.init @@ -331,6 +331,7 @@ yacyDB=DATA/YACYDB # by default, sharing is on. If you want to use the proxy only for # local indexing, you may switch this off allowDistributeIndex=true +allowDistributeIndexWhileCrawling=false allowReceiveIndex=true indexReceiveBlockBlacklist=false
Index Distribution:Index Distribution: This enables automated, DHT-ruled Index Transmission to other peers.
  If checked, DHT-Transmission is enabled even during crawling.
Index Receive: