*) Making DHT Transfer while Crawling configurable

See: http://www.yacy-forum.de/viewtopic.php?p=6904

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@496 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
theli 20 years ago
parent 0610e83468
commit 865b9490a2

@ -59,13 +59,19 @@ The transmission is necessary for the functionality of global search on other pe
If you switch off distribution or receipt of RWIs you will be banned from global search. If you switch off distribution or receipt of RWIs you will be banned from global search.
<table border="0" cellpadding="5" cellspacing="0"> <table border="0" cellpadding="5" cellspacing="0">
<tr valign="top" class="TableCellDark"> <tr valign="top" class="TableCellDark">
<td width="100">Index Distribution:</td> <td width="100">Index&nbsp;Distribution:</td>
<td><input type="checkbox" name="indexDistribute" align="top" #[indexDistributeChecked]#></td> <td><input type="checkbox" name="indexDistribute" align="top" #[indexDistributeChecked]#></td>
<td></td> <td></td>
<td>This enables automated, DHT-ruled Index Transmission to other peers. <td>This enables automated, DHT-ruled Index Transmission to other peers.
</td> </td>
</tr> </tr>
<tr valign="top" class="TableCellDark"> <tr valign="top" class="TableCellDark">
<td width="100"></td>
<td>&nbsp;&nbsp;</td>
<td><input type="checkbox" name="indexDistributeWhileCrawling" align="top" #[indexDistributeWhileCrawling]#></td>
<td>If checked, DHT-Transmission is enabled even during crawling.</td>
</tr>
<tr valign="top" class="TableCellDark">
<td width="100">Index Receive:</td> <td width="100">Index Receive:</td>
<td><input type="checkbox" name="indexReceive" align="top" #[indexReceiveChecked]#></td> <td><input type="checkbox" name="indexReceive" align="top" #[indexReceiveChecked]#></td>
<td></td> <td></td>

@ -80,6 +80,7 @@ public class IndexControl_p {
prop.put("ucount", Integer.toString(switchboard.urlPool.loadedURL.size())); prop.put("ucount", Integer.toString(switchboard.urlPool.loadedURL.size()));
prop.put("otherHosts", ""); prop.put("otherHosts", "");
prop.put("indexDistributeChecked", (switchboard.getConfig("allowDistributeIndex", "true").equals("true")) ? "checked" : ""); prop.put("indexDistributeChecked", (switchboard.getConfig("allowDistributeIndex", "true").equals("true")) ? "checked" : "");
prop.put("indexDistributeWhileCrawling", (switchboard.getConfig("allowDistributeIndexWhileCrawling", "true").equals("true")) ? "checked" : "");
prop.put("indexReceiveChecked", (switchboard.getConfig("allowReceiveIndex", "true").equals("true")) ? "checked" : ""); prop.put("indexReceiveChecked", (switchboard.getConfig("allowReceiveIndex", "true").equals("true")) ? "checked" : "");
prop.put("indexReceiveBlockBlacklistChecked", (switchboard.getConfig("indexReceiveBlockBlacklist", "true").equals("true")) ? "checked" : ""); prop.put("indexReceiveBlockBlacklistChecked", (switchboard.getConfig("indexReceiveBlockBlacklist", "true").equals("true")) ? "checked" : "");
return prop; // be save return prop; // be save
@ -108,10 +109,16 @@ public class IndexControl_p {
if (post.containsKey("setIndexTransmission")) { if (post.containsKey("setIndexTransmission")) {
boolean allowDistributeIndex = ((String) post.get("indexDistribute", "")).equals("on"); boolean allowDistributeIndex = ((String) post.get("indexDistribute", "")).equals("on");
switchboard.setConfig("allowDistributeIndex", (allowDistributeIndex) ? "true" : "false"); switchboard.setConfig("allowDistributeIndex", (allowDistributeIndex) ? "true" : "false");
if (allowDistributeIndex) switchboard.indexDistribution.enable(); else switchboard.indexDistribution.disable(); if (allowDistributeIndex) switchboard.indexDistribution.enable(); else switchboard.indexDistribution.disable();
boolean allowDistributeIndexWhileCrawling = post.containsKey("indexDistributeWhileCrawling");
switchboard.setConfig("allowDistributeIndexWhileCrawling", (allowDistributeIndexWhileCrawling) ? "true" : "false");
if (allowDistributeIndexWhileCrawling) switchboard.indexDistribution.enableWhileCrawling(); else switchboard.indexDistribution.disableWhileCrawling();
boolean allowReceiveIndex = ((String) post.get("indexReceive", "")).equals("on"); boolean allowReceiveIndex = ((String) post.get("indexReceive", "")).equals("on");
switchboard.setConfig("allowReceiveIndex", (allowReceiveIndex) ? "true" : "false"); switchboard.setConfig("allowReceiveIndex", (allowReceiveIndex) ? "true" : "false");
yacyCore.seedDB.mySeed.setFlagAcceptRemoteIndex(allowReceiveIndex); yacyCore.seedDB.mySeed.setFlagAcceptRemoteIndex(allowReceiveIndex);
boolean indexReceiveBlockBlacklist = ((String) post.get("indexReceiveBlockBlacklist", "")).equals("on"); boolean indexReceiveBlockBlacklist = ((String) post.get("indexReceiveBlockBlacklist", "")).equals("on");
switchboard.setConfig("indexReceiveBlockBlacklist", (indexReceiveBlockBlacklist) ? "true" : "false"); switchboard.setConfig("indexReceiveBlockBlacklist", (indexReceiveBlockBlacklist) ? "true" : "false");
} }
@ -204,6 +211,7 @@ public class IndexControl_p {
try {indexes[0].close();} catch (IOException e) {} try {indexes[0].close();} catch (IOException e) {}
} }
if (post.containsKey("keyhashsimilar")) { if (post.containsKey("keyhashsimilar")) {
Iterator hashIt = switchboard.wordIndex.wordHashes(keyhash, true, true); Iterator hashIt = switchboard.wordIndex.wordHashes(keyhash, true, true);
String result = "Sequential List of Word-Hashes:<br>"; String result = "Sequential List of Word-Hashes:<br>";
@ -293,6 +301,7 @@ public class IndexControl_p {
prop.put("wcount", Integer.toString(switchboard.wordIndex.size())); prop.put("wcount", Integer.toString(switchboard.wordIndex.size()));
prop.put("ucount", Integer.toString(switchboard.urlPool.loadedURL.size())); prop.put("ucount", Integer.toString(switchboard.urlPool.loadedURL.size()));
prop.put("indexDistributeChecked", (switchboard.getConfig("allowDistributeIndex", "true").equals("true")) ? "checked" : ""); prop.put("indexDistributeChecked", (switchboard.getConfig("allowDistributeIndex", "true").equals("true")) ? "checked" : "");
prop.put("indexDistributeWhileCrawling", (switchboard.getConfig("allowDistributeIndexWhileCrawling", "true").equals("true")) ? "checked" : "");
prop.put("indexReceiveChecked", (switchboard.getConfig("allowReceiveIndex", "true").equals("true")) ? "checked" : ""); prop.put("indexReceiveChecked", (switchboard.getConfig("allowReceiveIndex", "true").equals("true")) ? "checked" : "");
prop.put("indexReceiveBlockBlacklistChecked", (switchboard.getConfig("indexReceiveBlockBlacklist", "true").equals("true")) ? "checked" : ""); prop.put("indexReceiveBlockBlacklistChecked", (switchboard.getConfig("indexReceiveBlockBlacklist", "true").equals("true")) ? "checked" : "");
// return rewrite properties // return rewrite properties

@ -67,7 +67,7 @@ public class transferURL {
String iam = (String) post.get("iam", ""); // seed hash of requester String iam = (String) post.get("iam", ""); // seed hash of requester
String youare = (String) post.get("youare", ""); // seed hash of the target peer, needed for network stability String youare = (String) post.get("youare", ""); // seed hash of the target peer, needed for network stability
String key = (String) post.get("key", ""); // transmission key String key = (String) post.get("key", ""); // transmission key
int urlc = Integer.parseInt((String) post.get("urlc", "")); // number of transported urls int urlc = Integer.parseInt((String) post.get("urlc", "")); // number of transported urls
boolean granted = switchboard.getConfig("allowReceiveIndex", "false").equals("true"); boolean granted = switchboard.getConfig("allowReceiveIndex", "false").equals("true");
boolean blockBlacklist = switchboard.getConfig("indexReceiveBlockBlacklist", "false").equals("true"); boolean blockBlacklist = switchboard.getConfig("indexReceiveBlockBlacklist", "false").equals("true");

@ -381,7 +381,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
peerPing.setSyncObject(new Object()); peerPing.setSyncObject(new Object());
indexDistribution = new plasmaWordIndexDistribution(urlPool, wordIndex, log, indexDistribution = new plasmaWordIndexDistribution(urlPool, wordIndex, log,
getConfig("allowDistributeIndex", "false").equals("true")); getConfig("allowDistributeIndex", "false").equals("true"),
getConfig("allowDistributeIndexWhileCrawling","false").equals("true"));
indexDistribution.setCounts(100, 1, 3, 8000); indexDistribution.setCounts(100, 1, 3, 8000);
deployThread("20_dhtdistribution", "DHT Distribution", "selection, transfer and deletion of index entries that are not searched on your peer, but on others", null, deployThread("20_dhtdistribution", "DHT Distribution", "selection, transfer and deletion of index entries that are not searched on your peer, but on others", null,
new serverInstantThread(indexDistribution, "job", null), 12000); new serverInstantThread(indexDistribution, "job", null), 12000);

@ -28,13 +28,15 @@ public class plasmaWordIndexDistribution {
private plasmaWordIndex wordIndex; private plasmaWordIndex wordIndex;
private serverLog log; private serverLog log;
private boolean enabled; private boolean enabled;
private boolean enabledWhileCrawling;
private boolean closed; private boolean closed;
public plasmaWordIndexDistribution(plasmaURLPool urlPool, plasmaWordIndex wordIndex, serverLog log, public plasmaWordIndexDistribution(plasmaURLPool urlPool, plasmaWordIndex wordIndex, serverLog log,
boolean enable) { boolean enable, boolean enabledWhileCrawling) {
this.urlPool = urlPool; this.urlPool = urlPool;
this.wordIndex = wordIndex; this.wordIndex = wordIndex;
this.enabled = enable; this.enabled = enable;
this.enabledWhileCrawling = enabledWhileCrawling;
this.log = log; this.log = log;
this.closed = false; this.closed = false;
setCounts(100 /*indexCount*/, 1 /*juniorPeerCount*/, 3 /*seniorPeerCount*/, 8000); setCounts(100 /*indexCount*/, 1 /*juniorPeerCount*/, 3 /*seniorPeerCount*/, 8000);
@ -48,6 +50,14 @@ public class plasmaWordIndexDistribution {
enabled = false; enabled = false;
} }
public void enableWhileCrawling() {
this.enabledWhileCrawling = true;
}
public void disableWhileCrawling() {
this.enabledWhileCrawling = false;
}
public void close() { public void close() {
closed = true; closed = true;
} }
@ -82,7 +92,7 @@ public class plasmaWordIndexDistribution {
log.logDebug("no word distribution: not enough words - wordIndex.size() = " + wordIndex.size()); log.logDebug("no word distribution: not enough words - wordIndex.size() = " + wordIndex.size());
return false; return false;
} }
if (urlPool.noticeURL.stackSize() > 0) { if ((!enabledWhileCrawling) && (urlPool.noticeURL.stackSize() > 0)) {
log.logDebug("no word distribution: crawl in progress - noticeURL.stackSize() = " + urlPool.noticeURL.stackSize()); log.logDebug("no word distribution: crawl in progress - noticeURL.stackSize() = " + urlPool.noticeURL.stackSize());
return false; return false;
} }
@ -117,6 +127,68 @@ public class plasmaWordIndexDistribution {
this.seniorPeerCount = seniorPeerCount; this.seniorPeerCount = seniorPeerCount;
} }
// For testing purposes only ...
// public int performTransferWholeIndex() {
//
// boolean success = true;
// int indexCount = 1000;
// int totalCount = 0;
// String peerHash = "Z-X31fMiBs9h";
// yacySeed seed = (yacySeed) yacyCore.seedDB.getConnected(peerHash);
// String startPointHash = serverCodings.encodeMD5B64("" + System.currentTimeMillis(), true).substring(0, yacySeedDB.commonHashLength);
//
// if ((yacyCore.seedDB == null) || (yacyCore.seedDB.sizeConnected() == 0)) return -1;
//
// while (success) {
// // collect index
// //String startPointHash = yacyCore.seedCache.mySeed.hash;
//
// plasmaWordIndexEntity[] indexEntities = selectTransferIndexes(startPointHash, indexCount);
// if ((indexEntities == null) || (indexEntities.length == 0)) {
// log.logDebug("No index available for index transfer, hash start-point " + startPointHash);
// return -1;
// }
// // count the indexes again, can be smaller as expected
// indexCount = 0; for (int i = 0; i < indexEntities.length; i++) indexCount += indexEntities[i].size();
//
// // iterate over DHT-peers and send away the indexes
// String error;
// String peerNames = "";
//
//
// if ((seed != null) && (indexCount > 0)) {
// error = yacyClient.transferIndex(seed,indexEntities, urlPool.loadedURL);
// if (error == null) {
// log.logInfo("Index transfer of " + indexCount + " words [" + indexEntities[0].wordHash() + " .. " + indexEntities[indexEntities.length-1].wordHash() + "] to peer " + seed.getName() + ":" + seed.hash + " successfull");
// peerNames += ", " + seed.getName();
//
// } else {
// log.logWarning("Index transfer to peer " + seed.getName() + ":" + seed.hash + " failed:'" + error + "', disconnecting peer");
// yacyCore.peerActions.peerDeparture(seed);
// success = false;
// }
// } else {
// success = false;
// }
//
// try {
// if (deleteTransferIndexes(indexEntities)) {
// log.logDebug("Deleted all transferred whole-word indexes locally");
// totalCount += indexCount;;
// startPointHash = indexEntities[indexEntities.length - 1].wordHash();
// } else {
// log.logError("Deleted not all transferred whole-word indexes");
// return -1;
// }
// } catch (IOException ee) {
// log.logError("Deletion of indexes not possible:" + ee.getMessage());
// ee.printStackTrace();
// return -1;
// }
// }
// return totalCount;
// }
public int performTransferIndex(int indexCount, int peerCount, boolean delete) { public int performTransferIndex(int indexCount, int peerCount, boolean delete) {
if ((yacyCore.seedDB == null) || (yacyCore.seedDB.sizeConnected() == 0)) return -1; if ((yacyCore.seedDB == null) || (yacyCore.seedDB.sizeConnected() == 0)) return -1;

@ -331,6 +331,7 @@ yacyDB=DATA/YACYDB
# by default, sharing is on. If you want to use the proxy only for # by default, sharing is on. If you want to use the proxy only for
# local indexing, you may switch this off # local indexing, you may switch this off
allowDistributeIndex=true allowDistributeIndex=true
allowDistributeIndexWhileCrawling=false
allowReceiveIndex=true allowReceiveIndex=true
indexReceiveBlockBlacklist=false indexReceiveBlockBlacklist=false

Loading…
Cancel
Save