- Cache known URLs during indexReceive to avoid getting blocked during loadedURL.exists() whenever possible

- Small logging updates



git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2359 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
hermens 19 years ago
parent c09f734d06
commit d56f06401e

@ -136,8 +136,11 @@ public final class transferRWI {
indexEntry iEntry;
int wordhashesSize = v.size();
final HashSet unknownURL = new HashSet();
final HashSet knownURL = new HashSet();
String[] wordhashes = new String[v.size()];
int received = 0;
int blocked = 0;
int receivedURL = 0;
for (int i = 0; i < wordhashesSize; i++) {
serverCore.checkInterruption();
@ -147,31 +150,33 @@ public final class transferRWI {
wordHash = estring.substring(0, p);
wordhashes[received] = wordHash;
iEntry = new indexURLEntry(estring.substring(p));
urlHash = iEntry.urlHash();
if ((blockBlacklist) && (plasmaSwitchboard.urlBlacklist.hashInBlacklistedCache(urlHash))) {
//int deleted = sb.wordIndex.tryRemoveURLs(urlHash);
yacyCore.log.logFine("transferRWI: blocked blacklisted URLHash '" + urlHash + "' from peer " + otherPeerName + "; deleted 1 URL entries from RWIs");
blocked++;
} else {
sb.wordIndex.addEntry(wordHash, iEntry, System.currentTimeMillis(), true);
serverCore.checkInterruption();
urlHash = iEntry.urlHash();
if (!(knownURL.contains(urlHash)||unknownURL.contains(urlHash))) {
try {
if ((!(unknownURL.contains(urlHash))) &&
(!(sb.urlPool.loadedURL.exists(urlHash)))) {
if ((blockBlacklist) && (plasmaSwitchboard.urlBlacklist.hashInBlacklistedCache(urlHash))) {
int deleted = sb.wordIndex.tryRemoveURLs(urlHash);
yacyCore.log.logFine("transferRWI: blocked blacklisted URLHash '" + urlHash + "' from peer " + otherPeerName + "; deleted " + deleted + " URL entries from RWIs");
//TODO: set to logFine if it works.
}
else {
if (sb.urlPool.loadedURL.exists(urlHash)) {
knownURL.add(urlHash);
} else {
unknownURL.add(urlHash);
}
}
} catch (Exception ex) {
sb.getLog().logWarning(
"transferRWI: DB-Error while trying to determine if URL with hash '" +
urlHash + "' is known.", ex);
unknownURL.add(urlHash);
}
receivedURL++;
}
received++;
}
}
}
yacyCore.seedDB.mySeed.incRI(received);
// finally compose the unknownURL hash list
@ -185,7 +190,7 @@ public final class transferRWI {
sb.getLog().logInfo("Received 0 RWIs from " + otherPeerName + ", processed in " + (System.currentTimeMillis() - startProcess) + " milliseconds, requesting " + unknownURL.size() + " URLs");
} else {
final double avdist = (yacyDHTAction.dhtDistance(yacyCore.seedDB.mySeed.hash, wordhashes[0]) + yacyDHTAction.dhtDistance(yacyCore.seedDB.mySeed.hash, wordhashes[received - 1])) / 2.0;
sb.getLog().logInfo("Received " + received + " Words [" + wordhashes[0] + " .. " + wordhashes[received - 1] + "]/" + avdist + " from " + otherPeerName + ", processed in " + (System.currentTimeMillis() - startProcess) + " milliseconds, requesting " + unknownURL.size() + " URLs");
sb.getLog().logInfo("Received " + received + " Entries " + wordc + " Words [" + wordhashes[0] + " .. " + wordhashes[received - 1] + "]/" + avdist + " from " + otherPeerName + ", processed in " + (System.currentTimeMillis() - startProcess) + " milliseconds, requesting " + unknownURL.size() + "/" + receivedURL + " URLs, blocked " + blocked + " RWIs");
}
result = "ok";

@ -83,6 +83,7 @@ public final class transferURL {
if (granted) {
int received = 0;
int blocked = 0;
final int sizeBefore = sb.urlPool.loadedURL.size();
// read the urls from the other properties and store
String urls;
@ -100,6 +101,7 @@ public final class transferURL {
int deleted = sb.wordIndex.tryRemoveURLs(lEntry.hash());
yacyCore.log.logFine("transferURL: blocked blacklisted URL '" + lEntry.url() + "' from peer " + otherPeerName + "; deleted " + deleted + " URL entries from RWIs");
lEntry = null;
blocked++;
} else {
lEntry.store();
sb.urlPool.loadedURL.stackEntry(lEntry, iam, iam, 3);
@ -121,7 +123,7 @@ public final class transferURL {
// return rewrite properties
final int more = sb.urlPool.loadedURL.size() - sizeBefore;
doublevalues = Integer.toString(received - more);
sb.getLog().logInfo("Received " + received + " URLs from peer " + otherPeerName + " in " + (System.currentTimeMillis() - start) + " ms.");
sb.getLog().logInfo("Received " + received + " URLs from peer " + otherPeerName + " in " + (System.currentTimeMillis() - start) + " ms, Blocked " + blocked + " URLs");
if ((received - more) > 0) sb.getLog().logSevere("Received " + doublevalues + " double URLs from peer " + otherPeerName);
result = "ok";
} else {

@ -160,7 +160,9 @@ public class plasmaDHTTransfer extends Thread {
this.payloadSize = ((Integer)result.get("payloadSize")).intValue();
this.log.logInfo("Index transfer of " + this.dhtChunk.indexCount() +
" entries " + this.dhtChunk.containerSize() +
" words [" + this.dhtChunk.firstContainer().getWordHash() + " .. " + this.dhtChunk.lastContainer().getWordHash() + "]" +
" and " + this.dhtChunk.urlCacheMap().size() + " URLs" +
" to peer " + this.seed.getName() + ":" + this.seed.hash +
" in " + (this.transferTime / 1000) +
" seconds successful (" + (1000 * this.dhtChunk.indexCount() / (this.transferTime + 1)) +

Loading…
Cancel
Save