From 8ba8e2b7d9c24e6de9df7c64a00452fa98958516 Mon Sep 17 00:00:00 2001 From: hydrox Date: Wed, 28 Jun 2006 08:51:34 +0000 Subject: [PATCH] *) added cache for blacklists urlhashs recieved by DHT. DHT does not request URLs listed in this cache. git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2251 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/yacy/transferRWI.java | 10 ++++++++- htroot/yacy/transferURL.java | 2 +- source/de/anomic/plasma/plasmaURLPattern.java | 21 +++++++++++++++++++ 3 files changed, 31 insertions(+), 2 deletions(-) diff --git a/htroot/yacy/transferRWI.java b/htroot/yacy/transferRWI.java index 52cbe032f..012c9724e 100644 --- a/htroot/yacy/transferRWI.java +++ b/htroot/yacy/transferRWI.java @@ -78,6 +78,7 @@ public final class transferRWI { final int entryc = Integer.parseInt(post.get("entryc", "")); // number of entries in indexes byte[] indexes = post.get("indexes", "").getBytes(); // the indexes, as list of word entries boolean granted = sb.getConfig("allowReceiveIndex", "false").equals("true"); + boolean blockBlacklist = sb.getConfig("indexReceiveBlockBlacklist", "false").equals("true"); boolean checkLimit = sb.getConfigBool("indexDistribution.dhtReceiptLimitEnabled", true); final long cachelimit = sb.getConfigLong("indexDistribution.dhtReceiptLimit", 1000); final yacySeed otherPeer = yacyCore.seedDB.get(iam); @@ -152,7 +153,14 @@ public final class transferRWI { try { if ((!(unknownURL.contains(urlHash))) && (!(sb.urlPool.loadedURL.exists(urlHash)))) { - unknownURL.add(urlHash); + if ((blockBlacklist) && (plasmaSwitchboard.urlBlacklist.hashInBlacklistedCache(urlHash))) { + int deleted = sb.wordIndex.tryRemoveURLs(urlHash); + yacyCore.log.logFine("transferRWI: blocked blacklisted URLHash '" + urlHash + "' from peer " + otherPeerName + "; deleted " + deleted + " URL entries from RWIs"); + //TODO: set to logFine if it works. + } + else { + unknownURL.add(urlHash); + } } } catch (Exception ex) { sb.getLog().logWarning( diff --git a/htroot/yacy/transferURL.java b/htroot/yacy/transferURL.java index b27261fc9..6ab05512a 100644 --- a/htroot/yacy/transferURL.java +++ b/htroot/yacy/transferURL.java @@ -96,7 +96,7 @@ public final class transferURL { lEntry = sb.urlPool.loadedURL.newEntry(urls, true); if ((lEntry != null) && (lEntry.url() != null)) { if ((blockBlacklist) && - (plasmaSwitchboard.urlBlacklist.isListed(lEntry.url()))) { + (plasmaSwitchboard.urlBlacklist.isListed(lEntry.hash(), lEntry.url()))) { int deleted = sb.wordIndex.tryRemoveURLs(lEntry.hash()); yacyCore.log.logFine("transferURL: blocked blacklisted URL '" + lEntry.url() + "' from peer " + otherPeerName + "; deleted " + deleted + " URL entries from RWIs"); lEntry = null; diff --git a/source/de/anomic/plasma/plasmaURLPattern.java b/source/de/anomic/plasma/plasmaURLPattern.java index 515db7340..644081c71 100644 --- a/source/de/anomic/plasma/plasmaURLPattern.java +++ b/source/de/anomic/plasma/plasmaURLPattern.java @@ -43,11 +43,16 @@ package de.anomic.plasma; import java.io.File; import java.net.URL; +import java.util.Collections; import java.util.HashMap; +import java.util.HashSet; +import java.util.Set; + import de.anomic.kelondro.kelondroMSetTools; public class plasmaURLPattern { + private Set cachedUrlHashs = Collections.synchronizedSet(new HashSet()); private File rootPath = null; private HashMap hostpaths = null; // key=host, value=path; mapped url is http://host/path; path does not start with '/' here @@ -85,6 +90,22 @@ public class plasmaURLPattern { hostpaths.put(host.toLowerCase(), path); } + public boolean hashInBlacklistedCache(String urlHash) { + return cachedUrlHashs.contains(urlHash); + } + + public boolean isListed(String urlHash, URL url) { + if (!cachedUrlHashs.contains(urlHash)) { + boolean temp = isListed(url.getHost().toLowerCase(), url.getFile()); + if (temp) + { + cachedUrlHashs.add(urlHash); + } + return temp; + } + return true; + } + public boolean isListed(URL url) { return isListed(url.getHost().toLowerCase(), url.getFile()); }