From edaa09b9b12651ef3a211ff653040aab5becf68e Mon Sep 17 00:00:00 2001 From: Roland 'Quix0r' Haeder Date: Mon, 11 Jun 2012 00:17:30 +0200 Subject: [PATCH] Rewrote all String blacklist types to enum 'BlacklistType', closes bug #143 Conflicts: htroot/Supporter.java htroot/yacy/crawlReceipt.java htroot/yacy/transferRWI.java htroot/yacy/transferURL.java source/de/anomic/crawler/CrawlStacker.java source/de/anomic/data/ListManager.java source/net/yacy/peers/Protocol.java source/net/yacy/repository/Blacklist.java source/net/yacy/repository/LoaderDispatcher.java source/net/yacy/search/Switchboard.java source/net/yacy/search/index/MetadataRepository.java source/net/yacy/search/index/Segment.java source/net/yacy/search/query/RWIProcess.java source/net/yacy/search/snippet/MediaSnippet.java --- htroot/BlacklistCleaner_p.java | 17 +-- htroot/BlacklistTest_p.java | 18 +-- htroot/Blacklist_p.java | 63 ++++---- htroot/IndexControlRWIs_p.java | 9 +- htroot/Supporter.java | 8 +- htroot/Surftips.java | 4 +- htroot/api/blacklists_p.java | 9 +- htroot/sharedBlacklist_p.java | 7 +- htroot/yacy/crawlReceipt.java | 13 +- htroot/yacy/transferRWI.java | 8 +- htroot/yacy/transferURL.java | 8 +- source/de/anomic/crawler/CrawlStacker.java | 8 +- .../anomic/crawler/retrieval/HTTPLoader.java | 6 +- source/de/anomic/data/ListManager.java | 62 ++++---- .../anomic/http/server/HTTPDProxyHandler.java | 8 +- source/net/yacy/peers/NewsPool.java | 6 +- source/net/yacy/peers/Protocol.java | 7 +- source/net/yacy/repository/Blacklist.java | 136 +++++++++++------- source/net/yacy/repository/BlacklistFile.java | 8 +- .../net/yacy/repository/LoaderDispatcher.java | 5 +- source/net/yacy/search/Switchboard.java | 2 +- .../yacy/search/index/MetadataRepository.java | 5 +- source/net/yacy/search/index/Segment.java | 4 +- source/net/yacy/search/query/RWIProcess.java | 9 +- .../net/yacy/search/snippet/MediaSnippet.java | 14 +- 25 files changed, 241 insertions(+), 203 deletions(-) diff --git a/htroot/BlacklistCleaner_p.java b/htroot/BlacklistCleaner_p.java index 5e92b3782..3f2d45387 100644 --- a/htroot/BlacklistCleaner_p.java +++ b/htroot/BlacklistCleaner_p.java @@ -49,6 +49,7 @@ import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.util.FileUtils; import net.yacy.repository.Blacklist; import net.yacy.repository.Blacklist.BlacklistError; +import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.search.Switchboard; import net.yacy.search.query.SearchEventCache; import de.anomic.data.ListManager; @@ -76,10 +77,6 @@ public class BlacklistCleaner_p { ListManager.listsPath = new File(env.getDataPath(), env.getConfig("listManager.listsPath", "DATA/LISTS")); String blacklistToUse = null; - // get the list of supported blacklist types - final String supportedBlacklistTypesStr = Blacklist.BLACKLIST_TYPES_STRING; - final String[] supportedBlacklistTypes = supportedBlacklistTypesStr.split(","); - prop.put(DISABLED+"checked", "1"); if (post != null) { @@ -102,10 +99,10 @@ public class BlacklistCleaner_p { if (post.containsKey("delete")) { prop.put(RESULTS + "modified", "1"); - prop.put(RESULTS + "modified_delCount", removeEntries(blacklistToUse, supportedBlacklistTypes, getKeysByPrefix(post, "select", true))); + prop.put(RESULTS + "modified_delCount", removeEntries(blacklistToUse, BlacklistType.values(), getKeysByPrefix(post, "select", true))); } else if (post.containsKey("alter")) { prop.put(RESULTS + "modified", "2"); - prop.put(RESULTS + "modified_alterCount", alterEntries(blacklistToUse, supportedBlacklistTypes, getKeysByPrefix(post, "select", false), getValuesByPrefix(post, "entry", false))); + prop.put(RESULTS + "modified_alterCount", alterEntries(blacklistToUse, BlacklistType.values(), getKeysByPrefix(post, "select", false), getValuesByPrefix(post, "entry", false))); } // list illegal entries @@ -275,7 +272,7 @@ public class BlacklistCleaner_p { * @param entries Array of entries to be deleted. * @return Length of the list of entries to be removed. */ - private static int removeEntries(final String blacklistToUse, final String[] supportedBlacklistTypes, final String[] entries) { + private static int removeEntries(final String blacklistToUse, final BlacklistType[] supportedBlacklistTypes, final String[] entries) { // load blacklist data from file final List list = FileUtils.getListArray(new File(ListManager.listsPath, blacklistToUse)); @@ -299,7 +296,7 @@ public class BlacklistCleaner_p { } // remove the entry from the running blacklist engine - for (final String supportedBlacklistType : supportedBlacklistTypes) { + for (final BlacklistType supportedBlacklistType : supportedBlacklistTypes) { if (ListManager.listSetContains(supportedBlacklistType + ".BlackLists", blacklistToUse)) { final String host = (s.indexOf('/',0) == -1) ? s : s.substring(0, s.indexOf('/',0)); final String path = (s.indexOf('/',0) == -1) ? ".*" : s.substring(s.indexOf('/',0) + 1); @@ -328,7 +325,7 @@ public class BlacklistCleaner_p { */ private static int alterEntries( final String blacklistToUse, - final String[] supportedBlacklistTypes, + final BlacklistType[] supportedBlacklistTypes, final String[] oldEntry, final String[] newEntry) { removeEntries(blacklistToUse, supportedBlacklistTypes, oldEntry); @@ -346,7 +343,7 @@ public class BlacklistCleaner_p { path = n.substring(pos + 1); } pw.println(host + "/" + path); - for (final String s : supportedBlacklistTypes) { + for (final BlacklistType s : supportedBlacklistTypes) { if (ListManager.listSetContains(s + ".BlackLists",blacklistToUse)) { Switchboard.urlBlacklist.add( s, diff --git a/htroot/BlacklistTest_p.java b/htroot/BlacklistTest_p.java index 584754b78..915a657eb 100644 --- a/htroot/BlacklistTest_p.java +++ b/htroot/BlacklistTest_p.java @@ -34,7 +34,7 @@ import java.net.MalformedURLException; import net.yacy.cora.protocol.RequestHeader; import net.yacy.kelondro.data.meta.DigestURI; -import net.yacy.repository.Blacklist; +import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.search.Switchboard; import de.anomic.data.ListManager; @@ -64,21 +64,23 @@ public class BlacklistTest_p { DigestURI testurl = null; try { testurl = new DigestURI(urlstring); - } catch (final MalformedURLException e) { testurl = null; } + } catch (final MalformedURLException e) { + testurl = null; + } if(testurl != null) { prop.putHTML("url",testurl.toString()); prop.putHTML("testlist_url",testurl.toString()); - if(Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_CRAWLER, testurl)) + if(Switchboard.urlBlacklist.isListed(BlacklistType.CRAWLER, testurl)) prop.put("testlist_listedincrawler", "1"); - if(Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_DHT, testurl)) + if(Switchboard.urlBlacklist.isListed(BlacklistType.DHT, testurl)) prop.put("testlist_listedindht", "1"); - if(Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_NEWS, testurl)) + if(Switchboard.urlBlacklist.isListed(BlacklistType.NEWS, testurl)) prop.put("testlist_listedinnews", "1"); - if(Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_PROXY, testurl)) + if(Switchboard.urlBlacklist.isListed(BlacklistType.PROXY, testurl)) prop.put("testlist_listedinproxy", "1"); - if(Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_SEARCH, testurl)) + if(Switchboard.urlBlacklist.isListed(BlacklistType.SEARCH, testurl)) prop.put("testlist_listedinsearch", "1"); - if(Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_SURFTIPS, testurl)) + if(Switchboard.urlBlacklist.isListed(BlacklistType.SURFTIPS, testurl)) prop.put("testlist_listedinsurftips", "1"); } else { diff --git a/htroot/Blacklist_p.java b/htroot/Blacklist_p.java index 0476e00d3..862372117 100644 --- a/htroot/Blacklist_p.java +++ b/htroot/Blacklist_p.java @@ -43,6 +43,7 @@ import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.util.FileUtils; import net.yacy.repository.Blacklist; +import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.search.Switchboard; import net.yacy.search.query.SearchEventCache; import de.anomic.data.ListManager; @@ -64,10 +65,6 @@ public class Blacklist_p { ListManager.switchboard = (Switchboard) env; ListManager.listsPath = new File(ListManager.switchboard.getDataPath(),ListManager.switchboard.getConfig("listManager.listsPath", "DATA/LISTS")); - // get the list of supported blacklist types - final String supportedBlacklistTypesStr = Blacklist.BLACKLIST_TYPES_STRING; - final String[] supportedBlacklistTypes = supportedBlacklistTypesStr.split(","); - // load all blacklist files located in the directory List dirlist = FileUtils.getDirListing(ListManager.listsPath, Blacklist.BLACKLIST_FILENAME_FILTER); @@ -98,22 +95,22 @@ public class Blacklist_p { } if(testurl != null) { prop.putHTML("testlist_url",testurl.toString()); - if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_CRAWLER, testurl)) { + if (Switchboard.urlBlacklist.isListed(BlacklistType.CRAWLER, testurl)) { prop.put("testlist_listedincrawler", "1"); } - if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_DHT, testurl)) { + if (Switchboard.urlBlacklist.isListed(BlacklistType.DHT, testurl)) { prop.put("testlist_listedindht", "1"); } - if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_NEWS, testurl)) { + if (Switchboard.urlBlacklist.isListed(BlacklistType.NEWS, testurl)) { prop.put("testlist_listedinnews", "1"); } - if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_PROXY, testurl)) { + if (Switchboard.urlBlacklist.isListed(BlacklistType.PROXY, testurl)) { prop.put("testlist_listedinproxy", "1"); } - if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_SEARCH, testurl)) { + if (Switchboard.urlBlacklist.isListed(BlacklistType.SEARCH, testurl)) { prop.put("testlist_listedinsearch", "1"); } - if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_SURFTIPS, testurl)) { + if (Switchboard.urlBlacklist.isListed(BlacklistType.SURFTIPS, testurl)) { prop.put("testlist_listedinsurftips", "1"); } } else { @@ -159,7 +156,7 @@ public class Blacklist_p { ListManager.updateListSet(BLACKLIST_SHARED, blacklistToUse); // activate it for all known blacklist types - for (final String supportedBlacklistType : supportedBlacklistTypes) { + for (final BlacklistType supportedBlacklistType : BlacklistType.values()) { ListManager.updateListSet(supportedBlacklistType + ".BlackLists", blacklistToUse); } } catch (final IOException e) {/* */} @@ -189,7 +186,7 @@ public class Blacklist_p { Log.logWarning("Blacklist", "file "+ blackListFile +" could not be deleted!"); } - for (final String supportedBlacklistType : supportedBlacklistTypes) { + for (final BlacklistType supportedBlacklistType : BlacklistType.values()) { ListManager.removeFromListSet(supportedBlacklistType + ".BlackLists",blacklistToUse); } @@ -212,7 +209,7 @@ public class Blacklist_p { return prop; } - for (final String supportedBlacklistType : supportedBlacklistTypes) { + for (final BlacklistType supportedBlacklistType : BlacklistType.values()) { if (post.containsKey("activateList4" + supportedBlacklistType)) { ListManager.updateListSet(supportedBlacklistType + ".BlackLists",blacklistToUse); } else { @@ -253,7 +250,7 @@ public class Blacklist_p { if (selectedBlacklistEntries.length > 0) { String temp = null; for (final String selectedBlacklistEntry : selectedBlacklistEntries) { - if ((temp = deleteBlacklistEntry(blacklistToUse, selectedBlacklistEntry, header, supportedBlacklistTypes)) != null) { + if ((temp = deleteBlacklistEntry(blacklistToUse, selectedBlacklistEntry, header, BlacklistType.values())) != null) { prop.put("LOCATION", temp); return prop; } @@ -273,7 +270,7 @@ public class Blacklist_p { // store this call as api call ListManager.switchboard.tables.recordAPICall(post, "Blacklist_p.html", WorkTables.TABLE_API_TYPE_CONFIGURATION, "add to blacklist: " + blentry); - final String temp = addBlacklistEntry(blacklistToUse, blentry, header, supportedBlacklistTypes); + final String temp = addBlacklistEntry(blacklistToUse, blentry, header, BlacklistType.values()); if (temp != null) { prop.put("LOCATION", temp); return prop; @@ -298,12 +295,12 @@ public class Blacklist_p { !targetBlacklist.equals(blacklistToUse)) { String temp; for (final String selectedBlacklistEntry : selectedBlacklistEntries) { - if ((temp = addBlacklistEntry(targetBlacklist, selectedBlacklistEntry, header, supportedBlacklistTypes)) != null) { + if ((temp = addBlacklistEntry(targetBlacklist, selectedBlacklistEntry, header, BlacklistType.values())) != null) { prop.put("LOCATION", temp); return prop; } - if ((temp = deleteBlacklistEntry(blacklistToUse, selectedBlacklistEntry, header, supportedBlacklistTypes)) != null) { + if ((temp = deleteBlacklistEntry(blacklistToUse, selectedBlacklistEntry, header, BlacklistType.values())) != null) { prop.put("LOCATION", temp); return prop; @@ -338,12 +335,12 @@ public class Blacklist_p { if (!selectedBlacklistEntries[i].equals(editedBlacklistEntries[i])) { - if ((temp = deleteBlacklistEntry(blacklistToUse, selectedBlacklistEntries[i], header, supportedBlacklistTypes)) != null) { + if ((temp = deleteBlacklistEntry(blacklistToUse, selectedBlacklistEntries[i], header, BlacklistType.values())) != null) { prop.put("LOCATION", temp); return prop; } - if ((temp = addBlacklistEntry(blacklistToUse, editedBlacklistEntries[i], header, supportedBlacklistTypes)) != null) { + if ((temp = addBlacklistEntry(blacklistToUse, editedBlacklistEntries[i], header, BlacklistType.values())) != null) { prop.put("LOCATION", temp); return prop; } @@ -475,12 +472,12 @@ public class Blacklist_p { if (element.equals(blacklistToUse)) { //current List prop.put(DISABLED + BLACKLIST + blacklistCount + "_selected", "1"); - for (int blTypes=0; blTypes < supportedBlacklistTypes.length; blTypes++) { - prop.putXML(DISABLED + "currentActiveFor_" + blTypes + "_blTypeName",supportedBlacklistTypes[blTypes]); + for (int blTypes=0; blTypes < BlacklistType.values().length; blTypes++) { + prop.putXML(DISABLED + "currentActiveFor_" + blTypes + "_blTypeName",BlacklistType.values()[blTypes].toString()); prop.put(DISABLED + "currentActiveFor_" + blTypes + "_checked", - ListManager.listSetContains(supportedBlacklistTypes[blTypes] + ".BlackLists", element) ? "0" : "1"); + ListManager.listSetContains(BlacklistType.values()[blTypes] + ".BlackLists", element) ? "0" : "1"); } - prop.put(DISABLED + "currentActiveFor", supportedBlacklistTypes.length); + prop.put(DISABLED + "currentActiveFor", BlacklistType.values().length); } else { prop.putXML(DISABLED + EDIT + BLACKLIST_MOVE + blacklistMoveCount + "_name", element); @@ -494,9 +491,9 @@ public class Blacklist_p { } int activeCount = 0; - for (final String supportedBlacklistType : supportedBlacklistTypes) { + for (final BlacklistType supportedBlacklistType : BlacklistType.values()) { if (ListManager.listSetContains(supportedBlacklistType + ".BlackLists", element)) { - prop.putHTML(DISABLED + BLACKLIST + blacklistCount + "_active_" + activeCount + "_blTypeName", supportedBlacklistType); + prop.putHTML(DISABLED + BLACKLIST + blacklistCount + "_active_" + activeCount + "_blTypeName", supportedBlacklistType.toString()); activeCount++; } } @@ -521,13 +518,13 @@ public class Blacklist_p { * @param newEntry the entry that is to be added * @param header * @param supportedBlacklistTypes - * @return null if no error occured, else a String to put into LOCATION + * @return null if no error occurred, else a String to put into LOCATION */ private static String addBlacklistEntry( final String blacklistToUse, final String newEntry, final RequestHeader header, - final String[] supportedBlacklistTypes) { + final BlacklistType[] supportedBlacklistTypes) { if (blacklistToUse == null || blacklistToUse.length() == 0) { return ""; @@ -555,7 +552,7 @@ public class Blacklist_p { final String blacklistToUse, final String oldEntry, final RequestHeader header, - final String[] supportedBlacklistTypes) { + final BlacklistType[] supportedBlacklistTypes) { if (blacklistToUse == null || blacklistToUse.length() == 0) { return ""; @@ -580,7 +577,7 @@ public class Blacklist_p { final File listsPath, final String blacklistToUse, String oldEntry, - final String[] supportedBlacklistTypes) { + final BlacklistType[] supportedBlacklistTypes) { // load blacklist data from file final List list = FileUtils.getListArray(new File(listsPath, blacklistToUse)); @@ -603,7 +600,7 @@ public class Blacklist_p { pos = oldEntry.length(); oldEntry = oldEntry + "/.*"; } - for (final String supportedBlacklistType : supportedBlacklistTypes) { + for (final BlacklistType supportedBlacklistType : supportedBlacklistTypes) { if (ListManager.listSetContains(supportedBlacklistType + ".BlackLists",blacklistToUse)) { Switchboard.urlBlacklist.remove(supportedBlacklistType,oldEntry.substring(0, pos), oldEntry.substring(pos + 1)); } @@ -622,7 +619,7 @@ public class Blacklist_p { final File listsPath, final String blacklistToUse, String newEntry, - final String[] supportedBlacklistTypes) { + final BlacklistType[] supportedBlacklistTypes) { // ignore empty entries if(newEntry == null || newEntry.isEmpty()) { @@ -659,7 +656,7 @@ public class Blacklist_p { final File listsPath, final String blacklistToUse, String newEntry, - final String[] supportedBlacklistTypes) { + final BlacklistType[] supportedBlacklistTypes) { if (!Blacklist.blacklistFileContains(listsPath, blacklistToUse, newEntry)) { // append the line to the file @@ -683,7 +680,7 @@ public class Blacklist_p { // add to blacklist int pos = newEntry.indexOf('/',0); - for (final String supportedBlacklistType : supportedBlacklistTypes) { + for (final BlacklistType supportedBlacklistType : supportedBlacklistTypes) { if (ListManager.listSetContains(supportedBlacklistType + ".BlackLists", blacklistToUse)) { Switchboard.urlBlacklist.add(supportedBlacklistType, newEntry.substring(0, pos), newEntry.substring(pos + 1)); } diff --git a/htroot/IndexControlRWIs_p.java b/htroot/IndexControlRWIs_p.java index 19b05b2b0..19288a0df 100644 --- a/htroot/IndexControlRWIs_p.java +++ b/htroot/IndexControlRWIs_p.java @@ -58,7 +58,7 @@ import net.yacy.kelondro.util.FileUtils; import net.yacy.peers.Protocol; import net.yacy.peers.Seed; import net.yacy.peers.dht.PeerSelection; -import net.yacy.repository.Blacklist; +import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.search.Switchboard; import net.yacy.search.SwitchboardConstants; import net.yacy.search.index.Segment; @@ -441,7 +441,7 @@ public class IndexControlRWIs_p supportedBlacklistType + ".BlackLists", blacklist) ) { Switchboard.urlBlacklist.add( - supportedBlacklistType, + BlacklistType.valueOf(supportedBlacklistType), url.getHost(), url.getFile()); } @@ -457,7 +457,6 @@ public class IndexControlRWIs_p if ( post.containsKey("blacklistdomains") ) { PrintWriter pw; try { - final String[] supportedBlacklistTypes = Blacklist.BLACKLIST_TYPES_STRING.split(","); pw = new PrintWriter(new FileWriter(new File(ListManager.listsPath, blacklist), true)); DigestURI url; @@ -472,7 +471,7 @@ public class IndexControlRWIs_p if ( e != null ) { url = e.url(); pw.println(url.getHost() + "/.*"); - for ( final String supportedBlacklistType : supportedBlacklistTypes ) { + for ( final BlacklistType supportedBlacklistType : BlacklistType.values() ) { if ( ListManager.listSetContains( supportedBlacklistType + ".BlackLists", blacklist) ) { @@ -623,7 +622,7 @@ public class IndexControlRWIs_p ? "appears emphasized, " : "") + ((DigestURI.probablyRootURL(entry.word().urlhash())) ? "probably root url" : "")); - if ( Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_DHT, url) ) { + if ( Switchboard.urlBlacklist.isListed(BlacklistType.DHT, url) ) { prop.put("genUrlList_urlList_" + i + "_urlExists_urlhxChecked", "1"); } i++; diff --git a/htroot/Supporter.java b/htroot/Supporter.java index 767bdadcc..2d75ee4c9 100644 --- a/htroot/Supporter.java +++ b/htroot/Supporter.java @@ -43,7 +43,7 @@ import net.yacy.kelondro.order.NaturalOrder; import net.yacy.peers.NewsDB; import net.yacy.peers.NewsPool; import net.yacy.peers.Seed; -import net.yacy.repository.Blacklist; +import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.search.Switchboard; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; @@ -129,8 +129,10 @@ public class Supporter { url = row.getPrimaryKeyUTF8().trim(); try { - if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_SURFTIPS, new DigestURI(url, urlhash.getBytes()))) continue; - } catch(final MalformedURLException e) {continue;} + if (Switchboard.urlBlacklist.isListed(BlacklistType.SURFTIPS, new DigestURI(url, urlhash.getBytes()))) continue; + } catch (final MalformedURLException e) { + continue; + } title = row.getColUTF8(1); description = row.getColUTF8(2); if ((url == null) || (title == null) || (description == null)) continue; diff --git a/htroot/Surftips.java b/htroot/Surftips.java index 682f66ee4..63d4efa1a 100644 --- a/htroot/Surftips.java +++ b/htroot/Surftips.java @@ -42,7 +42,7 @@ import net.yacy.kelondro.order.NaturalOrder; import net.yacy.peers.NewsDB; import net.yacy.peers.NewsPool; import net.yacy.peers.Seed; -import net.yacy.repository.Blacklist; +import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.search.Switchboard; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; @@ -136,7 +136,7 @@ public class Surftips { url = row.getPrimaryKeyUTF8().trim(); try{ - if(Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_SURFTIPS ,new DigestURI(url))) + if(Switchboard.urlBlacklist.isListed(BlacklistType.SURFTIPS ,new DigestURI(url))) continue; }catch(final MalformedURLException e){continue;}; title = row.getColUTF8(1); diff --git a/htroot/api/blacklists_p.java b/htroot/api/blacklists_p.java index 180c3d4e8..6f907ef79 100644 --- a/htroot/api/blacklists_p.java +++ b/htroot/api/blacklists_p.java @@ -4,7 +4,7 @@ import java.util.List; import net.yacy.cora.protocol.RequestHeader; import net.yacy.kelondro.util.FileUtils; -import net.yacy.repository.Blacklist; +import net.yacy.repository.Blacklist.BlacklistType; import de.anomic.data.ListManager; import de.anomic.server.serverObjects; @@ -35,15 +35,14 @@ public class blacklists_p { prop.put("lists_" + blacklistCount + "_shared", "0"); } - final String[] types = Blacklist.BLACKLIST_TYPES_STRING.split(","); int j = 0; - for (final String type : types) { - prop.putXML("lists_" + blacklistCount + "_types_" + j + "_name", type); + for (final BlacklistType type : BlacklistType.values()) { + prop.putXML("lists_" + blacklistCount + "_types_" + j + "_name", type.toString()); prop.put("lists_" + blacklistCount + "_types_" + j + "_value", ListManager.listSetContains(type + ".BlackLists", element) ? 1 : 0); j++; } - prop.put("lists_" + blacklistCount + "_types", types.length); + prop.put("lists_" + blacklistCount + "_types", BlacklistType.values().length); if (!"1".equals(attrOnly) && !"true".equals(attrOnly)) { final List list = FileUtils.getListArray(new File(ListManager.listsPath, element)); diff --git a/htroot/sharedBlacklist_p.java b/htroot/sharedBlacklist_p.java index 2704cc9d5..1bbf28b55 100644 --- a/htroot/sharedBlacklist_p.java +++ b/htroot/sharedBlacklist_p.java @@ -47,7 +47,7 @@ import net.yacy.document.parser.html.CharacterCoding; import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.util.FileUtils; import net.yacy.peers.Seed; -import net.yacy.repository.Blacklist; +import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.search.Switchboard; import net.yacy.search.query.SearchEventCache; @@ -238,10 +238,7 @@ public class sharedBlacklist_p { pw.println(newItem); if (Switchboard.urlBlacklist != null) { - final String supportedBlacklistTypesStr = Blacklist.BLACKLIST_TYPES_STRING; - final String[] supportedBlacklistTypes = supportedBlacklistTypesStr.split(","); - - for (final String supportedBlacklistType : supportedBlacklistTypes) { + for (final BlacklistType supportedBlacklistType : BlacklistType.values()) { if (ListManager.listSetContains(supportedBlacklistType + ".BlackLists",selectedBlacklistName)) { Switchboard.urlBlacklist.add(supportedBlacklistType,newItem.substring(0, pos), newItem.substring(pos + 1)); } diff --git a/htroot/yacy/crawlReceipt.java b/htroot/yacy/crawlReceipt.java index fabcabe00..58dbea76a 100644 --- a/htroot/yacy/crawlReceipt.java +++ b/htroot/yacy/crawlReceipt.java @@ -35,6 +35,7 @@ import net.yacy.kelondro.data.meta.URIMetadataRow; import net.yacy.kelondro.logging.Log; import net.yacy.peers.Protocol; import net.yacy.peers.Seed; +import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.search.Switchboard; import net.yacy.search.index.Segments; import de.anomic.crawler.ResultURLs; @@ -133,8 +134,16 @@ public final class crawlReceipt { // check if the entry is in our network domain final String urlRejectReason = sb.crawlStacker.urlInAcceptedDomain(entry.url()); - if (urlRejectReason != null) { - if (log.isWarning()) log.logWarning("crawlReceipt: RECEIVED wrong RECEIPT (" + urlRejectReason + ") for hash " + ASCII.String(entry.hash()) + " from peer " + iam + "\n\tURL properties: "+ propStr); + if (urlRejectReason != null) { + log.logWarning("crawlReceipt: RECEIVED wrong RECEIPT (" + urlRejectReason + ") for hash " + ASCII.String(entry.hash()) + " from peer " + iam + "\n\tURL properties: "+ propStr); + prop.put("delay", "9999"); + return prop; + } + + // Check URL against DHT blacklist + if (Switchboard.urlBlacklist.isListed(BlacklistType.DHT, entry)) { + // URL is blacklisted + log.logWarning("crawlReceipt: RECEIVED wrong RECEIPT (URL is blacklisted) for URL " + ASCII.String(entry.hash()) + ":" + entry.url().toNormalform(false, true) + " from peer " + iam); prop.put("delay", "9999"); return prop; } diff --git a/htroot/yacy/transferRWI.java b/htroot/yacy/transferRWI.java index e20418823..b29b732b2 100644 --- a/htroot/yacy/transferRWI.java +++ b/htroot/yacy/transferRWI.java @@ -46,7 +46,7 @@ import net.yacy.peers.Network; import net.yacy.peers.Protocol; import net.yacy.peers.Seed; import net.yacy.peers.dht.FlatWordPartitionScheme; -import net.yacy.repository.Blacklist; +import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.search.Switchboard; import net.yacy.search.SwitchboardConstants; import net.yacy.search.index.Segments; @@ -171,9 +171,9 @@ public final class transferRWI { iEntry = new WordReferenceRow(estring.substring(p)); urlHash = iEntry.urlhash(); - // block blacklisted entries - if ((blockBlacklist) && (Switchboard.urlBlacklist.hashInBlacklistedCache(Blacklist.BLACKLIST_DHT, urlHash))) { - if (Network.log.isFine()) Network.log.logFine("transferRWI: blocked blacklisted URLHash '" + ASCII.String(urlHash) + "' from peer " + otherPeerName); + // block blacklisted entries + if ((blockBlacklist) && (Switchboard.urlBlacklist.hashInBlacklistedCache(BlacklistType.DHT, urlHash))) { + Network.log.logFine("transferRWI: blocked blacklisted URLHash '" + ASCII.String(urlHash) + "' from peer " + otherPeerName); blocked++; continue; } diff --git a/htroot/yacy/transferURL.java b/htroot/yacy/transferURL.java index 944c96d00..a9b43c7a2 100644 --- a/htroot/yacy/transferURL.java +++ b/htroot/yacy/transferURL.java @@ -38,7 +38,7 @@ import net.yacy.peers.EventChannel; import net.yacy.peers.Network; import net.yacy.peers.Protocol; import net.yacy.peers.Seed; -import net.yacy.repository.Blacklist; +import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.search.Switchboard; import net.yacy.search.index.Segments; import de.anomic.crawler.ResultURLs; @@ -121,9 +121,9 @@ public final class transferURL { continue; } - // check if the entry is blacklisted - if ((blockBlacklist) && (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_DHT, lEntry.url()))) { - if (Network.log.isFine()) Network.log.logFine("transferURL: blocked blacklisted URL '" + lEntry.url().toNormalform(false, true) + "' from peer " + otherPeerName); + // check if the entry is blacklisted + if ((blockBlacklist) && (Switchboard.urlBlacklist.isListed(BlacklistType.DHT, lEntry))) { + Network.log.logFine("transferURL: blocked blacklisted URL '" + lEntry.url().toNormalform(false, true) + "' from peer " + otherPeerName); lEntry = null; blocked++; continue; diff --git a/source/de/anomic/crawler/CrawlStacker.java b/source/de/anomic/crawler/CrawlStacker.java index 966ecabac..35d1a3f11 100644 --- a/source/de/anomic/crawler/CrawlStacker.java +++ b/source/de/anomic/crawler/CrawlStacker.java @@ -50,7 +50,7 @@ import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.order.Base64Order; import net.yacy.kelondro.workflow.WorkflowProcessor; import net.yacy.peers.SeedDB; -import net.yacy.repository.Blacklist; +import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.repository.FilterEngine; import net.yacy.search.Switchboard; import net.yacy.search.index.Segment; @@ -404,9 +404,9 @@ public final class CrawlStacker { return "denied_(" + urlRejectReason + ")"; } - // check blacklist - if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_CRAWLER, url)) { - if (this.log.isFine()) this.log.logFine("URL '" + urlstring + "' is in blacklist."); + // check blacklist + if (Switchboard.urlBlacklist.isListed(BlacklistType.CRAWLER, url)) { + this.log.logFine("URL '" + urlstring + "' is in blacklist."); return "url in blacklist"; } diff --git a/source/de/anomic/crawler/retrieval/HTTPLoader.java b/source/de/anomic/crawler/retrieval/HTTPLoader.java index a3547feda..2a349a601 100644 --- a/source/de/anomic/crawler/retrieval/HTTPLoader.java +++ b/source/de/anomic/crawler/retrieval/HTTPLoader.java @@ -36,7 +36,7 @@ import net.yacy.cora.protocol.http.HTTPClient; import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.io.ByteCount; import net.yacy.kelondro.logging.Log; -import net.yacy.repository.Blacklist; +import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.search.Switchboard; import net.yacy.search.SwitchboardConstants; import net.yacy.search.index.Segments; @@ -95,7 +95,7 @@ public final class HTTPLoader { // check if url is in blacklist final String hostlow = host.toLowerCase(); - if (checkBlacklist && Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_CRAWLER, hostlow, path)) { + if (checkBlacklist && Switchboard.urlBlacklist.isListed(BlacklistType.CRAWLER, hostlow, path)) { this.sb.crawlQueues.errorURL.push(request, this.sb.peers.mySeed().hash.getBytes(), new Date(), 1, FailCategory.FINAL_LOAD_CONTEXT, "url in blacklist", -1); throw new IOException("CRAWLER Rejecting URL '" + request.url().toString() + "'. URL is in blacklist."); } @@ -236,7 +236,7 @@ public final class HTTPLoader { // check if url is in blacklist final String hostlow = host.toLowerCase(); - if (Switchboard.urlBlacklist != null && Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_CRAWLER, hostlow, path)) { + if (Switchboard.urlBlacklist != null && Switchboard.urlBlacklist.isListed(BlacklistType.CRAWLER, hostlow, path)) { throw new IOException("CRAWLER Rejecting URL '" + request.url().toString() + "'. URL is in blacklist."); } diff --git a/source/de/anomic/data/ListManager.java b/source/de/anomic/data/ListManager.java index 47a38ff67..051580a8c 100644 --- a/source/de/anomic/data/ListManager.java +++ b/source/de/anomic/data/ListManager.java @@ -1,10 +1,10 @@ // listManager.java // ------------------------------------- // part of YACY -// +// // (C) 2005, 2006 by Alexander Schier // (C) 2007 by Bjoern 'Fuchs' Krombholz; fox.box@gmail.com -// +// // last change: $LastChangedDate$ by $LastChangedBy$ // $LastChangedRevision$ // @@ -30,35 +30,34 @@ import java.util.Arrays; import java.util.Collection; import java.util.HashSet; import java.util.Iterator; +import java.util.List; import java.util.Set; import java.util.Vector; +import java.util.regex.Pattern; import net.yacy.kelondro.util.FileUtils; -import net.yacy.repository.Blacklist; +import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.repository.BlacklistFile; import net.yacy.search.Switchboard; import net.yacy.search.query.SearchEventCache; -import java.util.List; -import java.util.regex.Pattern; - // The Naming of the functions is a bit strange... public class ListManager { - + private final static Pattern commaPattern = Pattern.compile(","); - + public static Switchboard switchboard = null; public static File listsPath = null; /** * Get ListSet from configuration file and return it as a unified Set. - * + * * Meaning of ListSet: There are various "lists" in YaCy which are * actually disjunct (pairwise unequal) sets which themselves can be seperated * into different subsets. E.g., there can be more than one blacklist of a type. - * A ListSet is the set of all those "lists" (subsets) of an equal type. - * + * A ListSet is the set of all those "lists" (subsets) of an equal type. + * * @param setName name of the ListSet * @return a ListSet from configuration file */ @@ -69,13 +68,13 @@ public class ListManager { /** * Removes an element from a ListSet and updates the configuration file * accordingly. If the element doesn't exist, then nothing will be changed. - * + * * @param setName name of the ListSet. * @param listName name of the element to remove from the ListSet. */ public static void removeFromListSet(final String setName, final String listName) { final Set listSet = getListSet(setName); - + if (!listSet.isEmpty()) { listSet.remove(listName); switchboard.setConfig(setName, collection2string(listSet)); @@ -86,9 +85,9 @@ public class ListManager { * Adds an element to an existing ListSet. If the ListSet doesn't exist yet, * a new one will be added. If the ListSet already contains an identical element, * then nothing happens. - * + * * The new list will be written to the configuartion file. - * + * * @param setName * @param newListName */ @@ -101,7 +100,7 @@ public class ListManager { /** * @param setName ListSet in which to search for an element. - * @param listName the element to search for. + * @param listName the element to search for. * @return true if the ListSet "setName" contains an element * "listName", false otherwise. */ @@ -112,23 +111,23 @@ public class ListManager { //================general Lists================== - public static String getListString(final String filename, final boolean withcomments) { + public static String getListString(final String filename, final boolean withcomments) { return FileUtils.getListString(new File(listsPath ,filename), withcomments); } - + //================Helper functions for collection conversion================== - + /** * Simple conversion of a Collection of Strings to a comma separated String. * If the implementing Collection subclass guaranties an order of its elements, * the substrings of the result will have the same order. - * + * * @param col a Collection of Strings. * @return String with elements from set separated by comma. */ public static String collection2string(final Collection col){ final StringBuilder str = new StringBuilder(col.size() * 40); - + if (col != null && !col.isEmpty()) { final Iterator it = col.iterator(); str.append(it.next()); @@ -137,7 +136,7 @@ public class ListManager { str.append(it.next()); } } - + return str.toString(); } @@ -158,13 +157,13 @@ public class ListManager { /** * Simple conversion of a comma separated list to a unified Set. - * + * * @param string list of comma separated Strings * @return resulting Set or empty Set if string is null */ public static Set string2set(final String string){ HashSet set; - + if (string != null) { set = new HashSet(Arrays.asList(commaPattern.split(string, 0))); } else { @@ -177,7 +176,7 @@ public class ListManager { /** * Simple conversion of a comma separated list to a Vector containing * the order of the substrings. - * + * * @param string list of comma separated Strings * @return resulting Vector or empty Vector if string is null */ @@ -198,19 +197,16 @@ public class ListManager { /** * Load or reload all active Blacklists */ - public static void reloadBlacklists(){ - final String supportedBlacklistTypesStr = Blacklist.BLACKLIST_TYPES_STRING; - final String[] supportedBlacklistTypes = supportedBlacklistTypesStr.split(","); - - final List blacklistFiles = new ArrayList(supportedBlacklistTypes.length); - for (String supportedBlacklistType : supportedBlacklistTypes) { + public static void reloadBlacklists(){ + final List blacklistFiles = new ArrayList(BlacklistType.values().length); + for (final BlacklistType supportedBlacklistType : BlacklistType.values()) { final BlacklistFile blFile = new BlacklistFile( switchboard.getConfig( - supportedBlacklistType + ".BlackLists", switchboard.getConfig("BlackLists.DefaultList", "url.default.black")), + supportedBlacklistType.toString() + ".BlackLists", switchboard.getConfig("BlackLists.DefaultList", "url.default.black")), supportedBlacklistType); blacklistFiles.add(blFile); } - + Switchboard.urlBlacklist.clear(); Switchboard.urlBlacklist.loadList( blacklistFiles.toArray(new BlacklistFile[blacklistFiles.size()]), diff --git a/source/de/anomic/http/server/HTTPDProxyHandler.java b/source/de/anomic/http/server/HTTPDProxyHandler.java index ad901bdb0..30e958cd8 100644 --- a/source/de/anomic/http/server/HTTPDProxyHandler.java +++ b/source/de/anomic/http/server/HTTPDProxyHandler.java @@ -84,7 +84,7 @@ import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.io.ByteCountOutputStream; import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.util.FileUtils; -import net.yacy.repository.Blacklist; +import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.search.Switchboard; import net.yacy.search.SwitchboardConstants; import de.anomic.crawler.Cache; @@ -350,7 +350,7 @@ public final class HTTPDProxyHandler { // respond a 404 for all AGIS ("all you get is shit") servers final String hostlow = host.toLowerCase(); if (args != null) { path = path + "?" + args; } - if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_PROXY, hostlow, path)) { + if (Switchboard.urlBlacklist.isListed(BlacklistType.PROXY, hostlow, path)) { log.logInfo("AGIS blocking of host '" + hostlow + "'"); HTTPDemon.sendRespondError(conProp,countedRespond,4,403,null, "URL '" + hostlow + "' blocked by yacy proxy (blacklisted)",null); @@ -814,7 +814,7 @@ public final class HTTPDProxyHandler { // re-calc the url path final String remotePath = (args == null) ? path : (path + "?" + args); - if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_PROXY, hostlow, remotePath)) { + if (Switchboard.urlBlacklist.isListed(BlacklistType.PROXY, hostlow, remotePath)) { HTTPDemon.sendRespondError(conProp,respond,4,403,null, "URL '" + hostlow + "' blocked by yacy proxy (blacklisted)",null); log.logInfo("AGIS blocking of host '" + hostlow + "'"); @@ -1243,7 +1243,7 @@ public final class HTTPDProxyHandler { // blacklist idea inspired by [AS]: // respond a 404 for all AGIS ("all you get is shit") servers final String hostlow = host.toLowerCase(); - if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_PROXY, hostlow, path)) { + if (Switchboard.urlBlacklist.isListed(BlacklistType.PROXY, hostlow, path)) { HTTPDemon.sendRespondError(conProp,clientOut,4,403,null, "URL '" + hostlow + "' blocked by yacy proxy (blacklisted)",null); log.logInfo("AGIS blocking of host '" + hostlow + "'"); diff --git a/source/net/yacy/peers/NewsPool.java b/source/net/yacy/peers/NewsPool.java index c75f10c14..33b87b676 100644 --- a/source/net/yacy/peers/NewsPool.java +++ b/source/net/yacy/peers/NewsPool.java @@ -56,7 +56,7 @@ import java.util.Set; import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.index.RowSpaceExceededException; import net.yacy.kelondro.logging.Log; -import net.yacy.repository.Blacklist; +import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.search.Switchboard; public class NewsPool { @@ -343,13 +343,13 @@ public class NewsPool { if (record.created().getTime() == 0) return; final Map attributes = record.attributes(); if (attributes.containsKey("url")){ - if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_NEWS, new DigestURI(attributes.get("url")))){ + if (Switchboard.urlBlacklist.isListed(BlacklistType.NEWS, new DigestURI(attributes.get("url")))){ System.out.println("DEBUG: ignored news-entry url blacklisted: " + attributes.get("url")); return; } } if (attributes.containsKey("startURL")){ - if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_NEWS, new DigestURI(attributes.get("startURL")))){ + if (Switchboard.urlBlacklist.isListed(BlacklistType.NEWS, new DigestURI(attributes.get("startURL")))){ System.out.println("DEBUG: ignored news-entry url blacklisted: " + attributes.get("startURL")); return; } diff --git a/source/net/yacy/peers/Protocol.java b/source/net/yacy/peers/Protocol.java index 298fa8052..b1681d722 100644 --- a/source/net/yacy/peers/Protocol.java +++ b/source/net/yacy/peers/Protocol.java @@ -96,6 +96,7 @@ import net.yacy.peers.graphics.WebStructureGraph; import net.yacy.peers.graphics.WebStructureGraph.HostReference; import net.yacy.peers.operation.yacyVersion; import net.yacy.repository.Blacklist; +import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.search.EventTracker; import net.yacy.search.Switchboard; import net.yacy.search.SwitchboardConstants; @@ -699,9 +700,9 @@ public final class Protocol assert (urlEntry.hash().length == 12) : "urlEntry.hash() = " + ASCII.String(urlEntry.hash()); if ( urlEntry.hash().length != 12 ) { continue; // bad url hash - } - if ( blacklist.isListed(Blacklist.BLACKLIST_SEARCH, urlEntry.url()) ) { - if ( Network.log.isInfo() ) { + } + if ( blacklist.isListed(BlacklistType.SEARCH, urlEntry) ) { + if ( Network.log.isInfo() ) { Network.log.logInfo("remote search: filtered blacklisted url " + urlEntry.url() + " from peer " diff --git a/source/net/yacy/repository/Blacklist.java b/source/net/yacy/repository/Blacklist.java index 1f23b9eca..472921d84 100644 --- a/source/net/yacy/repository/Blacklist.java +++ b/source/net/yacy/repository/Blacklist.java @@ -26,9 +26,13 @@ package net.yacy.repository; import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; import java.io.IOException; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; import java.util.ArrayList; -import java.util.Arrays; import java.util.HashSet; import java.util.Iterator; import java.util.List; @@ -50,12 +54,12 @@ import net.yacy.kelondro.util.SetTools; public class Blacklist { - public static final String BLACKLIST_DHT = "dht"; - public static final String BLACKLIST_CRAWLER = "crawler"; - public static final String BLACKLIST_PROXY = "proxy"; - public static final String BLACKLIST_SEARCH = "search"; - public static final String BLACKLIST_SURFTIPS = "surftips"; - public static final String BLACKLIST_NEWS = "news"; + private static final File BLACKLIST_DHT_CACHEFILE = new File("DATA/WORK/BlacklistCache_DHT.ser"); + + public enum BlacklistType { + DHT, CRAWLER, PROXY, SEARCH, SURFTIPS, NEWS + } + public final static String BLACKLIST_FILENAME_FILTER = "^.*\\.black$"; public static enum BlacklistError { @@ -82,33 +86,31 @@ public class Blacklist { return this.errorCode; } } - protected static final Set BLACKLIST_TYPES = new HashSet(Arrays.asList(new String[]{ - Blacklist.BLACKLIST_CRAWLER, - Blacklist.BLACKLIST_PROXY, - Blacklist.BLACKLIST_DHT, - Blacklist.BLACKLIST_SEARCH, - Blacklist.BLACKLIST_SURFTIPS, - Blacklist.BLACKLIST_NEWS - })); - public static final String BLACKLIST_TYPES_STRING = "proxy,crawler,dht,search,surftips,news"; + private File blacklistRootPath = null; - private final ConcurrentMap cachedUrlHashs; - private final ConcurrentMap>> hostpaths_matchable; // key=host, value=path; mapped url is http://host/path; path does not start with '/' here - private final ConcurrentMap>> hostpaths_notmatchable; // key=host, value=path; mapped url is http://host/path; path does not start with '/' here + private final ConcurrentMap cachedUrlHashs; + private final ConcurrentMap>> hostpaths_matchable; // key=host, value=path; mapped url is http://host/path; path does not start with '/' here + private final ConcurrentMap>> hostpaths_notmatchable; // key=host, value=path; mapped url is http://host/path; path does not start with '/' here + public Blacklist(final File rootPath) { setRootPath(rootPath); // prepare the data structure - this.hostpaths_matchable = new ConcurrentHashMap>>(); - this.hostpaths_notmatchable = new ConcurrentHashMap>>(); - this.cachedUrlHashs = new ConcurrentHashMap(); + this.hostpaths_matchable = new ConcurrentHashMap>>(); + this.hostpaths_notmatchable = new ConcurrentHashMap>>(); + this.cachedUrlHashs = new ConcurrentHashMap(); - for (final String blacklistType : BLACKLIST_TYPES) { + for (final BlacklistType blacklistType : BlacklistType.values()) { this.hostpaths_matchable.put(blacklistType, new ConcurrentHashMap>()); this.hostpaths_notmatchable.put(blacklistType, new ConcurrentHashMap>()); - this.cachedUrlHashs.put(blacklistType, new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0)); + this.hostpaths_notmatchable.put(blacklistType, new ConcurrentHashMap>()); + if (blacklistType.equals(BlacklistType.DHT)) { + loadDHTCache(); + } else { + this.cachedUrlHashs.put(blacklistType, new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0)); + } } } @@ -126,30 +128,16 @@ public class Blacklist { this.blacklistRootPath = rootPath; } - protected Map> getBlacklistMap(final String blacklistType, final boolean matchable) { - if (blacklistType == null) { - throw new IllegalArgumentException("Blacklist type not set."); - } - if (!BLACKLIST_TYPES.contains(blacklistType)) { - throw new IllegalArgumentException("Unknown blacklist type: " + blacklistType + "."); - } - + protected ConcurrentMap> getBlacklistMap(final BlacklistType blacklistType, final boolean matchable) { return (matchable) ? this.hostpaths_matchable.get(blacklistType) : this.hostpaths_notmatchable.get(blacklistType); } - protected HandleSet getCacheUrlHashsSet(final String blacklistType) { - if (blacklistType == null) { - throw new IllegalArgumentException("Blacklist type not set."); - } - if (!BLACKLIST_TYPES.contains(blacklistType)) { - throw new IllegalArgumentException("Unknown backlist type."); - } - + protected HandleSet getCacheUrlHashsSet(final BlacklistType blacklistType) { return this.cachedUrlHashs.get(blacklistType); } public void clear() { - for (final Map> entry : this.hostpaths_matchable.values()) { + for (final ConcurrentMap> entry : this.hostpaths_matchable.values()) { entry.clear(); } for (final Map> entry : this.hostpaths_notmatchable.values()) { @@ -162,12 +150,12 @@ public class Blacklist { public int size() { int size = 0; - for (final String entry : this.hostpaths_matchable.keySet()) { + for (final BlacklistType entry : this.hostpaths_matchable.keySet()) { for (final List ientry : this.hostpaths_matchable.get(entry).values()) { size += ientry.size(); } } - for (final String entry : this.hostpaths_notmatchable.keySet()) { + for (final BlacklistType entry : this.hostpaths_notmatchable.keySet()) { for (final List ientry : this.hostpaths_notmatchable.get(entry).values()) { size += ientry.size(); } @@ -188,8 +176,8 @@ public class Blacklist { * @param sep */ private void loadList(final BlacklistFile blFile, final String sep) { - final Map> blacklistMapMatch = getBlacklistMap(blFile.getType(), true); - final Map> blacklistMapNotMatch = getBlacklistMap(blFile.getType(), false); + final ConcurrentMap> blacklistMapMatch = getBlacklistMap(blFile.getType(), true); + final ConcurrentMap> blacklistMapNotMatch = getBlacklistMap(blFile.getType(), false); Set>> loadedBlacklist; Map.Entry> loadedEntry; List paths; @@ -240,18 +228,18 @@ public class Blacklist { } } - public void loadList(final String blacklistType, final String fileNames, final String sep) { + public void loadList(final BlacklistType blacklistType, final String fileNames, final String sep) { // method for not breaking older plasmaURLPattern interface final BlacklistFile blFile = new BlacklistFile(fileNames, blacklistType); loadList(blFile, sep); } - public void removeAll(final String blacklistType, final String host) { + public void removeAll(final BlacklistType blacklistType, final String host) { getBlacklistMap(blacklistType, true).remove(host); getBlacklistMap(blacklistType, false).remove(host); } - public void remove(final String blacklistType, final String host, final String path) { + public void remove(final BlacklistType blacklistType, final String host, final String path) { final Map> blacklistMap = getBlacklistMap(blacklistType, true); List hostList = blacklistMap.get(host); @@ -272,7 +260,7 @@ public class Blacklist { } } - public void add(final String blacklistType, final String host, final String path) { + public void add(final BlacklistType blacklistType, final String host, final String path) { if (host == null) { throw new IllegalArgumentException("host may not be null"); } @@ -296,18 +284,18 @@ public class Blacklist { public int blacklistCacheSize() { int size = 0; - final Iterator iter = this.cachedUrlHashs.keySet().iterator(); + final Iterator iter = this.cachedUrlHashs.keySet().iterator(); while (iter.hasNext()) { size += this.cachedUrlHashs.get(iter.next()).size(); } return size; } - public boolean hashInBlacklistedCache(final String blacklistType, final byte[] urlHash) { + public boolean hashInBlacklistedCache(final BlacklistType blacklistType, final byte[] urlHash) { return getCacheUrlHashsSet(blacklistType).has(urlHash); } - public boolean contains(final String blacklistType, final String host, final String path) { + public boolean contains(final BlacklistType blacklistType, final String host, final String path) { boolean ret = false; if (blacklistType != null && host != null && path != null) { @@ -324,7 +312,18 @@ public class Blacklist { return ret; } - public boolean isListed(final String blacklistType, final DigestURI url) { + /** + * Checks whether the given entry is listed in given blacklist type + * @param blacklistType The used blacklist + * @param entry Entry to be checked + * @return Whether the given entry is blacklisted + */ + public boolean isListed(final BlacklistType blacklistType, final URIMetadataRow entry) { + // Call inner method + return isListed(blacklistType, entry.url()); + } + + public boolean isListed(final BlacklistType blacklistType, final DigestURI url) { if (url == null) { throw new IllegalArgumentException("url may not be null"); } @@ -358,7 +357,7 @@ public class Blacklist { return "Default YaCy Blacklist Engine"; } - public boolean isListed(final String blacklistType, final String hostlow, final String path) { + public boolean isListed(final BlacklistType blacklistType, final String hostlow, final String path) { if (hostlow == null) { throw new IllegalArgumentException("hostlow may not be null"); } @@ -509,4 +508,33 @@ public class Blacklist { final Set blacklist = new HashSet(FileUtils.getListArray(new File(listsPath, blacklistToUse))); return blacklist != null && blacklist.contains(newEntry); } + + public final void saveDHTCache() { + try { + final ObjectOutputStream out = new ObjectOutputStream(new FileOutputStream(BLACKLIST_DHT_CACHEFILE)); + out.writeObject(getCacheUrlHashsSet(BlacklistType.DHT)); + out.close(); + + } catch (final IOException e) { + Log.logException(e); + } + } + + public final void loadDHTCache() { + try { + if (BLACKLIST_DHT_CACHEFILE.exists()) { + final ObjectInputStream in = new ObjectInputStream(new FileInputStream(BLACKLIST_DHT_CACHEFILE)); + this.cachedUrlHashs.put(BlacklistType.DHT, (HandleSet) in.readObject()); + in.close(); + } else { + this.cachedUrlHashs.put(BlacklistType.DHT, new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0)); + } + } catch (final ClassNotFoundException e) { + Log.logException(e); + } catch (final FileNotFoundException e) { + Log.logException(e); + } catch (final IOException e) { + Log.logException(e); + } + } } diff --git a/source/net/yacy/repository/BlacklistFile.java b/source/net/yacy/repository/BlacklistFile.java index 5c516ae09..00493ddd8 100644 --- a/source/net/yacy/repository/BlacklistFile.java +++ b/source/net/yacy/repository/BlacklistFile.java @@ -30,12 +30,14 @@ import java.util.Arrays; import java.util.HashSet; import java.util.Set; +import net.yacy.repository.Blacklist.BlacklistType; + public class BlacklistFile { private final String filename; - private final String type; + private final BlacklistType type; - public BlacklistFile(final String filename, final String type) { + public BlacklistFile(final String filename, final BlacklistType type) { this.filename = filename; this.type = type; } @@ -53,5 +55,5 @@ public class BlacklistFile { return new HashSet(Arrays.asList(this.filename.split(","))); } - public String getType() { return this.type; } + public BlacklistType getType() { return this.type; } } diff --git a/source/net/yacy/repository/LoaderDispatcher.java b/source/net/yacy/repository/LoaderDispatcher.java index 82cef5fd4..24f8ebbcf 100644 --- a/source/net/yacy/repository/LoaderDispatcher.java +++ b/source/net/yacy/repository/LoaderDispatcher.java @@ -52,6 +52,7 @@ import net.yacy.document.TextParser; import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.util.FileUtils; +import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.search.Switchboard; import net.yacy.search.index.Segments; import de.anomic.crawler.Cache; @@ -188,8 +189,8 @@ public final class LoaderDispatcher { final String protocol = url.getProtocol(); final String host = url.getHost(); - // check if url is in blacklist - if (checkBlacklist && host != null && Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_CRAWLER, host.toLowerCase(), url.getFile())) { + // check if url is in blacklist + if (checkBlacklist && Switchboard.urlBlacklist.isListed(BlacklistType.CRAWLER, host.toLowerCase(), url.getFile())) { this.sb.crawlQueues.errorURL.push(request, this.sb.peers.mySeed().hash.getBytes(), new Date(), 1, FailCategory.FINAL_LOAD_CONTEXT, "url in blacklist", -1); throw new IOException("DISPATCHER Rejecting URL '" + request.url().toString() + "'. URL is in blacklist."); } diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java index b3452c5e4..195f38266 100644 --- a/source/net/yacy/search/Switchboard.java +++ b/source/net/yacy/search/Switchboard.java @@ -2597,7 +2597,7 @@ public final class Switchboard extends serverSwitch "denied by profile rule, process case=" + processCase + ", profile name = " - + queueEntry.profile().name()); + + queueEntry.profile().name()); return; } diff --git a/source/net/yacy/search/index/MetadataRepository.java b/source/net/yacy/search/index/MetadataRepository.java index 61f6c82d6..0ce804a23 100644 --- a/source/net/yacy/search/index/MetadataRepository.java +++ b/source/net/yacy/search/index/MetadataRepository.java @@ -60,6 +60,7 @@ import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.table.SplitTable; import net.yacy.kelondro.util.MemoryControl; import net.yacy.repository.Blacklist; +import net.yacy.repository.Blacklist.BlacklistType; import de.anomic.crawler.CrawlStacker; public final class MetadataRepository implements /*Metadata,*/ Iterable { @@ -408,8 +409,8 @@ public final class MetadataRepository implements /*Metadata,*/ Iterable remove(entry.hash()); continue; } - if (this.blacklist.isListed(Blacklist.BLACKLIST_CRAWLER, entry.url()) || - this.blacklist.isListed(Blacklist.BLACKLIST_DHT, entry.url()) || + if (this.blacklist.isListed(BlacklistType.CRAWLER, entry) || + this.blacklist.isListed(BlacklistType.DHT, entry) || (this.crawlStacker.urlInAcceptedDomain(entry.url()) != null)) { this.lastBlacklistedUrl = entry.url().toNormalform(true, true); this.lastBlacklistedHash = ASCII.String(entry.hash()); diff --git a/source/net/yacy/search/index/Segment.java b/source/net/yacy/search/index/Segment.java index 221bf4ad8..fd90400c6 100644 --- a/source/net/yacy/search/index/Segment.java +++ b/source/net/yacy/search/index/Segment.java @@ -62,7 +62,7 @@ import net.yacy.kelondro.rwi.IndexCell; import net.yacy.kelondro.rwi.ReferenceContainer; import net.yacy.kelondro.rwi.ReferenceFactory; import net.yacy.kelondro.util.ISO639; -import net.yacy.repository.Blacklist; +import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.repository.LoaderDispatcher; import net.yacy.search.Switchboard; import net.yacy.search.query.RWIProcess; @@ -536,7 +536,7 @@ public class Segment { urlHashs.put(entry.urlhash()); } else { url = ue.url(); - if (url == null || Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_CRAWLER, url)) { + if (url == null || Switchboard.urlBlacklist.isListed(BlacklistType.CRAWLER, url)) { urlHashs.put(entry.urlhash()); } } diff --git a/source/net/yacy/search/query/RWIProcess.java b/source/net/yacy/search/query/RWIProcess.java index 43b189c7e..d1046693e 100644 --- a/source/net/yacy/search/query/RWIProcess.java +++ b/source/net/yacy/search/query/RWIProcess.java @@ -64,6 +64,7 @@ import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.rwi.ReferenceContainer; import net.yacy.kelondro.rwi.TermSearch; import net.yacy.peers.graphics.ProfilingGraph; +import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.search.EventTracker; import net.yacy.search.Switchboard; import net.yacy.search.index.Segment; @@ -624,7 +625,13 @@ public final class RWIProcess extends Thread this.sortout++; continue; } - + + // Check for blacklist + if ( Switchboard.urlBlacklist.isListed(BlacklistType.SEARCH, page) ) { + this.sortout++; + continue; + } + final String pageurl = page.url().toNormalform(true, true); final String pageauthor = page.dc_creator(); final String pagetitle = page.dc_title().toLowerCase(); diff --git a/source/net/yacy/search/snippet/MediaSnippet.java b/source/net/yacy/search/snippet/MediaSnippet.java index 7f6707601..16765e59f 100644 --- a/source/net/yacy/search/snippet/MediaSnippet.java +++ b/source/net/yacy/search/snippet/MediaSnippet.java @@ -51,7 +51,7 @@ import net.yacy.kelondro.index.RowSpaceExceededException; import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.order.Base64Order; import net.yacy.kelondro.util.ByteArray; -import net.yacy.repository.Blacklist; +import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.search.Switchboard; import de.anomic.crawler.ZURL.FailCategory; import de.anomic.crawler.retrieval.Request; @@ -177,8 +177,8 @@ public class MediaSnippet implements Comparable, Comparator, Comparator= 0 || u.indexOf("favicon",0) >= 0) continue; if (ientry.height() > 0 && ientry.height() < 32) continue; if (ientry.width() > 0 && ientry.width() < 32) continue; @@ -251,8 +251,8 @@ public class MediaSnippet implements Comparable, Comparator