Rewrote all String blacklist types to enum 'BlacklistType', closes bug

#143

Conflicts:
	htroot/Supporter.java
	htroot/yacy/crawlReceipt.java
	htroot/yacy/transferRWI.java
	htroot/yacy/transferURL.java
	source/de/anomic/crawler/CrawlStacker.java
	source/de/anomic/data/ListManager.java
	source/net/yacy/peers/Protocol.java
	source/net/yacy/repository/Blacklist.java
	source/net/yacy/repository/LoaderDispatcher.java
	source/net/yacy/search/Switchboard.java
	source/net/yacy/search/index/MetadataRepository.java
	source/net/yacy/search/index/Segment.java
	source/net/yacy/search/query/RWIProcess.java
	source/net/yacy/search/snippet/MediaSnippet.java
pull/1/head
Roland 'Quix0r' Haeder 13 years ago committed by Michael Peter Christen
parent 213f006bf1
commit edaa09b9b1

@ -49,6 +49,7 @@ import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.util.FileUtils; import net.yacy.kelondro.util.FileUtils;
import net.yacy.repository.Blacklist; import net.yacy.repository.Blacklist;
import net.yacy.repository.Blacklist.BlacklistError; import net.yacy.repository.Blacklist.BlacklistError;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.query.SearchEventCache; import net.yacy.search.query.SearchEventCache;
import de.anomic.data.ListManager; import de.anomic.data.ListManager;
@ -76,10 +77,6 @@ public class BlacklistCleaner_p {
ListManager.listsPath = new File(env.getDataPath(), env.getConfig("listManager.listsPath", "DATA/LISTS")); ListManager.listsPath = new File(env.getDataPath(), env.getConfig("listManager.listsPath", "DATA/LISTS"));
String blacklistToUse = null; String blacklistToUse = null;
// get the list of supported blacklist types
final String supportedBlacklistTypesStr = Blacklist.BLACKLIST_TYPES_STRING;
final String[] supportedBlacklistTypes = supportedBlacklistTypesStr.split(",");
prop.put(DISABLED+"checked", "1"); prop.put(DISABLED+"checked", "1");
if (post != null) { if (post != null) {
@ -102,10 +99,10 @@ public class BlacklistCleaner_p {
if (post.containsKey("delete")) { if (post.containsKey("delete")) {
prop.put(RESULTS + "modified", "1"); prop.put(RESULTS + "modified", "1");
prop.put(RESULTS + "modified_delCount", removeEntries(blacklistToUse, supportedBlacklistTypes, getKeysByPrefix(post, "select", true))); prop.put(RESULTS + "modified_delCount", removeEntries(blacklistToUse, BlacklistType.values(), getKeysByPrefix(post, "select", true)));
} else if (post.containsKey("alter")) { } else if (post.containsKey("alter")) {
prop.put(RESULTS + "modified", "2"); prop.put(RESULTS + "modified", "2");
prop.put(RESULTS + "modified_alterCount", alterEntries(blacklistToUse, supportedBlacklistTypes, getKeysByPrefix(post, "select", false), getValuesByPrefix(post, "entry", false))); prop.put(RESULTS + "modified_alterCount", alterEntries(blacklistToUse, BlacklistType.values(), getKeysByPrefix(post, "select", false), getValuesByPrefix(post, "entry", false)));
} }
// list illegal entries // list illegal entries
@ -275,7 +272,7 @@ public class BlacklistCleaner_p {
* @param entries Array of entries to be deleted. * @param entries Array of entries to be deleted.
* @return Length of the list of entries to be removed. * @return Length of the list of entries to be removed.
*/ */
private static int removeEntries(final String blacklistToUse, final String[] supportedBlacklistTypes, final String[] entries) { private static int removeEntries(final String blacklistToUse, final BlacklistType[] supportedBlacklistTypes, final String[] entries) {
// load blacklist data from file // load blacklist data from file
final List<String> list = FileUtils.getListArray(new File(ListManager.listsPath, blacklistToUse)); final List<String> list = FileUtils.getListArray(new File(ListManager.listsPath, blacklistToUse));
@ -299,7 +296,7 @@ public class BlacklistCleaner_p {
} }
// remove the entry from the running blacklist engine // remove the entry from the running blacklist engine
for (final String supportedBlacklistType : supportedBlacklistTypes) { for (final BlacklistType supportedBlacklistType : supportedBlacklistTypes) {
if (ListManager.listSetContains(supportedBlacklistType + ".BlackLists", blacklistToUse)) { if (ListManager.listSetContains(supportedBlacklistType + ".BlackLists", blacklistToUse)) {
final String host = (s.indexOf('/',0) == -1) ? s : s.substring(0, s.indexOf('/',0)); final String host = (s.indexOf('/',0) == -1) ? s : s.substring(0, s.indexOf('/',0));
final String path = (s.indexOf('/',0) == -1) ? ".*" : s.substring(s.indexOf('/',0) + 1); final String path = (s.indexOf('/',0) == -1) ? ".*" : s.substring(s.indexOf('/',0) + 1);
@ -328,7 +325,7 @@ public class BlacklistCleaner_p {
*/ */
private static int alterEntries( private static int alterEntries(
final String blacklistToUse, final String blacklistToUse,
final String[] supportedBlacklistTypes, final BlacklistType[] supportedBlacklistTypes,
final String[] oldEntry, final String[] oldEntry,
final String[] newEntry) { final String[] newEntry) {
removeEntries(blacklistToUse, supportedBlacklistTypes, oldEntry); removeEntries(blacklistToUse, supportedBlacklistTypes, oldEntry);
@ -346,7 +343,7 @@ public class BlacklistCleaner_p {
path = n.substring(pos + 1); path = n.substring(pos + 1);
} }
pw.println(host + "/" + path); pw.println(host + "/" + path);
for (final String s : supportedBlacklistTypes) { for (final BlacklistType s : supportedBlacklistTypes) {
if (ListManager.listSetContains(s + ".BlackLists",blacklistToUse)) { if (ListManager.listSetContains(s + ".BlackLists",blacklistToUse)) {
Switchboard.urlBlacklist.add( Switchboard.urlBlacklist.add(
s, s,

@ -34,7 +34,7 @@ import java.net.MalformedURLException;
import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.protocol.RequestHeader;
import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.repository.Blacklist; import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import de.anomic.data.ListManager; import de.anomic.data.ListManager;
@ -64,21 +64,23 @@ public class BlacklistTest_p {
DigestURI testurl = null; DigestURI testurl = null;
try { try {
testurl = new DigestURI(urlstring); testurl = new DigestURI(urlstring);
} catch (final MalformedURLException e) { testurl = null; } } catch (final MalformedURLException e) {
testurl = null;
}
if(testurl != null) { if(testurl != null) {
prop.putHTML("url",testurl.toString()); prop.putHTML("url",testurl.toString());
prop.putHTML("testlist_url",testurl.toString()); prop.putHTML("testlist_url",testurl.toString());
if(Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_CRAWLER, testurl)) if(Switchboard.urlBlacklist.isListed(BlacklistType.CRAWLER, testurl))
prop.put("testlist_listedincrawler", "1"); prop.put("testlist_listedincrawler", "1");
if(Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_DHT, testurl)) if(Switchboard.urlBlacklist.isListed(BlacklistType.DHT, testurl))
prop.put("testlist_listedindht", "1"); prop.put("testlist_listedindht", "1");
if(Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_NEWS, testurl)) if(Switchboard.urlBlacklist.isListed(BlacklistType.NEWS, testurl))
prop.put("testlist_listedinnews", "1"); prop.put("testlist_listedinnews", "1");
if(Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_PROXY, testurl)) if(Switchboard.urlBlacklist.isListed(BlacklistType.PROXY, testurl))
prop.put("testlist_listedinproxy", "1"); prop.put("testlist_listedinproxy", "1");
if(Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_SEARCH, testurl)) if(Switchboard.urlBlacklist.isListed(BlacklistType.SEARCH, testurl))
prop.put("testlist_listedinsearch", "1"); prop.put("testlist_listedinsearch", "1");
if(Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_SURFTIPS, testurl)) if(Switchboard.urlBlacklist.isListed(BlacklistType.SURFTIPS, testurl))
prop.put("testlist_listedinsurftips", "1"); prop.put("testlist_listedinsurftips", "1");
} }
else { else {

@ -43,6 +43,7 @@ import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.util.FileUtils; import net.yacy.kelondro.util.FileUtils;
import net.yacy.repository.Blacklist; import net.yacy.repository.Blacklist;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.query.SearchEventCache; import net.yacy.search.query.SearchEventCache;
import de.anomic.data.ListManager; import de.anomic.data.ListManager;
@ -64,10 +65,6 @@ public class Blacklist_p {
ListManager.switchboard = (Switchboard) env; ListManager.switchboard = (Switchboard) env;
ListManager.listsPath = new File(ListManager.switchboard.getDataPath(),ListManager.switchboard.getConfig("listManager.listsPath", "DATA/LISTS")); ListManager.listsPath = new File(ListManager.switchboard.getDataPath(),ListManager.switchboard.getConfig("listManager.listsPath", "DATA/LISTS"));
// get the list of supported blacklist types
final String supportedBlacklistTypesStr = Blacklist.BLACKLIST_TYPES_STRING;
final String[] supportedBlacklistTypes = supportedBlacklistTypesStr.split(",");
// load all blacklist files located in the directory // load all blacklist files located in the directory
List<String> dirlist = FileUtils.getDirListing(ListManager.listsPath, Blacklist.BLACKLIST_FILENAME_FILTER); List<String> dirlist = FileUtils.getDirListing(ListManager.listsPath, Blacklist.BLACKLIST_FILENAME_FILTER);
@ -98,22 +95,22 @@ public class Blacklist_p {
} }
if(testurl != null) { if(testurl != null) {
prop.putHTML("testlist_url",testurl.toString()); prop.putHTML("testlist_url",testurl.toString());
if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_CRAWLER, testurl)) { if (Switchboard.urlBlacklist.isListed(BlacklistType.CRAWLER, testurl)) {
prop.put("testlist_listedincrawler", "1"); prop.put("testlist_listedincrawler", "1");
} }
if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_DHT, testurl)) { if (Switchboard.urlBlacklist.isListed(BlacklistType.DHT, testurl)) {
prop.put("testlist_listedindht", "1"); prop.put("testlist_listedindht", "1");
} }
if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_NEWS, testurl)) { if (Switchboard.urlBlacklist.isListed(BlacklistType.NEWS, testurl)) {
prop.put("testlist_listedinnews", "1"); prop.put("testlist_listedinnews", "1");
} }
if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_PROXY, testurl)) { if (Switchboard.urlBlacklist.isListed(BlacklistType.PROXY, testurl)) {
prop.put("testlist_listedinproxy", "1"); prop.put("testlist_listedinproxy", "1");
} }
if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_SEARCH, testurl)) { if (Switchboard.urlBlacklist.isListed(BlacklistType.SEARCH, testurl)) {
prop.put("testlist_listedinsearch", "1"); prop.put("testlist_listedinsearch", "1");
} }
if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_SURFTIPS, testurl)) { if (Switchboard.urlBlacklist.isListed(BlacklistType.SURFTIPS, testurl)) {
prop.put("testlist_listedinsurftips", "1"); prop.put("testlist_listedinsurftips", "1");
} }
} else { } else {
@ -159,7 +156,7 @@ public class Blacklist_p {
ListManager.updateListSet(BLACKLIST_SHARED, blacklistToUse); ListManager.updateListSet(BLACKLIST_SHARED, blacklistToUse);
// activate it for all known blacklist types // activate it for all known blacklist types
for (final String supportedBlacklistType : supportedBlacklistTypes) { for (final BlacklistType supportedBlacklistType : BlacklistType.values()) {
ListManager.updateListSet(supportedBlacklistType + ".BlackLists", blacklistToUse); ListManager.updateListSet(supportedBlacklistType + ".BlackLists", blacklistToUse);
} }
} catch (final IOException e) {/* */} } catch (final IOException e) {/* */}
@ -189,7 +186,7 @@ public class Blacklist_p {
Log.logWarning("Blacklist", "file "+ blackListFile +" could not be deleted!"); Log.logWarning("Blacklist", "file "+ blackListFile +" could not be deleted!");
} }
for (final String supportedBlacklistType : supportedBlacklistTypes) { for (final BlacklistType supportedBlacklistType : BlacklistType.values()) {
ListManager.removeFromListSet(supportedBlacklistType + ".BlackLists",blacklistToUse); ListManager.removeFromListSet(supportedBlacklistType + ".BlackLists",blacklistToUse);
} }
@ -212,7 +209,7 @@ public class Blacklist_p {
return prop; return prop;
} }
for (final String supportedBlacklistType : supportedBlacklistTypes) { for (final BlacklistType supportedBlacklistType : BlacklistType.values()) {
if (post.containsKey("activateList4" + supportedBlacklistType)) { if (post.containsKey("activateList4" + supportedBlacklistType)) {
ListManager.updateListSet(supportedBlacklistType + ".BlackLists",blacklistToUse); ListManager.updateListSet(supportedBlacklistType + ".BlackLists",blacklistToUse);
} else { } else {
@ -253,7 +250,7 @@ public class Blacklist_p {
if (selectedBlacklistEntries.length > 0) { if (selectedBlacklistEntries.length > 0) {
String temp = null; String temp = null;
for (final String selectedBlacklistEntry : selectedBlacklistEntries) { for (final String selectedBlacklistEntry : selectedBlacklistEntries) {
if ((temp = deleteBlacklistEntry(blacklistToUse, selectedBlacklistEntry, header, supportedBlacklistTypes)) != null) { if ((temp = deleteBlacklistEntry(blacklistToUse, selectedBlacklistEntry, header, BlacklistType.values())) != null) {
prop.put("LOCATION", temp); prop.put("LOCATION", temp);
return prop; return prop;
} }
@ -273,7 +270,7 @@ public class Blacklist_p {
// store this call as api call // store this call as api call
ListManager.switchboard.tables.recordAPICall(post, "Blacklist_p.html", WorkTables.TABLE_API_TYPE_CONFIGURATION, "add to blacklist: " + blentry); ListManager.switchboard.tables.recordAPICall(post, "Blacklist_p.html", WorkTables.TABLE_API_TYPE_CONFIGURATION, "add to blacklist: " + blentry);
final String temp = addBlacklistEntry(blacklistToUse, blentry, header, supportedBlacklistTypes); final String temp = addBlacklistEntry(blacklistToUse, blentry, header, BlacklistType.values());
if (temp != null) { if (temp != null) {
prop.put("LOCATION", temp); prop.put("LOCATION", temp);
return prop; return prop;
@ -298,12 +295,12 @@ public class Blacklist_p {
!targetBlacklist.equals(blacklistToUse)) { !targetBlacklist.equals(blacklistToUse)) {
String temp; String temp;
for (final String selectedBlacklistEntry : selectedBlacklistEntries) { for (final String selectedBlacklistEntry : selectedBlacklistEntries) {
if ((temp = addBlacklistEntry(targetBlacklist, selectedBlacklistEntry, header, supportedBlacklistTypes)) != null) { if ((temp = addBlacklistEntry(targetBlacklist, selectedBlacklistEntry, header, BlacklistType.values())) != null) {
prop.put("LOCATION", temp); prop.put("LOCATION", temp);
return prop; return prop;
} }
if ((temp = deleteBlacklistEntry(blacklistToUse, selectedBlacklistEntry, header, supportedBlacklistTypes)) != null) { if ((temp = deleteBlacklistEntry(blacklistToUse, selectedBlacklistEntry, header, BlacklistType.values())) != null) {
prop.put("LOCATION", temp); prop.put("LOCATION", temp);
return prop; return prop;
@ -338,12 +335,12 @@ public class Blacklist_p {
if (!selectedBlacklistEntries[i].equals(editedBlacklistEntries[i])) { if (!selectedBlacklistEntries[i].equals(editedBlacklistEntries[i])) {
if ((temp = deleteBlacklistEntry(blacklistToUse, selectedBlacklistEntries[i], header, supportedBlacklistTypes)) != null) { if ((temp = deleteBlacklistEntry(blacklistToUse, selectedBlacklistEntries[i], header, BlacklistType.values())) != null) {
prop.put("LOCATION", temp); prop.put("LOCATION", temp);
return prop; return prop;
} }
if ((temp = addBlacklistEntry(blacklistToUse, editedBlacklistEntries[i], header, supportedBlacklistTypes)) != null) { if ((temp = addBlacklistEntry(blacklistToUse, editedBlacklistEntries[i], header, BlacklistType.values())) != null) {
prop.put("LOCATION", temp); prop.put("LOCATION", temp);
return prop; return prop;
} }
@ -475,12 +472,12 @@ public class Blacklist_p {
if (element.equals(blacklistToUse)) { //current List if (element.equals(blacklistToUse)) { //current List
prop.put(DISABLED + BLACKLIST + blacklistCount + "_selected", "1"); prop.put(DISABLED + BLACKLIST + blacklistCount + "_selected", "1");
for (int blTypes=0; blTypes < supportedBlacklistTypes.length; blTypes++) { for (int blTypes=0; blTypes < BlacklistType.values().length; blTypes++) {
prop.putXML(DISABLED + "currentActiveFor_" + blTypes + "_blTypeName",supportedBlacklistTypes[blTypes]); prop.putXML(DISABLED + "currentActiveFor_" + blTypes + "_blTypeName",BlacklistType.values()[blTypes].toString());
prop.put(DISABLED + "currentActiveFor_" + blTypes + "_checked", prop.put(DISABLED + "currentActiveFor_" + blTypes + "_checked",
ListManager.listSetContains(supportedBlacklistTypes[blTypes] + ".BlackLists", element) ? "0" : "1"); ListManager.listSetContains(BlacklistType.values()[blTypes] + ".BlackLists", element) ? "0" : "1");
} }
prop.put(DISABLED + "currentActiveFor", supportedBlacklistTypes.length); prop.put(DISABLED + "currentActiveFor", BlacklistType.values().length);
} else { } else {
prop.putXML(DISABLED + EDIT + BLACKLIST_MOVE + blacklistMoveCount + "_name", element); prop.putXML(DISABLED + EDIT + BLACKLIST_MOVE + blacklistMoveCount + "_name", element);
@ -494,9 +491,9 @@ public class Blacklist_p {
} }
int activeCount = 0; int activeCount = 0;
for (final String supportedBlacklistType : supportedBlacklistTypes) { for (final BlacklistType supportedBlacklistType : BlacklistType.values()) {
if (ListManager.listSetContains(supportedBlacklistType + ".BlackLists", element)) { if (ListManager.listSetContains(supportedBlacklistType + ".BlackLists", element)) {
prop.putHTML(DISABLED + BLACKLIST + blacklistCount + "_active_" + activeCount + "_blTypeName", supportedBlacklistType); prop.putHTML(DISABLED + BLACKLIST + blacklistCount + "_active_" + activeCount + "_blTypeName", supportedBlacklistType.toString());
activeCount++; activeCount++;
} }
} }
@ -521,13 +518,13 @@ public class Blacklist_p {
* @param newEntry the entry that is to be added * @param newEntry the entry that is to be added
* @param header * @param header
* @param supportedBlacklistTypes * @param supportedBlacklistTypes
* @return null if no error occured, else a String to put into LOCATION * @return null if no error occurred, else a String to put into LOCATION
*/ */
private static String addBlacklistEntry( private static String addBlacklistEntry(
final String blacklistToUse, final String blacklistToUse,
final String newEntry, final String newEntry,
final RequestHeader header, final RequestHeader header,
final String[] supportedBlacklistTypes) { final BlacklistType[] supportedBlacklistTypes) {
if (blacklistToUse == null || blacklistToUse.length() == 0) { if (blacklistToUse == null || blacklistToUse.length() == 0) {
return ""; return "";
@ -555,7 +552,7 @@ public class Blacklist_p {
final String blacklistToUse, final String blacklistToUse,
final String oldEntry, final String oldEntry,
final RequestHeader header, final RequestHeader header,
final String[] supportedBlacklistTypes) { final BlacklistType[] supportedBlacklistTypes) {
if (blacklistToUse == null || blacklistToUse.length() == 0) { if (blacklistToUse == null || blacklistToUse.length() == 0) {
return ""; return "";
@ -580,7 +577,7 @@ public class Blacklist_p {
final File listsPath, final File listsPath,
final String blacklistToUse, final String blacklistToUse,
String oldEntry, String oldEntry,
final String[] supportedBlacklistTypes) { final BlacklistType[] supportedBlacklistTypes) {
// load blacklist data from file // load blacklist data from file
final List<String> list = FileUtils.getListArray(new File(listsPath, blacklistToUse)); final List<String> list = FileUtils.getListArray(new File(listsPath, blacklistToUse));
@ -603,7 +600,7 @@ public class Blacklist_p {
pos = oldEntry.length(); pos = oldEntry.length();
oldEntry = oldEntry + "/.*"; oldEntry = oldEntry + "/.*";
} }
for (final String supportedBlacklistType : supportedBlacklistTypes) { for (final BlacklistType supportedBlacklistType : supportedBlacklistTypes) {
if (ListManager.listSetContains(supportedBlacklistType + ".BlackLists",blacklistToUse)) { if (ListManager.listSetContains(supportedBlacklistType + ".BlackLists",blacklistToUse)) {
Switchboard.urlBlacklist.remove(supportedBlacklistType,oldEntry.substring(0, pos), oldEntry.substring(pos + 1)); Switchboard.urlBlacklist.remove(supportedBlacklistType,oldEntry.substring(0, pos), oldEntry.substring(pos + 1));
} }
@ -622,7 +619,7 @@ public class Blacklist_p {
final File listsPath, final File listsPath,
final String blacklistToUse, final String blacklistToUse,
String newEntry, String newEntry,
final String[] supportedBlacklistTypes) { final BlacklistType[] supportedBlacklistTypes) {
// ignore empty entries // ignore empty entries
if(newEntry == null || newEntry.isEmpty()) { if(newEntry == null || newEntry.isEmpty()) {
@ -659,7 +656,7 @@ public class Blacklist_p {
final File listsPath, final File listsPath,
final String blacklistToUse, final String blacklistToUse,
String newEntry, String newEntry,
final String[] supportedBlacklistTypes) { final BlacklistType[] supportedBlacklistTypes) {
if (!Blacklist.blacklistFileContains(listsPath, blacklistToUse, newEntry)) { if (!Blacklist.blacklistFileContains(listsPath, blacklistToUse, newEntry)) {
// append the line to the file // append the line to the file
@ -683,7 +680,7 @@ public class Blacklist_p {
// add to blacklist // add to blacklist
int pos = newEntry.indexOf('/',0); int pos = newEntry.indexOf('/',0);
for (final String supportedBlacklistType : supportedBlacklistTypes) { for (final BlacklistType supportedBlacklistType : supportedBlacklistTypes) {
if (ListManager.listSetContains(supportedBlacklistType + ".BlackLists", blacklistToUse)) { if (ListManager.listSetContains(supportedBlacklistType + ".BlackLists", blacklistToUse)) {
Switchboard.urlBlacklist.add(supportedBlacklistType, newEntry.substring(0, pos), newEntry.substring(pos + 1)); Switchboard.urlBlacklist.add(supportedBlacklistType, newEntry.substring(0, pos), newEntry.substring(pos + 1));
} }

@ -58,7 +58,7 @@ import net.yacy.kelondro.util.FileUtils;
import net.yacy.peers.Protocol; import net.yacy.peers.Protocol;
import net.yacy.peers.Seed; import net.yacy.peers.Seed;
import net.yacy.peers.dht.PeerSelection; import net.yacy.peers.dht.PeerSelection;
import net.yacy.repository.Blacklist; import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants; import net.yacy.search.SwitchboardConstants;
import net.yacy.search.index.Segment; import net.yacy.search.index.Segment;
@ -441,7 +441,7 @@ public class IndexControlRWIs_p
supportedBlacklistType + ".BlackLists", supportedBlacklistType + ".BlackLists",
blacklist) ) { blacklist) ) {
Switchboard.urlBlacklist.add( Switchboard.urlBlacklist.add(
supportedBlacklistType, BlacklistType.valueOf(supportedBlacklistType),
url.getHost(), url.getHost(),
url.getFile()); url.getFile());
} }
@ -457,7 +457,6 @@ public class IndexControlRWIs_p
if ( post.containsKey("blacklistdomains") ) { if ( post.containsKey("blacklistdomains") ) {
PrintWriter pw; PrintWriter pw;
try { try {
final String[] supportedBlacklistTypes = Blacklist.BLACKLIST_TYPES_STRING.split(",");
pw = pw =
new PrintWriter(new FileWriter(new File(ListManager.listsPath, blacklist), true)); new PrintWriter(new FileWriter(new File(ListManager.listsPath, blacklist), true));
DigestURI url; DigestURI url;
@ -472,7 +471,7 @@ public class IndexControlRWIs_p
if ( e != null ) { if ( e != null ) {
url = e.url(); url = e.url();
pw.println(url.getHost() + "/.*"); pw.println(url.getHost() + "/.*");
for ( final String supportedBlacklistType : supportedBlacklistTypes ) { for ( final BlacklistType supportedBlacklistType : BlacklistType.values() ) {
if ( ListManager.listSetContains( if ( ListManager.listSetContains(
supportedBlacklistType + ".BlackLists", supportedBlacklistType + ".BlackLists",
blacklist) ) { blacklist) ) {
@ -623,7 +622,7 @@ public class IndexControlRWIs_p
? "appears emphasized, " ? "appears emphasized, "
: "") : "")
+ ((DigestURI.probablyRootURL(entry.word().urlhash())) ? "probably root url" : "")); + ((DigestURI.probablyRootURL(entry.word().urlhash())) ? "probably root url" : ""));
if ( Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_DHT, url) ) { if ( Switchboard.urlBlacklist.isListed(BlacklistType.DHT, url) ) {
prop.put("genUrlList_urlList_" + i + "_urlExists_urlhxChecked", "1"); prop.put("genUrlList_urlList_" + i + "_urlExists_urlhxChecked", "1");
} }
i++; i++;

@ -43,7 +43,7 @@ import net.yacy.kelondro.order.NaturalOrder;
import net.yacy.peers.NewsDB; import net.yacy.peers.NewsDB;
import net.yacy.peers.NewsPool; import net.yacy.peers.NewsPool;
import net.yacy.peers.Seed; import net.yacy.peers.Seed;
import net.yacy.repository.Blacklist; import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import de.anomic.server.serverObjects; import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch; import de.anomic.server.serverSwitch;
@ -129,8 +129,10 @@ public class Supporter {
url = row.getPrimaryKeyUTF8().trim(); url = row.getPrimaryKeyUTF8().trim();
try { try {
if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_SURFTIPS, new DigestURI(url, urlhash.getBytes()))) continue; if (Switchboard.urlBlacklist.isListed(BlacklistType.SURFTIPS, new DigestURI(url, urlhash.getBytes()))) continue;
} catch(final MalformedURLException e) {continue;} } catch (final MalformedURLException e) {
continue;
}
title = row.getColUTF8(1); title = row.getColUTF8(1);
description = row.getColUTF8(2); description = row.getColUTF8(2);
if ((url == null) || (title == null) || (description == null)) continue; if ((url == null) || (title == null) || (description == null)) continue;

@ -42,7 +42,7 @@ import net.yacy.kelondro.order.NaturalOrder;
import net.yacy.peers.NewsDB; import net.yacy.peers.NewsDB;
import net.yacy.peers.NewsPool; import net.yacy.peers.NewsPool;
import net.yacy.peers.Seed; import net.yacy.peers.Seed;
import net.yacy.repository.Blacklist; import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import de.anomic.server.serverObjects; import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch; import de.anomic.server.serverSwitch;
@ -136,7 +136,7 @@ public class Surftips {
url = row.getPrimaryKeyUTF8().trim(); url = row.getPrimaryKeyUTF8().trim();
try{ try{
if(Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_SURFTIPS ,new DigestURI(url))) if(Switchboard.urlBlacklist.isListed(BlacklistType.SURFTIPS ,new DigestURI(url)))
continue; continue;
}catch(final MalformedURLException e){continue;}; }catch(final MalformedURLException e){continue;};
title = row.getColUTF8(1); title = row.getColUTF8(1);

@ -4,7 +4,7 @@ import java.util.List;
import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.protocol.RequestHeader;
import net.yacy.kelondro.util.FileUtils; import net.yacy.kelondro.util.FileUtils;
import net.yacy.repository.Blacklist; import net.yacy.repository.Blacklist.BlacklistType;
import de.anomic.data.ListManager; import de.anomic.data.ListManager;
import de.anomic.server.serverObjects; import de.anomic.server.serverObjects;
@ -35,15 +35,14 @@ public class blacklists_p {
prop.put("lists_" + blacklistCount + "_shared", "0"); prop.put("lists_" + blacklistCount + "_shared", "0");
} }
final String[] types = Blacklist.BLACKLIST_TYPES_STRING.split(",");
int j = 0; int j = 0;
for (final String type : types) { for (final BlacklistType type : BlacklistType.values()) {
prop.putXML("lists_" + blacklistCount + "_types_" + j + "_name", type); prop.putXML("lists_" + blacklistCount + "_types_" + j + "_name", type.toString());
prop.put("lists_" + blacklistCount + "_types_" + j + "_value", prop.put("lists_" + blacklistCount + "_types_" + j + "_value",
ListManager.listSetContains(type + ".BlackLists", element) ? 1 : 0); ListManager.listSetContains(type + ".BlackLists", element) ? 1 : 0);
j++; j++;
} }
prop.put("lists_" + blacklistCount + "_types", types.length); prop.put("lists_" + blacklistCount + "_types", BlacklistType.values().length);
if (!"1".equals(attrOnly) && !"true".equals(attrOnly)) { if (!"1".equals(attrOnly) && !"true".equals(attrOnly)) {
final List<String> list = FileUtils.getListArray(new File(ListManager.listsPath, element)); final List<String> list = FileUtils.getListArray(new File(ListManager.listsPath, element));

@ -47,7 +47,7 @@ import net.yacy.document.parser.html.CharacterCoding;
import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.util.FileUtils; import net.yacy.kelondro.util.FileUtils;
import net.yacy.peers.Seed; import net.yacy.peers.Seed;
import net.yacy.repository.Blacklist; import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.query.SearchEventCache; import net.yacy.search.query.SearchEventCache;
@ -238,10 +238,7 @@ public class sharedBlacklist_p {
pw.println(newItem); pw.println(newItem);
if (Switchboard.urlBlacklist != null) { if (Switchboard.urlBlacklist != null) {
final String supportedBlacklistTypesStr = Blacklist.BLACKLIST_TYPES_STRING; for (final BlacklistType supportedBlacklistType : BlacklistType.values()) {
final String[] supportedBlacklistTypes = supportedBlacklistTypesStr.split(",");
for (final String supportedBlacklistType : supportedBlacklistTypes) {
if (ListManager.listSetContains(supportedBlacklistType + ".BlackLists",selectedBlacklistName)) { if (ListManager.listSetContains(supportedBlacklistType + ".BlackLists",selectedBlacklistName)) {
Switchboard.urlBlacklist.add(supportedBlacklistType,newItem.substring(0, pos), newItem.substring(pos + 1)); Switchboard.urlBlacklist.add(supportedBlacklistType,newItem.substring(0, pos), newItem.substring(pos + 1));
} }

@ -35,6 +35,7 @@ import net.yacy.kelondro.data.meta.URIMetadataRow;
import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.logging.Log;
import net.yacy.peers.Protocol; import net.yacy.peers.Protocol;
import net.yacy.peers.Seed; import net.yacy.peers.Seed;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.index.Segments; import net.yacy.search.index.Segments;
import de.anomic.crawler.ResultURLs; import de.anomic.crawler.ResultURLs;
@ -133,8 +134,16 @@ public final class crawlReceipt {
// check if the entry is in our network domain // check if the entry is in our network domain
final String urlRejectReason = sb.crawlStacker.urlInAcceptedDomain(entry.url()); final String urlRejectReason = sb.crawlStacker.urlInAcceptedDomain(entry.url());
if (urlRejectReason != null) { if (urlRejectReason != null) {
if (log.isWarning()) log.logWarning("crawlReceipt: RECEIVED wrong RECEIPT (" + urlRejectReason + ") for hash " + ASCII.String(entry.hash()) + " from peer " + iam + "\n\tURL properties: "+ propStr); log.logWarning("crawlReceipt: RECEIVED wrong RECEIPT (" + urlRejectReason + ") for hash " + ASCII.String(entry.hash()) + " from peer " + iam + "\n\tURL properties: "+ propStr);
prop.put("delay", "9999");
return prop;
}
// Check URL against DHT blacklist
if (Switchboard.urlBlacklist.isListed(BlacklistType.DHT, entry)) {
// URL is blacklisted
log.logWarning("crawlReceipt: RECEIVED wrong RECEIPT (URL is blacklisted) for URL " + ASCII.String(entry.hash()) + ":" + entry.url().toNormalform(false, true) + " from peer " + iam);
prop.put("delay", "9999"); prop.put("delay", "9999");
return prop; return prop;
} }

@ -46,7 +46,7 @@ import net.yacy.peers.Network;
import net.yacy.peers.Protocol; import net.yacy.peers.Protocol;
import net.yacy.peers.Seed; import net.yacy.peers.Seed;
import net.yacy.peers.dht.FlatWordPartitionScheme; import net.yacy.peers.dht.FlatWordPartitionScheme;
import net.yacy.repository.Blacklist; import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants; import net.yacy.search.SwitchboardConstants;
import net.yacy.search.index.Segments; import net.yacy.search.index.Segments;
@ -171,9 +171,9 @@ public final class transferRWI {
iEntry = new WordReferenceRow(estring.substring(p)); iEntry = new WordReferenceRow(estring.substring(p));
urlHash = iEntry.urlhash(); urlHash = iEntry.urlhash();
// block blacklisted entries // block blacklisted entries
if ((blockBlacklist) && (Switchboard.urlBlacklist.hashInBlacklistedCache(Blacklist.BLACKLIST_DHT, urlHash))) { if ((blockBlacklist) && (Switchboard.urlBlacklist.hashInBlacklistedCache(BlacklistType.DHT, urlHash))) {
if (Network.log.isFine()) Network.log.logFine("transferRWI: blocked blacklisted URLHash '" + ASCII.String(urlHash) + "' from peer " + otherPeerName); Network.log.logFine("transferRWI: blocked blacklisted URLHash '" + ASCII.String(urlHash) + "' from peer " + otherPeerName);
blocked++; blocked++;
continue; continue;
} }

@ -38,7 +38,7 @@ import net.yacy.peers.EventChannel;
import net.yacy.peers.Network; import net.yacy.peers.Network;
import net.yacy.peers.Protocol; import net.yacy.peers.Protocol;
import net.yacy.peers.Seed; import net.yacy.peers.Seed;
import net.yacy.repository.Blacklist; import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.index.Segments; import net.yacy.search.index.Segments;
import de.anomic.crawler.ResultURLs; import de.anomic.crawler.ResultURLs;
@ -121,9 +121,9 @@ public final class transferURL {
continue; continue;
} }
// check if the entry is blacklisted // check if the entry is blacklisted
if ((blockBlacklist) && (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_DHT, lEntry.url()))) { if ((blockBlacklist) && (Switchboard.urlBlacklist.isListed(BlacklistType.DHT, lEntry))) {
if (Network.log.isFine()) Network.log.logFine("transferURL: blocked blacklisted URL '" + lEntry.url().toNormalform(false, true) + "' from peer " + otherPeerName); Network.log.logFine("transferURL: blocked blacklisted URL '" + lEntry.url().toNormalform(false, true) + "' from peer " + otherPeerName);
lEntry = null; lEntry = null;
blocked++; blocked++;
continue; continue;

@ -50,7 +50,7 @@ import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.Base64Order; import net.yacy.kelondro.order.Base64Order;
import net.yacy.kelondro.workflow.WorkflowProcessor; import net.yacy.kelondro.workflow.WorkflowProcessor;
import net.yacy.peers.SeedDB; import net.yacy.peers.SeedDB;
import net.yacy.repository.Blacklist; import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.repository.FilterEngine; import net.yacy.repository.FilterEngine;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.index.Segment; import net.yacy.search.index.Segment;
@ -404,9 +404,9 @@ public final class CrawlStacker {
return "denied_(" + urlRejectReason + ")"; return "denied_(" + urlRejectReason + ")";
} }
// check blacklist // check blacklist
if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_CRAWLER, url)) { if (Switchboard.urlBlacklist.isListed(BlacklistType.CRAWLER, url)) {
if (this.log.isFine()) this.log.logFine("URL '" + urlstring + "' is in blacklist."); this.log.logFine("URL '" + urlstring + "' is in blacklist.");
return "url in blacklist"; return "url in blacklist";
} }

@ -36,7 +36,7 @@ import net.yacy.cora.protocol.http.HTTPClient;
import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.io.ByteCount; import net.yacy.kelondro.io.ByteCount;
import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.logging.Log;
import net.yacy.repository.Blacklist; import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants; import net.yacy.search.SwitchboardConstants;
import net.yacy.search.index.Segments; import net.yacy.search.index.Segments;
@ -95,7 +95,7 @@ public final class HTTPLoader {
// check if url is in blacklist // check if url is in blacklist
final String hostlow = host.toLowerCase(); final String hostlow = host.toLowerCase();
if (checkBlacklist && Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_CRAWLER, hostlow, path)) { if (checkBlacklist && Switchboard.urlBlacklist.isListed(BlacklistType.CRAWLER, hostlow, path)) {
this.sb.crawlQueues.errorURL.push(request, this.sb.peers.mySeed().hash.getBytes(), new Date(), 1, FailCategory.FINAL_LOAD_CONTEXT, "url in blacklist", -1); this.sb.crawlQueues.errorURL.push(request, this.sb.peers.mySeed().hash.getBytes(), new Date(), 1, FailCategory.FINAL_LOAD_CONTEXT, "url in blacklist", -1);
throw new IOException("CRAWLER Rejecting URL '" + request.url().toString() + "'. URL is in blacklist."); throw new IOException("CRAWLER Rejecting URL '" + request.url().toString() + "'. URL is in blacklist.");
} }
@ -236,7 +236,7 @@ public final class HTTPLoader {
// check if url is in blacklist // check if url is in blacklist
final String hostlow = host.toLowerCase(); final String hostlow = host.toLowerCase();
if (Switchboard.urlBlacklist != null && Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_CRAWLER, hostlow, path)) { if (Switchboard.urlBlacklist != null && Switchboard.urlBlacklist.isListed(BlacklistType.CRAWLER, hostlow, path)) {
throw new IOException("CRAWLER Rejecting URL '" + request.url().toString() + "'. URL is in blacklist."); throw new IOException("CRAWLER Rejecting URL '" + request.url().toString() + "'. URL is in blacklist.");
} }

@ -1,10 +1,10 @@
// listManager.java // listManager.java
// ------------------------------------- // -------------------------------------
// part of YACY // part of YACY
// //
// (C) 2005, 2006 by Alexander Schier // (C) 2005, 2006 by Alexander Schier
// (C) 2007 by Bjoern 'Fuchs' Krombholz; fox.box@gmail.com // (C) 2007 by Bjoern 'Fuchs' Krombholz; fox.box@gmail.com
// //
// last change: $LastChangedDate$ by $LastChangedBy$ // last change: $LastChangedDate$ by $LastChangedBy$
// $LastChangedRevision$ // $LastChangedRevision$
// //
@ -30,35 +30,34 @@ import java.util.Arrays;
import java.util.Collection; import java.util.Collection;
import java.util.HashSet; import java.util.HashSet;
import java.util.Iterator; import java.util.Iterator;
import java.util.List;
import java.util.Set; import java.util.Set;
import java.util.Vector; import java.util.Vector;
import java.util.regex.Pattern;
import net.yacy.kelondro.util.FileUtils; import net.yacy.kelondro.util.FileUtils;
import net.yacy.repository.Blacklist; import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.repository.BlacklistFile; import net.yacy.repository.BlacklistFile;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.query.SearchEventCache; import net.yacy.search.query.SearchEventCache;
import java.util.List;
import java.util.regex.Pattern;
// The Naming of the functions is a bit strange... // The Naming of the functions is a bit strange...
public class ListManager { public class ListManager {
private final static Pattern commaPattern = Pattern.compile(","); private final static Pattern commaPattern = Pattern.compile(",");
public static Switchboard switchboard = null; public static Switchboard switchboard = null;
public static File listsPath = null; public static File listsPath = null;
/** /**
* Get ListSet from configuration file and return it as a unified Set. * Get ListSet from configuration file and return it as a unified Set.
* *
* <b>Meaning of ListSet</b>: There are various "lists" in YaCy which are * <b>Meaning of ListSet</b>: There are various "lists" in YaCy which are
* actually disjunct (pairwise unequal) sets which themselves can be seperated * actually disjunct (pairwise unequal) sets which themselves can be seperated
* into different subsets. E.g., there can be more than one blacklist of a type. * into different subsets. E.g., there can be more than one blacklist of a type.
* A ListSet is the set of all those "lists" (subsets) of an equal type. * A ListSet is the set of all those "lists" (subsets) of an equal type.
* *
* @param setName name of the ListSet * @param setName name of the ListSet
* @return a ListSet from configuration file * @return a ListSet from configuration file
*/ */
@ -69,13 +68,13 @@ public class ListManager {
/** /**
* Removes an element from a ListSet and updates the configuration file * Removes an element from a ListSet and updates the configuration file
* accordingly. If the element doesn't exist, then nothing will be changed. * accordingly. If the element doesn't exist, then nothing will be changed.
* *
* @param setName name of the ListSet. * @param setName name of the ListSet.
* @param listName name of the element to remove from the ListSet. * @param listName name of the element to remove from the ListSet.
*/ */
public static void removeFromListSet(final String setName, final String listName) { public static void removeFromListSet(final String setName, final String listName) {
final Set<String> listSet = getListSet(setName); final Set<String> listSet = getListSet(setName);
if (!listSet.isEmpty()) { if (!listSet.isEmpty()) {
listSet.remove(listName); listSet.remove(listName);
switchboard.setConfig(setName, collection2string(listSet)); switchboard.setConfig(setName, collection2string(listSet));
@ -86,9 +85,9 @@ public class ListManager {
* Adds an element to an existing ListSet. If the ListSet doesn't exist yet, * Adds an element to an existing ListSet. If the ListSet doesn't exist yet,
* a new one will be added. If the ListSet already contains an identical element, * a new one will be added. If the ListSet already contains an identical element,
* then nothing happens. * then nothing happens.
* *
* The new list will be written to the configuartion file. * The new list will be written to the configuartion file.
* *
* @param setName * @param setName
* @param newListName * @param newListName
*/ */
@ -101,7 +100,7 @@ public class ListManager {
/** /**
* @param setName ListSet in which to search for an element. * @param setName ListSet in which to search for an element.
* @param listName the element to search for. * @param listName the element to search for.
* @return <code>true</code> if the ListSet "setName" contains an element * @return <code>true</code> if the ListSet "setName" contains an element
* "listName", <code>false</code> otherwise. * "listName", <code>false</code> otherwise.
*/ */
@ -112,23 +111,23 @@ public class ListManager {
//================general Lists================== //================general Lists==================
public static String getListString(final String filename, final boolean withcomments) { public static String getListString(final String filename, final boolean withcomments) {
return FileUtils.getListString(new File(listsPath ,filename), withcomments); return FileUtils.getListString(new File(listsPath ,filename), withcomments);
} }
//================Helper functions for collection conversion================== //================Helper functions for collection conversion==================
/** /**
* Simple conversion of a Collection of Strings to a comma separated String. * Simple conversion of a Collection of Strings to a comma separated String.
* If the implementing Collection subclass guaranties an order of its elements, * If the implementing Collection subclass guaranties an order of its elements,
* the substrings of the result will have the same order. * the substrings of the result will have the same order.
* *
* @param col a Collection of Strings. * @param col a Collection of Strings.
* @return String with elements from set separated by comma. * @return String with elements from set separated by comma.
*/ */
public static String collection2string(final Collection<String> col){ public static String collection2string(final Collection<String> col){
final StringBuilder str = new StringBuilder(col.size() * 40); final StringBuilder str = new StringBuilder(col.size() * 40);
if (col != null && !col.isEmpty()) { if (col != null && !col.isEmpty()) {
final Iterator<String> it = col.iterator(); final Iterator<String> it = col.iterator();
str.append(it.next()); str.append(it.next());
@ -137,7 +136,7 @@ public class ListManager {
str.append(it.next()); str.append(it.next());
} }
} }
return str.toString(); return str.toString();
} }
@ -158,13 +157,13 @@ public class ListManager {
/** /**
* Simple conversion of a comma separated list to a unified Set. * Simple conversion of a comma separated list to a unified Set.
* *
* @param string list of comma separated Strings * @param string list of comma separated Strings
* @return resulting Set or empty Set if string is <code>null</code> * @return resulting Set or empty Set if string is <code>null</code>
*/ */
public static Set<String> string2set(final String string){ public static Set<String> string2set(final String string){
HashSet<String> set; HashSet<String> set;
if (string != null) { if (string != null) {
set = new HashSet<String>(Arrays.asList(commaPattern.split(string, 0))); set = new HashSet<String>(Arrays.asList(commaPattern.split(string, 0)));
} else { } else {
@ -177,7 +176,7 @@ public class ListManager {
/** /**
* Simple conversion of a comma separated list to a Vector containing * Simple conversion of a comma separated list to a Vector containing
* the order of the substrings. * the order of the substrings.
* *
* @param string list of comma separated Strings * @param string list of comma separated Strings
* @return resulting Vector or empty Vector if string is <code>null</code> * @return resulting Vector or empty Vector if string is <code>null</code>
*/ */
@ -198,19 +197,16 @@ public class ListManager {
/** /**
* Load or reload all active Blacklists * Load or reload all active Blacklists
*/ */
public static void reloadBlacklists(){ public static void reloadBlacklists(){
final String supportedBlacklistTypesStr = Blacklist.BLACKLIST_TYPES_STRING; final List<BlacklistFile> blacklistFiles = new ArrayList<BlacklistFile>(BlacklistType.values().length);
final String[] supportedBlacklistTypes = supportedBlacklistTypesStr.split(","); for (final BlacklistType supportedBlacklistType : BlacklistType.values()) {
final List<BlacklistFile> blacklistFiles = new ArrayList<BlacklistFile>(supportedBlacklistTypes.length);
for (String supportedBlacklistType : supportedBlacklistTypes) {
final BlacklistFile blFile = new BlacklistFile( final BlacklistFile blFile = new BlacklistFile(
switchboard.getConfig( switchboard.getConfig(
supportedBlacklistType + ".BlackLists", switchboard.getConfig("BlackLists.DefaultList", "url.default.black")), supportedBlacklistType.toString() + ".BlackLists", switchboard.getConfig("BlackLists.DefaultList", "url.default.black")),
supportedBlacklistType); supportedBlacklistType);
blacklistFiles.add(blFile); blacklistFiles.add(blFile);
} }
Switchboard.urlBlacklist.clear(); Switchboard.urlBlacklist.clear();
Switchboard.urlBlacklist.loadList( Switchboard.urlBlacklist.loadList(
blacklistFiles.toArray(new BlacklistFile[blacklistFiles.size()]), blacklistFiles.toArray(new BlacklistFile[blacklistFiles.size()]),

@ -84,7 +84,7 @@ import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.io.ByteCountOutputStream; import net.yacy.kelondro.io.ByteCountOutputStream;
import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.util.FileUtils; import net.yacy.kelondro.util.FileUtils;
import net.yacy.repository.Blacklist; import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants; import net.yacy.search.SwitchboardConstants;
import de.anomic.crawler.Cache; import de.anomic.crawler.Cache;
@ -350,7 +350,7 @@ public final class HTTPDProxyHandler {
// respond a 404 for all AGIS ("all you get is shit") servers // respond a 404 for all AGIS ("all you get is shit") servers
final String hostlow = host.toLowerCase(); final String hostlow = host.toLowerCase();
if (args != null) { path = path + "?" + args; } if (args != null) { path = path + "?" + args; }
if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_PROXY, hostlow, path)) { if (Switchboard.urlBlacklist.isListed(BlacklistType.PROXY, hostlow, path)) {
log.logInfo("AGIS blocking of host '" + hostlow + "'"); log.logInfo("AGIS blocking of host '" + hostlow + "'");
HTTPDemon.sendRespondError(conProp,countedRespond,4,403,null, HTTPDemon.sendRespondError(conProp,countedRespond,4,403,null,
"URL '" + hostlow + "' blocked by yacy proxy (blacklisted)",null); "URL '" + hostlow + "' blocked by yacy proxy (blacklisted)",null);
@ -814,7 +814,7 @@ public final class HTTPDProxyHandler {
// re-calc the url path // re-calc the url path
final String remotePath = (args == null) ? path : (path + "?" + args); final String remotePath = (args == null) ? path : (path + "?" + args);
if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_PROXY, hostlow, remotePath)) { if (Switchboard.urlBlacklist.isListed(BlacklistType.PROXY, hostlow, remotePath)) {
HTTPDemon.sendRespondError(conProp,respond,4,403,null, HTTPDemon.sendRespondError(conProp,respond,4,403,null,
"URL '" + hostlow + "' blocked by yacy proxy (blacklisted)",null); "URL '" + hostlow + "' blocked by yacy proxy (blacklisted)",null);
log.logInfo("AGIS blocking of host '" + hostlow + "'"); log.logInfo("AGIS blocking of host '" + hostlow + "'");
@ -1243,7 +1243,7 @@ public final class HTTPDProxyHandler {
// blacklist idea inspired by [AS]: // blacklist idea inspired by [AS]:
// respond a 404 for all AGIS ("all you get is shit") servers // respond a 404 for all AGIS ("all you get is shit") servers
final String hostlow = host.toLowerCase(); final String hostlow = host.toLowerCase();
if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_PROXY, hostlow, path)) { if (Switchboard.urlBlacklist.isListed(BlacklistType.PROXY, hostlow, path)) {
HTTPDemon.sendRespondError(conProp,clientOut,4,403,null, HTTPDemon.sendRespondError(conProp,clientOut,4,403,null,
"URL '" + hostlow + "' blocked by yacy proxy (blacklisted)",null); "URL '" + hostlow + "' blocked by yacy proxy (blacklisted)",null);
log.logInfo("AGIS blocking of host '" + hostlow + "'"); log.logInfo("AGIS blocking of host '" + hostlow + "'");

@ -56,7 +56,7 @@ import java.util.Set;
import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.index.RowSpaceExceededException; import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.logging.Log;
import net.yacy.repository.Blacklist; import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
public class NewsPool { public class NewsPool {
@ -343,13 +343,13 @@ public class NewsPool {
if (record.created().getTime() == 0) return; if (record.created().getTime() == 0) return;
final Map<String, String> attributes = record.attributes(); final Map<String, String> attributes = record.attributes();
if (attributes.containsKey("url")){ if (attributes.containsKey("url")){
if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_NEWS, new DigestURI(attributes.get("url")))){ if (Switchboard.urlBlacklist.isListed(BlacklistType.NEWS, new DigestURI(attributes.get("url")))){
System.out.println("DEBUG: ignored news-entry url blacklisted: " + attributes.get("url")); System.out.println("DEBUG: ignored news-entry url blacklisted: " + attributes.get("url"));
return; return;
} }
} }
if (attributes.containsKey("startURL")){ if (attributes.containsKey("startURL")){
if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_NEWS, new DigestURI(attributes.get("startURL")))){ if (Switchboard.urlBlacklist.isListed(BlacklistType.NEWS, new DigestURI(attributes.get("startURL")))){
System.out.println("DEBUG: ignored news-entry url blacklisted: " + attributes.get("startURL")); System.out.println("DEBUG: ignored news-entry url blacklisted: " + attributes.get("startURL"));
return; return;
} }

@ -96,6 +96,7 @@ import net.yacy.peers.graphics.WebStructureGraph;
import net.yacy.peers.graphics.WebStructureGraph.HostReference; import net.yacy.peers.graphics.WebStructureGraph.HostReference;
import net.yacy.peers.operation.yacyVersion; import net.yacy.peers.operation.yacyVersion;
import net.yacy.repository.Blacklist; import net.yacy.repository.Blacklist;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.EventTracker; import net.yacy.search.EventTracker;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants; import net.yacy.search.SwitchboardConstants;
@ -699,9 +700,9 @@ public final class Protocol
assert (urlEntry.hash().length == 12) : "urlEntry.hash() = " + ASCII.String(urlEntry.hash()); assert (urlEntry.hash().length == 12) : "urlEntry.hash() = " + ASCII.String(urlEntry.hash());
if ( urlEntry.hash().length != 12 ) { if ( urlEntry.hash().length != 12 ) {
continue; // bad url hash continue; // bad url hash
} }
if ( blacklist.isListed(Blacklist.BLACKLIST_SEARCH, urlEntry.url()) ) { if ( blacklist.isListed(BlacklistType.SEARCH, urlEntry) ) {
if ( Network.log.isInfo() ) { if ( Network.log.isInfo() ) {
Network.log.logInfo("remote search: filtered blacklisted url " Network.log.logInfo("remote search: filtered blacklisted url "
+ urlEntry.url() + urlEntry.url()
+ " from peer " + " from peer "

@ -26,9 +26,13 @@
package net.yacy.repository; package net.yacy.repository;
import java.io.File; import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException; import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet; import java.util.HashSet;
import java.util.Iterator; import java.util.Iterator;
import java.util.List; import java.util.List;
@ -50,12 +54,12 @@ import net.yacy.kelondro.util.SetTools;
public class Blacklist { public class Blacklist {
public static final String BLACKLIST_DHT = "dht"; private static final File BLACKLIST_DHT_CACHEFILE = new File("DATA/WORK/BlacklistCache_DHT.ser");
public static final String BLACKLIST_CRAWLER = "crawler";
public static final String BLACKLIST_PROXY = "proxy"; public enum BlacklistType {
public static final String BLACKLIST_SEARCH = "search"; DHT, CRAWLER, PROXY, SEARCH, SURFTIPS, NEWS
public static final String BLACKLIST_SURFTIPS = "surftips"; }
public static final String BLACKLIST_NEWS = "news";
public final static String BLACKLIST_FILENAME_FILTER = "^.*\\.black$"; public final static String BLACKLIST_FILENAME_FILTER = "^.*\\.black$";
public static enum BlacklistError { public static enum BlacklistError {
@ -82,33 +86,31 @@ public class Blacklist {
return this.errorCode; return this.errorCode;
} }
} }
protected static final Set<String> BLACKLIST_TYPES = new HashSet<String>(Arrays.asList(new String[]{
Blacklist.BLACKLIST_CRAWLER,
Blacklist.BLACKLIST_PROXY,
Blacklist.BLACKLIST_DHT,
Blacklist.BLACKLIST_SEARCH,
Blacklist.BLACKLIST_SURFTIPS,
Blacklist.BLACKLIST_NEWS
}));
public static final String BLACKLIST_TYPES_STRING = "proxy,crawler,dht,search,surftips,news";
private File blacklistRootPath = null; private File blacklistRootPath = null;
private final ConcurrentMap<String, HandleSet> cachedUrlHashs; private final ConcurrentMap<BlacklistType, HandleSet> cachedUrlHashs;
private final ConcurrentMap<String, Map<String, List<Pattern>>> hostpaths_matchable; // key=host, value=path; mapped url is http://host/path; path does not start with '/' here private final ConcurrentMap<BlacklistType, ConcurrentMap<String, List<Pattern>>> hostpaths_matchable; // key=host, value=path; mapped url is http://host/path; path does not start with '/' here
private final ConcurrentMap<String, Map<String, List<Pattern>>> hostpaths_notmatchable; // key=host, value=path; mapped url is http://host/path; path does not start with '/' here private final ConcurrentMap<BlacklistType, ConcurrentMap<String, List<Pattern>>> hostpaths_notmatchable; // key=host, value=path; mapped url is http://host/path; path does not start with '/' here
public Blacklist(final File rootPath) { public Blacklist(final File rootPath) {
setRootPath(rootPath); setRootPath(rootPath);
// prepare the data structure // prepare the data structure
this.hostpaths_matchable = new ConcurrentHashMap<String, Map<String, List<Pattern>>>(); this.hostpaths_matchable = new ConcurrentHashMap<BlacklistType, ConcurrentMap<String, List<Pattern>>>();
this.hostpaths_notmatchable = new ConcurrentHashMap<String, Map<String, List<Pattern>>>(); this.hostpaths_notmatchable = new ConcurrentHashMap<BlacklistType, ConcurrentMap<String, List<Pattern>>>();
this.cachedUrlHashs = new ConcurrentHashMap<String, HandleSet>(); this.cachedUrlHashs = new ConcurrentHashMap<BlacklistType, HandleSet>();
for (final String blacklistType : BLACKLIST_TYPES) { for (final BlacklistType blacklistType : BlacklistType.values()) {
this.hostpaths_matchable.put(blacklistType, new ConcurrentHashMap<String, List<Pattern>>()); this.hostpaths_matchable.put(blacklistType, new ConcurrentHashMap<String, List<Pattern>>());
this.hostpaths_notmatchable.put(blacklistType, new ConcurrentHashMap<String, List<Pattern>>()); this.hostpaths_notmatchable.put(blacklistType, new ConcurrentHashMap<String, List<Pattern>>());
this.cachedUrlHashs.put(blacklistType, new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0)); this.hostpaths_notmatchable.put(blacklistType, new ConcurrentHashMap<String, List<Pattern>>());
if (blacklistType.equals(BlacklistType.DHT)) {
loadDHTCache();
} else {
this.cachedUrlHashs.put(blacklistType, new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0));
}
} }
} }
@ -126,30 +128,16 @@ public class Blacklist {
this.blacklistRootPath = rootPath; this.blacklistRootPath = rootPath;
} }
protected Map<String, List<Pattern>> getBlacklistMap(final String blacklistType, final boolean matchable) { protected ConcurrentMap<String, List<Pattern>> getBlacklistMap(final BlacklistType blacklistType, final boolean matchable) {
if (blacklistType == null) {
throw new IllegalArgumentException("Blacklist type not set.");
}
if (!BLACKLIST_TYPES.contains(blacklistType)) {
throw new IllegalArgumentException("Unknown blacklist type: " + blacklistType + ".");
}
return (matchable) ? this.hostpaths_matchable.get(blacklistType) : this.hostpaths_notmatchable.get(blacklistType); return (matchable) ? this.hostpaths_matchable.get(blacklistType) : this.hostpaths_notmatchable.get(blacklistType);
} }
protected HandleSet getCacheUrlHashsSet(final String blacklistType) { protected HandleSet getCacheUrlHashsSet(final BlacklistType blacklistType) {
if (blacklistType == null) {
throw new IllegalArgumentException("Blacklist type not set.");
}
if (!BLACKLIST_TYPES.contains(blacklistType)) {
throw new IllegalArgumentException("Unknown backlist type.");
}
return this.cachedUrlHashs.get(blacklistType); return this.cachedUrlHashs.get(blacklistType);
} }
public void clear() { public void clear() {
for (final Map<String, List<Pattern>> entry : this.hostpaths_matchable.values()) { for (final ConcurrentMap<String, List<Pattern>> entry : this.hostpaths_matchable.values()) {
entry.clear(); entry.clear();
} }
for (final Map<String, List<Pattern>> entry : this.hostpaths_notmatchable.values()) { for (final Map<String, List<Pattern>> entry : this.hostpaths_notmatchable.values()) {
@ -162,12 +150,12 @@ public class Blacklist {
public int size() { public int size() {
int size = 0; int size = 0;
for (final String entry : this.hostpaths_matchable.keySet()) { for (final BlacklistType entry : this.hostpaths_matchable.keySet()) {
for (final List<Pattern> ientry : this.hostpaths_matchable.get(entry).values()) { for (final List<Pattern> ientry : this.hostpaths_matchable.get(entry).values()) {
size += ientry.size(); size += ientry.size();
} }
} }
for (final String entry : this.hostpaths_notmatchable.keySet()) { for (final BlacklistType entry : this.hostpaths_notmatchable.keySet()) {
for (final List<Pattern> ientry : this.hostpaths_notmatchable.get(entry).values()) { for (final List<Pattern> ientry : this.hostpaths_notmatchable.get(entry).values()) {
size += ientry.size(); size += ientry.size();
} }
@ -188,8 +176,8 @@ public class Blacklist {
* @param sep * @param sep
*/ */
private void loadList(final BlacklistFile blFile, final String sep) { private void loadList(final BlacklistFile blFile, final String sep) {
final Map<String, List<Pattern>> blacklistMapMatch = getBlacklistMap(blFile.getType(), true); final ConcurrentMap<String, List<Pattern>> blacklistMapMatch = getBlacklistMap(blFile.getType(), true);
final Map<String, List<Pattern>> blacklistMapNotMatch = getBlacklistMap(blFile.getType(), false); final ConcurrentMap<String, List<Pattern>> blacklistMapNotMatch = getBlacklistMap(blFile.getType(), false);
Set<Map.Entry<String, List<String>>> loadedBlacklist; Set<Map.Entry<String, List<String>>> loadedBlacklist;
Map.Entry<String, List<String>> loadedEntry; Map.Entry<String, List<String>> loadedEntry;
List<Pattern> paths; List<Pattern> paths;
@ -240,18 +228,18 @@ public class Blacklist {
} }
} }
public void loadList(final String blacklistType, final String fileNames, final String sep) { public void loadList(final BlacklistType blacklistType, final String fileNames, final String sep) {
// method for not breaking older plasmaURLPattern interface // method for not breaking older plasmaURLPattern interface
final BlacklistFile blFile = new BlacklistFile(fileNames, blacklistType); final BlacklistFile blFile = new BlacklistFile(fileNames, blacklistType);
loadList(blFile, sep); loadList(blFile, sep);
} }
public void removeAll(final String blacklistType, final String host) { public void removeAll(final BlacklistType blacklistType, final String host) {
getBlacklistMap(blacklistType, true).remove(host); getBlacklistMap(blacklistType, true).remove(host);
getBlacklistMap(blacklistType, false).remove(host); getBlacklistMap(blacklistType, false).remove(host);
} }
public void remove(final String blacklistType, final String host, final String path) { public void remove(final BlacklistType blacklistType, final String host, final String path) {
final Map<String, List<Pattern>> blacklistMap = getBlacklistMap(blacklistType, true); final Map<String, List<Pattern>> blacklistMap = getBlacklistMap(blacklistType, true);
List<Pattern> hostList = blacklistMap.get(host); List<Pattern> hostList = blacklistMap.get(host);
@ -272,7 +260,7 @@ public class Blacklist {
} }
} }
public void add(final String blacklistType, final String host, final String path) { public void add(final BlacklistType blacklistType, final String host, final String path) {
if (host == null) { if (host == null) {
throw new IllegalArgumentException("host may not be null"); throw new IllegalArgumentException("host may not be null");
} }
@ -296,18 +284,18 @@ public class Blacklist {
public int blacklistCacheSize() { public int blacklistCacheSize() {
int size = 0; int size = 0;
final Iterator<String> iter = this.cachedUrlHashs.keySet().iterator(); final Iterator<BlacklistType> iter = this.cachedUrlHashs.keySet().iterator();
while (iter.hasNext()) { while (iter.hasNext()) {
size += this.cachedUrlHashs.get(iter.next()).size(); size += this.cachedUrlHashs.get(iter.next()).size();
} }
return size; return size;
} }
public boolean hashInBlacklistedCache(final String blacklistType, final byte[] urlHash) { public boolean hashInBlacklistedCache(final BlacklistType blacklistType, final byte[] urlHash) {
return getCacheUrlHashsSet(blacklistType).has(urlHash); return getCacheUrlHashsSet(blacklistType).has(urlHash);
} }
public boolean contains(final String blacklistType, final String host, final String path) { public boolean contains(final BlacklistType blacklistType, final String host, final String path) {
boolean ret = false; boolean ret = false;
if (blacklistType != null && host != null && path != null) { if (blacklistType != null && host != null && path != null) {
@ -324,7 +312,18 @@ public class Blacklist {
return ret; return ret;
} }
public boolean isListed(final String blacklistType, final DigestURI url) { /**
* Checks whether the given entry is listed in given blacklist type
* @param blacklistType The used blacklist
* @param entry Entry to be checked
* @return Whether the given entry is blacklisted
*/
public boolean isListed(final BlacklistType blacklistType, final URIMetadataRow entry) {
// Call inner method
return isListed(blacklistType, entry.url());
}
public boolean isListed(final BlacklistType blacklistType, final DigestURI url) {
if (url == null) { if (url == null) {
throw new IllegalArgumentException("url may not be null"); throw new IllegalArgumentException("url may not be null");
} }
@ -358,7 +357,7 @@ public class Blacklist {
return "Default YaCy Blacklist Engine"; return "Default YaCy Blacklist Engine";
} }
public boolean isListed(final String blacklistType, final String hostlow, final String path) { public boolean isListed(final BlacklistType blacklistType, final String hostlow, final String path) {
if (hostlow == null) { if (hostlow == null) {
throw new IllegalArgumentException("hostlow may not be null"); throw new IllegalArgumentException("hostlow may not be null");
} }
@ -509,4 +508,33 @@ public class Blacklist {
final Set<String> blacklist = new HashSet<String>(FileUtils.getListArray(new File(listsPath, blacklistToUse))); final Set<String> blacklist = new HashSet<String>(FileUtils.getListArray(new File(listsPath, blacklistToUse)));
return blacklist != null && blacklist.contains(newEntry); return blacklist != null && blacklist.contains(newEntry);
} }
public final void saveDHTCache() {
try {
final ObjectOutputStream out = new ObjectOutputStream(new FileOutputStream(BLACKLIST_DHT_CACHEFILE));
out.writeObject(getCacheUrlHashsSet(BlacklistType.DHT));
out.close();
} catch (final IOException e) {
Log.logException(e);
}
}
public final void loadDHTCache() {
try {
if (BLACKLIST_DHT_CACHEFILE.exists()) {
final ObjectInputStream in = new ObjectInputStream(new FileInputStream(BLACKLIST_DHT_CACHEFILE));
this.cachedUrlHashs.put(BlacklistType.DHT, (HandleSet) in.readObject());
in.close();
} else {
this.cachedUrlHashs.put(BlacklistType.DHT, new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0));
}
} catch (final ClassNotFoundException e) {
Log.logException(e);
} catch (final FileNotFoundException e) {
Log.logException(e);
} catch (final IOException e) {
Log.logException(e);
}
}
} }

@ -30,12 +30,14 @@ import java.util.Arrays;
import java.util.HashSet; import java.util.HashSet;
import java.util.Set; import java.util.Set;
import net.yacy.repository.Blacklist.BlacklistType;
public class BlacklistFile { public class BlacklistFile {
private final String filename; private final String filename;
private final String type; private final BlacklistType type;
public BlacklistFile(final String filename, final String type) { public BlacklistFile(final String filename, final BlacklistType type) {
this.filename = filename; this.filename = filename;
this.type = type; this.type = type;
} }
@ -53,5 +55,5 @@ public class BlacklistFile {
return new HashSet<String>(Arrays.asList(this.filename.split(","))); return new HashSet<String>(Arrays.asList(this.filename.split(",")));
} }
public String getType() { return this.type; } public BlacklistType getType() { return this.type; }
} }

@ -52,6 +52,7 @@ import net.yacy.document.TextParser;
import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.util.FileUtils; import net.yacy.kelondro.util.FileUtils;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.index.Segments; import net.yacy.search.index.Segments;
import de.anomic.crawler.Cache; import de.anomic.crawler.Cache;
@ -188,8 +189,8 @@ public final class LoaderDispatcher {
final String protocol = url.getProtocol(); final String protocol = url.getProtocol();
final String host = url.getHost(); final String host = url.getHost();
// check if url is in blacklist // check if url is in blacklist
if (checkBlacklist && host != null && Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_CRAWLER, host.toLowerCase(), url.getFile())) { if (checkBlacklist && Switchboard.urlBlacklist.isListed(BlacklistType.CRAWLER, host.toLowerCase(), url.getFile())) {
this.sb.crawlQueues.errorURL.push(request, this.sb.peers.mySeed().hash.getBytes(), new Date(), 1, FailCategory.FINAL_LOAD_CONTEXT, "url in blacklist", -1); this.sb.crawlQueues.errorURL.push(request, this.sb.peers.mySeed().hash.getBytes(), new Date(), 1, FailCategory.FINAL_LOAD_CONTEXT, "url in blacklist", -1);
throw new IOException("DISPATCHER Rejecting URL '" + request.url().toString() + "'. URL is in blacklist."); throw new IOException("DISPATCHER Rejecting URL '" + request.url().toString() + "'. URL is in blacklist.");
} }

@ -2597,7 +2597,7 @@ public final class Switchboard extends serverSwitch
"denied by profile rule, process case=" "denied by profile rule, process case="
+ processCase + processCase
+ ", profile name = " + ", profile name = "
+ queueEntry.profile().name()); + queueEntry.profile().name());
return; return;
} }

@ -60,6 +60,7 @@ import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.table.SplitTable; import net.yacy.kelondro.table.SplitTable;
import net.yacy.kelondro.util.MemoryControl; import net.yacy.kelondro.util.MemoryControl;
import net.yacy.repository.Blacklist; import net.yacy.repository.Blacklist;
import net.yacy.repository.Blacklist.BlacklistType;
import de.anomic.crawler.CrawlStacker; import de.anomic.crawler.CrawlStacker;
public final class MetadataRepository implements /*Metadata,*/ Iterable<byte[]> { public final class MetadataRepository implements /*Metadata,*/ Iterable<byte[]> {
@ -408,8 +409,8 @@ public final class MetadataRepository implements /*Metadata,*/ Iterable<byte[]>
remove(entry.hash()); remove(entry.hash());
continue; continue;
} }
if (this.blacklist.isListed(Blacklist.BLACKLIST_CRAWLER, entry.url()) || if (this.blacklist.isListed(BlacklistType.CRAWLER, entry) ||
this.blacklist.isListed(Blacklist.BLACKLIST_DHT, entry.url()) || this.blacklist.isListed(BlacklistType.DHT, entry) ||
(this.crawlStacker.urlInAcceptedDomain(entry.url()) != null)) { (this.crawlStacker.urlInAcceptedDomain(entry.url()) != null)) {
this.lastBlacklistedUrl = entry.url().toNormalform(true, true); this.lastBlacklistedUrl = entry.url().toNormalform(true, true);
this.lastBlacklistedHash = ASCII.String(entry.hash()); this.lastBlacklistedHash = ASCII.String(entry.hash());

@ -62,7 +62,7 @@ import net.yacy.kelondro.rwi.IndexCell;
import net.yacy.kelondro.rwi.ReferenceContainer; import net.yacy.kelondro.rwi.ReferenceContainer;
import net.yacy.kelondro.rwi.ReferenceFactory; import net.yacy.kelondro.rwi.ReferenceFactory;
import net.yacy.kelondro.util.ISO639; import net.yacy.kelondro.util.ISO639;
import net.yacy.repository.Blacklist; import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.repository.LoaderDispatcher; import net.yacy.repository.LoaderDispatcher;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.query.RWIProcess; import net.yacy.search.query.RWIProcess;
@ -536,7 +536,7 @@ public class Segment {
urlHashs.put(entry.urlhash()); urlHashs.put(entry.urlhash());
} else { } else {
url = ue.url(); url = ue.url();
if (url == null || Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_CRAWLER, url)) { if (url == null || Switchboard.urlBlacklist.isListed(BlacklistType.CRAWLER, url)) {
urlHashs.put(entry.urlhash()); urlHashs.put(entry.urlhash());
} }
} }

@ -64,6 +64,7 @@ import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.rwi.ReferenceContainer; import net.yacy.kelondro.rwi.ReferenceContainer;
import net.yacy.kelondro.rwi.TermSearch; import net.yacy.kelondro.rwi.TermSearch;
import net.yacy.peers.graphics.ProfilingGraph; import net.yacy.peers.graphics.ProfilingGraph;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.EventTracker; import net.yacy.search.EventTracker;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.index.Segment; import net.yacy.search.index.Segment;
@ -624,7 +625,13 @@ public final class RWIProcess extends Thread
this.sortout++; this.sortout++;
continue; continue;
} }
// Check for blacklist
if ( Switchboard.urlBlacklist.isListed(BlacklistType.SEARCH, page) ) {
this.sortout++;
continue;
}
final String pageurl = page.url().toNormalform(true, true); final String pageurl = page.url().toNormalform(true, true);
final String pageauthor = page.dc_creator(); final String pageauthor = page.dc_creator();
final String pagetitle = page.dc_title().toLowerCase(); final String pagetitle = page.dc_title().toLowerCase();

@ -51,7 +51,7 @@ import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.Base64Order; import net.yacy.kelondro.order.Base64Order;
import net.yacy.kelondro.util.ByteArray; import net.yacy.kelondro.util.ByteArray;
import net.yacy.repository.Blacklist; import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import de.anomic.crawler.ZURL.FailCategory; import de.anomic.crawler.ZURL.FailCategory;
import de.anomic.crawler.retrieval.Request; import de.anomic.crawler.retrieval.Request;
@ -177,8 +177,8 @@ public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaS
while (i.hasNext()) { while (i.hasNext()) {
entry = i.next(); entry = i.next();
url = new DigestURI(entry.getKey()); url = new DigestURI(entry.getKey());
desc = entry.getValue(); desc = entry.getValue();
if (isUrlBlacklisted(url, Blacklist.BLACKLIST_SEARCH)) continue; if (isUrlBlacklisted(BlacklistType.SEARCH, url)) continue;
final int ranking = removeAppearanceHashes(url.toNormalform(false, false), queryhashes).size() + final int ranking = removeAppearanceHashes(url.toNormalform(false, false), queryhashes).size() +
removeAppearanceHashes(desc, queryhashes).size(); removeAppearanceHashes(desc, queryhashes).size();
if (ranking < 2 * queryhashes.size()) { if (ranking < 2 * queryhashes.size()) {
@ -202,8 +202,8 @@ public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaS
while (i.hasNext()) { while (i.hasNext()) {
ientry = i.next(); ientry = i.next();
url = new DigestURI(ientry.url()); url = new DigestURI(ientry.url());
final String u = url.toString(); final String u = url.toString();
if (isUrlBlacklisted(url, Blacklist.BLACKLIST_SEARCH)) continue; if (isUrlBlacklisted(BlacklistType.SEARCH, url)) continue;
if (u.indexOf(".ico",0) >= 0 || u.indexOf("favicon",0) >= 0) continue; if (u.indexOf(".ico",0) >= 0 || u.indexOf("favicon",0) >= 0) continue;
if (ientry.height() > 0 && ientry.height() < 32) continue; if (ientry.height() > 0 && ientry.height() < 32) continue;
if (ientry.width() > 0 && ientry.width() < 32) continue; if (ientry.width() > 0 && ientry.width() < 32) continue;
@ -251,8 +251,8 @@ public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaS
* @param url The URL to check * @param url The URL to check
* @param blacklistType Type of blacklist (see class Blacklist, BLACKLIST_FOO) * @param blacklistType Type of blacklist (see class Blacklist, BLACKLIST_FOO)
* @return isBlacklisted Wether the given URL is blacklisted * @return isBlacklisted Wether the given URL is blacklisted
*/ */
private static boolean isUrlBlacklisted (DigestURI url, String blacklistType) { private static boolean isUrlBlacklisted (final BlacklistType blacklistType, final DigestURI url) {
// Default is not blacklisted // Default is not blacklisted
boolean isBlacklisted = false; boolean isBlacklisted = false;

Loading…
Cancel
Save