Rewrote all String blacklist types to enum 'BlacklistType', closes bug

#143

Conflicts:
	htroot/Supporter.java
	htroot/yacy/crawlReceipt.java
	htroot/yacy/transferRWI.java
	htroot/yacy/transferURL.java
	source/de/anomic/crawler/CrawlStacker.java
	source/de/anomic/data/ListManager.java
	source/net/yacy/peers/Protocol.java
	source/net/yacy/repository/Blacklist.java
	source/net/yacy/repository/LoaderDispatcher.java
	source/net/yacy/search/Switchboard.java
	source/net/yacy/search/index/MetadataRepository.java
	source/net/yacy/search/index/Segment.java
	source/net/yacy/search/query/RWIProcess.java
	source/net/yacy/search/snippet/MediaSnippet.java
pull/1/head
Roland 'Quix0r' Haeder 13 years ago committed by Michael Peter Christen
parent 213f006bf1
commit edaa09b9b1

@ -49,6 +49,7 @@ import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.util.FileUtils;
import net.yacy.repository.Blacklist;
import net.yacy.repository.Blacklist.BlacklistError;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard;
import net.yacy.search.query.SearchEventCache;
import de.anomic.data.ListManager;
@ -76,10 +77,6 @@ public class BlacklistCleaner_p {
ListManager.listsPath = new File(env.getDataPath(), env.getConfig("listManager.listsPath", "DATA/LISTS"));
String blacklistToUse = null;
// get the list of supported blacklist types
final String supportedBlacklistTypesStr = Blacklist.BLACKLIST_TYPES_STRING;
final String[] supportedBlacklistTypes = supportedBlacklistTypesStr.split(",");
prop.put(DISABLED+"checked", "1");
if (post != null) {
@ -102,10 +99,10 @@ public class BlacklistCleaner_p {
if (post.containsKey("delete")) {
prop.put(RESULTS + "modified", "1");
prop.put(RESULTS + "modified_delCount", removeEntries(blacklistToUse, supportedBlacklistTypes, getKeysByPrefix(post, "select", true)));
prop.put(RESULTS + "modified_delCount", removeEntries(blacklistToUse, BlacklistType.values(), getKeysByPrefix(post, "select", true)));
} else if (post.containsKey("alter")) {
prop.put(RESULTS + "modified", "2");
prop.put(RESULTS + "modified_alterCount", alterEntries(blacklistToUse, supportedBlacklistTypes, getKeysByPrefix(post, "select", false), getValuesByPrefix(post, "entry", false)));
prop.put(RESULTS + "modified_alterCount", alterEntries(blacklistToUse, BlacklistType.values(), getKeysByPrefix(post, "select", false), getValuesByPrefix(post, "entry", false)));
}
// list illegal entries
@ -275,7 +272,7 @@ public class BlacklistCleaner_p {
* @param entries Array of entries to be deleted.
* @return Length of the list of entries to be removed.
*/
private static int removeEntries(final String blacklistToUse, final String[] supportedBlacklistTypes, final String[] entries) {
private static int removeEntries(final String blacklistToUse, final BlacklistType[] supportedBlacklistTypes, final String[] entries) {
// load blacklist data from file
final List<String> list = FileUtils.getListArray(new File(ListManager.listsPath, blacklistToUse));
@ -299,7 +296,7 @@ public class BlacklistCleaner_p {
}
// remove the entry from the running blacklist engine
for (final String supportedBlacklistType : supportedBlacklistTypes) {
for (final BlacklistType supportedBlacklistType : supportedBlacklistTypes) {
if (ListManager.listSetContains(supportedBlacklistType + ".BlackLists", blacklistToUse)) {
final String host = (s.indexOf('/',0) == -1) ? s : s.substring(0, s.indexOf('/',0));
final String path = (s.indexOf('/',0) == -1) ? ".*" : s.substring(s.indexOf('/',0) + 1);
@ -328,7 +325,7 @@ public class BlacklistCleaner_p {
*/
private static int alterEntries(
final String blacklistToUse,
final String[] supportedBlacklistTypes,
final BlacklistType[] supportedBlacklistTypes,
final String[] oldEntry,
final String[] newEntry) {
removeEntries(blacklistToUse, supportedBlacklistTypes, oldEntry);
@ -346,7 +343,7 @@ public class BlacklistCleaner_p {
path = n.substring(pos + 1);
}
pw.println(host + "/" + path);
for (final String s : supportedBlacklistTypes) {
for (final BlacklistType s : supportedBlacklistTypes) {
if (ListManager.listSetContains(s + ".BlackLists",blacklistToUse)) {
Switchboard.urlBlacklist.add(
s,

@ -34,7 +34,7 @@ import java.net.MalformedURLException;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.repository.Blacklist;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard;
import de.anomic.data.ListManager;
@ -64,21 +64,23 @@ public class BlacklistTest_p {
DigestURI testurl = null;
try {
testurl = new DigestURI(urlstring);
} catch (final MalformedURLException e) { testurl = null; }
} catch (final MalformedURLException e) {
testurl = null;
}
if(testurl != null) {
prop.putHTML("url",testurl.toString());
prop.putHTML("testlist_url",testurl.toString());
if(Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_CRAWLER, testurl))
if(Switchboard.urlBlacklist.isListed(BlacklistType.CRAWLER, testurl))
prop.put("testlist_listedincrawler", "1");
if(Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_DHT, testurl))
if(Switchboard.urlBlacklist.isListed(BlacklistType.DHT, testurl))
prop.put("testlist_listedindht", "1");
if(Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_NEWS, testurl))
if(Switchboard.urlBlacklist.isListed(BlacklistType.NEWS, testurl))
prop.put("testlist_listedinnews", "1");
if(Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_PROXY, testurl))
if(Switchboard.urlBlacklist.isListed(BlacklistType.PROXY, testurl))
prop.put("testlist_listedinproxy", "1");
if(Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_SEARCH, testurl))
if(Switchboard.urlBlacklist.isListed(BlacklistType.SEARCH, testurl))
prop.put("testlist_listedinsearch", "1");
if(Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_SURFTIPS, testurl))
if(Switchboard.urlBlacklist.isListed(BlacklistType.SURFTIPS, testurl))
prop.put("testlist_listedinsurftips", "1");
}
else {

@ -43,6 +43,7 @@ import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.util.FileUtils;
import net.yacy.repository.Blacklist;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard;
import net.yacy.search.query.SearchEventCache;
import de.anomic.data.ListManager;
@ -64,10 +65,6 @@ public class Blacklist_p {
ListManager.switchboard = (Switchboard) env;
ListManager.listsPath = new File(ListManager.switchboard.getDataPath(),ListManager.switchboard.getConfig("listManager.listsPath", "DATA/LISTS"));
// get the list of supported blacklist types
final String supportedBlacklistTypesStr = Blacklist.BLACKLIST_TYPES_STRING;
final String[] supportedBlacklistTypes = supportedBlacklistTypesStr.split(",");
// load all blacklist files located in the directory
List<String> dirlist = FileUtils.getDirListing(ListManager.listsPath, Blacklist.BLACKLIST_FILENAME_FILTER);
@ -98,22 +95,22 @@ public class Blacklist_p {
}
if(testurl != null) {
prop.putHTML("testlist_url",testurl.toString());
if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_CRAWLER, testurl)) {
if (Switchboard.urlBlacklist.isListed(BlacklistType.CRAWLER, testurl)) {
prop.put("testlist_listedincrawler", "1");
}
if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_DHT, testurl)) {
if (Switchboard.urlBlacklist.isListed(BlacklistType.DHT, testurl)) {
prop.put("testlist_listedindht", "1");
}
if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_NEWS, testurl)) {
if (Switchboard.urlBlacklist.isListed(BlacklistType.NEWS, testurl)) {
prop.put("testlist_listedinnews", "1");
}
if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_PROXY, testurl)) {
if (Switchboard.urlBlacklist.isListed(BlacklistType.PROXY, testurl)) {
prop.put("testlist_listedinproxy", "1");
}
if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_SEARCH, testurl)) {
if (Switchboard.urlBlacklist.isListed(BlacklistType.SEARCH, testurl)) {
prop.put("testlist_listedinsearch", "1");
}
if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_SURFTIPS, testurl)) {
if (Switchboard.urlBlacklist.isListed(BlacklistType.SURFTIPS, testurl)) {
prop.put("testlist_listedinsurftips", "1");
}
} else {
@ -159,7 +156,7 @@ public class Blacklist_p {
ListManager.updateListSet(BLACKLIST_SHARED, blacklistToUse);
// activate it for all known blacklist types
for (final String supportedBlacklistType : supportedBlacklistTypes) {
for (final BlacklistType supportedBlacklistType : BlacklistType.values()) {
ListManager.updateListSet(supportedBlacklistType + ".BlackLists", blacklistToUse);
}
} catch (final IOException e) {/* */}
@ -189,7 +186,7 @@ public class Blacklist_p {
Log.logWarning("Blacklist", "file "+ blackListFile +" could not be deleted!");
}
for (final String supportedBlacklistType : supportedBlacklistTypes) {
for (final BlacklistType supportedBlacklistType : BlacklistType.values()) {
ListManager.removeFromListSet(supportedBlacklistType + ".BlackLists",blacklistToUse);
}
@ -212,7 +209,7 @@ public class Blacklist_p {
return prop;
}
for (final String supportedBlacklistType : supportedBlacklistTypes) {
for (final BlacklistType supportedBlacklistType : BlacklistType.values()) {
if (post.containsKey("activateList4" + supportedBlacklistType)) {
ListManager.updateListSet(supportedBlacklistType + ".BlackLists",blacklistToUse);
} else {
@ -253,7 +250,7 @@ public class Blacklist_p {
if (selectedBlacklistEntries.length > 0) {
String temp = null;
for (final String selectedBlacklistEntry : selectedBlacklistEntries) {
if ((temp = deleteBlacklistEntry(blacklistToUse, selectedBlacklistEntry, header, supportedBlacklistTypes)) != null) {
if ((temp = deleteBlacklistEntry(blacklistToUse, selectedBlacklistEntry, header, BlacklistType.values())) != null) {
prop.put("LOCATION", temp);
return prop;
}
@ -273,7 +270,7 @@ public class Blacklist_p {
// store this call as api call
ListManager.switchboard.tables.recordAPICall(post, "Blacklist_p.html", WorkTables.TABLE_API_TYPE_CONFIGURATION, "add to blacklist: " + blentry);
final String temp = addBlacklistEntry(blacklistToUse, blentry, header, supportedBlacklistTypes);
final String temp = addBlacklistEntry(blacklistToUse, blentry, header, BlacklistType.values());
if (temp != null) {
prop.put("LOCATION", temp);
return prop;
@ -298,12 +295,12 @@ public class Blacklist_p {
!targetBlacklist.equals(blacklistToUse)) {
String temp;
for (final String selectedBlacklistEntry : selectedBlacklistEntries) {
if ((temp = addBlacklistEntry(targetBlacklist, selectedBlacklistEntry, header, supportedBlacklistTypes)) != null) {
if ((temp = addBlacklistEntry(targetBlacklist, selectedBlacklistEntry, header, BlacklistType.values())) != null) {
prop.put("LOCATION", temp);
return prop;
}
if ((temp = deleteBlacklistEntry(blacklistToUse, selectedBlacklistEntry, header, supportedBlacklistTypes)) != null) {
if ((temp = deleteBlacklistEntry(blacklistToUse, selectedBlacklistEntry, header, BlacklistType.values())) != null) {
prop.put("LOCATION", temp);
return prop;
@ -338,12 +335,12 @@ public class Blacklist_p {
if (!selectedBlacklistEntries[i].equals(editedBlacklistEntries[i])) {
if ((temp = deleteBlacklistEntry(blacklistToUse, selectedBlacklistEntries[i], header, supportedBlacklistTypes)) != null) {
if ((temp = deleteBlacklistEntry(blacklistToUse, selectedBlacklistEntries[i], header, BlacklistType.values())) != null) {
prop.put("LOCATION", temp);
return prop;
}
if ((temp = addBlacklistEntry(blacklistToUse, editedBlacklistEntries[i], header, supportedBlacklistTypes)) != null) {
if ((temp = addBlacklistEntry(blacklistToUse, editedBlacklistEntries[i], header, BlacklistType.values())) != null) {
prop.put("LOCATION", temp);
return prop;
}
@ -475,12 +472,12 @@ public class Blacklist_p {
if (element.equals(blacklistToUse)) { //current List
prop.put(DISABLED + BLACKLIST + blacklistCount + "_selected", "1");
for (int blTypes=0; blTypes < supportedBlacklistTypes.length; blTypes++) {
prop.putXML(DISABLED + "currentActiveFor_" + blTypes + "_blTypeName",supportedBlacklistTypes[blTypes]);
for (int blTypes=0; blTypes < BlacklistType.values().length; blTypes++) {
prop.putXML(DISABLED + "currentActiveFor_" + blTypes + "_blTypeName",BlacklistType.values()[blTypes].toString());
prop.put(DISABLED + "currentActiveFor_" + blTypes + "_checked",
ListManager.listSetContains(supportedBlacklistTypes[blTypes] + ".BlackLists", element) ? "0" : "1");
ListManager.listSetContains(BlacklistType.values()[blTypes] + ".BlackLists", element) ? "0" : "1");
}
prop.put(DISABLED + "currentActiveFor", supportedBlacklistTypes.length);
prop.put(DISABLED + "currentActiveFor", BlacklistType.values().length);
} else {
prop.putXML(DISABLED + EDIT + BLACKLIST_MOVE + blacklistMoveCount + "_name", element);
@ -494,9 +491,9 @@ public class Blacklist_p {
}
int activeCount = 0;
for (final String supportedBlacklistType : supportedBlacklistTypes) {
for (final BlacklistType supportedBlacklistType : BlacklistType.values()) {
if (ListManager.listSetContains(supportedBlacklistType + ".BlackLists", element)) {
prop.putHTML(DISABLED + BLACKLIST + blacklistCount + "_active_" + activeCount + "_blTypeName", supportedBlacklistType);
prop.putHTML(DISABLED + BLACKLIST + blacklistCount + "_active_" + activeCount + "_blTypeName", supportedBlacklistType.toString());
activeCount++;
}
}
@ -521,13 +518,13 @@ public class Blacklist_p {
* @param newEntry the entry that is to be added
* @param header
* @param supportedBlacklistTypes
* @return null if no error occured, else a String to put into LOCATION
* @return null if no error occurred, else a String to put into LOCATION
*/
private static String addBlacklistEntry(
final String blacklistToUse,
final String newEntry,
final RequestHeader header,
final String[] supportedBlacklistTypes) {
final BlacklistType[] supportedBlacklistTypes) {
if (blacklistToUse == null || blacklistToUse.length() == 0) {
return "";
@ -555,7 +552,7 @@ public class Blacklist_p {
final String blacklistToUse,
final String oldEntry,
final RequestHeader header,
final String[] supportedBlacklistTypes) {
final BlacklistType[] supportedBlacklistTypes) {
if (blacklistToUse == null || blacklistToUse.length() == 0) {
return "";
@ -580,7 +577,7 @@ public class Blacklist_p {
final File listsPath,
final String blacklistToUse,
String oldEntry,
final String[] supportedBlacklistTypes) {
final BlacklistType[] supportedBlacklistTypes) {
// load blacklist data from file
final List<String> list = FileUtils.getListArray(new File(listsPath, blacklistToUse));
@ -603,7 +600,7 @@ public class Blacklist_p {
pos = oldEntry.length();
oldEntry = oldEntry + "/.*";
}
for (final String supportedBlacklistType : supportedBlacklistTypes) {
for (final BlacklistType supportedBlacklistType : supportedBlacklistTypes) {
if (ListManager.listSetContains(supportedBlacklistType + ".BlackLists",blacklistToUse)) {
Switchboard.urlBlacklist.remove(supportedBlacklistType,oldEntry.substring(0, pos), oldEntry.substring(pos + 1));
}
@ -622,7 +619,7 @@ public class Blacklist_p {
final File listsPath,
final String blacklistToUse,
String newEntry,
final String[] supportedBlacklistTypes) {
final BlacklistType[] supportedBlacklistTypes) {
// ignore empty entries
if(newEntry == null || newEntry.isEmpty()) {
@ -659,7 +656,7 @@ public class Blacklist_p {
final File listsPath,
final String blacklistToUse,
String newEntry,
final String[] supportedBlacklistTypes) {
final BlacklistType[] supportedBlacklistTypes) {
if (!Blacklist.blacklistFileContains(listsPath, blacklistToUse, newEntry)) {
// append the line to the file
@ -683,7 +680,7 @@ public class Blacklist_p {
// add to blacklist
int pos = newEntry.indexOf('/',0);
for (final String supportedBlacklistType : supportedBlacklistTypes) {
for (final BlacklistType supportedBlacklistType : supportedBlacklistTypes) {
if (ListManager.listSetContains(supportedBlacklistType + ".BlackLists", blacklistToUse)) {
Switchboard.urlBlacklist.add(supportedBlacklistType, newEntry.substring(0, pos), newEntry.substring(pos + 1));
}

@ -58,7 +58,7 @@ import net.yacy.kelondro.util.FileUtils;
import net.yacy.peers.Protocol;
import net.yacy.peers.Seed;
import net.yacy.peers.dht.PeerSelection;
import net.yacy.repository.Blacklist;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
import net.yacy.search.index.Segment;
@ -441,7 +441,7 @@ public class IndexControlRWIs_p
supportedBlacklistType + ".BlackLists",
blacklist) ) {
Switchboard.urlBlacklist.add(
supportedBlacklistType,
BlacklistType.valueOf(supportedBlacklistType),
url.getHost(),
url.getFile());
}
@ -457,7 +457,6 @@ public class IndexControlRWIs_p
if ( post.containsKey("blacklistdomains") ) {
PrintWriter pw;
try {
final String[] supportedBlacklistTypes = Blacklist.BLACKLIST_TYPES_STRING.split(",");
pw =
new PrintWriter(new FileWriter(new File(ListManager.listsPath, blacklist), true));
DigestURI url;
@ -472,7 +471,7 @@ public class IndexControlRWIs_p
if ( e != null ) {
url = e.url();
pw.println(url.getHost() + "/.*");
for ( final String supportedBlacklistType : supportedBlacklistTypes ) {
for ( final BlacklistType supportedBlacklistType : BlacklistType.values() ) {
if ( ListManager.listSetContains(
supportedBlacklistType + ".BlackLists",
blacklist) ) {
@ -623,7 +622,7 @@ public class IndexControlRWIs_p
? "appears emphasized, "
: "")
+ ((DigestURI.probablyRootURL(entry.word().urlhash())) ? "probably root url" : ""));
if ( Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_DHT, url) ) {
if ( Switchboard.urlBlacklist.isListed(BlacklistType.DHT, url) ) {
prop.put("genUrlList_urlList_" + i + "_urlExists_urlhxChecked", "1");
}
i++;

@ -43,7 +43,7 @@ import net.yacy.kelondro.order.NaturalOrder;
import net.yacy.peers.NewsDB;
import net.yacy.peers.NewsPool;
import net.yacy.peers.Seed;
import net.yacy.repository.Blacklist;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
@ -129,8 +129,10 @@ public class Supporter {
url = row.getPrimaryKeyUTF8().trim();
try {
if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_SURFTIPS, new DigestURI(url, urlhash.getBytes()))) continue;
} catch(final MalformedURLException e) {continue;}
if (Switchboard.urlBlacklist.isListed(BlacklistType.SURFTIPS, new DigestURI(url, urlhash.getBytes()))) continue;
} catch (final MalformedURLException e) {
continue;
}
title = row.getColUTF8(1);
description = row.getColUTF8(2);
if ((url == null) || (title == null) || (description == null)) continue;

@ -42,7 +42,7 @@ import net.yacy.kelondro.order.NaturalOrder;
import net.yacy.peers.NewsDB;
import net.yacy.peers.NewsPool;
import net.yacy.peers.Seed;
import net.yacy.repository.Blacklist;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
@ -136,7 +136,7 @@ public class Surftips {
url = row.getPrimaryKeyUTF8().trim();
try{
if(Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_SURFTIPS ,new DigestURI(url)))
if(Switchboard.urlBlacklist.isListed(BlacklistType.SURFTIPS ,new DigestURI(url)))
continue;
}catch(final MalformedURLException e){continue;};
title = row.getColUTF8(1);

@ -4,7 +4,7 @@ import java.util.List;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.kelondro.util.FileUtils;
import net.yacy.repository.Blacklist;
import net.yacy.repository.Blacklist.BlacklistType;
import de.anomic.data.ListManager;
import de.anomic.server.serverObjects;
@ -35,15 +35,14 @@ public class blacklists_p {
prop.put("lists_" + blacklistCount + "_shared", "0");
}
final String[] types = Blacklist.BLACKLIST_TYPES_STRING.split(",");
int j = 0;
for (final String type : types) {
prop.putXML("lists_" + blacklistCount + "_types_" + j + "_name", type);
for (final BlacklistType type : BlacklistType.values()) {
prop.putXML("lists_" + blacklistCount + "_types_" + j + "_name", type.toString());
prop.put("lists_" + blacklistCount + "_types_" + j + "_value",
ListManager.listSetContains(type + ".BlackLists", element) ? 1 : 0);
j++;
}
prop.put("lists_" + blacklistCount + "_types", types.length);
prop.put("lists_" + blacklistCount + "_types", BlacklistType.values().length);
if (!"1".equals(attrOnly) && !"true".equals(attrOnly)) {
final List<String> list = FileUtils.getListArray(new File(ListManager.listsPath, element));

@ -47,7 +47,7 @@ import net.yacy.document.parser.html.CharacterCoding;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.util.FileUtils;
import net.yacy.peers.Seed;
import net.yacy.repository.Blacklist;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard;
import net.yacy.search.query.SearchEventCache;
@ -238,10 +238,7 @@ public class sharedBlacklist_p {
pw.println(newItem);
if (Switchboard.urlBlacklist != null) {
final String supportedBlacklistTypesStr = Blacklist.BLACKLIST_TYPES_STRING;
final String[] supportedBlacklistTypes = supportedBlacklistTypesStr.split(",");
for (final String supportedBlacklistType : supportedBlacklistTypes) {
for (final BlacklistType supportedBlacklistType : BlacklistType.values()) {
if (ListManager.listSetContains(supportedBlacklistType + ".BlackLists",selectedBlacklistName)) {
Switchboard.urlBlacklist.add(supportedBlacklistType,newItem.substring(0, pos), newItem.substring(pos + 1));
}

@ -35,6 +35,7 @@ import net.yacy.kelondro.data.meta.URIMetadataRow;
import net.yacy.kelondro.logging.Log;
import net.yacy.peers.Protocol;
import net.yacy.peers.Seed;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard;
import net.yacy.search.index.Segments;
import de.anomic.crawler.ResultURLs;
@ -134,7 +135,15 @@ public final class crawlReceipt {
// check if the entry is in our network domain
final String urlRejectReason = sb.crawlStacker.urlInAcceptedDomain(entry.url());
if (urlRejectReason != null) {
if (log.isWarning()) log.logWarning("crawlReceipt: RECEIVED wrong RECEIPT (" + urlRejectReason + ") for hash " + ASCII.String(entry.hash()) + " from peer " + iam + "\n\tURL properties: "+ propStr);
log.logWarning("crawlReceipt: RECEIVED wrong RECEIPT (" + urlRejectReason + ") for hash " + ASCII.String(entry.hash()) + " from peer " + iam + "\n\tURL properties: "+ propStr);
prop.put("delay", "9999");
return prop;
}
// Check URL against DHT blacklist
if (Switchboard.urlBlacklist.isListed(BlacklistType.DHT, entry)) {
// URL is blacklisted
log.logWarning("crawlReceipt: RECEIVED wrong RECEIPT (URL is blacklisted) for URL " + ASCII.String(entry.hash()) + ":" + entry.url().toNormalform(false, true) + " from peer " + iam);
prop.put("delay", "9999");
return prop;
}

@ -46,7 +46,7 @@ import net.yacy.peers.Network;
import net.yacy.peers.Protocol;
import net.yacy.peers.Seed;
import net.yacy.peers.dht.FlatWordPartitionScheme;
import net.yacy.repository.Blacklist;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
import net.yacy.search.index.Segments;
@ -172,8 +172,8 @@ public final class transferRWI {
urlHash = iEntry.urlhash();
// block blacklisted entries
if ((blockBlacklist) && (Switchboard.urlBlacklist.hashInBlacklistedCache(Blacklist.BLACKLIST_DHT, urlHash))) {
if (Network.log.isFine()) Network.log.logFine("transferRWI: blocked blacklisted URLHash '" + ASCII.String(urlHash) + "' from peer " + otherPeerName);
if ((blockBlacklist) && (Switchboard.urlBlacklist.hashInBlacklistedCache(BlacklistType.DHT, urlHash))) {
Network.log.logFine("transferRWI: blocked blacklisted URLHash '" + ASCII.String(urlHash) + "' from peer " + otherPeerName);
blocked++;
continue;
}

@ -38,7 +38,7 @@ import net.yacy.peers.EventChannel;
import net.yacy.peers.Network;
import net.yacy.peers.Protocol;
import net.yacy.peers.Seed;
import net.yacy.repository.Blacklist;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard;
import net.yacy.search.index.Segments;
import de.anomic.crawler.ResultURLs;
@ -122,8 +122,8 @@ public final class transferURL {
}
// check if the entry is blacklisted
if ((blockBlacklist) && (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_DHT, lEntry.url()))) {
if (Network.log.isFine()) Network.log.logFine("transferURL: blocked blacklisted URL '" + lEntry.url().toNormalform(false, true) + "' from peer " + otherPeerName);
if ((blockBlacklist) && (Switchboard.urlBlacklist.isListed(BlacklistType.DHT, lEntry))) {
Network.log.logFine("transferURL: blocked blacklisted URL '" + lEntry.url().toNormalform(false, true) + "' from peer " + otherPeerName);
lEntry = null;
blocked++;
continue;

@ -50,7 +50,7 @@ import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.Base64Order;
import net.yacy.kelondro.workflow.WorkflowProcessor;
import net.yacy.peers.SeedDB;
import net.yacy.repository.Blacklist;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.repository.FilterEngine;
import net.yacy.search.Switchboard;
import net.yacy.search.index.Segment;
@ -405,8 +405,8 @@ public final class CrawlStacker {
}
// check blacklist
if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_CRAWLER, url)) {
if (this.log.isFine()) this.log.logFine("URL '" + urlstring + "' is in blacklist.");
if (Switchboard.urlBlacklist.isListed(BlacklistType.CRAWLER, url)) {
this.log.logFine("URL '" + urlstring + "' is in blacklist.");
return "url in blacklist";
}

@ -36,7 +36,7 @@ import net.yacy.cora.protocol.http.HTTPClient;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.io.ByteCount;
import net.yacy.kelondro.logging.Log;
import net.yacy.repository.Blacklist;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
import net.yacy.search.index.Segments;
@ -95,7 +95,7 @@ public final class HTTPLoader {
// check if url is in blacklist
final String hostlow = host.toLowerCase();
if (checkBlacklist && Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_CRAWLER, hostlow, path)) {
if (checkBlacklist && Switchboard.urlBlacklist.isListed(BlacklistType.CRAWLER, hostlow, path)) {
this.sb.crawlQueues.errorURL.push(request, this.sb.peers.mySeed().hash.getBytes(), new Date(), 1, FailCategory.FINAL_LOAD_CONTEXT, "url in blacklist", -1);
throw new IOException("CRAWLER Rejecting URL '" + request.url().toString() + "'. URL is in blacklist.");
}
@ -236,7 +236,7 @@ public final class HTTPLoader {
// check if url is in blacklist
final String hostlow = host.toLowerCase();
if (Switchboard.urlBlacklist != null && Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_CRAWLER, hostlow, path)) {
if (Switchboard.urlBlacklist != null && Switchboard.urlBlacklist.isListed(BlacklistType.CRAWLER, hostlow, path)) {
throw new IOException("CRAWLER Rejecting URL '" + request.url().toString() + "'. URL is in blacklist.");
}

@ -30,18 +30,17 @@ import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.Vector;
import java.util.regex.Pattern;
import net.yacy.kelondro.util.FileUtils;
import net.yacy.repository.Blacklist;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.repository.BlacklistFile;
import net.yacy.search.Switchboard;
import net.yacy.search.query.SearchEventCache;
import java.util.List;
import java.util.regex.Pattern;
// The Naming of the functions is a bit strange...
public class ListManager {
@ -199,14 +198,11 @@ public class ListManager {
* Load or reload all active Blacklists
*/
public static void reloadBlacklists(){
final String supportedBlacklistTypesStr = Blacklist.BLACKLIST_TYPES_STRING;
final String[] supportedBlacklistTypes = supportedBlacklistTypesStr.split(",");
final List<BlacklistFile> blacklistFiles = new ArrayList<BlacklistFile>(supportedBlacklistTypes.length);
for (String supportedBlacklistType : supportedBlacklistTypes) {
final List<BlacklistFile> blacklistFiles = new ArrayList<BlacklistFile>(BlacklistType.values().length);
for (final BlacklistType supportedBlacklistType : BlacklistType.values()) {
final BlacklistFile blFile = new BlacklistFile(
switchboard.getConfig(
supportedBlacklistType + ".BlackLists", switchboard.getConfig("BlackLists.DefaultList", "url.default.black")),
supportedBlacklistType.toString() + ".BlackLists", switchboard.getConfig("BlackLists.DefaultList", "url.default.black")),
supportedBlacklistType);
blacklistFiles.add(blFile);
}

@ -84,7 +84,7 @@ import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.io.ByteCountOutputStream;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.util.FileUtils;
import net.yacy.repository.Blacklist;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
import de.anomic.crawler.Cache;
@ -350,7 +350,7 @@ public final class HTTPDProxyHandler {
// respond a 404 for all AGIS ("all you get is shit") servers
final String hostlow = host.toLowerCase();
if (args != null) { path = path + "?" + args; }
if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_PROXY, hostlow, path)) {
if (Switchboard.urlBlacklist.isListed(BlacklistType.PROXY, hostlow, path)) {
log.logInfo("AGIS blocking of host '" + hostlow + "'");
HTTPDemon.sendRespondError(conProp,countedRespond,4,403,null,
"URL '" + hostlow + "' blocked by yacy proxy (blacklisted)",null);
@ -814,7 +814,7 @@ public final class HTTPDProxyHandler {
// re-calc the url path
final String remotePath = (args == null) ? path : (path + "?" + args);
if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_PROXY, hostlow, remotePath)) {
if (Switchboard.urlBlacklist.isListed(BlacklistType.PROXY, hostlow, remotePath)) {
HTTPDemon.sendRespondError(conProp,respond,4,403,null,
"URL '" + hostlow + "' blocked by yacy proxy (blacklisted)",null);
log.logInfo("AGIS blocking of host '" + hostlow + "'");
@ -1243,7 +1243,7 @@ public final class HTTPDProxyHandler {
// blacklist idea inspired by [AS]:
// respond a 404 for all AGIS ("all you get is shit") servers
final String hostlow = host.toLowerCase();
if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_PROXY, hostlow, path)) {
if (Switchboard.urlBlacklist.isListed(BlacklistType.PROXY, hostlow, path)) {
HTTPDemon.sendRespondError(conProp,clientOut,4,403,null,
"URL '" + hostlow + "' blocked by yacy proxy (blacklisted)",null);
log.logInfo("AGIS blocking of host '" + hostlow + "'");

@ -56,7 +56,7 @@ import java.util.Set;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.logging.Log;
import net.yacy.repository.Blacklist;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard;
public class NewsPool {
@ -343,13 +343,13 @@ public class NewsPool {
if (record.created().getTime() == 0) return;
final Map<String, String> attributes = record.attributes();
if (attributes.containsKey("url")){
if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_NEWS, new DigestURI(attributes.get("url")))){
if (Switchboard.urlBlacklist.isListed(BlacklistType.NEWS, new DigestURI(attributes.get("url")))){
System.out.println("DEBUG: ignored news-entry url blacklisted: " + attributes.get("url"));
return;
}
}
if (attributes.containsKey("startURL")){
if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_NEWS, new DigestURI(attributes.get("startURL")))){
if (Switchboard.urlBlacklist.isListed(BlacklistType.NEWS, new DigestURI(attributes.get("startURL")))){
System.out.println("DEBUG: ignored news-entry url blacklisted: " + attributes.get("startURL"));
return;
}

@ -96,6 +96,7 @@ import net.yacy.peers.graphics.WebStructureGraph;
import net.yacy.peers.graphics.WebStructureGraph.HostReference;
import net.yacy.peers.operation.yacyVersion;
import net.yacy.repository.Blacklist;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.EventTracker;
import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
@ -700,7 +701,7 @@ public final class Protocol
if ( urlEntry.hash().length != 12 ) {
continue; // bad url hash
}
if ( blacklist.isListed(Blacklist.BLACKLIST_SEARCH, urlEntry.url()) ) {
if ( blacklist.isListed(BlacklistType.SEARCH, urlEntry) ) {
if ( Network.log.isInfo() ) {
Network.log.logInfo("remote search: filtered blacklisted url "
+ urlEntry.url()

@ -26,9 +26,13 @@
package net.yacy.repository;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
@ -50,12 +54,12 @@ import net.yacy.kelondro.util.SetTools;
public class Blacklist {
public static final String BLACKLIST_DHT = "dht";
public static final String BLACKLIST_CRAWLER = "crawler";
public static final String BLACKLIST_PROXY = "proxy";
public static final String BLACKLIST_SEARCH = "search";
public static final String BLACKLIST_SURFTIPS = "surftips";
public static final String BLACKLIST_NEWS = "news";
private static final File BLACKLIST_DHT_CACHEFILE = new File("DATA/WORK/BlacklistCache_DHT.ser");
public enum BlacklistType {
DHT, CRAWLER, PROXY, SEARCH, SURFTIPS, NEWS
}
public final static String BLACKLIST_FILENAME_FILTER = "^.*\\.black$";
public static enum BlacklistError {
@ -82,33 +86,31 @@ public class Blacklist {
return this.errorCode;
}
}
protected static final Set<String> BLACKLIST_TYPES = new HashSet<String>(Arrays.asList(new String[]{
Blacklist.BLACKLIST_CRAWLER,
Blacklist.BLACKLIST_PROXY,
Blacklist.BLACKLIST_DHT,
Blacklist.BLACKLIST_SEARCH,
Blacklist.BLACKLIST_SURFTIPS,
Blacklist.BLACKLIST_NEWS
}));
public static final String BLACKLIST_TYPES_STRING = "proxy,crawler,dht,search,surftips,news";
private File blacklistRootPath = null;
private final ConcurrentMap<String, HandleSet> cachedUrlHashs;
private final ConcurrentMap<String, Map<String, List<Pattern>>> hostpaths_matchable; // key=host, value=path; mapped url is http://host/path; path does not start with '/' here
private final ConcurrentMap<String, Map<String, List<Pattern>>> hostpaths_notmatchable; // key=host, value=path; mapped url is http://host/path; path does not start with '/' here
private final ConcurrentMap<BlacklistType, HandleSet> cachedUrlHashs;
private final ConcurrentMap<BlacklistType, ConcurrentMap<String, List<Pattern>>> hostpaths_matchable; // key=host, value=path; mapped url is http://host/path; path does not start with '/' here
private final ConcurrentMap<BlacklistType, ConcurrentMap<String, List<Pattern>>> hostpaths_notmatchable; // key=host, value=path; mapped url is http://host/path; path does not start with '/' here
public Blacklist(final File rootPath) {
setRootPath(rootPath);
// prepare the data structure
this.hostpaths_matchable = new ConcurrentHashMap<String, Map<String, List<Pattern>>>();
this.hostpaths_notmatchable = new ConcurrentHashMap<String, Map<String, List<Pattern>>>();
this.cachedUrlHashs = new ConcurrentHashMap<String, HandleSet>();
this.hostpaths_matchable = new ConcurrentHashMap<BlacklistType, ConcurrentMap<String, List<Pattern>>>();
this.hostpaths_notmatchable = new ConcurrentHashMap<BlacklistType, ConcurrentMap<String, List<Pattern>>>();
this.cachedUrlHashs = new ConcurrentHashMap<BlacklistType, HandleSet>();
for (final String blacklistType : BLACKLIST_TYPES) {
for (final BlacklistType blacklistType : BlacklistType.values()) {
this.hostpaths_matchable.put(blacklistType, new ConcurrentHashMap<String, List<Pattern>>());
this.hostpaths_notmatchable.put(blacklistType, new ConcurrentHashMap<String, List<Pattern>>());
this.cachedUrlHashs.put(blacklistType, new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0));
this.hostpaths_notmatchable.put(blacklistType, new ConcurrentHashMap<String, List<Pattern>>());
if (blacklistType.equals(BlacklistType.DHT)) {
loadDHTCache();
} else {
this.cachedUrlHashs.put(blacklistType, new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0));
}
}
}
@ -126,30 +128,16 @@ public class Blacklist {
this.blacklistRootPath = rootPath;
}
protected Map<String, List<Pattern>> getBlacklistMap(final String blacklistType, final boolean matchable) {
if (blacklistType == null) {
throw new IllegalArgumentException("Blacklist type not set.");
}
if (!BLACKLIST_TYPES.contains(blacklistType)) {
throw new IllegalArgumentException("Unknown blacklist type: " + blacklistType + ".");
}
protected ConcurrentMap<String, List<Pattern>> getBlacklistMap(final BlacklistType blacklistType, final boolean matchable) {
return (matchable) ? this.hostpaths_matchable.get(blacklistType) : this.hostpaths_notmatchable.get(blacklistType);
}
protected HandleSet getCacheUrlHashsSet(final String blacklistType) {
if (blacklistType == null) {
throw new IllegalArgumentException("Blacklist type not set.");
}
if (!BLACKLIST_TYPES.contains(blacklistType)) {
throw new IllegalArgumentException("Unknown backlist type.");
}
protected HandleSet getCacheUrlHashsSet(final BlacklistType blacklistType) {
return this.cachedUrlHashs.get(blacklistType);
}
public void clear() {
for (final Map<String, List<Pattern>> entry : this.hostpaths_matchable.values()) {
for (final ConcurrentMap<String, List<Pattern>> entry : this.hostpaths_matchable.values()) {
entry.clear();
}
for (final Map<String, List<Pattern>> entry : this.hostpaths_notmatchable.values()) {
@ -162,12 +150,12 @@ public class Blacklist {
public int size() {
int size = 0;
for (final String entry : this.hostpaths_matchable.keySet()) {
for (final BlacklistType entry : this.hostpaths_matchable.keySet()) {
for (final List<Pattern> ientry : this.hostpaths_matchable.get(entry).values()) {
size += ientry.size();
}
}
for (final String entry : this.hostpaths_notmatchable.keySet()) {
for (final BlacklistType entry : this.hostpaths_notmatchable.keySet()) {
for (final List<Pattern> ientry : this.hostpaths_notmatchable.get(entry).values()) {
size += ientry.size();
}
@ -188,8 +176,8 @@ public class Blacklist {
* @param sep
*/
private void loadList(final BlacklistFile blFile, final String sep) {
final Map<String, List<Pattern>> blacklistMapMatch = getBlacklistMap(blFile.getType(), true);
final Map<String, List<Pattern>> blacklistMapNotMatch = getBlacklistMap(blFile.getType(), false);
final ConcurrentMap<String, List<Pattern>> blacklistMapMatch = getBlacklistMap(blFile.getType(), true);
final ConcurrentMap<String, List<Pattern>> blacklistMapNotMatch = getBlacklistMap(blFile.getType(), false);
Set<Map.Entry<String, List<String>>> loadedBlacklist;
Map.Entry<String, List<String>> loadedEntry;
List<Pattern> paths;
@ -240,18 +228,18 @@ public class Blacklist {
}
}
public void loadList(final String blacklistType, final String fileNames, final String sep) {
public void loadList(final BlacklistType blacklistType, final String fileNames, final String sep) {
// method for not breaking older plasmaURLPattern interface
final BlacklistFile blFile = new BlacklistFile(fileNames, blacklistType);
loadList(blFile, sep);
}
public void removeAll(final String blacklistType, final String host) {
public void removeAll(final BlacklistType blacklistType, final String host) {
getBlacklistMap(blacklistType, true).remove(host);
getBlacklistMap(blacklistType, false).remove(host);
}
public void remove(final String blacklistType, final String host, final String path) {
public void remove(final BlacklistType blacklistType, final String host, final String path) {
final Map<String, List<Pattern>> blacklistMap = getBlacklistMap(blacklistType, true);
List<Pattern> hostList = blacklistMap.get(host);
@ -272,7 +260,7 @@ public class Blacklist {
}
}
public void add(final String blacklistType, final String host, final String path) {
public void add(final BlacklistType blacklistType, final String host, final String path) {
if (host == null) {
throw new IllegalArgumentException("host may not be null");
}
@ -296,18 +284,18 @@ public class Blacklist {
public int blacklistCacheSize() {
int size = 0;
final Iterator<String> iter = this.cachedUrlHashs.keySet().iterator();
final Iterator<BlacklistType> iter = this.cachedUrlHashs.keySet().iterator();
while (iter.hasNext()) {
size += this.cachedUrlHashs.get(iter.next()).size();
}
return size;
}
public boolean hashInBlacklistedCache(final String blacklistType, final byte[] urlHash) {
public boolean hashInBlacklistedCache(final BlacklistType blacklistType, final byte[] urlHash) {
return getCacheUrlHashsSet(blacklistType).has(urlHash);
}
public boolean contains(final String blacklistType, final String host, final String path) {
public boolean contains(final BlacklistType blacklistType, final String host, final String path) {
boolean ret = false;
if (blacklistType != null && host != null && path != null) {
@ -324,7 +312,18 @@ public class Blacklist {
return ret;
}
public boolean isListed(final String blacklistType, final DigestURI url) {
/**
* Checks whether the given entry is listed in given blacklist type
* @param blacklistType The used blacklist
* @param entry Entry to be checked
* @return Whether the given entry is blacklisted
*/
public boolean isListed(final BlacklistType blacklistType, final URIMetadataRow entry) {
// Call inner method
return isListed(blacklistType, entry.url());
}
public boolean isListed(final BlacklistType blacklistType, final DigestURI url) {
if (url == null) {
throw new IllegalArgumentException("url may not be null");
}
@ -358,7 +357,7 @@ public class Blacklist {
return "Default YaCy Blacklist Engine";
}
public boolean isListed(final String blacklistType, final String hostlow, final String path) {
public boolean isListed(final BlacklistType blacklistType, final String hostlow, final String path) {
if (hostlow == null) {
throw new IllegalArgumentException("hostlow may not be null");
}
@ -509,4 +508,33 @@ public class Blacklist {
final Set<String> blacklist = new HashSet<String>(FileUtils.getListArray(new File(listsPath, blacklistToUse)));
return blacklist != null && blacklist.contains(newEntry);
}
public final void saveDHTCache() {
try {
final ObjectOutputStream out = new ObjectOutputStream(new FileOutputStream(BLACKLIST_DHT_CACHEFILE));
out.writeObject(getCacheUrlHashsSet(BlacklistType.DHT));
out.close();
} catch (final IOException e) {
Log.logException(e);
}
}
public final void loadDHTCache() {
try {
if (BLACKLIST_DHT_CACHEFILE.exists()) {
final ObjectInputStream in = new ObjectInputStream(new FileInputStream(BLACKLIST_DHT_CACHEFILE));
this.cachedUrlHashs.put(BlacklistType.DHT, (HandleSet) in.readObject());
in.close();
} else {
this.cachedUrlHashs.put(BlacklistType.DHT, new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0));
}
} catch (final ClassNotFoundException e) {
Log.logException(e);
} catch (final FileNotFoundException e) {
Log.logException(e);
} catch (final IOException e) {
Log.logException(e);
}
}
}

@ -30,12 +30,14 @@ import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
import net.yacy.repository.Blacklist.BlacklistType;
public class BlacklistFile {
private final String filename;
private final String type;
private final BlacklistType type;
public BlacklistFile(final String filename, final String type) {
public BlacklistFile(final String filename, final BlacklistType type) {
this.filename = filename;
this.type = type;
}
@ -53,5 +55,5 @@ public class BlacklistFile {
return new HashSet<String>(Arrays.asList(this.filename.split(",")));
}
public String getType() { return this.type; }
public BlacklistType getType() { return this.type; }
}

@ -52,6 +52,7 @@ import net.yacy.document.TextParser;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.util.FileUtils;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard;
import net.yacy.search.index.Segments;
import de.anomic.crawler.Cache;
@ -189,7 +190,7 @@ public final class LoaderDispatcher {
final String host = url.getHost();
// check if url is in blacklist
if (checkBlacklist && host != null && Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_CRAWLER, host.toLowerCase(), url.getFile())) {
if (checkBlacklist && Switchboard.urlBlacklist.isListed(BlacklistType.CRAWLER, host.toLowerCase(), url.getFile())) {
this.sb.crawlQueues.errorURL.push(request, this.sb.peers.mySeed().hash.getBytes(), new Date(), 1, FailCategory.FINAL_LOAD_CONTEXT, "url in blacklist", -1);
throw new IOException("DISPATCHER Rejecting URL '" + request.url().toString() + "'. URL is in blacklist.");
}

@ -60,6 +60,7 @@ import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.table.SplitTable;
import net.yacy.kelondro.util.MemoryControl;
import net.yacy.repository.Blacklist;
import net.yacy.repository.Blacklist.BlacklistType;
import de.anomic.crawler.CrawlStacker;
public final class MetadataRepository implements /*Metadata,*/ Iterable<byte[]> {
@ -408,8 +409,8 @@ public final class MetadataRepository implements /*Metadata,*/ Iterable<byte[]>
remove(entry.hash());
continue;
}
if (this.blacklist.isListed(Blacklist.BLACKLIST_CRAWLER, entry.url()) ||
this.blacklist.isListed(Blacklist.BLACKLIST_DHT, entry.url()) ||
if (this.blacklist.isListed(BlacklistType.CRAWLER, entry) ||
this.blacklist.isListed(BlacklistType.DHT, entry) ||
(this.crawlStacker.urlInAcceptedDomain(entry.url()) != null)) {
this.lastBlacklistedUrl = entry.url().toNormalform(true, true);
this.lastBlacklistedHash = ASCII.String(entry.hash());

@ -62,7 +62,7 @@ import net.yacy.kelondro.rwi.IndexCell;
import net.yacy.kelondro.rwi.ReferenceContainer;
import net.yacy.kelondro.rwi.ReferenceFactory;
import net.yacy.kelondro.util.ISO639;
import net.yacy.repository.Blacklist;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.repository.LoaderDispatcher;
import net.yacy.search.Switchboard;
import net.yacy.search.query.RWIProcess;
@ -536,7 +536,7 @@ public class Segment {
urlHashs.put(entry.urlhash());
} else {
url = ue.url();
if (url == null || Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_CRAWLER, url)) {
if (url == null || Switchboard.urlBlacklist.isListed(BlacklistType.CRAWLER, url)) {
urlHashs.put(entry.urlhash());
}
}

@ -64,6 +64,7 @@ import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.rwi.ReferenceContainer;
import net.yacy.kelondro.rwi.TermSearch;
import net.yacy.peers.graphics.ProfilingGraph;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.EventTracker;
import net.yacy.search.Switchboard;
import net.yacy.search.index.Segment;
@ -625,6 +626,12 @@ public final class RWIProcess extends Thread
continue;
}
// Check for blacklist
if ( Switchboard.urlBlacklist.isListed(BlacklistType.SEARCH, page) ) {
this.sortout++;
continue;
}
final String pageurl = page.url().toNormalform(true, true);
final String pageauthor = page.dc_creator();
final String pagetitle = page.dc_title().toLowerCase();

@ -51,7 +51,7 @@ import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.Base64Order;
import net.yacy.kelondro.util.ByteArray;
import net.yacy.repository.Blacklist;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard;
import de.anomic.crawler.ZURL.FailCategory;
import de.anomic.crawler.retrieval.Request;
@ -178,7 +178,7 @@ public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaS
entry = i.next();
url = new DigestURI(entry.getKey());
desc = entry.getValue();
if (isUrlBlacklisted(url, Blacklist.BLACKLIST_SEARCH)) continue;
if (isUrlBlacklisted(BlacklistType.SEARCH, url)) continue;
final int ranking = removeAppearanceHashes(url.toNormalform(false, false), queryhashes).size() +
removeAppearanceHashes(desc, queryhashes).size();
if (ranking < 2 * queryhashes.size()) {
@ -203,7 +203,7 @@ public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaS
ientry = i.next();
url = new DigestURI(ientry.url());
final String u = url.toString();
if (isUrlBlacklisted(url, Blacklist.BLACKLIST_SEARCH)) continue;
if (isUrlBlacklisted(BlacklistType.SEARCH, url)) continue;
if (u.indexOf(".ico",0) >= 0 || u.indexOf("favicon",0) >= 0) continue;
if (ientry.height() > 0 && ientry.height() < 32) continue;
if (ientry.width() > 0 && ientry.width() < 32) continue;
@ -252,7 +252,7 @@ public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaS
* @param blacklistType Type of blacklist (see class Blacklist, BLACKLIST_FOO)
* @return isBlacklisted Wether the given URL is blacklisted
*/
private static boolean isUrlBlacklisted (DigestURI url, String blacklistType) {
private static boolean isUrlBlacklisted (final BlacklistType blacklistType, final DigestURI url) {
// Default is not blacklisted
boolean isBlacklisted = false;

Loading…
Cancel
Save