Surftips Blacklist

Blacklists List Hardcoded instead of only updated on firststart / migration.java

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3788 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
allo 18 years ago
parent 051a65f7af
commit d1e1580223

@ -63,6 +63,7 @@ import java.util.regex.PatternSyntaxException;
import de.anomic.data.listManager;
import de.anomic.http.httpHeader;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.urlPattern.abstractURLPattern;
import de.anomic.plasma.urlPattern.defaultURLPattern;
import de.anomic.plasma.urlPattern.plasmaURLPattern;
import de.anomic.server.serverObjects;
@ -96,7 +97,7 @@ public class BlacklistCleaner_p {
String blacklistToUse = null;
// getting the list of supported blacklist types
String supportedBlacklistTypesStr = env.getConfig("BlackLists.types", "");
String supportedBlacklistTypesStr = abstractURLPattern.BLACKLIST_TYPES_STRING;
String[] supportedBlacklistTypes = supportedBlacklistTypesStr.split(",");
if (post == null) {

@ -60,6 +60,7 @@ import java.util.TreeMap;
import de.anomic.data.listManager;
import de.anomic.http.httpHeader;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.urlPattern.abstractURLPattern;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.yacy.yacyCore;
@ -77,7 +78,7 @@ public class Blacklist_p {
listManager.listsPath = new File(listManager.switchboard.getRootPath(),listManager.switchboard.getConfig("listManager.listsPath", "DATA/LISTS"));
// getting the list of supported blacklist types
String supportedBlacklistTypesStr = env.getConfig("BlackLists.types", "");
String supportedBlacklistTypesStr = abstractURLPattern.BLACKLIST_TYPES_STRING;
String[] supportedBlacklistTypes = supportedBlacklistTypesStr.split(",");
String blacklistToUse = null;

@ -69,6 +69,7 @@ import de.anomic.kelondro.kelondroRotateIterator;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaCondenser;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.urlPattern.abstractURLPattern;
import de.anomic.plasma.urlPattern.plasmaURLPattern;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
@ -384,7 +385,7 @@ public class IndexControl_p {
if (post.containsKey("blacklistdomains")) {
PrintWriter pw;
try {
String[] supportedBlacklistTypes = env.getConfig("BlackLists.types", "").split(",");
String[] supportedBlacklistTypes = abstractURLPattern.BLACKLIST_TYPES_STRING.split(",");
pw = new PrintWriter(new FileWriter(new File(listManager.listsPath, blacklist), true));
URL url;
for (int i=0; i<urlx.length; i++) {

@ -26,16 +26,19 @@
import java.io.IOException;
import java.net.MalformedURLException;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import de.anomic.http.httpHeader;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaURL;
import de.anomic.kelondro.kelondroMScoreCluster;
import de.anomic.kelondro.kelondroRow;
import de.anomic.kelondro.kelondroNaturalOrder;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.urlPattern.plasmaURLPattern;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.tools.crypt;
@ -129,6 +132,10 @@ public class Surftips {
if (row == null) continue;
url = row.getColString(0, null);
try{
if(plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_SURFTIPS ,new URL(url)))
continue;
}catch(MalformedURLException e){continue;};
title = row.getColString(1,"UTF-8");
description = row.getColString(2,"UTF-8");
if ((url == null) || (title == null) || (description == null)) continue;

@ -60,6 +60,7 @@ import de.anomic.http.httpHeader;
import de.anomic.http.httpc;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.urlPattern.abstractURLPattern;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.tools.nxTools;
@ -206,7 +207,7 @@ public class sharedBlacklist_p {
count++;
if (plasmaSwitchboard.urlBlacklist != null) {
String supportedBlacklistTypesStr = env.getConfig("BlackLists.types", "");
String supportedBlacklistTypesStr = abstractURLPattern.BLACKLIST_TYPES_STRING;
String[] supportedBlacklistTypes = supportedBlacklistTypesStr.split(",");
for (int blTypes=0; blTypes < supportedBlacklistTypes.length; blTypes++) {

@ -55,7 +55,10 @@ import java.util.HashSet;
import java.util.Iterator;
import java.util.Vector;
import com.sun.tools.javac.comp.Env;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.urlPattern.abstractURLPattern;
import de.anomic.plasma.urlPattern.plasmaURLPattern.blacklistFile;
import de.anomic.server.serverCore;
@ -332,13 +335,14 @@ public class listManager {
// load all active Blacklists in the Proxy
public static void reloadBlacklists(){
String supportedBlacklistTypesStr = switchboard.getConfig("BlackLists.types", "");
String supportedBlacklistTypesStr = abstractURLPattern.BLACKLIST_TYPES_STRING;
String[] supportedBlacklistTypes = supportedBlacklistTypesStr.split(",");
ArrayList blacklistFiles = new ArrayList(supportedBlacklistTypes.length);
for (int i=0; i < supportedBlacklistTypes.length; i++) {
blacklistFile blFile = new blacklistFile(
switchboard.getConfig(supportedBlacklistTypes[i] + ".BlackLists", ""),
switchboard.getConfig(
supportedBlacklistTypes[i] + ".BlackLists", switchboard.getConfig("lackLists.DefaultList", "url.default.black")),
supportedBlacklistTypes[i]);
blacklistFiles.add(blFile);
}

@ -63,8 +63,10 @@ public abstract class abstractURLPattern implements plasmaURLPattern {
plasmaURLPattern.BLACKLIST_CRAWLER,
plasmaURLPattern.BLACKLIST_PROXY,
plasmaURLPattern.BLACKLIST_DHT,
plasmaURLPattern.BLACKLIST_SEARCH
plasmaURLPattern.BLACKLIST_SEARCH,
plasmaURLPattern.BLACKLIST_SURFTIPS
}));
public static final String BLACKLIST_TYPES_STRING="proxy,crawler,dht,search,surftips";
protected File blacklistRootPath = null;
protected HashMap cachedUrlHashs = null;

@ -10,6 +10,7 @@ public interface plasmaURLPattern {
public static final String BLACKLIST_CRAWLER = "crawler";
public static final String BLACKLIST_PROXY = "proxy";
public static final String BLACKLIST_SEARCH = "search";
public static final String BLACKLIST_SURFTIPS = "surftips";
public static final class blacklistFile {

@ -251,8 +251,10 @@ public class migration {
sb.setConfig("crawler.BlackLists", value);
sb.setConfig("dht.BlackLists", value);
sb.setConfig("search.BlackLists", value);
sb.setConfig("surftips.BlackLists", value);
sb.setConfig("BlackLists.Shared",sb.getConfig("proxyBlackListsShared",""));
sb.setConfig("proxyBlackListsActive", "");
}
// migration of http specific crawler settings

@ -248,13 +248,8 @@ proxyYellowList=yacy.yellow
# instead always a 404 is returned
# all these files will be placed in the listsPath
BlackLists.class=de.anomic.plasma.urlPattern.defaultURLPattern
BlackLists.types=proxy,crawler,dht,search
BlackLists.Shared=url.default.black
proxy.BlackLists=url.default.black
crawler.BlackLists=url.default.black
dht.BlackLists=url.default.black
search.BlackLists=url.default.black
BlackLists.DefaultList=url.default.black
proxyCookieBlackList=cookie.default.black
proxyCookieWhiteList=cookie.default.black

Loading…
Cancel
Save