diff --git a/defaults/yacy.init b/defaults/yacy.init index 6e077dfe9..bf0c048d6 100644 --- a/defaults/yacy.init +++ b/defaults/yacy.init @@ -298,7 +298,6 @@ proxyYellowList=yacy.yellow # the black-list; URLs appearing in this list will not be loaded; # instead always a 404 is returned # all these files will be placed in the listsPath -BlackLists.class=de.anomic.kelondro.text.DefaultBlacklist BlackLists.Shared=url.default.black BlackLists.DefaultList=url.default.black diff --git a/source/de/anomic/data/DefaultBlacklist.java b/source/de/anomic/data/DefaultBlacklist.java index 95ea8560f..98b7d76fa 100644 --- a/source/de/anomic/data/DefaultBlacklist.java +++ b/source/de/anomic/data/DefaultBlacklist.java @@ -33,9 +33,6 @@ import java.util.Map.Entry; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; - - - public class DefaultBlacklist extends AbstractBlacklist implements Blacklist { public DefaultBlacklist(final File rootPath) { diff --git a/source/de/anomic/search/Switchboard.java b/source/de/anomic/search/Switchboard.java index 214208fdc..566afae9f 100644 --- a/source/de/anomic/search/Switchboard.java +++ b/source/de/anomic/search/Switchboard.java @@ -91,7 +91,6 @@ import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.io.UnsupportedEncodingException; -import java.lang.reflect.Constructor; import java.net.MalformedURLException; import java.security.NoSuchAlgorithmException; import java.security.PublicKey; @@ -132,6 +131,7 @@ import de.anomic.crawler.retrieval.Request; import de.anomic.crawler.retrieval.LoaderDispatcher; import de.anomic.crawler.retrieval.Response; import de.anomic.data.Blacklist; +import de.anomic.data.DefaultBlacklist; import de.anomic.data.LibraryProvider; import de.anomic.data.URLLicense; import de.anomic.data.blogBoard; @@ -208,8 +208,8 @@ public final class Switchboard extends serverAbstractSwitch implements serverSwi public static long lastPPMUpdate = System.currentTimeMillis()- 30000; // colored list management - public static TreeSet badwords = new TreeSet(); - public static TreeSet stopwords = new TreeSet(); + public static TreeSet badwords = new TreeSet(NaturalOrder.naturalComparator); + public static TreeSet stopwords = new TreeSet(NaturalOrder.naturalComparator); public static TreeSet blueList = null; public static TreeSet badwordHashes = null; public static TreeSet blueListHashes = null; @@ -419,35 +419,10 @@ public final class Switchboard extends serverAbstractSwitch implements serverSwi ppRamString(plasmaBlueListFile.length()/1024)); } - // load the black-list / inspired by [AS] + // load blacklist + this.log.logConfig("Loading blacklist ..."); final File blacklistsPath = getConfigPath(SwitchboardConstants.LISTS_PATH, SwitchboardConstants.LISTS_PATH_DEFAULT); - String[] blacklistClassName = new String[] { - getConfig(SwitchboardConstants.BLACKLIST_CLASS, SwitchboardConstants.BLACKLIST_CLASS_DEFAULT), - SwitchboardConstants.BLACKLIST_CLASS_DEFAULT - }; - - this.log.logConfig("Starting blacklist engine ..."); - urlBlacklist = null; - for (int i = 0; i < blacklistClassName.length; i++) { - try { - final Class blacklistClass = Class.forName(blacklistClassName[i]); - final Constructor blacklistClassConstr = blacklistClass.getConstructor( new Class[] { File.class } ); - urlBlacklist = (Blacklist) blacklistClassConstr.newInstance(new Object[] { blacklistsPath }); - this.log.logFine("Used blacklist engine class: " + blacklistClassName); - this.log.logConfig("Using blacklist engine: " + urlBlacklist.getEngineInfo()); - break; - } catch (final Exception e) { - continue; // try next - } catch (final Error e) { - continue; // try next - } - } - if (urlBlacklist == null) { - this.log.logSevere("Unable to load the blacklist engine"); - System.exit(-1); - } - - this.log.logConfig("Loading backlist data ..."); + urlBlacklist = new DefaultBlacklist(blacklistsPath); listManager.switchboard = this; listManager.listsPath = blacklistsPath; listManager.reloadBlacklists(); diff --git a/source/de/anomic/search/SwitchboardConstants.java b/source/de/anomic/search/SwitchboardConstants.java index 69529efe4..ddce48dcc 100644 --- a/source/de/anomic/search/SwitchboardConstants.java +++ b/source/de/anomic/search/SwitchboardConstants.java @@ -286,6 +286,7 @@ public final class SwitchboardConstants { */ public static final String CRAWLER_THREADS_ACTIVE_MAX = "crawler.MaxActiveThreads"; public static final String YACY_MODE_DEBUG = "yacyDebugMode"; + /** *

public static final String WORDCACHE_MAX_COUNT = "wordCacheMaxCount"

*

Name of the setting how many words the word-cache (or DHT-Out cache) shall contain maximal. Indexing pages if the @@ -295,20 +296,13 @@ public final class SwitchboardConstants { public static final String HTTPC_NAME_CACHE_CACHING_PATTERNS_NO = "httpc.nameCacheNoCachingPatterns"; public static final String ROBOTS_TXT = "httpd.robots.txt"; public static final String ROBOTS_TXT_DEFAULT = RobotsTxtConfig.LOCKED + "," + RobotsTxtConfig.DIRS; - /** - *

public static final String BLACKLIST_CLASS = "Blacklist.class"

- *

Name of the setting which Blacklist backend shall be used. Due to different requirements of users, the - * {@link plasmaURLPattern}-interface has been created to support blacklist engines different from YaCy's default

- *

Attention is required when the backend is changed, because different engines may have different syntaxes

- */ - public static final String BLACKLIST_CLASS = "BlackLists.class"; + /** *

public static final String BLACKLIST_CLASS_DEFAULT = "de.anomic.plasma.urlPattern.defaultURLPattern"

*

Package and name of YaCy's {@link DefaultBlacklist default} blacklist implementation

* * @see DefaultBlacklist for a detailed overview about the syntax of the default implementation */ - public static final String BLACKLIST_CLASS_DEFAULT = "de.anomic.data.DefaultBlacklist"; public static final String LIST_BLUE = "plasmaBlueList"; public static final String LIST_BLUE_DEFAULT = null; public static final String LIST_BADWORDS_DEFAULT = "yacy.badwords";