*) adding an interface for customized blacklist classes

- now it's possible to use a customized blacklist engine
     instead of the default one
   - this can be done by configuring the property BlackLists.class
   See: http://www.yacy-forum.de/viewtopic.php?t=2108

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2397 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
theli 19 years ago
parent 6d2f15971a
commit eee44be602

@ -8,6 +8,8 @@
#%env/templates/header.template%#
<br><br>
<h2>Blacklist</h2>
<p>Used Blacklist engine: <span class="settingsValue">#[blacklistEngine]#</span></p>
<p>This function provides an URL filter to the proxy; any blacklisted URL is blocked
from being loaded. You can define several blacklists and activate them separately.
You may also provide your blacklist to other peers by sharing them; in return you may

@ -79,6 +79,7 @@ public class Blacklist_p {
String blacklistToUse = null;
serverObjects prop = new serverObjects();
prop.put("blacklistEngine", plasmaSwitchboard.urlBlacklist.getEngineInfo());
// do all post operations
if (post != null) {
@ -334,7 +335,7 @@ public class Blacklist_p {
if (dirlist != null) {
for (int i = 0; i <= dirlist.length - 1; i++) {
prop.put(BLACKLIST + blacklistCount + "_name", dirlist[i]);
prop.put(BLACKLIST + blacklistCount + "_shared", 0);
prop.put(BLACKLIST + blacklistCount + "_selected", 0);
if (dirlist[i].equals(blacklistToUse)) { //current List
prop.put(BLACKLIST + blacklistCount + "_selected", 1);
@ -351,7 +352,7 @@ public class Blacklist_p {
if (listManager.ListInListslist(BLACKLIST_SHARED, dirlist[i])) {
prop.put(BLACKLIST + blacklistCount + "_shared", 1);
} else {
prop.put(BLACKLIST + blacklistCount + "_selected", 0);
prop.put(BLACKLIST + blacklistCount + "_shared", 0);
}
int activeCount = 0;

@ -63,8 +63,8 @@ import de.anomic.index.indexEntryAttribute;
import de.anomic.index.indexURL;
import de.anomic.plasma.plasmaCrawlLURL;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaURLPattern;
import de.anomic.plasma.plasmaWordIndex;
import de.anomic.plasma.urlPattern.plasmaURLPattern;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.yacy.yacyClient;

@ -54,7 +54,7 @@ import de.anomic.http.httpHeader;
import de.anomic.index.indexEntry;
import de.anomic.index.indexURLEntry;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaURLPattern;
import de.anomic.plasma.urlPattern.plasmaURLPattern;
import de.anomic.server.serverCore;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
@ -99,7 +99,7 @@ public final class transferRWI {
shortCacheFlush = true;
break;
}
try {Thread.sleep(100);} catch (InterruptedException e) {}
try {Thread.sleep(100);} catch (InterruptedException e) {/* */}
}
}

@ -48,7 +48,7 @@
import de.anomic.http.httpHeader;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaCrawlLURL;
import de.anomic.plasma.plasmaURLPattern;
import de.anomic.plasma.urlPattern.plasmaURLPattern;
import de.anomic.server.serverCore;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;

@ -96,7 +96,7 @@ import de.anomic.index.indexURL;
import de.anomic.plasma.plasmaHTCache;
import de.anomic.plasma.plasmaParser;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaURLPattern;
import de.anomic.plasma.urlPattern.plasmaURLPattern;
import de.anomic.server.serverCore;
import de.anomic.server.serverFileUtils;
import de.anomic.server.serverObjects;

@ -73,6 +73,7 @@ import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroTree;
import de.anomic.kelondro.kelondroRow;
import de.anomic.plasma.plasmaHTCache;
import de.anomic.plasma.urlPattern.plasmaURLPattern;
import de.anomic.server.serverCodings;
import de.anomic.server.serverObjects;
import de.anomic.server.logging.serverLog;

@ -66,6 +66,7 @@ import de.anomic.kelondro.kelondroIndex;
import de.anomic.kelondro.kelondroRow;
import de.anomic.kelondro.kelondroTree;
import de.anomic.plasma.plasmaCrawlEURL;
import de.anomic.plasma.urlPattern.plasmaURLPattern;
import de.anomic.server.serverSemaphore;
import de.anomic.server.logging.serverLog;
import de.anomic.tools.bitfield;

@ -51,6 +51,8 @@ import java.net.MalformedURLException;
import java.net.NoRouteToHostException;
import java.net.SocketException;
import de.anomic.net.URL;
import de.anomic.plasma.urlPattern.plasmaURLPattern;
import java.net.UnknownHostException;
import java.util.Date;
import de.anomic.http.httpHeader;

@ -106,6 +106,7 @@ package de.anomic.plasma;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.lang.reflect.Constructor;
import java.net.InetAddress;
import java.net.MalformedURLException;
@ -133,6 +134,7 @@ import de.anomic.htmlFilter.htmlFilterContentScraper;
import de.anomic.http.httpHeader;
import de.anomic.http.httpRemoteProxyConfig;
import de.anomic.http.httpc;
import de.anomic.http.httpdHandler;
import de.anomic.index.indexContainer;
import de.anomic.index.indexEntry;
import de.anomic.index.indexEntryAttribute;
@ -145,6 +147,7 @@ import de.anomic.kelondro.kelondroMSetTools;
import de.anomic.kelondro.kelondroNaturalOrder;
import de.anomic.kelondro.kelondroMapTable;
import de.anomic.plasma.dbImport.dbImportManager;
import de.anomic.plasma.urlPattern.plasmaURLPattern;
import de.anomic.server.serverAbstractSwitch;
import de.anomic.server.serverCodings;
import de.anomic.server.serverDate;
@ -303,7 +306,24 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
// load the black-list / inspired by [AS]
File ulrBlackListFile = new File(getRootPath(), getConfig("listsPath", "DATA/LISTS"));
urlBlacklist = new plasmaURLPattern(ulrBlackListFile);
String blacklistClassName = getConfig("BlackLists.class", "de.anomic.plasma.urlPattern.defaultURLPattern");
this.log.logConfig("Starting blacklist engine ...");
try {
Class blacklistClass = Class.forName(blacklistClassName);
Constructor blacklistClassConstr = blacklistClass.getConstructor( new Class[] { File.class } );
urlBlacklist = (plasmaURLPattern) blacklistClassConstr.newInstance(new Object[] { ulrBlackListFile });
this.log.logFine("Used blacklist engine class: " + blacklistClassName);
this.log.logConfig("Using blacklist engine: " + urlBlacklist.getEngineInfo());
} catch (Exception e) {
this.log.logSevere("Unable to load the blacklist engine",e);
System.exit(-1);
} catch (Error e) {
this.log.logSevere("Unable to load the blacklist engine",e);
System.exit(-1);
}
this.log.logConfig("Loading backlist data ...");
listManager.switchboard = this;
listManager.listsPath = ulrBlackListFile;
listManager.reloadBlacklists();

@ -56,6 +56,7 @@ import java.util.Set;
import java.util.Date;
import java.util.TreeSet;
import de.anomic.net.URL;
import de.anomic.plasma.urlPattern.plasmaURLPattern;
import de.anomic.htmlFilter.htmlFilterContentScraper;
import de.anomic.index.indexCollectionRI;

@ -45,7 +45,6 @@ package de.anomic.yacy;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import de.anomic.net.URL;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
@ -59,19 +58,18 @@ import de.anomic.index.indexEntryAttribute;
import de.anomic.index.indexRowSetContainer;
import de.anomic.index.indexURLEntry;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.net.URL;
import de.anomic.plasma.plasmaCrawlLURL;
import de.anomic.plasma.plasmaSearchRankingProfile;
import de.anomic.plasma.plasmaSearchTimingProfile;
import de.anomic.plasma.plasmaSnippetCache;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaURLPattern;
import de.anomic.plasma.plasmaSearchTimingProfile;
import de.anomic.plasma.urlPattern.plasmaURLPattern;
import de.anomic.server.serverCodings;
import de.anomic.server.serverCore;
import de.anomic.server.serverObjects;
import de.anomic.server.serverCodings;
import de.anomic.tools.crypt;
import de.anomic.tools.nxTools;
import de.anomic.yacy.yacySeed;
import de.anomic.yacy.yacyVersion;
public final class yacyClient {

@ -44,17 +44,17 @@
package de.anomic.yacy;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Set;
import java.util.HashMap;
import de.anomic.index.indexContainer;
import de.anomic.kelondro.kelondroMScoreCluster;
import de.anomic.plasma.plasmaCrawlLURL;
import de.anomic.plasma.plasmaSearchRankingProfile;
import de.anomic.plasma.plasmaURLPattern;
import de.anomic.plasma.plasmaSnippetCache;
import de.anomic.plasma.plasmaSearchTimingProfile;
import de.anomic.plasma.plasmaSnippetCache;
import de.anomic.plasma.urlPattern.plasmaURLPattern;
import de.anomic.server.logging.serverLog;
public class yacySearch extends Thread {

@ -247,7 +247,7 @@ public class migration {
}
// migration for blacklists
if ((value = sb.getConfig("proxyBlackLists","")).length() > 0) {
if ((value = sb.getConfig("proxyBlackListsActive","")).length() > 0) {
sb.setConfig("proxy.BlackLists", value);
sb.setConfig("crawler.BlackLists", value);
sb.setConfig("dht.BlackLists", value);

@ -205,6 +205,7 @@ proxyYellowList=yacy.yellow
# the black-list; URLs appearing in this list will not be loaded;
# instead always a 404 is returned
# all these files will be placed in the listsPath
BlackLists.class=de.anomic.plasma.urlPattern.defaultURLPattern
BlackLists.types=proxy,crawler,dht,search
BlackLists.Shared=url.default.black

Loading…
Cancel
Save