* add domaincheck (local/global/domainlist) to urlcleaner

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7311 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
f1ori 14 years ago
parent 442bebca2b
commit 2c539b514a

@ -56,7 +56,7 @@ public class IndexCleaner_p {
if (post!=null) { if (post!=null) {
if (post.get("action").equals("ustart")) { if (post.get("action").equals("ustart")) {
if (urldbCleanerThread==null || !urldbCleanerThread.isAlive()) { if (urldbCleanerThread==null || !urldbCleanerThread.isAlive()) {
urldbCleanerThread = indexSegment.urlMetadata().getBlacklistCleaner(Switchboard.urlBlacklist); urldbCleanerThread = indexSegment.urlMetadata().getBlacklistCleaner(Switchboard.urlBlacklist, sb.crawlStacker);
urldbCleanerThread.start(); urldbCleanerThread.start();
} }
else { else {

@ -38,6 +38,8 @@ import java.util.Iterator;
import java.util.Map; import java.util.Map;
import java.util.TreeSet; import java.util.TreeSet;
import de.anomic.crawler.CrawlStacker;
import net.yacy.cora.document.MultiProtocolURI; import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.protocol.http.HTTPClient; import net.yacy.cora.protocol.http.HTTPClient;
import net.yacy.cora.storage.DynamicScore; import net.yacy.cora.storage.DynamicScore;
@ -322,8 +324,8 @@ public final class MetadataRepository implements Iterable<byte[]> {
} }
} }
public BlacklistCleaner getBlacklistCleaner(final Blacklist blacklist) { public BlacklistCleaner getBlacklistCleaner(final Blacklist blacklist, final CrawlStacker crawlStacker) {
return new BlacklistCleaner(blacklist); return new BlacklistCleaner(blacklist, crawlStacker);
} }
public class BlacklistCleaner extends Thread { public class BlacklistCleaner extends Thread {
@ -337,9 +339,11 @@ public final class MetadataRepository implements Iterable<byte[]> {
public String lastUrl = ""; public String lastUrl = "";
public String lastHash = ""; public String lastHash = "";
private final Blacklist blacklist; private final Blacklist blacklist;
private final CrawlStacker crawlStacker;
public BlacklistCleaner(final Blacklist blacklist) { public BlacklistCleaner(final Blacklist blacklist, final CrawlStacker crawlStacker) {
this.blacklist = blacklist; this.blacklist = blacklist;
this.crawlStacker = crawlStacker;
} }
public void run() { public void run() {
@ -377,7 +381,8 @@ public final class MetadataRepository implements Iterable<byte[]> {
continue; continue;
} }
if (blacklist.isListed(Blacklist.BLACKLIST_CRAWLER, metadata.url()) || if (blacklist.isListed(Blacklist.BLACKLIST_CRAWLER, metadata.url()) ||
blacklist.isListed(Blacklist.BLACKLIST_DHT, metadata.url())) { blacklist.isListed(Blacklist.BLACKLIST_DHT, metadata.url()) ||
(crawlStacker.urlInAcceptedDomain(metadata.url()) != null)) {
lastBlacklistedUrl = metadata.url().toNormalform(true, true); lastBlacklistedUrl = metadata.url().toNormalform(true, true);
lastBlacklistedHash = new String(entry.hash()); lastBlacklistedHash = new String(entry.hash());
if (Log.isFine("URLDBCLEANER")) Log.logFine("URLDBCLEANER", ++blacklistedUrls + " blacklisted (" + ((double) blacklistedUrls / totalSearchedUrls) * 100 + "%): " + new String(entry.hash()) + " " + metadata.url().toNormalform(false, true)); if (Log.isFine("URLDBCLEANER")) Log.logFine("URLDBCLEANER", ++blacklistedUrls + " blacklisted (" + ((double) blacklistedUrls / totalSearchedUrls) * 100 + "%): " + new String(entry.hash()) + " " + metadata.url().toNormalform(false, true));

Loading…
Cancel
Save