* add domaincheck (local/global/domainlist) to urlcleaner

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7311 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
f1ori 14 years ago
parent 442bebca2b
commit 2c539b514a

@ -56,7 +56,7 @@ public class IndexCleaner_p {
if (post!=null) {
if (post.get("action").equals("ustart")) {
if (urldbCleanerThread==null || !urldbCleanerThread.isAlive()) {
urldbCleanerThread = indexSegment.urlMetadata().getBlacklistCleaner(Switchboard.urlBlacklist);
urldbCleanerThread = indexSegment.urlMetadata().getBlacklistCleaner(Switchboard.urlBlacklist, sb.crawlStacker);
urldbCleanerThread.start();
}
else {

@ -38,6 +38,8 @@ import java.util.Iterator;
import java.util.Map;
import java.util.TreeSet;
import de.anomic.crawler.CrawlStacker;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.protocol.http.HTTPClient;
import net.yacy.cora.storage.DynamicScore;
@ -322,8 +324,8 @@ public final class MetadataRepository implements Iterable<byte[]> {
}
}
public BlacklistCleaner getBlacklistCleaner(final Blacklist blacklist) {
return new BlacklistCleaner(blacklist);
public BlacklistCleaner getBlacklistCleaner(final Blacklist blacklist, final CrawlStacker crawlStacker) {
return new BlacklistCleaner(blacklist, crawlStacker);
}
public class BlacklistCleaner extends Thread {
@ -337,9 +339,11 @@ public final class MetadataRepository implements Iterable<byte[]> {
public String lastUrl = "";
public String lastHash = "";
private final Blacklist blacklist;
private final CrawlStacker crawlStacker;
public BlacklistCleaner(final Blacklist blacklist) {
public BlacklistCleaner(final Blacklist blacklist, final CrawlStacker crawlStacker) {
this.blacklist = blacklist;
this.crawlStacker = crawlStacker;
}
public void run() {
@ -377,7 +381,8 @@ public final class MetadataRepository implements Iterable<byte[]> {
continue;
}
if (blacklist.isListed(Blacklist.BLACKLIST_CRAWLER, metadata.url()) ||
blacklist.isListed(Blacklist.BLACKLIST_DHT, metadata.url())) {
blacklist.isListed(Blacklist.BLACKLIST_DHT, metadata.url()) ||
(crawlStacker.urlInAcceptedDomain(metadata.url()) != null)) {
lastBlacklistedUrl = metadata.url().toNormalform(true, true);
lastBlacklistedHash = new String(entry.hash());
if (Log.isFine("URLDBCLEANER")) Log.logFine("URLDBCLEANER", ++blacklistedUrls + " blacklisted (" + ((double) blacklistedUrls / totalSearchedUrls) * 100 + "%): " + new String(entry.hash()) + " " + metadata.url().toNormalform(false, true));

Loading…
Cancel
Save