diff --git a/source/de/anomic/plasma/urlPattern/abstractURLPattern.java b/source/de/anomic/plasma/urlPattern/abstractURLPattern.java index db87594c4..73207cdcb 100644 --- a/source/de/anomic/plasma/urlPattern/abstractURLPattern.java +++ b/source/de/anomic/plasma/urlPattern/abstractURLPattern.java @@ -64,9 +64,10 @@ public abstract class abstractURLPattern implements plasmaURLPattern { plasmaURLPattern.BLACKLIST_PROXY, plasmaURLPattern.BLACKLIST_DHT, plasmaURLPattern.BLACKLIST_SEARCH, - plasmaURLPattern.BLACKLIST_SURFTIPS + plasmaURLPattern.BLACKLIST_SURFTIPS, + plasmaURLPattern.BLACKLIST_NEWS })); - public static final String BLACKLIST_TYPES_STRING="proxy,crawler,dht,search,surftips"; + public static final String BLACKLIST_TYPES_STRING="proxy,crawler,dht,search,surftips,news"; protected File blacklistRootPath = null; protected HashMap cachedUrlHashs = null; diff --git a/source/de/anomic/plasma/urlPattern/plasmaURLPattern.java b/source/de/anomic/plasma/urlPattern/plasmaURLPattern.java index 59ed9b897..4655f028a 100644 --- a/source/de/anomic/plasma/urlPattern/plasmaURLPattern.java +++ b/source/de/anomic/plasma/urlPattern/plasmaURLPattern.java @@ -11,6 +11,7 @@ public interface plasmaURLPattern { public static final String BLACKLIST_PROXY = "proxy"; public static final String BLACKLIST_SEARCH = "search"; public static final String BLACKLIST_SURFTIPS = "surftips"; + public static final String BLACKLIST_NEWS = "news"; public static final class blacklistFile { diff --git a/source/de/anomic/yacy/yacyNewsPool.java b/source/de/anomic/yacy/yacyNewsPool.java index 61a06b230..2ab651690 100644 --- a/source/de/anomic/yacy/yacyNewsPool.java +++ b/source/de/anomic/yacy/yacyNewsPool.java @@ -47,6 +47,11 @@ package de.anomic.yacy; import java.io.File; import java.io.IOException; import java.util.HashSet; +import java.util.Map; + +import de.anomic.net.URL; +import de.anomic.plasma.plasmaSwitchboard; +import de.anomic.plasma.urlPattern.plasmaURLPattern; public class yacyNewsPool { @@ -300,6 +305,19 @@ public class yacyNewsPool { if (record.category() == null) return; if (!(categories.contains(record.category()))) return; if (record.created().getTime() == 0) return; + Map attributes = record.attributes(); + if (attributes.containsKey("url")){ + if(plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_NEWS, new URL((String) attributes.get("url")))){ + System.out.println("DEBUG: ignored news-entry url blacklisted: " + attributes.get("url")); + return; + } + } + if (attributes.containsKey("startURL")){ + if(plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_NEWS, new URL((String) attributes.get("startURL")))){ + System.out.println("DEBUG: ignored news-entry url blacklisted: " + attributes.get("startURL")); + return; + } + } // double-check with old news if (newsDB.get(record.id()) != null) return; diff --git a/yacy.init b/yacy.init index 95c1bb35e..81a195561 100644 --- a/yacy.init +++ b/yacy.init @@ -257,6 +257,7 @@ crawler.BlackLists=url.default.black dht.BlackLists=url.default.black search.BlackLists=url.default.black surftips.BlackLists=url.default.black +news.BlackLists=url.default.black proxyCookieBlackList=cookie.default.black