From a2841261bdd0c4eef6c5acc7888973803854d05d Mon Sep 17 00:00:00 2001 From: cominch Date: Wed, 29 Aug 2012 09:52:14 +0200 Subject: [PATCH] content control: apply filter if enabled to crawls --- source/de/anomic/crawler/CrawlStacker.java | 22 ++++++++++++++++++++ source/de/anomic/data/ymark/YMarkTables.java | 15 +++++++++++-- 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/source/de/anomic/crawler/CrawlStacker.java b/source/de/anomic/crawler/CrawlStacker.java index 66ec72b0c..360f8e667 100644 --- a/source/de/anomic/crawler/CrawlStacker.java +++ b/source/de/anomic/crawler/CrawlStacker.java @@ -44,6 +44,7 @@ import net.yacy.cora.document.MultiProtocolURI; import net.yacy.cora.document.UTF8; import net.yacy.cora.protocol.Domains; import net.yacy.cora.protocol.ftp.FTPClient; +import net.yacy.interaction.contentcontrol.ContentControlFilterUpdateThread; import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.URIMetadata; import net.yacy.kelondro.logging.Log; @@ -535,6 +536,27 @@ public final class CrawlStacker { return "the url '" + url + "' is not in domainList of this network"; } } + + if (Switchboard.getSwitchboard().getConfigBool( + "contentcontrol.enabled", false) == true) { + + if (!Switchboard.getSwitchboard() + .getConfig("contentcontrol.mandatoryfilterlist", "") + .equals("")) { + FilterEngine f = ContentControlFilterUpdateThread.getNetworkFilter(); + if (f != null) { + if (!f.isListed(url, null)) { + + return "the url '" + + url + + "' does not belong to the network mandatory filter list"; + + } + } + } + + } + final boolean local = url.isLocal(); if (this.acceptLocalURLs && local) return null; if (this.acceptGlobalURLs && !local) return null; diff --git a/source/de/anomic/data/ymark/YMarkTables.java b/source/de/anomic/data/ymark/YMarkTables.java index e6b0450df..8015d6a78 100644 --- a/source/de/anomic/data/ymark/YMarkTables.java +++ b/source/de/anomic/data/ymark/YMarkTables.java @@ -27,6 +27,7 @@ package de.anomic.data.ymark; import java.io.IOException; +import java.net.MalformedURLException; import java.util.ArrayList; import java.util.Collections; import java.util.EnumMap; @@ -44,6 +45,7 @@ import net.yacy.document.Parser.Failure; import net.yacy.kelondro.blob.Tables; import net.yacy.kelondro.blob.Tables.Row; import net.yacy.kelondro.data.meta.DigestURI; +import net.yacy.kelondro.logging.Log; import net.yacy.repository.LoaderDispatcher; import de.anomic.data.WorkTables; @@ -338,11 +340,20 @@ public class YMarkTables { public void addBookmark(final String bmk_user, final YMarkEntry bmk, final boolean mergeTags, final boolean mergeFolders) throws IOException, SpaceExceededException { final String bmk_table = TABLES.BOOKMARKS.tablename(bmk_user); final String date = String.valueOf(System.currentTimeMillis()); - final byte[] urlHash = YMarkUtil.getBookmarkId(bmk.get(YMarkEntry.BOOKMARK.URL.key())); + byte[] urlHash = null; + try { + urlHash = YMarkUtil.getBookmarkId(bmk.get(YMarkEntry.BOOKMARK.URL.key())); + } catch (MalformedURLException e) { + Log.logInfo("BOOKMARKIMPORT", "invalid url: "+bmk.get(YMarkEntry.BOOKMARK.URL.key())); + } Tables.Row bmk_row = null; if (urlHash != null) { - bmk_row = this.worktables.select(bmk_table, urlHash); + try { + bmk_row = this.worktables.select(bmk_table, urlHash); + } catch (Exception e) { + + } if (bmk_row == null) { // create and insert new entry if(!bmk.containsKey(YMarkEntry.BOOKMARK.DATE_ADDED.key())) {