diff --git a/defaults/yacy.init b/defaults/yacy.init index 25e0697a4..cd53877ac 100644 --- a/defaults/yacy.init +++ b/defaults/yacy.init @@ -1069,7 +1069,20 @@ augmentation.reflect = false augmentation.addDoctype = false augmentation.reparse = false +# Content control settings +contentcontrol.enabled = false +contentcontrol.bookmarklist = contentcontrol +contentcontrol.mandatoryfilterlist = yacy +contentcontrol.smwimport.enabled = false +contentcontrol.smwimport.baseurl = +contentcontrol.smwimport.purgelistoninit = true +contentcontrol.smwimport.targetlist = contentcontrol +contentcontrol.smwimport.defaultcategory = yacy + # Interaction settings +interaction.enabled = false +interaction.target = yacy + interaction.feedback.enabled = true interaction.feedback.url = interaction.feedback.accept = false diff --git a/htroot/ContentControl_p.html b/htroot/ContentControl_p.html new file mode 100644 index 000000000..d830e834b --- /dev/null +++ b/htroot/ContentControl_p.html @@ -0,0 +1,116 @@ + + + + YaCy '#[clientname]#': Content Control + #%env/templates/metas.template%# + + + #%env/templates/header.template%# + #%env/templates/submenuBlacklist.template%# + +

Content Control

+ +
+ +
Peer Content Control URL Filter +

+ With this settings you can activate or deactivate content control on this peer. +

+ + +
+ +
+
+ Enabled
+

+ Enables or disables content control. +

+
+ +
+
+

+

+ Define a category string. If defined, all URLs will be filtered out during crawling and DHT which do not belong to this category. +

+
+ +
+
+

+

+ Define a bookmark list. Default: contentcontrol +

+
+ + +
+ + + +
+
+ + + +
+
Content Control Settings +

+ With this settings you can define the content control settings. +

+ +
+ +
+
+ Enabled
+

+ Enable or disable constant background synchronisation of content control list from SMW (Semantic Mediawiki). Requires restart! +

+
+ +
+
+

+

+ Define base URL for SMW special page "Ask". Example: http://my.wiki.cc/wiki/Special:Ask +

+
+ +
+
+

+

+ Define import target bookmark list. Default: contentcontrol +

+
+ +
+
+

+

+ Define default category which is added to each entry. This category can be defined as mandatory default filter list. +

+
+ +
+
+ Enabled
+

+ Purge content control list on initial synchronisation after startup. +

+
+ + +
+ + + +
+
+ + + #%env/templates/footer.template%# + + diff --git a/htroot/ContentControl_p.java b/htroot/ContentControl_p.java new file mode 100644 index 000000000..fbf8eccf1 --- /dev/null +++ b/htroot/ContentControl_p.java @@ -0,0 +1,77 @@ +import net.yacy.cora.protocol.RequestHeader; +import de.anomic.server.serverObjects; +import de.anomic.server.serverSwitch; + +public final class ContentControl_p { + + public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header, + final serverObjects post, final serverSwitch env) { + + final serverObjects prop = new serverObjects(); + + if (post != null) { + + if (post.containsKey("contentcontrolExtraSettings")) { + + env.setConfig("contentcontrol.smwimport.baseurl", + post.get("ccsmwimporturl")); + + env.setConfig("contentcontrol.smwimport.enabled", + "on".equals(post.get("ccsmwimport")) ? true : false); + + env.setConfig("contentcontrol.smwimport.purgelistoninit", + "on".equals(post.get("ccsmwpurge")) ? true : false); + + env.setConfig("contentcontrol.smwimport.targetlist", + post.get("ccsmwimportlist")); + + env.setConfig("contentcontrol.smwimport.defaultcategory", + post.get("ccsmwimportcat")); + + } + + if (post.containsKey("contentcontrolSettings")) { + + env.setConfig("contentcontrol.enabled", + "on".equals(post.get("contentcontrolenabled")) ? true : false); + + env.setConfig("contentcontrol.mandatoryfilterlist", + post.get("contentcontrolmfl")); + + env.setConfig("contentcontrol.bookmarklist", + post.get("contentcontrolbml")); + + } + + } + + prop.putHTML("ccsmwimportcat", + env.getConfig("contentcontrol.smwimport.defaultcategory", "yacy")); + + prop.putHTML("ccsmwimportlist", + env.getConfig("contentcontrol.smwimport.targetlist", "contentcontrol")); + + prop.put("ccsmwpurge_checked", env.getConfigBool( + "contentcontrol.smwimport.purgelistoninit", false) ? "1" : "0"); + + prop.putHTML("ccsmwimporturl", + env.getConfig("contentcontrol.smwimport.baseurl", "")); + + prop.put("ccsmwimport_checked", env.getConfigBool( + "contentcontrol.smwimport.enabled", false) ? "1" : "0"); + + + prop.put("contentcontrolenabled_checked", + env.getConfigBool("contentcontrol.enabled", false) ? "1" : "0"); + + prop.putHTML("contentcontrolmfl", + env.getConfig("contentcontrol.mandatoryfilterlist", "yacy")); + + prop.putHTML("contentcontrolbml", + env.getConfig("contentcontrol.bookmarklist", "")); + + // return rewrite properties + return prop; + } + +} diff --git a/htroot/api/yacydoc.html b/htroot/api/yacydoc.html index 4f3b09448..89236c710 100644 --- a/htroot/api/yacydoc.html +++ b/htroot/api/yacydoc.html @@ -13,7 +13,7 @@ you can validate it with http://validator.w3.org/
-API +API This search result can also be retrieved as XML. Click the API icon to see an example call to the search rss API. To see a list of all APIs, please visit the API wiki page. diff --git a/htroot/env/templates/submenuBlacklist.template b/htroot/env/templates/submenuBlacklist.template index a02bb6c1f..04bb039d5 100644 --- a/htroot/env/templates/submenuBlacklist.template +++ b/htroot/env/templates/submenuBlacklist.template @@ -5,5 +5,6 @@
  • Blacklist Cleaner
  • Blacklist Test
  • Import/Export
  • +
  • Content Control
  • \ No newline at end of file diff --git a/source/de/anomic/crawler/CrawlStacker.java b/source/de/anomic/crawler/CrawlStacker.java index 66ec72b0c..360f8e667 100644 --- a/source/de/anomic/crawler/CrawlStacker.java +++ b/source/de/anomic/crawler/CrawlStacker.java @@ -44,6 +44,7 @@ import net.yacy.cora.document.MultiProtocolURI; import net.yacy.cora.document.UTF8; import net.yacy.cora.protocol.Domains; import net.yacy.cora.protocol.ftp.FTPClient; +import net.yacy.interaction.contentcontrol.ContentControlFilterUpdateThread; import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.URIMetadata; import net.yacy.kelondro.logging.Log; @@ -535,6 +536,27 @@ public final class CrawlStacker { return "the url '" + url + "' is not in domainList of this network"; } } + + if (Switchboard.getSwitchboard().getConfigBool( + "contentcontrol.enabled", false) == true) { + + if (!Switchboard.getSwitchboard() + .getConfig("contentcontrol.mandatoryfilterlist", "") + .equals("")) { + FilterEngine f = ContentControlFilterUpdateThread.getNetworkFilter(); + if (f != null) { + if (!f.isListed(url, null)) { + + return "the url '" + + url + + "' does not belong to the network mandatory filter list"; + + } + } + } + + } + final boolean local = url.isLocal(); if (this.acceptLocalURLs && local) return null; if (this.acceptGlobalURLs && !local) return null; diff --git a/source/de/anomic/data/ymark/YMarkEntry.java b/source/de/anomic/data/ymark/YMarkEntry.java index bd73bc800..f5ee2397a 100644 --- a/source/de/anomic/data/ymark/YMarkEntry.java +++ b/source/de/anomic/data/ymark/YMarkEntry.java @@ -13,6 +13,7 @@ public class YMarkEntry extends TreeMap { private static final long serialVersionUID = 2179622977348536148L; public static final YMarkEntry POISON = new YMarkEntry(); + public static final YMarkEntry EMPTY = new YMarkEntry(); public static final String BOOKMARKS_ID = "id"; public static final String BOOKMARKS_REF = "ref"; public static final String FOLDERS_IMPORTED = "/imported"; @@ -28,7 +29,11 @@ public class YMarkEntry extends TreeMap { PUBLIC ("public", "", "false", "private", "yacy:public", "", "lock"), TAGS ("tags", "dc:subject", "unsorted", "shortcuturl", "yacy:tags", "keyword", "tag"), VISITS ("visits", "", "0", "", "yacy:visits", "", "stat"), - FOLDERS ("folders", "", "/unsorted", "", "", "", "folder"); + FOLDERS ("folders", "", "/unsorted", "", "", "", "folder"), + FILTER ("filter", "", "", "", "yacy:filter", "", "filter"), + OAI ("oai", "", "", "", "yacy:oai", "", "oai"), + URLHASH ("urlhash", "", "", "", "yacy:urlhash", "", "urlhash"), + STARRATING ("starrating", "", "", "", "yacy:starrating", "", "stat"); private String key; private String dc_attrb; diff --git a/source/de/anomic/data/ymark/YMarkSMWJSONImporter.java b/source/de/anomic/data/ymark/YMarkSMWJSONImporter.java new file mode 100644 index 000000000..3c6d2dd50 --- /dev/null +++ b/source/de/anomic/data/ymark/YMarkSMWJSONImporter.java @@ -0,0 +1,202 @@ +package de.anomic.data.ymark; + +import java.io.IOException; +import java.io.Reader; +import java.util.HashMap; +import java.util.HashSet; +import java.util.concurrent.ArrayBlockingQueue; + +import net.yacy.kelondro.logging.Log; +import net.yacy.search.Switchboard; + +import org.json.simple.parser.ContentHandler; +import org.json.simple.parser.JSONParser; +import org.json.simple.parser.ParseException; + +public class YMarkSMWJSONImporter implements Runnable, ContentHandler{ + + // Importer Variables + private final ArrayBlockingQueue bookmarks; + private final Reader bmk_file; + private final String RootFolder; + private final StringBuilder folderstring; + private YMarkEntry bmk; + private final JSONParser parser; + + private boolean empty = true; + private int count = 0; + + // Parser Variables + private final StringBuilder value; + private final StringBuilder key; + private final StringBuilder date; + private final HashMap obj; + + private Boolean isBookmark; + + public YMarkSMWJSONImporter(final Reader bmk_file, final int queueSize, final String root) { + this.bookmarks = new ArrayBlockingQueue(queueSize); + this.bmk_file = bmk_file; + this.RootFolder = root; + this.folderstring = new StringBuilder(YMarkTables.BUFFER_LENGTH); + this.folderstring.append(this.RootFolder); + this.bmk = new YMarkEntry(); + + this.parser = new JSONParser(); + + this.value = new StringBuilder(128); + this.key = new StringBuilder(16); + this.date = new StringBuilder(32); + this.obj = new HashMap(); + + this.isBookmark = false; + this.empty = true; + this.count = 0; + } + + public void startJSON() throws ParseException, IOException { + } + + public void endJSON() throws ParseException, IOException { + } + + public boolean startArray() throws ParseException, IOException { + final String key = this.key.toString(); + + if(key.equals("items") ) { + + this.isBookmark = true; + this.count = 0; + + } + return true; + } + + public boolean endArray() throws ParseException, IOException { + + return true; + } + + public boolean startObject() throws ParseException, IOException { + + return true; + } + + public boolean endObject() throws ParseException, IOException { + + if(this.isBookmark) { + + if(this.obj.containsKey("category")) { + String catstr = obj.get("category"); + + HashSet tags = YMarkUtil.keysStringToSet (catstr); + + HashSet categories = YMarkUtil.keysStringToSet(""); + + for (String c: tags) { + + c = c.split(":")[1]; + + c = c.replace("/", "_"); + c = c.replace(" ", "_"); + + if (!c.equals("") && (!c.equals(" "))) { + categories.add ("sc:"+c); + } + + } + + if (!Switchboard.getSwitchboard().getConfig("contentcontrol.smwimport.defaultcategory", "").equals("")) { + categories.add ("sc:"+Switchboard.getSwitchboard().getConfig("contentcontrol.smwimport.defaultcategory", "")); + } + + catstr = YMarkUtil.keySetToString(categories); + + this.bmk.put(YMarkEntry.BOOKMARK.TAGS.key(), catstr); + } + + if(this.obj.containsKey("article_has_average_rating")) { + this.bmk.put(YMarkEntry.BOOKMARK.STARRATING.key(),obj.get("article_has_average_rating")); + } + + this.bmk.put(YMarkEntry.BOOKMARK.TITLE.key(),obj.get("label")); + this.bmk.put(YMarkEntry.BOOKMARK.URL.key(),obj.get("url")); + if(this.obj.containsKey("filter")) { + this.bmk.put(YMarkEntry.BOOKMARK.FILTER.key(),obj.get("filter")); + } else { + this.bmk.put(YMarkEntry.BOOKMARK.FILTER.key(),""); + } + try { + this.bookmarks.put(this.bmk); + this.count++; + } catch (InterruptedException e) { + Log.logException(e); + } + this.obj.clear(); + this.bmk = new YMarkEntry(); + } + + return true; + } + + public boolean startObjectEntry(String key) throws ParseException, IOException { + this.key.setLength(0); + this.key.append(key); + + return true; + } + + public boolean primitive(Object value) throws ParseException, IOException { + + this.value.setLength(0); + if(value instanceof java.lang.String) { + this.value.append((String)value); + } else if(value instanceof java.lang.Boolean) { + this.value.append((Boolean)value); + } else if(value instanceof java.lang.Number) { + this.value.append((Number)value); + } + + return true; + } + + public boolean endObjectEntry() throws ParseException, IOException { + + final String key = this.key.toString(); + final String value = this.value.toString(); + + this.obj.put(key, value); + + return true; + } + + public void run() { + try { + Log.logInfo(YMarkTables.BOOKMARKS_LOG, "SMWJSON Importer run()"); + this.empty = true; + this.parser.parse(this.bmk_file, this, true); + + } catch (IOException e) { + Log.logException(e); + } catch (ParseException e) { + Log.logException(e); + } finally { + + try { + Log.logInfo(YMarkTables.BOOKMARKS_LOG, "SMWJSON Importer inserted poison pill in queue"); + this.bookmarks.put(YMarkEntry.POISON); + } catch (InterruptedException e) { + Log.logException(e); + } + } + } + + public YMarkEntry take() { + try { + return this.bookmarks.take(); + } catch (InterruptedException e) { + Log.logException(e); + return null; + } + } +} diff --git a/source/de/anomic/data/ymark/YMarkTables.java b/source/de/anomic/data/ymark/YMarkTables.java index da5470967..8015d6a78 100644 --- a/source/de/anomic/data/ymark/YMarkTables.java +++ b/source/de/anomic/data/ymark/YMarkTables.java @@ -27,6 +27,7 @@ package de.anomic.data.ymark; import java.io.IOException; +import java.net.MalformedURLException; import java.util.ArrayList; import java.util.Collections; import java.util.EnumMap; @@ -44,6 +45,7 @@ import net.yacy.document.Parser.Failure; import net.yacy.kelondro.blob.Tables; import net.yacy.kelondro.blob.Tables.Row; import net.yacy.kelondro.data.meta.DigestURI; +import net.yacy.kelondro.logging.Log; import net.yacy.repository.LoaderDispatcher; import de.anomic.data.WorkTables; @@ -99,9 +101,12 @@ public class YMarkTables { public final static int BUFFER_LENGTH = 256; private final WorkTables worktables; + + public boolean dirty = false; public YMarkTables(final Tables wt) { this.worktables = (WorkTables)wt; + dirty = true; } public void deleteBookmark(final String bmk_user, final byte[] urlHash) throws IOException, SpaceExceededException { @@ -111,6 +116,7 @@ public class YMarkTables { if(bmk_row != null) { this.worktables.delete(bmk_table,urlHash); } + dirty = true; } public void deleteBookmark(final String bmk_user, final String url) throws IOException, SpaceExceededException { @@ -215,6 +221,16 @@ public class YMarkTables { final Pattern p = Pattern.compile(patternBuilder.toString(), Pattern.CASE_INSENSITIVE); return this.worktables.iterator(bmk_table, YMarkEntry.BOOKMARK.TAGS.key(), p); } + + public Iterator getBookmarksByTag(final String bmk_user, String regex) throws IOException { + final String bmk_table = TABLES.BOOKMARKS.tablename(bmk_user); + final StringBuilder patternBuilder = new StringBuilder(BUFFER_LENGTH); + patternBuilder.setLength(0); + patternBuilder.append(regex); + + final Pattern p = Pattern.compile(patternBuilder.toString(), Pattern.CASE_INSENSITIVE); + return this.worktables.iterator(bmk_table, YMarkEntry.BOOKMARK.TAGS.key(), p); + } public List orderBookmarksBy(final Iterator rowIterator, final String sortname, final String sortorder) { final List sortList = new ArrayList(); @@ -236,6 +252,7 @@ public class YMarkTables { bmk.put(YMarkEntry.BOOKMARK.TAGS.key(), YMarkUtil.cleanTagsString(tagString)); addBookmark(bmk_user, bmk, merge, true); } + dirty = true; } public void replaceTags(final Iterator rowIterator, final String bmk_user, final String tagString, final String replaceString) throws IOException { @@ -255,6 +272,7 @@ public class YMarkTables { row.put(YMarkEntry.BOOKMARK.TAGS.key(), YMarkUtil.cleanTagsString(t.toString())); this.worktables.update(TABLES.BOOKMARKS.tablename(bmk_user), row); } + dirty = true; } public void addFolder(final String bmk_user, final String url, final String folder) throws IOException, SpaceExceededException { @@ -322,11 +340,20 @@ public class YMarkTables { public void addBookmark(final String bmk_user, final YMarkEntry bmk, final boolean mergeTags, final boolean mergeFolders) throws IOException, SpaceExceededException { final String bmk_table = TABLES.BOOKMARKS.tablename(bmk_user); final String date = String.valueOf(System.currentTimeMillis()); - final byte[] urlHash = YMarkUtil.getBookmarkId(bmk.get(YMarkEntry.BOOKMARK.URL.key())); + byte[] urlHash = null; + try { + urlHash = YMarkUtil.getBookmarkId(bmk.get(YMarkEntry.BOOKMARK.URL.key())); + } catch (MalformedURLException e) { + Log.logInfo("BOOKMARKIMPORT", "invalid url: "+bmk.get(YMarkEntry.BOOKMARK.URL.key())); + } Tables.Row bmk_row = null; if (urlHash != null) { - bmk_row = this.worktables.select(bmk_table, urlHash); + try { + bmk_row = this.worktables.select(bmk_table, urlHash); + } catch (Exception e) { + + } if (bmk_row == null) { // create and insert new entry if(!bmk.containsKey(YMarkEntry.BOOKMARK.DATE_ADDED.key())) { @@ -391,6 +418,8 @@ public class YMarkTables { // update bmk_table this.worktables.update(bmk_table, bmk_row); } + + dirty = true; } } } diff --git a/source/net/yacy/interaction/contentcontrol/ContentControlFilterUpdateThread.java b/source/net/yacy/interaction/contentcontrol/ContentControlFilterUpdateThread.java new file mode 100644 index 000000000..8950a2ecd --- /dev/null +++ b/source/net/yacy/interaction/contentcontrol/ContentControlFilterUpdateThread.java @@ -0,0 +1,114 @@ +package net.yacy.interaction.contentcontrol; + +import java.io.IOException; +import java.util.Iterator; + +import net.yacy.kelondro.blob.Tables; +import net.yacy.kelondro.blob.Tables.Row; +import net.yacy.repository.FilterEngine; +import net.yacy.search.Switchboard; + +public class ContentControlFilterUpdateThread { + + private Switchboard sb; + + private Boolean locked = false; + + private static FilterEngine networkfilter; + + public ContentControlFilterUpdateThread(final Switchboard sb) { + final long time = System.currentTimeMillis(); + + this.sb = sb; + + + if (this.sb.getConfigBool("contentcontrol.smwimport.purgelistoninit", + false)) { + this.sb.tables.clear(this.sb.getConfig( + "contentcontrol.smwimport.targetlist", "contentcontrol")); + + } + + } + + + + @SuppressWarnings("deprecation") + public final void run() { + + if (!locked) { + + locked = true; + + if (this.sb.getConfigBool("contentcontrol.enabled", false) == true) { + + if (!this.sb + .getConfig("contentcontrol.mandatoryfilterlist", "") + .equals("")) { + + if (sb.tables.bookmarks.dirty) { + + networkfilter = updateFilter(); + + sb.tables.bookmarks.dirty = false; + + } + + } + + } + + locked = false; + + } + + + return; + } + + private static FilterEngine updateFilter () { + + FilterEngine newfilter = new FilterEngine(); + + Switchboard sb = Switchboard.getSwitchboard(); + + Iterator it; + try { + it = sb.tables.bookmarks.getBookmarksByTag( + sb.getConfig( + "contentcontrol.bookmarklist", + "contentcontrol"), + "^((?!sc:" + + sb + .getConfig( + "contentcontrol.mandatoryfilterlist", + "") + ").*)$"); + while (it.hasNext()) { + Row b = it.next(); + + if (!b.get("filter", "").equals("")) { + + newfilter.add(b.get("filter", ""), null); + } + } + + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + return newfilter; + } + + + public static FilterEngine getNetworkFilter() { + FilterEngine f = networkfilter; + + if (f != null && f.size() > 0) + return f; + + return null; + + } + +} diff --git a/source/net/yacy/interaction/contentcontrol/ContentControlImportThread.java b/source/net/yacy/interaction/contentcontrol/ContentControlImportThread.java new file mode 100644 index 000000000..e95dc4c47 --- /dev/null +++ b/source/net/yacy/interaction/contentcontrol/ContentControlImportThread.java @@ -0,0 +1,253 @@ +package net.yacy.interaction.contentcontrol; + +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.UnsupportedEncodingException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.Date; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Map; + +import net.yacy.cora.document.UTF8; +import net.yacy.cora.protocol.http.HTTPClient; +import net.yacy.cora.util.SpaceExceededException; +import net.yacy.kelondro.blob.Tables.Row; +import net.yacy.kelondro.data.meta.DigestURI; +import net.yacy.kelondro.logging.Log; +import net.yacy.search.Switchboard; +import de.anomic.data.ymark.YMarkEntry; +import de.anomic.data.ymark.YMarkSMWJSONImporter; +import de.anomic.data.ymark.YMarkUtil; + +public class ContentControlImportThread { + + private Switchboard sb; + + private Boolean locked = false; + + private String lastsync = "1900-01-01T01:00:00"; + + private String currenttimestamp = "1900-01-01T01:00:00"; + + private long offset = 0; + + private long limit = 500; + + private long currentmax = 0; + + private boolean runningjob = false; + + public ContentControlImportThread(final Switchboard sb) { + final long time = System.currentTimeMillis(); + + this.sb = sb; + + + if (this.sb.getConfigBool("contentcontrol.smwimport.purgelistoninit", + false)) { + this.sb.tables.clear(this.sb.getConfig( + "contentcontrol.smwimport.targetlist", "contentcontrol")); + + } + + } + + private final String wikiurlify (String s) { + + String ret = s; + + ret = ret.replace("-", "-2D"); + ret = ret.replace("+", "-2B"); + ret = ret.replace(" ", "-20"); + + ret = ret.replace("[", "-5B"); + ret = ret.replace("]", "-5D"); + + ret = ret.replace(":", "-3A"); + ret = ret.replace(">", "-3E"); + + ret = ret.replace("?", "-3F"); + + + return ret; + } + + @SuppressWarnings("deprecation") + public final void run() { + + if (!locked) { + + locked = true; + + if (sb.getConfigBool("contentcontrol.smwimport.enabled", false) == true) { + + if (runningjob) { + + Log.logInfo("CONTENTCONTROL", + "CONTENTCONTROL importing max. " + limit + + " elements at " + offset + " of " + + currentmax + ", since " + + currenttimestamp); + + URL bmks_json; + + String currenttimestampurl = wikiurlify (currenttimestamp); + + try { + + if (!sb.getConfig("contentcontrol.smwimport.baseurl", + "").equals("")) { + + + + bmks_json = new URL( + sb.getConfig( + "contentcontrol.smwimport.baseurl", + "") + + wikiurlify ("/[[Category:Web Page]] [[Modification date::>" +currenttimestamp+ "]]") + + + wikiurlify ("/?Url/?Filter/?Article has average rating/?Category") + + "/mainlabel%3D" + + "/offset%3D" + offset + + "/limit%3D" + limit + + "/format%3Djson"); + + offset += limit; + + if (offset > currentmax) { + runningjob = false; + } + + InputStreamReader reader = null; + try { + reader = new InputStreamReader( + bmks_json.openStream(), "UTF-8"); + } catch (Exception e) { + + Log.logException(e); + runningjob = false; + } + + if (reader != null) { + YMarkSMWJSONImporter bookmarkImporter = null; + try { + bookmarkImporter = new YMarkSMWJSONImporter( + reader, 200, ""); + } catch (final Exception e) { + // TODO: display an error message + Log.logException(e); + runningjob = false; + } + + Thread t; + YMarkEntry bmk; + + t = new Thread(bookmarkImporter, + "YMarks - Network bookmark importer"); + t.start(); + + while ((bmk = bookmarkImporter.take()) != YMarkEntry.POISON) { + + if (bmk == YMarkEntry.EMPTY) { + + runningjob = false; + + } else { + + try { + sb.tables.bookmarks.addBookmark( + sb.getConfig("contentcontrol.smwimport.targetlist", "contentcontrol"), bmk, + true, true); + + } catch (Exception e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } + } + + } else { + + } + } + + else { + + } + + } catch (MalformedURLException e2) { + // TODO Auto-generated catch block + e2.printStackTrace(); + } + + } else { + + try { + + if (!sb.getConfig("contentcontrol.smwimport.baseurl", + "").equals("")) { + + URL bmks_count; + + bmks_count = new URL( + sb.getConfig( + "contentcontrol.smwimport.baseurl", + "") + + wikiurlify ("/[[Category:Web Page]] [[Modification date::>" +lastsync+ "]]") + + + + wikiurlify ("/?Url/?Filter/?Article has average rating/?Category") + + "/mainlabel%3D" + + "/format%3Dsupercount"); + + String reply = UTF8.String(new HTTPClient() + .GETbytes(bmks_count.toString())); + + String overallcount = reply.split(",")[0]; + + String lastsyncstring = reply.split(",")[1]; + + currentmax = Integer.parseInt(overallcount); + + if (currentmax > 0) { + + Log.logInfo("CONTENTCONTROL", + "CONTENTCONTROL import job counts " + + currentmax + + " new elements between " + + lastsync + " and " + + currenttimestamp); + + currenttimestamp = lastsync; + + runningjob = true; + lastsync = lastsyncstring; + offset = 0; + } + } else { + Log.logWarning("CONTENTCONTROL", + "No SMWimport URL defined"); + } + + } catch (MalformedURLException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + } + + locked = false; + + } + } + + return; + } + +} diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java index 98f904e0f..20dadc1a1 100644 --- a/source/net/yacy/search/Switchboard.java +++ b/source/net/yacy/search/Switchboard.java @@ -110,6 +110,8 @@ import net.yacy.document.content.SurrogateReader; import net.yacy.document.importer.OAIListFriendsLoader; import net.yacy.document.parser.html.Evaluation; import net.yacy.gui.Tray; +import net.yacy.interaction.contentcontrol.ContentControlFilterUpdateThread; +import net.yacy.interaction.contentcontrol.ContentControlImportThread; import net.yacy.kelondro.blob.Tables; import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.URIMetadata; @@ -775,7 +777,7 @@ public final class Switchboard extends serverSwitch // that an automatic authorization of localhost is done, because in this case crawls from local // addresses are blocked to prevent attack szenarios where remote pages contain links to localhost // addresses that can steer a YaCy peer - if ( (getConfigBool("adminAccountForLocalhost", false)) ) { + if ( !getConfigBool("adminAccountForLocalhost", false) ) { if ( getConfig(SwitchboardConstants.ADMIN_ACCOUNT_B64MD5, "").startsWith("0000") ) { // the password was set automatically with a random value. // We must remove that here to prevent that a user cannot log in any more @@ -969,7 +971,39 @@ public final class Switchboard extends serverSwitch Long.parseLong(getConfig(SwitchboardConstants.INDEX_DIST_IDLESLEEP, "5000")), Long.parseLong(getConfig(SwitchboardConstants.INDEX_DIST_BUSYSLEEP, "0")), Long.parseLong(getConfig(SwitchboardConstants.INDEX_DIST_MEMPREREQ, "1000000"))); - + + // content control: initialize list sync thread + deployThread( + "720_ccimport", + "Content Control Import", + "this is the content control import thread", + null, + new InstantBusyThread( + new ContentControlImportThread(sb), + "run", + SwitchboardConstants.PEER_PING_METHOD_JOBCOUNT, + SwitchboardConstants.PEER_PING_METHOD_FREEMEM, + 3000, + 10000, + 3000, + 10000), + 2000); + deployThread( + "730_ccfilter", + "Content Control Filter", + "this is the content control filter update thread", + null, + new InstantBusyThread( + new ContentControlFilterUpdateThread(sb), + "run", + SwitchboardConstants.PEER_PING_METHOD_JOBCOUNT, + SwitchboardConstants.PEER_PING_METHOD_FREEMEM, + 3000, + 10000, + 3000, + 10000), + 2000); + // set network-specific performance attributes if ( this.firstInit ) { setRemotecrawlPPM(Math.max(1, (int) getConfigLong("network.unit.remotecrawl.speed", 60))); @@ -981,7 +1015,7 @@ public final class Switchboard extends serverSwitch //query.add(CrawlSwitchboardEntry.word2hash("Zahl")); //plasmaSnippetCache.result scr = snippetCache.retrieve(new URL("http://www.heise.de/mobil/newsticker/meldung/mail/54980"), query, true); //plasmaSnippetCache.result scr = snippetCache.retrieve(new URL("http://www.heise.de/security/news/foren/go.shtml?read=1&msg_id=7301419&forum_id=72721"), query, true); - //plasmaSnippetCache.result scr = snippetCache.retrieve(new URL("http://www.heise.de/kiosk/archiv/ct/2003/4/20"), query, true, 260); + //plasmaSnippetCache.result scr = snippetCache.retrieve(new URL("http://www.heise.de/kiosk/archiv/ct/2003/4/20"), query, true, 260); this.trail = new LinkedBlockingQueue(); @@ -3449,4 +3483,4 @@ public final class Switchboard extends serverSwitch this.shutdownSync.acquire(); return this.terminate; } -} +} diff --git a/source/net/yacy/search/query/RWIProcess.java b/source/net/yacy/search/query/RWIProcess.java index 50d622809..f7ccbd39e 100644 --- a/source/net/yacy/search/query/RWIProcess.java +++ b/source/net/yacy/search/query/RWIProcess.java @@ -56,6 +56,7 @@ import net.yacy.cora.storage.HandleSet; import net.yacy.cora.util.SpaceExceededException; import net.yacy.document.Condenser; import net.yacy.document.LibraryProvider; +import net.yacy.interaction.contentcontrol.ContentControlFilterUpdateThread; import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.URIMetadata; import net.yacy.kelondro.data.meta.URIMetadataRow; @@ -68,6 +69,7 @@ import net.yacy.kelondro.rwi.ReferenceContainer; import net.yacy.kelondro.rwi.TermSearch; import net.yacy.peers.graphics.ProfilingGraph; import net.yacy.repository.Blacklist.BlacklistType; +import net.yacy.repository.FilterEngine; import net.yacy.search.EventTracker; import net.yacy.search.Switchboard; import net.yacy.search.index.Segment; @@ -706,6 +708,29 @@ public final class RWIProcess extends Thread this.sortout++; continue; } + + // content control + + if (Switchboard.getSwitchboard().getConfigBool( + "contentcontrol.enabled", false) == true) { + + // check global network filter from bookmark list + if (!Switchboard.getSwitchboard() + .getConfig("contentcontrol.mandatoryfilterlist", "") + .equals("")) { + + FilterEngine f = ContentControlFilterUpdateThread.getNetworkFilter(); + + if (f != null) { + if (!f.isListed(page.url(), null)) { + + this.sortout++; + continue; + } + } + + } + } final String pageurl = page.url().toNormalform(true, true); final String pageauthor = page.dc_creator();