diff --git a/htroot/ContentControl_p.html b/htroot/ContentControl_p.html index 78f78157e..270770eb4 100644 --- a/htroot/ContentControl_p.html +++ b/htroot/ContentControl_p.html @@ -27,20 +27,13 @@ Enables or disables content control.

+ -
-
-

-

- Define a category string. If defined, all URLs will be filtered out during crawling and DHT which do not belong to this category. -

-
- -
+


- Define a bookmark list. Default: contentcontrol + Define a table. Default: contentcontrol

@@ -52,9 +45,9 @@
-
Content Control Settings +
Content Control SMW Import Settings

- With this settings you can define the content control settings. + With this settings you can define the content control import settings. You can define a SMW with the appropriate extensions. Details: yacy-smwextension on Gitorious

@@ -63,7 +56,7 @@
Enabled

- Enable or disable constant background synchronisation of content control list from SMW (Semantic Mediawiki). Requires restart! + Enable or disable constant background synchronization of content control list from SMW (Semantic Mediawiki). Requires restart!

@@ -75,19 +68,11 @@

-
+


- Define import target bookmark list. Default: contentcontrol -

-
- -
-
-

-

- Define default category which is added to each entry. This category can be defined as mandatory default filter list. + Define import target table. Default: contentcontrol

diff --git a/htroot/ContentControl_p.java b/htroot/ContentControl_p.java index b89ff44e4..e39632f15 100644 --- a/htroot/ContentControl_p.java +++ b/htroot/ContentControl_p.java @@ -24,9 +24,7 @@ public final class ContentControl_p { env.setConfig("contentcontrol.smwimport.targetlist", post.get("ccsmwimportlist")); - - env.setConfig("contentcontrol.smwimport.defaultcategory", - post.get("ccsmwimportcat")); + } @@ -35,9 +33,7 @@ public final class ContentControl_p { env.setConfig("contentcontrol.enabled", "on".equals(post.get("contentcontrolenabled")) ? true : false); - env.setConfig("contentcontrol.mandatoryfilterlist", - post.get("contentcontrolmfl")); - + env.setConfig("contentcontrol.bookmarklist", post.get("contentcontrolbml")); @@ -45,8 +41,6 @@ public final class ContentControl_p { } - prop.putHTML("ccsmwimportcat", - env.getConfig("contentcontrol.smwimport.defaultcategory", "yacy")); prop.putHTML("ccsmwimportlist", env.getConfig("contentcontrol.smwimport.targetlist", "contentcontrol")); @@ -64,9 +58,6 @@ public final class ContentControl_p { prop.put("contentcontrolenabled_checked", env.getConfigBool("contentcontrol.enabled", false) ? "1" : "0"); - prop.putHTML("contentcontrolmfl", - env.getConfig("contentcontrol.mandatoryfilterlist", "yacy")); - prop.putHTML("contentcontrolbml", env.getConfig("contentcontrol.bookmarklist", "")); diff --git a/source/net/yacy/interaction/contentcontrol/ContentControlFilterUpdateThread.java b/source/net/yacy/contentcontrol/ContentControlFilterUpdateThread.java similarity index 50% rename from source/net/yacy/interaction/contentcontrol/ContentControlFilterUpdateThread.java rename to source/net/yacy/contentcontrol/ContentControlFilterUpdateThread.java index ee1fcd136..d938a149a 100644 --- a/source/net/yacy/interaction/contentcontrol/ContentControlFilterUpdateThread.java +++ b/source/net/yacy/contentcontrol/ContentControlFilterUpdateThread.java @@ -1,5 +1,6 @@ -package net.yacy.interaction.contentcontrol; +package net.yacy.contentcontrol; +import java.io.IOException; import java.util.Iterator; import net.yacy.kelondro.blob.Tables; @@ -16,15 +17,9 @@ public class ContentControlFilterUpdateThread { private static FilterEngine networkfilter; public ContentControlFilterUpdateThread(final Switchboard sb) { - //final long time = System.currentTimeMillis(); - this.sb = sb; - if (this.sb.getConfigBool("contentcontrol.smwimport.purgelistoninit", - false)) { - this.sb.tables.clear(this.sb.getConfig( - "contentcontrol.smwimport.targetlist", "contentcontrol")); + this.sb = sb; - } } public final void run() { @@ -35,17 +30,11 @@ public class ContentControlFilterUpdateThread { if (this.sb.getConfigBool("contentcontrol.enabled", false) == true) { - if (!this.sb - .getConfig("contentcontrol.mandatoryfilterlist", "") - .equals("")) { - - if (this.sb.tables.bookmarks.dirty) { + if (SMWListSyncThread.dirty) { - networkfilter = updateFilter(); + networkfilter = updateFilter(); - this.sb.tables.bookmarks.dirty = false; - - } + SMWListSyncThread.dirty = false; } @@ -55,7 +44,6 @@ public class ContentControlFilterUpdateThread { } - return; } @@ -66,23 +54,23 @@ public class ContentControlFilterUpdateThread { Switchboard sb = Switchboard.getSwitchboard(); Iterator it; - it = sb.tables.bookmarks.getBookmarksByTag( - sb.getConfig( - "contentcontrol.bookmarklist", - "contentcontrol"), - "^((?!sc:" - + sb - .getConfig( - "contentcontrol.mandatoryfilterlist", - "") + ").*)$"); - while (it.hasNext()) { - Row b = it.next(); - - if (!b.get("filter", "").equals("")) { - - newfilter.add(b.get("filter", ""), null); - } - } + try { + it = sb.tables.iterator(sb.getConfig("contentcontrol.bookmarklist", + "contentcontrol")); + + while (it.hasNext()) { + Row b = it.next(); + + if (!b.get("filter", "").equals("")) { + + newfilter.add(b.get("filter", ""), null); + } + } + + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } return newfilter; } diff --git a/source/net/yacy/contentcontrol/SMWListImporter.java b/source/net/yacy/contentcontrol/SMWListImporter.java new file mode 100644 index 000000000..500c0b625 --- /dev/null +++ b/source/net/yacy/contentcontrol/SMWListImporter.java @@ -0,0 +1,163 @@ +package net.yacy.contentcontrol; + +import java.io.IOException; +import java.io.Reader; +import java.util.HashMap; +import java.util.Map.Entry; +import java.util.concurrent.ArrayBlockingQueue; + +import net.yacy.kelondro.logging.Log; + +import org.json.simple.parser.ContentHandler; +import org.json.simple.parser.JSONParser; +import org.json.simple.parser.ParseException; + +public class SMWListImporter implements Runnable, ContentHandler{ + + // Importer Variables + private final ArrayBlockingQueue listEntries; + private final Reader importFile; + + private SMWListRow row; + private final JSONParser parser; + + // Parser Variables + private final StringBuilder value; + private final StringBuilder key; + private final HashMap obj; + + private Boolean isElement; + + public SMWListImporter(final Reader importFile, final int queueSize) { + this.listEntries = new ArrayBlockingQueue(queueSize); + this.importFile = importFile; + + this.row = new SMWListRow(); + + this.parser = new JSONParser(); + + this.value = new StringBuilder(128); + this.key = new StringBuilder(16); + this.obj = new HashMap(); + + this.isElement = false; + + } + + @Override + public void startJSON() throws ParseException, IOException { + } + + @Override + public void endJSON() throws ParseException, IOException { + } + + @Override + public boolean startArray() throws ParseException, IOException { + final String key = this.key.toString(); + + if (key.equals("items")) { + + this.isElement = true; + + } + return true; + } + + @Override + public boolean endArray() throws ParseException, IOException { + + return true; + } + + @Override + public boolean startObject() throws ParseException, IOException { + + return true; + } + + @Override + public boolean endObject() throws ParseException, IOException { + + if(this.isElement) { + + for (Entry e: this.obj.entrySet()) { + this.row.add (e.getKey(), e.getValue()); + } + try { + this.listEntries.put(this.row); + //this.count++; + } catch (InterruptedException e) { + Log.logException(e); + } + this.obj.clear(); + this.row = new SMWListRow(); + } + + return true; + } + + @Override + public boolean startObjectEntry(String key) throws ParseException, IOException { + this.key.setLength(0); + this.key.append(key); + + return true; + } + + @Override + public boolean primitive(Object value) throws ParseException, IOException { + + this.value.setLength(0); + if(value instanceof java.lang.String) { + this.value.append((String)value); + } else if(value instanceof java.lang.Boolean) { + this.value.append(value); + } else if(value instanceof java.lang.Number) { + this.value.append(value); + } + + return true; + } + + @Override + public boolean endObjectEntry() throws ParseException, IOException { + + final String key = this.key.toString(); + final String value = this.value.toString(); + + this.obj.put(key, value); + + return true; + } + + @Override + public void run() { + try { + Log.logInfo("SMWLISTSYNC", "Importer run()"); + this.parser.parse(this.importFile, this, true); + + } catch (IOException e) { + Log.logException(e); + } catch (ParseException e) { + Log.logException(e); + } finally { + + try { + Log.logInfo("SMWLISTSYNC", "Importer inserted poison pill in queue"); + this.listEntries.put(SMWListRow.POISON); + } catch (InterruptedException e) { + Log.logException(e); + } + } + } + + public SMWListRow take() { + try { + return this.listEntries.take(); + } catch (InterruptedException e) { + Log.logException(e); + return null; + } + } +} diff --git a/source/net/yacy/contentcontrol/SMWListImporterFormatObsolete.java b/source/net/yacy/contentcontrol/SMWListImporterFormatObsolete.java new file mode 100644 index 000000000..45804c45e --- /dev/null +++ b/source/net/yacy/contentcontrol/SMWListImporterFormatObsolete.java @@ -0,0 +1,117 @@ +package net.yacy.contentcontrol; + +import java.io.IOException; +import java.io.Reader; +import java.util.Iterator; +import java.util.concurrent.ArrayBlockingQueue; + +import net.yacy.kelondro.logging.Log; + +import org.json.simple.JSONArray; +import org.json.simple.JSONObject; +import org.json.simple.parser.JSONParser; +import org.json.simple.parser.ParseException; + +public class SMWListImporterFormatObsolete implements Runnable{ + + private final ArrayBlockingQueue listEntries; + private final Reader importFile; + private final JSONParser parser; + + public SMWListImporterFormatObsolete(final Reader importFile, final int queueSize) { + this.listEntries = new ArrayBlockingQueue(queueSize); + this.importFile = importFile; + this.parser = new JSONParser(); + + } + + + @Override + public void run() { + try { + Log.logInfo("SMWLISTSYNC", "Importer run()"); + Object obj = this.parser.parse(this.importFile); + + JSONObject jsonObject = (JSONObject) obj; + + JSONArray items = (JSONArray) jsonObject.get("items"); + + @SuppressWarnings("unchecked") + Iterator iterator = items.iterator(); + while (iterator.hasNext()) { + this.parseItem (iterator.next()); + } + + } catch (IOException e) { + Log.logException(e); + } catch (ParseException e) { + Log.logException(e); + } finally { + + try { + Log.logInfo("SMWLISTSYNC", "Importer inserted poison pill in queue"); + this.listEntries.put(SMWListRow.POISON); + } catch (InterruptedException e) { + Log.logException(e); + } + } + } + + private void parseItem(JSONObject jsonObject) { + + try { + SMWListRow row = new SMWListRow(); + @SuppressWarnings("unchecked") + Iterator iterator = jsonObject.keySet().iterator(); + + while (iterator.hasNext()) { + String entryKey = iterator.next(); + + Object value = jsonObject.get (entryKey); + String valueKey = ""; + + if (value instanceof java.lang.String) { + valueKey = value.toString(); + } else if (value instanceof JSONArray) { + valueKey = jsonListAll ((JSONArray) value); + } + + row.add (entryKey, valueKey); + } + + this.listEntries.put(row); + + } catch (Exception e) { + Log.logInfo("SMWLISTSYNC", "import of entry failed"); + } + + } + + + private String jsonListAll(JSONArray value) { + String res = ""; + + @SuppressWarnings("unchecked") + Iterator iterator = value.listIterator(); + while (iterator.hasNext()) { + Object val = iterator.next(); + res += val.toString()+","; + } + + if (res.endsWith (",")) { + res = res.substring (0, res.length()-1); + } + + return res; + } + + + public SMWListRow take() { + try { + return this.listEntries.take(); + } catch (InterruptedException e) { + Log.logException(e); + return null; + } + } +} diff --git a/source/net/yacy/contentcontrol/SMWListRow.java b/source/net/yacy/contentcontrol/SMWListRow.java new file mode 100644 index 000000000..78c085d40 --- /dev/null +++ b/source/net/yacy/contentcontrol/SMWListRow.java @@ -0,0 +1,24 @@ +package net.yacy.contentcontrol; + +import net.yacy.kelondro.blob.Tables; + +public class SMWListRow { + + private Tables.Data data; + + public static final SMWListRow POISON = new SMWListRow(); + public static final SMWListRow EMPTY = new SMWListRow(); + + public SMWListRow() { + this.data = new Tables.Data(); + } + + public void add (String key, String value) { + this.data.put(key, value); + } + + public Tables.Data getData() { + return this.data; + } + +} diff --git a/source/net/yacy/interaction/contentcontrol/ContentControlImportThread.java b/source/net/yacy/contentcontrol/SMWListSyncThread.java similarity index 66% rename from source/net/yacy/interaction/contentcontrol/ContentControlImportThread.java rename to source/net/yacy/contentcontrol/SMWListSyncThread.java index 12173ae29..0d2f6feac 100644 --- a/source/net/yacy/interaction/contentcontrol/ContentControlImportThread.java +++ b/source/net/yacy/contentcontrol/SMWListSyncThread.java @@ -1,4 +1,4 @@ -package net.yacy.interaction.contentcontrol; +package net.yacy.contentcontrol; import java.io.IOException; import java.io.InputStreamReader; @@ -8,12 +8,10 @@ import java.net.URL; import net.yacy.cora.document.UTF8; import net.yacy.cora.protocol.ClientIdentification; import net.yacy.cora.protocol.http.HTTPClient; -import net.yacy.data.ymark.YMarkEntry; -import net.yacy.data.ymark.YMarkSMWJSONImporter; import net.yacy.kelondro.logging.Log; import net.yacy.search.Switchboard; -public class ContentControlImportThread { +public class SMWListSyncThread { private final Switchboard sb; private Boolean locked = false; @@ -23,11 +21,20 @@ public class ContentControlImportThread { private final long limit = 500; private long currentmax = 0; private boolean runningjob = false; - - public ContentControlImportThread(final Switchboard sb) { + + private String targetList; + private String parameters; + private String query; + + public static Boolean dirty = false; + + public SMWListSyncThread(final Switchboard sb, final String targetList, final String query, final String parameters, final Boolean purgeOnInit) { this.sb = sb; - if (this.sb.getConfigBool("contentcontrol.smwimport.purgelistoninit",false)) { - this.sb.tables.clear(this.sb.getConfig("contentcontrol.smwimport.targetlist", "contentcontrol")); + this.targetList = targetList; + this.parameters = parameters; + this.query = query; + if (purgeOnInit) { + this.sb.tables.clear(targetList); } } @@ -51,26 +58,84 @@ public class ContentControlImportThread { if (!this.locked) { this.locked = true; if (this.sb.getConfigBool("contentcontrol.smwimport.enabled", false) == true) { - if (this.runningjob) { - Log.logInfo("CONTENTCONTROL", - "CONTENTCONTROL importing max. " + this.limit + + if (!this.runningjob) { + + // we have to count all new elements first + try { + if (!this.sb.getConfig("contentcontrol.smwimport.baseurl","").equals("")) { + URL urlCount; + + urlCount = new URL( + this.sb.getConfig( + "contentcontrol.smwimport.baseurl", + "") + + wikiurlify ("/[["+this.query+"]] [[Modification date::>" +this.lastsync+ "]]") + + + wikiurlify (this.parameters) + + + "/mainlabel%3D" + + "/offset%3D0" + + "/limit%3D200000" + + "/format%3Dystat"); + + String reply = UTF8.String(new HTTPClient(ClientIdentification.getUserAgent(), ClientIdentification.DEFAULT_TIMEOUT).GETbytes(urlCount.toString())); + String overallcount = reply.split(",")[0]; + String lastsyncstring = reply.split(",")[1]; + this.currentmax = Integer.parseInt(overallcount); + + if (this.currentmax > 0) { + Log.logInfo("SMWLISTSYNC", + "import job counts " + + this.currentmax + + " new elements between " + + this.lastsync + " and " + + this.currenttimestamp); + + this.currenttimestamp = this.lastsync; + + this.runningjob = true; + this.lastsync = lastsyncstring; + this.offset = 0; + } + } else { + Log.logWarning("SMWLISTSYNC", + "No SMWimport URL defined"); + } + } catch (MalformedURLException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + + } else { + + // there are new elements to be imported + Log.logInfo("SMWLISTSYNC", + "importing max. " + this.limit + " elements at " + this.offset + " of " + this.currentmax + ", since " + this.currenttimestamp); - URL bmks_json; + URL urlImport; try { if (!this.sb.getConfig("contentcontrol.smwimport.baseurl","").equals("")) { - bmks_json = new URL( + urlImport = new URL( this.sb.getConfig( "contentcontrol.smwimport.baseurl", "") - + wikiurlify ("/[[Category:Web Page]] [[Modification date::>" +this.currenttimestamp+ "]]") + + wikiurlify ("/[["+this.query+"]] [[Modification date::>" +this.currenttimestamp+ "]]") - + wikiurlify ("/?Url/?Filter/?Article has average rating/?Category") + + wikiurlify (this.parameters) + + "/mainlabel%3D" + + "/syntax%3Dobsolete" + "/offset%3D" + this.offset + "/limit%3D" + this.limit + "/format%3Djson"); + this.offset += this.limit; if (this.offset > this.currentmax) { this.runningjob = false; @@ -79,34 +144,34 @@ public class ContentControlImportThread { InputStreamReader reader = null; try { reader = new InputStreamReader( - bmks_json.openStream(), "UTF-8"); + urlImport.openStream(), "UTF-8"); } catch (Exception e) { Log.logException(e); this.runningjob = false; } if (reader != null) { - YMarkSMWJSONImporter bookmarkImporter = null; + SMWListImporterFormatObsolete smwListImporter = null; try { - bookmarkImporter = new YMarkSMWJSONImporter( - reader, 200, ""); + smwListImporter = new SMWListImporterFormatObsolete( + reader, 200); } catch (final Exception e) { // TODO: display an error message Log.logException(e); this.runningjob = false; } Thread t; - YMarkEntry bmk; - t = new Thread(bookmarkImporter,"YMarks - Network bookmark importer"); + SMWListRow row; + t = new Thread(smwListImporter,"SMW List Importer"); t.start(); - while ((bmk = bookmarkImporter.take()) != YMarkEntry.POISON) { - if (bmk == YMarkEntry.EMPTY) { + while ((row = smwListImporter.take()) != SMWListRow.POISON) { + if (row == SMWListRow.EMPTY) { this.runningjob = false; } else { try { - this.sb.tables.bookmarks.addBookmark( - this.sb.getConfig("contentcontrol.smwimport.targetlist", "contentcontrol"), bmk, - true, true); + this.sb.tables.insert(targetList, row.getData()); + + dirty = true; } catch (Exception e) { // TODO Auto-generated catch block @@ -114,62 +179,15 @@ public class ContentControlImportThread { } } } - } else { - - } - } - else { - + } } + } catch (MalformedURLException e2) { // TODO Auto-generated catch block e2.printStackTrace(); } - } else { - try { - if (!this.sb.getConfig("contentcontrol.smwimport.baseurl","").equals("")) { - URL bmks_count; - - bmks_count = new URL( - this.sb.getConfig( - "contentcontrol.smwimport.baseurl", - "") - + wikiurlify ("/[[Category:Web Page]] [[Modification date::>" +this.lastsync+ "]]") - + wikiurlify ("/?Url/?Filter/?Article has average rating/?Category") - + "/mainlabel%3D" - + "/format%3Dystat"); - - String reply = UTF8.String(new HTTPClient(ClientIdentification.getUserAgent(), ClientIdentification.DEFAULT_TIMEOUT).GETbytes(bmks_count.toString())); - String overallcount = reply.split(",")[0]; - String lastsyncstring = reply.split(",")[1]; - this.currentmax = Integer.parseInt(overallcount); - - if (this.currentmax > 0) { - Log.logInfo("CONTENTCONTROL", - "CONTENTCONTROL import job counts " - + this.currentmax - + " new elements between " - + this.lastsync + " and " - + this.currenttimestamp); - - this.currenttimestamp = this.lastsync; - - this.runningjob = true; - this.lastsync = lastsyncstring; - this.offset = 0; - } - } else { - Log.logWarning("CONTENTCONTROL", - "No SMWimport URL defined"); - } - } catch (MalformedURLException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } catch (IOException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } + } this.locked = false; } diff --git a/source/net/yacy/crawler/CrawlStacker.java b/source/net/yacy/crawler/CrawlStacker.java index 34bf1c68b..fee09e310 100644 --- a/source/net/yacy/crawler/CrawlStacker.java +++ b/source/net/yacy/crawler/CrawlStacker.java @@ -39,6 +39,7 @@ import java.util.Properties; import java.util.concurrent.BlockingQueue; import java.util.concurrent.atomic.AtomicInteger; +import net.yacy.contentcontrol.ContentControlFilterUpdateThread; import net.yacy.cora.document.ASCII; import net.yacy.cora.document.Classification.ContentDomain; import net.yacy.cora.document.MultiProtocolURI; @@ -58,7 +59,6 @@ import net.yacy.crawler.retrieval.HTTPLoader; import net.yacy.crawler.retrieval.Request; import net.yacy.crawler.retrieval.SMBLoader; import net.yacy.crawler.robots.RobotsTxt; -import net.yacy.interaction.contentcontrol.ContentControlFilterUpdateThread; import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.URIMetadataNode; import net.yacy.kelondro.logging.Log; diff --git a/source/net/yacy/data/ymark/YMarkSMWJSONImporter.java b/source/net/yacy/data/ymark/YMarkSMWJSONImporter.java deleted file mode 100644 index 5a3e7f4e4..000000000 --- a/source/net/yacy/data/ymark/YMarkSMWJSONImporter.java +++ /dev/null @@ -1,212 +0,0 @@ -package net.yacy.data.ymark; - -import java.io.IOException; -import java.io.Reader; -import java.util.HashMap; -import java.util.HashSet; -import java.util.concurrent.ArrayBlockingQueue; - -import net.yacy.kelondro.logging.Log; -import net.yacy.search.Switchboard; - -import org.json.simple.parser.ContentHandler; -import org.json.simple.parser.JSONParser; -import org.json.simple.parser.ParseException; - -public class YMarkSMWJSONImporter implements Runnable, ContentHandler{ - - // Importer Variables - private final ArrayBlockingQueue bookmarks; - private final Reader bmk_file; - private final String RootFolder; - private final StringBuilder folderstring; - private YMarkEntry bmk; - private final JSONParser parser; - - //private boolean empty = true; - //private int count = 0; - - // Parser Variables - private final StringBuilder value; - private final StringBuilder key; - //private final StringBuilder date; - private final HashMap obj; - - private Boolean isBookmark; - - public YMarkSMWJSONImporter(final Reader bmk_file, final int queueSize, final String root) { - this.bookmarks = new ArrayBlockingQueue(queueSize); - this.bmk_file = bmk_file; - this.RootFolder = root; - this.folderstring = new StringBuilder(YMarkTables.BUFFER_LENGTH); - this.folderstring.append(this.RootFolder); - this.bmk = new YMarkEntry(); - - this.parser = new JSONParser(); - - this.value = new StringBuilder(128); - this.key = new StringBuilder(16); - //this.date = new StringBuilder(32); - this.obj = new HashMap(); - - this.isBookmark = false; - //this.empty = true; - //this.count = 0; - } - - @Override - public void startJSON() throws ParseException, IOException { - } - - @Override - public void endJSON() throws ParseException, IOException { - } - - @Override - public boolean startArray() throws ParseException, IOException { - final String key = this.key.toString(); - - if(key.equals("items") ) { - - this.isBookmark = true; - //this.count = 0; - - } - return true; - } - - @Override - public boolean endArray() throws ParseException, IOException { - - return true; - } - - @Override - public boolean startObject() throws ParseException, IOException { - - return true; - } - - @Override - public boolean endObject() throws ParseException, IOException { - - if(this.isBookmark) { - - if(this.obj.containsKey("category")) { - String catstr = this.obj.get("category"); - - HashSet tags = YMarkUtil.keysStringToSet (catstr); - - HashSet categories = YMarkUtil.keysStringToSet(""); - - for (String c: tags) { - - c = c.split(":")[1]; - - c = c.replace("/", "_"); - c = c.replace(" ", "_"); - - if (!c.equals("") && (!c.equals(" "))) { - categories.add ("sc:"+c); - } - - } - - if (!Switchboard.getSwitchboard().getConfig("contentcontrol.smwimport.defaultcategory", "").equals("")) { - categories.add ("sc:"+Switchboard.getSwitchboard().getConfig("contentcontrol.smwimport.defaultcategory", "")); - } - - catstr = YMarkUtil.keySetToString(categories); - - this.bmk.put(YMarkEntry.BOOKMARK.TAGS.key(), catstr); - } - - if(this.obj.containsKey("article_has_average_rating")) { - this.bmk.put(YMarkEntry.BOOKMARK.STARRATING.key(),this.obj.get("article_has_average_rating")); - } - - this.bmk.put(YMarkEntry.BOOKMARK.TITLE.key(),this.obj.get("label")); - this.bmk.put(YMarkEntry.BOOKMARK.URL.key(),this.obj.get("url")); - if(this.obj.containsKey("filter")) { - this.bmk.put(YMarkEntry.BOOKMARK.FILTER.key(),this.obj.get("filter")); - } else { - this.bmk.put(YMarkEntry.BOOKMARK.FILTER.key(),""); - } - try { - this.bookmarks.put(this.bmk); - //this.count++; - } catch (InterruptedException e) { - Log.logException(e); - } - this.obj.clear(); - this.bmk = new YMarkEntry(); - } - - return true; - } - - @Override - public boolean startObjectEntry(String key) throws ParseException, IOException { - this.key.setLength(0); - this.key.append(key); - - return true; - } - - @Override - public boolean primitive(Object value) throws ParseException, IOException { - - this.value.setLength(0); - if(value instanceof java.lang.String) { - this.value.append((String)value); - } else if(value instanceof java.lang.Boolean) { - this.value.append(value); - } else if(value instanceof java.lang.Number) { - this.value.append(value); - } - - return true; - } - - @Override - public boolean endObjectEntry() throws ParseException, IOException { - - final String key = this.key.toString(); - final String value = this.value.toString(); - - this.obj.put(key, value); - - return true; - } - - @Override - public void run() { - try { - Log.logInfo(YMarkTables.BOOKMARKS_LOG, "SMWJSON Importer run()"); - //this.empty = true; - this.parser.parse(this.bmk_file, this, true); - - } catch (IOException e) { - Log.logException(e); - } catch (ParseException e) { - Log.logException(e); - } finally { - - try { - Log.logInfo(YMarkTables.BOOKMARKS_LOG, "SMWJSON Importer inserted poison pill in queue"); - this.bookmarks.put(YMarkEntry.POISON); - } catch (InterruptedException e) { - Log.logException(e); - } - } - } - - public YMarkEntry take() { - try { - return this.bookmarks.take(); - } catch (InterruptedException e) { - Log.logException(e); - return null; - } - } -} diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java index 4d78abe4c..a0812cdd6 100644 --- a/source/net/yacy/search/Switchboard.java +++ b/source/net/yacy/search/Switchboard.java @@ -79,6 +79,8 @@ import java.util.zip.ZipInputStream; import org.apache.solr.common.SolrInputDocument; +import net.yacy.contentcontrol.ContentControlFilterUpdateThread; +import net.yacy.contentcontrol.SMWListSyncThread; import net.yacy.cora.date.GenericFormatter; import net.yacy.cora.document.ASCII; import net.yacy.cora.document.Classification; @@ -148,8 +150,6 @@ import net.yacy.document.importer.OAIListFriendsLoader; import net.yacy.document.parser.audioTagParser; import net.yacy.document.parser.html.Evaluation; import net.yacy.gui.Tray; -import net.yacy.interaction.contentcontrol.ContentControlFilterUpdateThread; -import net.yacy.interaction.contentcontrol.ContentControlImportThread; import net.yacy.kelondro.blob.Tables; import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.URIMetadataNode; @@ -1017,7 +1017,8 @@ public final class Switchboard extends serverSwitch { "this is the content control import thread", null, new InstantBusyThread( - new ContentControlImportThread(this), + new SMWListSyncThread(this, sb.getConfig("contentcontrol.bookmarklist", "contentcontrol"), "Category:Content Source", "/?Url/?Filter/?Category/?Modification date", sb.getConfigBool( + "contentcontrol.smwimport.purgelistoninit", false)), "run", SwitchboardConstants.PEER_PING_METHOD_JOBCOUNT, SwitchboardConstants.PEER_PING_METHOD_FREEMEM, diff --git a/source/net/yacy/search/query/SearchEvent.java b/source/net/yacy/search/query/SearchEvent.java index 60a85b3cd..09278d634 100644 --- a/source/net/yacy/search/query/SearchEvent.java +++ b/source/net/yacy/search/query/SearchEvent.java @@ -39,6 +39,7 @@ import java.util.concurrent.atomic.AtomicInteger; import com.hp.hpl.jena.rdf.model.RDFNode; import com.hp.hpl.jena.rdf.model.Resource; +import net.yacy.contentcontrol.ContentControlFilterUpdateThread; import net.yacy.cora.document.ASCII; import net.yacy.cora.document.Classification; import net.yacy.cora.document.Classification.ContentDomain; @@ -61,7 +62,6 @@ import net.yacy.cora.util.SpaceExceededException; import net.yacy.data.WorkTables; import net.yacy.document.Condenser; import net.yacy.document.LargeNumberCache; -import net.yacy.interaction.contentcontrol.ContentControlFilterUpdateThread; import net.yacy.kelondro.data.meta.URIMetadataNode; import net.yacy.kelondro.data.word.Word; import net.yacy.kelondro.data.word.WordReference; @@ -741,22 +741,20 @@ public final class SearchEvent { continue; } - // content control - if (Switchboard.getSwitchboard().getConfigBool("contentcontrol.enabled", false) == true) { - // check global network filter from bookmark list - if (!Switchboard.getSwitchboard() - .getConfig("contentcontrol.mandatoryfilterlist", "") - .equals("")) { - - FilterEngine f = ContentControlFilterUpdateThread.getNetworkFilter(); - if (f != null) { - if (!f.isListed(page.url(), null)) { - this.query.misses.add(page.hash()); - continue; - } - } - } - } + // contentcontrol + if (Switchboard.getSwitchboard().getConfigBool( + "contentcontrol.enabled", false) == true) { + + FilterEngine f = ContentControlFilterUpdateThread + .getNetworkFilter(); + if (f != null) { + if (!f.isListed(page.url(), null)) { + this.query.misses.add(page.hash()); + continue; + } + } + + } final String pageurl = page.url().toNormalform(true); final String pageauthor = page.dc_creator();