Merge commit '2bb8f045cc92f31fc7e720cc30b38af417563890'

pull/1/head
Michael Peter Christen 12 years ago
commit 46be4af5b9

@ -27,20 +27,13 @@
Enables or disables content control.
</p>
</dd>
<dt><label for="content">Mandatory default filter list (category):</label></dt>
<dd>
<input type="text" name="contentcontrolmfl" value="#[contentcontrolmfl]#" size="60" /><br/><br/>
<p class="help">
Define a category string. If defined, all URLs will be filtered out during crawling and DHT which do not belong to this category.
</p>
</dd>
<dt><label for="content">Use this bookmark list:</label></dt>
<dt><label for="content">Use this table to create filter:</label></dt>
<dd>
<input type="text" name="contentcontrolbml" value="#[contentcontrolbml]#" size="60" /><br/><br/>
<p class="help">
Define a bookmark list. Default: contentcontrol
Define a table. Default: contentcontrol
</p>
</dd>
<dt></dt>
@ -52,9 +45,9 @@
<form id="contentcontrolExtraSettings" action="ContentControl_p.html" method="post" enctype="multipart/form-data">
<fieldset><legend id="urlproxy">Content Control Settings</legend>
<fieldset><legend id="urlproxy">Content Control SMW Import Settings</legend>
<p>
With this settings you can define the content control settings.
With this settings you can define the content control import settings. You can define a SMW with the appropriate extensions. Details: <a href="https://gitorious.org/sciety/yacy-smwextension" target="_blank">yacy-smwextension on Gitorious</a>
</p>
<dl>
@ -63,7 +56,7 @@
<dd>
<input type="checkbox" name="ccsmwimport" id="ccsmwimport" #(ccsmwimport_checked)#:: checked="checked"#(/ccsmwimport_checked)# />Enabled<br/>
<p class="help">
Enable or disable constant background synchronisation of content control list from SMW (Semantic Mediawiki). Requires restart!
Enable or disable constant background synchronization of content control list from SMW (Semantic Mediawiki). Requires restart!
</p>
</dd>
@ -75,19 +68,11 @@
</p>
</dd>
<dt><label for="content">SMW import target bookmark list:</label></dt>
<dt><label for="content">SMW import target table:</label></dt>
<dd>
<input type="text" name="ccsmwimportlist" value="#[ccsmwimportlist]#" size="60" /><br/><br/>
<p class="help">
Define import target bookmark list. Default: contentcontrol
</p>
</dd>
<dt><label for="content">SMW import default category:</label></dt>
<dd>
<input type="text" name="ccsmwimportcat" value="#[ccsmwimportcat]#" size="60" /><br/><br/>
<p class="help">
Define default category which is added to each entry. This category can be defined as mandatory default filter list.
Define import target table. Default: contentcontrol
</p>
</dd>

@ -24,9 +24,7 @@ public final class ContentControl_p {
env.setConfig("contentcontrol.smwimport.targetlist",
post.get("ccsmwimportlist"));
env.setConfig("contentcontrol.smwimport.defaultcategory",
post.get("ccsmwimportcat"));
}
@ -35,9 +33,7 @@ public final class ContentControl_p {
env.setConfig("contentcontrol.enabled",
"on".equals(post.get("contentcontrolenabled")) ? true : false);
env.setConfig("contentcontrol.mandatoryfilterlist",
post.get("contentcontrolmfl"));
env.setConfig("contentcontrol.bookmarklist",
post.get("contentcontrolbml"));
@ -45,8 +41,6 @@ public final class ContentControl_p {
}
prop.putHTML("ccsmwimportcat",
env.getConfig("contentcontrol.smwimport.defaultcategory", "yacy"));
prop.putHTML("ccsmwimportlist",
env.getConfig("contentcontrol.smwimport.targetlist", "contentcontrol"));
@ -64,9 +58,6 @@ public final class ContentControl_p {
prop.put("contentcontrolenabled_checked",
env.getConfigBool("contentcontrol.enabled", false) ? "1" : "0");
prop.putHTML("contentcontrolmfl",
env.getConfig("contentcontrol.mandatoryfilterlist", "yacy"));
prop.putHTML("contentcontrolbml",
env.getConfig("contentcontrol.bookmarklist", ""));

@ -1,5 +1,6 @@
package net.yacy.interaction.contentcontrol;
package net.yacy.contentcontrol;
import java.io.IOException;
import java.util.Iterator;
import net.yacy.kelondro.blob.Tables;
@ -16,15 +17,9 @@ public class ContentControlFilterUpdateThread {
private static FilterEngine networkfilter;
public ContentControlFilterUpdateThread(final Switchboard sb) {
//final long time = System.currentTimeMillis();
this.sb = sb;
if (this.sb.getConfigBool("contentcontrol.smwimport.purgelistoninit",
false)) {
this.sb.tables.clear(this.sb.getConfig(
"contentcontrol.smwimport.targetlist", "contentcontrol"));
this.sb = sb;
}
}
public final void run() {
@ -35,17 +30,11 @@ public class ContentControlFilterUpdateThread {
if (this.sb.getConfigBool("contentcontrol.enabled", false) == true) {
if (!this.sb
.getConfig("contentcontrol.mandatoryfilterlist", "")
.equals("")) {
if (this.sb.tables.bookmarks.dirty) {
if (SMWListSyncThread.dirty) {
networkfilter = updateFilter();
networkfilter = updateFilter();
this.sb.tables.bookmarks.dirty = false;
}
SMWListSyncThread.dirty = false;
}
@ -55,7 +44,6 @@ public class ContentControlFilterUpdateThread {
}
return;
}
@ -66,23 +54,23 @@ public class ContentControlFilterUpdateThread {
Switchboard sb = Switchboard.getSwitchboard();
Iterator<Tables.Row> it;
it = sb.tables.bookmarks.getBookmarksByTag(
sb.getConfig(
"contentcontrol.bookmarklist",
"contentcontrol"),
"^((?!sc:"
+ sb
.getConfig(
"contentcontrol.mandatoryfilterlist",
"") + ").*)$");
while (it.hasNext()) {
Row b = it.next();
if (!b.get("filter", "").equals("")) {
newfilter.add(b.get("filter", ""), null);
}
}
try {
it = sb.tables.iterator(sb.getConfig("contentcontrol.bookmarklist",
"contentcontrol"));
while (it.hasNext()) {
Row b = it.next();
if (!b.get("filter", "").equals("")) {
newfilter.add(b.get("filter", ""), null);
}
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return newfilter;
}

@ -0,0 +1,163 @@
package net.yacy.contentcontrol;
import java.io.IOException;
import java.io.Reader;
import java.util.HashMap;
import java.util.Map.Entry;
import java.util.concurrent.ArrayBlockingQueue;
import net.yacy.kelondro.logging.Log;
import org.json.simple.parser.ContentHandler;
import org.json.simple.parser.JSONParser;
import org.json.simple.parser.ParseException;
public class SMWListImporter implements Runnable, ContentHandler{
// Importer Variables
private final ArrayBlockingQueue<SMWListRow> listEntries;
private final Reader importFile;
private SMWListRow row;
private final JSONParser parser;
// Parser Variables
private final StringBuilder value;
private final StringBuilder key;
private final HashMap<String,String> obj;
private Boolean isElement;
public SMWListImporter(final Reader importFile, final int queueSize) {
this.listEntries = new ArrayBlockingQueue<SMWListRow>(queueSize);
this.importFile = importFile;
this.row = new SMWListRow();
this.parser = new JSONParser();
this.value = new StringBuilder(128);
this.key = new StringBuilder(16);
this.obj = new HashMap<String,String>();
this.isElement = false;
}
@Override
public void startJSON() throws ParseException, IOException {
}
@Override
public void endJSON() throws ParseException, IOException {
}
@Override
public boolean startArray() throws ParseException, IOException {
final String key = this.key.toString();
if (key.equals("items")) {
this.isElement = true;
}
return true;
}
@Override
public boolean endArray() throws ParseException, IOException {
return true;
}
@Override
public boolean startObject() throws ParseException, IOException {
return true;
}
@Override
public boolean endObject() throws ParseException, IOException {
if(this.isElement) {
for (Entry<String, String> e: this.obj.entrySet()) {
this.row.add (e.getKey(), e.getValue());
}
try {
this.listEntries.put(this.row);
//this.count++;
} catch (InterruptedException e) {
Log.logException(e);
}
this.obj.clear();
this.row = new SMWListRow();
}
return true;
}
@Override
public boolean startObjectEntry(String key) throws ParseException, IOException {
this.key.setLength(0);
this.key.append(key);
return true;
}
@Override
public boolean primitive(Object value) throws ParseException, IOException {
this.value.setLength(0);
if(value instanceof java.lang.String) {
this.value.append((String)value);
} else if(value instanceof java.lang.Boolean) {
this.value.append(value);
} else if(value instanceof java.lang.Number) {
this.value.append(value);
}
return true;
}
@Override
public boolean endObjectEntry() throws ParseException, IOException {
final String key = this.key.toString();
final String value = this.value.toString();
this.obj.put(key, value);
return true;
}
@Override
public void run() {
try {
Log.logInfo("SMWLISTSYNC", "Importer run()");
this.parser.parse(this.importFile, this, true);
} catch (IOException e) {
Log.logException(e);
} catch (ParseException e) {
Log.logException(e);
} finally {
try {
Log.logInfo("SMWLISTSYNC", "Importer inserted poison pill in queue");
this.listEntries.put(SMWListRow.POISON);
} catch (InterruptedException e) {
Log.logException(e);
}
}
}
public SMWListRow take() {
try {
return this.listEntries.take();
} catch (InterruptedException e) {
Log.logException(e);
return null;
}
}
}

@ -0,0 +1,117 @@
package net.yacy.contentcontrol;
import java.io.IOException;
import java.io.Reader;
import java.util.Iterator;
import java.util.concurrent.ArrayBlockingQueue;
import net.yacy.kelondro.logging.Log;
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;
import org.json.simple.parser.JSONParser;
import org.json.simple.parser.ParseException;
public class SMWListImporterFormatObsolete implements Runnable{
private final ArrayBlockingQueue<SMWListRow> listEntries;
private final Reader importFile;
private final JSONParser parser;
public SMWListImporterFormatObsolete(final Reader importFile, final int queueSize) {
this.listEntries = new ArrayBlockingQueue<SMWListRow>(queueSize);
this.importFile = importFile;
this.parser = new JSONParser();
}
@Override
public void run() {
try {
Log.logInfo("SMWLISTSYNC", "Importer run()");
Object obj = this.parser.parse(this.importFile);
JSONObject jsonObject = (JSONObject) obj;
JSONArray items = (JSONArray) jsonObject.get("items");
@SuppressWarnings("unchecked")
Iterator<JSONObject> iterator = items.iterator();
while (iterator.hasNext()) {
this.parseItem (iterator.next());
}
} catch (IOException e) {
Log.logException(e);
} catch (ParseException e) {
Log.logException(e);
} finally {
try {
Log.logInfo("SMWLISTSYNC", "Importer inserted poison pill in queue");
this.listEntries.put(SMWListRow.POISON);
} catch (InterruptedException e) {
Log.logException(e);
}
}
}
private void parseItem(JSONObject jsonObject) {
try {
SMWListRow row = new SMWListRow();
@SuppressWarnings("unchecked")
Iterator<String> iterator = jsonObject.keySet().iterator();
while (iterator.hasNext()) {
String entryKey = iterator.next();
Object value = jsonObject.get (entryKey);
String valueKey = "";
if (value instanceof java.lang.String) {
valueKey = value.toString();
} else if (value instanceof JSONArray) {
valueKey = jsonListAll ((JSONArray) value);
}
row.add (entryKey, valueKey);
}
this.listEntries.put(row);
} catch (Exception e) {
Log.logInfo("SMWLISTSYNC", "import of entry failed");
}
}
private String jsonListAll(JSONArray value) {
String res = "";
@SuppressWarnings("unchecked")
Iterator<Object> iterator = value.listIterator();
while (iterator.hasNext()) {
Object val = iterator.next();
res += val.toString()+",";
}
if (res.endsWith (",")) {
res = res.substring (0, res.length()-1);
}
return res;
}
public SMWListRow take() {
try {
return this.listEntries.take();
} catch (InterruptedException e) {
Log.logException(e);
return null;
}
}
}

@ -0,0 +1,24 @@
package net.yacy.contentcontrol;
import net.yacy.kelondro.blob.Tables;
public class SMWListRow {
private Tables.Data data;
public static final SMWListRow POISON = new SMWListRow();
public static final SMWListRow EMPTY = new SMWListRow();
public SMWListRow() {
this.data = new Tables.Data();
}
public void add (String key, String value) {
this.data.put(key, value);
}
public Tables.Data getData() {
return this.data;
}
}

@ -1,4 +1,4 @@
package net.yacy.interaction.contentcontrol;
package net.yacy.contentcontrol;
import java.io.IOException;
import java.io.InputStreamReader;
@ -8,12 +8,10 @@ import java.net.URL;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.protocol.http.HTTPClient;
import net.yacy.data.ymark.YMarkEntry;
import net.yacy.data.ymark.YMarkSMWJSONImporter;
import net.yacy.kelondro.logging.Log;
import net.yacy.search.Switchboard;
public class ContentControlImportThread {
public class SMWListSyncThread {
private final Switchboard sb;
private Boolean locked = false;
@ -23,11 +21,20 @@ public class ContentControlImportThread {
private final long limit = 500;
private long currentmax = 0;
private boolean runningjob = false;
public ContentControlImportThread(final Switchboard sb) {
private String targetList;
private String parameters;
private String query;
public static Boolean dirty = false;
public SMWListSyncThread(final Switchboard sb, final String targetList, final String query, final String parameters, final Boolean purgeOnInit) {
this.sb = sb;
if (this.sb.getConfigBool("contentcontrol.smwimport.purgelistoninit",false)) {
this.sb.tables.clear(this.sb.getConfig("contentcontrol.smwimport.targetlist", "contentcontrol"));
this.targetList = targetList;
this.parameters = parameters;
this.query = query;
if (purgeOnInit) {
this.sb.tables.clear(targetList);
}
}
@ -51,26 +58,84 @@ public class ContentControlImportThread {
if (!this.locked) {
this.locked = true;
if (this.sb.getConfigBool("contentcontrol.smwimport.enabled", false) == true) {
if (this.runningjob) {
Log.logInfo("CONTENTCONTROL",
"CONTENTCONTROL importing max. " + this.limit
if (!this.runningjob) {
// we have to count all new elements first
try {
if (!this.sb.getConfig("contentcontrol.smwimport.baseurl","").equals("")) {
URL urlCount;
urlCount = new URL(
this.sb.getConfig(
"contentcontrol.smwimport.baseurl",
"")
+ wikiurlify ("/[["+this.query+"]] [[Modification date::>" +this.lastsync+ "]]")
+ wikiurlify (this.parameters)
+ "/mainlabel%3D"
+ "/offset%3D0"
+ "/limit%3D200000"
+ "/format%3Dystat");
String reply = UTF8.String(new HTTPClient(ClientIdentification.getUserAgent(), ClientIdentification.DEFAULT_TIMEOUT).GETbytes(urlCount.toString()));
String overallcount = reply.split(",")[0];
String lastsyncstring = reply.split(",")[1];
this.currentmax = Integer.parseInt(overallcount);
if (this.currentmax > 0) {
Log.logInfo("SMWLISTSYNC",
"import job counts "
+ this.currentmax
+ " new elements between "
+ this.lastsync + " and "
+ this.currenttimestamp);
this.currenttimestamp = this.lastsync;
this.runningjob = true;
this.lastsync = lastsyncstring;
this.offset = 0;
}
} else {
Log.logWarning("SMWLISTSYNC",
"No SMWimport URL defined");
}
} catch (MalformedURLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
} else {
// there are new elements to be imported
Log.logInfo("SMWLISTSYNC",
"importing max. " + this.limit
+ " elements at " + this.offset + " of "
+ this.currentmax + ", since "
+ this.currenttimestamp);
URL bmks_json;
URL urlImport;
try {
if (!this.sb.getConfig("contentcontrol.smwimport.baseurl","").equals("")) {
bmks_json = new URL(
urlImport = new URL(
this.sb.getConfig(
"contentcontrol.smwimport.baseurl",
"")
+ wikiurlify ("/[[Category:Web Page]] [[Modification date::>" +this.currenttimestamp+ "]]")
+ wikiurlify ("/[["+this.query+"]] [[Modification date::>" +this.currenttimestamp+ "]]")
+ wikiurlify ("/?Url/?Filter/?Article has average rating/?Category")
+ wikiurlify (this.parameters)
+ "/mainlabel%3D"
+ "/syntax%3Dobsolete"
+ "/offset%3D" + this.offset
+ "/limit%3D" + this.limit
+ "/format%3Djson");
this.offset += this.limit;
if (this.offset > this.currentmax) {
this.runningjob = false;
@ -79,34 +144,34 @@ public class ContentControlImportThread {
InputStreamReader reader = null;
try {
reader = new InputStreamReader(
bmks_json.openStream(), "UTF-8");
urlImport.openStream(), "UTF-8");
} catch (Exception e) {
Log.logException(e);
this.runningjob = false;
}
if (reader != null) {
YMarkSMWJSONImporter bookmarkImporter = null;
SMWListImporterFormatObsolete smwListImporter = null;
try {
bookmarkImporter = new YMarkSMWJSONImporter(
reader, 200, "");
smwListImporter = new SMWListImporterFormatObsolete(
reader, 200);
} catch (final Exception e) {
// TODO: display an error message
Log.logException(e);
this.runningjob = false;
}
Thread t;
YMarkEntry bmk;
t = new Thread(bookmarkImporter,"YMarks - Network bookmark importer");
SMWListRow row;
t = new Thread(smwListImporter,"SMW List Importer");
t.start();
while ((bmk = bookmarkImporter.take()) != YMarkEntry.POISON) {
if (bmk == YMarkEntry.EMPTY) {
while ((row = smwListImporter.take()) != SMWListRow.POISON) {
if (row == SMWListRow.EMPTY) {
this.runningjob = false;
} else {
try {
this.sb.tables.bookmarks.addBookmark(
this.sb.getConfig("contentcontrol.smwimport.targetlist", "contentcontrol"), bmk,
true, true);
this.sb.tables.insert(targetList, row.getData());
dirty = true;
} catch (Exception e) {
// TODO Auto-generated catch block
@ -114,62 +179,15 @@ public class ContentControlImportThread {
}
}
}
} else {
}
}
else {
}
}
} catch (MalformedURLException e2) {
// TODO Auto-generated catch block
e2.printStackTrace();
}
} else {
try {
if (!this.sb.getConfig("contentcontrol.smwimport.baseurl","").equals("")) {
URL bmks_count;
bmks_count = new URL(
this.sb.getConfig(
"contentcontrol.smwimport.baseurl",
"")
+ wikiurlify ("/[[Category:Web Page]] [[Modification date::>" +this.lastsync+ "]]")
+ wikiurlify ("/?Url/?Filter/?Article has average rating/?Category")
+ "/mainlabel%3D"
+ "/format%3Dystat");
String reply = UTF8.String(new HTTPClient(ClientIdentification.getUserAgent(), ClientIdentification.DEFAULT_TIMEOUT).GETbytes(bmks_count.toString()));
String overallcount = reply.split(",")[0];
String lastsyncstring = reply.split(",")[1];
this.currentmax = Integer.parseInt(overallcount);
if (this.currentmax > 0) {
Log.logInfo("CONTENTCONTROL",
"CONTENTCONTROL import job counts "
+ this.currentmax
+ " new elements between "
+ this.lastsync + " and "
+ this.currenttimestamp);
this.currenttimestamp = this.lastsync;
this.runningjob = true;
this.lastsync = lastsyncstring;
this.offset = 0;
}
} else {
Log.logWarning("CONTENTCONTROL",
"No SMWimport URL defined");
}
} catch (MalformedURLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
this.locked = false;
}

@ -39,6 +39,7 @@ import java.util.Properties;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.atomic.AtomicInteger;
import net.yacy.contentcontrol.ContentControlFilterUpdateThread;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.Classification.ContentDomain;
import net.yacy.cora.document.MultiProtocolURI;
@ -58,7 +59,6 @@ import net.yacy.crawler.retrieval.HTTPLoader;
import net.yacy.crawler.retrieval.Request;
import net.yacy.crawler.retrieval.SMBLoader;
import net.yacy.crawler.robots.RobotsTxt;
import net.yacy.interaction.contentcontrol.ContentControlFilterUpdateThread;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.meta.URIMetadataNode;
import net.yacy.kelondro.logging.Log;

@ -1,212 +0,0 @@
package net.yacy.data.ymark;
import java.io.IOException;
import java.io.Reader;
import java.util.HashMap;
import java.util.HashSet;
import java.util.concurrent.ArrayBlockingQueue;
import net.yacy.kelondro.logging.Log;
import net.yacy.search.Switchboard;
import org.json.simple.parser.ContentHandler;
import org.json.simple.parser.JSONParser;
import org.json.simple.parser.ParseException;
public class YMarkSMWJSONImporter implements Runnable, ContentHandler{
// Importer Variables
private final ArrayBlockingQueue<YMarkEntry> bookmarks;
private final Reader bmk_file;
private final String RootFolder;
private final StringBuilder folderstring;
private YMarkEntry bmk;
private final JSONParser parser;
//private boolean empty = true;
//private int count = 0;
// Parser Variables
private final StringBuilder value;
private final StringBuilder key;
//private final StringBuilder date;
private final HashMap<String,String> obj;
private Boolean isBookmark;
public YMarkSMWJSONImporter(final Reader bmk_file, final int queueSize, final String root) {
this.bookmarks = new ArrayBlockingQueue<YMarkEntry>(queueSize);
this.bmk_file = bmk_file;
this.RootFolder = root;
this.folderstring = new StringBuilder(YMarkTables.BUFFER_LENGTH);
this.folderstring.append(this.RootFolder);
this.bmk = new YMarkEntry();
this.parser = new JSONParser();
this.value = new StringBuilder(128);
this.key = new StringBuilder(16);
//this.date = new StringBuilder(32);
this.obj = new HashMap<String,String>();
this.isBookmark = false;
//this.empty = true;
//this.count = 0;
}
@Override
public void startJSON() throws ParseException, IOException {
}
@Override
public void endJSON() throws ParseException, IOException {
}
@Override
public boolean startArray() throws ParseException, IOException {
final String key = this.key.toString();
if(key.equals("items") ) {
this.isBookmark = true;
//this.count = 0;
}
return true;
}
@Override
public boolean endArray() throws ParseException, IOException {
return true;
}
@Override
public boolean startObject() throws ParseException, IOException {
return true;
}
@Override
public boolean endObject() throws ParseException, IOException {
if(this.isBookmark) {
if(this.obj.containsKey("category")) {
String catstr = this.obj.get("category");
HashSet<String> tags = YMarkUtil.keysStringToSet (catstr);
HashSet<String> categories = YMarkUtil.keysStringToSet("");
for (String c: tags) {
c = c.split(":")[1];
c = c.replace("/", "_");
c = c.replace(" ", "_");
if (!c.equals("") && (!c.equals(" "))) {
categories.add ("sc:"+c);
}
}
if (!Switchboard.getSwitchboard().getConfig("contentcontrol.smwimport.defaultcategory", "").equals("")) {
categories.add ("sc:"+Switchboard.getSwitchboard().getConfig("contentcontrol.smwimport.defaultcategory", ""));
}
catstr = YMarkUtil.keySetToString(categories);
this.bmk.put(YMarkEntry.BOOKMARK.TAGS.key(), catstr);
}
if(this.obj.containsKey("article_has_average_rating")) {
this.bmk.put(YMarkEntry.BOOKMARK.STARRATING.key(),this.obj.get("article_has_average_rating"));
}
this.bmk.put(YMarkEntry.BOOKMARK.TITLE.key(),this.obj.get("label"));
this.bmk.put(YMarkEntry.BOOKMARK.URL.key(),this.obj.get("url"));
if(this.obj.containsKey("filter")) {
this.bmk.put(YMarkEntry.BOOKMARK.FILTER.key(),this.obj.get("filter"));
} else {
this.bmk.put(YMarkEntry.BOOKMARK.FILTER.key(),"");
}
try {
this.bookmarks.put(this.bmk);
//this.count++;
} catch (InterruptedException e) {
Log.logException(e);
}
this.obj.clear();
this.bmk = new YMarkEntry();
}
return true;
}
@Override
public boolean startObjectEntry(String key) throws ParseException, IOException {
this.key.setLength(0);
this.key.append(key);
return true;
}
@Override
public boolean primitive(Object value) throws ParseException, IOException {
this.value.setLength(0);
if(value instanceof java.lang.String) {
this.value.append((String)value);
} else if(value instanceof java.lang.Boolean) {
this.value.append(value);
} else if(value instanceof java.lang.Number) {
this.value.append(value);
}
return true;
}
@Override
public boolean endObjectEntry() throws ParseException, IOException {
final String key = this.key.toString();
final String value = this.value.toString();
this.obj.put(key, value);
return true;
}
@Override
public void run() {
try {
Log.logInfo(YMarkTables.BOOKMARKS_LOG, "SMWJSON Importer run()");
//this.empty = true;
this.parser.parse(this.bmk_file, this, true);
} catch (IOException e) {
Log.logException(e);
} catch (ParseException e) {
Log.logException(e);
} finally {
try {
Log.logInfo(YMarkTables.BOOKMARKS_LOG, "SMWJSON Importer inserted poison pill in queue");
this.bookmarks.put(YMarkEntry.POISON);
} catch (InterruptedException e) {
Log.logException(e);
}
}
}
public YMarkEntry take() {
try {
return this.bookmarks.take();
} catch (InterruptedException e) {
Log.logException(e);
return null;
}
}
}

@ -79,6 +79,8 @@ import java.util.zip.ZipInputStream;
import org.apache.solr.common.SolrInputDocument;
import net.yacy.contentcontrol.ContentControlFilterUpdateThread;
import net.yacy.contentcontrol.SMWListSyncThread;
import net.yacy.cora.date.GenericFormatter;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.Classification;
@ -148,8 +150,6 @@ import net.yacy.document.importer.OAIListFriendsLoader;
import net.yacy.document.parser.audioTagParser;
import net.yacy.document.parser.html.Evaluation;
import net.yacy.gui.Tray;
import net.yacy.interaction.contentcontrol.ContentControlFilterUpdateThread;
import net.yacy.interaction.contentcontrol.ContentControlImportThread;
import net.yacy.kelondro.blob.Tables;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.meta.URIMetadataNode;
@ -1017,7 +1017,8 @@ public final class Switchboard extends serverSwitch {
"this is the content control import thread",
null,
new InstantBusyThread(
new ContentControlImportThread(this),
new SMWListSyncThread(this, sb.getConfig("contentcontrol.bookmarklist", "contentcontrol"), "Category:Content Source", "/?Url/?Filter/?Category/?Modification date", sb.getConfigBool(
"contentcontrol.smwimport.purgelistoninit", false)),
"run",
SwitchboardConstants.PEER_PING_METHOD_JOBCOUNT,
SwitchboardConstants.PEER_PING_METHOD_FREEMEM,

@ -39,6 +39,7 @@ import java.util.concurrent.atomic.AtomicInteger;
import com.hp.hpl.jena.rdf.model.RDFNode;
import com.hp.hpl.jena.rdf.model.Resource;
import net.yacy.contentcontrol.ContentControlFilterUpdateThread;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.Classification;
import net.yacy.cora.document.Classification.ContentDomain;
@ -61,7 +62,6 @@ import net.yacy.cora.util.SpaceExceededException;
import net.yacy.data.WorkTables;
import net.yacy.document.Condenser;
import net.yacy.document.LargeNumberCache;
import net.yacy.interaction.contentcontrol.ContentControlFilterUpdateThread;
import net.yacy.kelondro.data.meta.URIMetadataNode;
import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.data.word.WordReference;
@ -741,22 +741,20 @@ public final class SearchEvent {
continue;
}
// content control
if (Switchboard.getSwitchboard().getConfigBool("contentcontrol.enabled", false) == true) {
// check global network filter from bookmark list
if (!Switchboard.getSwitchboard()
.getConfig("contentcontrol.mandatoryfilterlist", "")
.equals("")) {
FilterEngine f = ContentControlFilterUpdateThread.getNetworkFilter();
if (f != null) {
if (!f.isListed(page.url(), null)) {
this.query.misses.add(page.hash());
continue;
}
}
}
}
// contentcontrol
if (Switchboard.getSwitchboard().getConfigBool(
"contentcontrol.enabled", false) == true) {
FilterEngine f = ContentControlFilterUpdateThread
.getNetworkFilter();
if (f != null) {
if (!f.isListed(page.url(), null)) {
this.query.misses.add(page.hash());
continue;
}
}
}
final String pageurl = page.url().toNormalform(true);
final String pageauthor = page.dc_creator();

Loading…
Cancel
Save