add content control features for custom filter lists

pull/1/head
cominch 13 years ago
parent a3d5959981
commit dc468dad01

@ -1069,7 +1069,20 @@ augmentation.reflect = false
augmentation.addDoctype = false
augmentation.reparse = false
# Content control settings
contentcontrol.enabled = false
contentcontrol.bookmarklist = contentcontrol
contentcontrol.mandatoryfilterlist = yacy
contentcontrol.smwimport.enabled = false
contentcontrol.smwimport.baseurl =
contentcontrol.smwimport.purgelistoninit = true
contentcontrol.smwimport.targetlist = contentcontrol
contentcontrol.smwimport.defaultcategory = yacy
# Interaction settings
interaction.enabled = false
interaction.target = yacy
interaction.feedback.enabled = true
interaction.feedback.url =
interaction.feedback.accept = false

@ -0,0 +1,116 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>YaCy '#[clientname]#': Content Control</title>
#%env/templates/metas.template%#
</head>
<body id="Settings">
#%env/templates/header.template%#
#%env/templates/submenuBlacklist.template%#
<h2>Content Control</h2>
<form id="contentcontrolsettings" action="ContentControl_p.html" method="post" enctype="multipart/form-data">
<fieldset><legend id="augmentation">Peer Content Control URL Filter</legend>
<p>
With this settings you can activate or deactivate content control on this peer.
</p>
<dl>
<dt><label for="content">Use content control filtering:</label></dt>
<dd>
<input type="checkbox" name="contentcontrolenabled" id="contentcontrolenabled" #(contentcontrolenabled_checked)#:: checked="checked"#(/contentcontrolenabled_checked)# />Enabled<br/>
<p class="help">
Enables or disables content control.
</p>
</dd>
<dt><label for="content">Mandatory default filter list (category):</label></dt>
<dd>
<input type="text" name="contentcontrolmfl" value="#[contentcontrolmfl]#" size="60" /><br/><br/>
<p class="help">
Define a category string. If defined, all URLs will be filtered out during crawling and DHT which do not belong to this category.
</p>
</dd>
<dt><label for="content">Use this bookmark list:</label></dt>
<dd>
<input type="text" name="contentcontrolbml" value="#[contentcontrolbml]#" size="60" /><br/><br/>
<p class="help">
Define a bookmark list. Default: contentcontrol
</p>
</dd>
</dl>
<input type="submit" name="contentcontrolSettings" value="Submit"/>
</fieldset>
</form>
<form id="contentcontrolExtraSettings" action="ContentControl_p.html" method="post" enctype="multipart/form-data">
<fieldset><legend id="urlproxy">Content Control Settings</legend>
<p>
With this settings you can define the content control settings.
</p>
<dl>
<dt><label for="content">SMW import to content control list:</label></dt>
<dd>
<input type="checkbox" name="ccsmwimport" id="ccsmwimport" #(ccsmwimport_checked)#:: checked="checked"#(/ccsmwimport_checked)# />Enabled<br/>
<p class="help">
Enable or disable constant background synchronisation of content control list from SMW (Semantic Mediawiki). Requires restart!
</p>
</dd>
<dt><label for="content">SMW import base URL:</label></dt>
<dd>
<input type="text" name="ccsmwimporturl" value="#[ccsmwimporturl]#" size="60" /><br/><br/>
<p class="help">
Define base URL for SMW special page "Ask". Example: http://my.wiki.cc/wiki/Special:Ask
</p>
</dd>
<dt><label for="content">SMW import target bookmark list:</label></dt>
<dd>
<input type="text" name="ccsmwimportlist" value="#[ccsmwimportlist]#" size="60" /><br/><br/>
<p class="help">
Define import target bookmark list. Default: contentcontrol
</p>
</dd>
<dt><label for="content">SMW import default category:</label></dt>
<dd>
<input type="text" name="ccsmwimportcat" value="#[ccsmwimportcat]#" size="60" /><br/><br/>
<p class="help">
Define default category which is added to each entry. This category can be defined as mandatory default filter list.
</p>
</dd>
<dt><label for="content">Purge content control list on initial sync:</label></dt>
<dd>
<input type="checkbox" name="ccsmwpurge" id="ccsmwpurge" #(ccsmwpurge_checked)#:: checked="checked"#(/ccsmwpurge_checked)# />Enabled<br/>
<p class="help">
Purge content control list on initial synchronisation after startup.
</p>
</dd>
</dl>
<input type="submit" name="contentcontrolExtraSettings" value="Submit"/>
</fieldset>
</form>
#%env/templates/footer.template%#
</body>
</html>

@ -0,0 +1,77 @@
import net.yacy.cora.protocol.RequestHeader;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
public final class ContentControl_p {
public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header,
final serverObjects post, final serverSwitch env) {
final serverObjects prop = new serverObjects();
if (post != null) {
if (post.containsKey("contentcontrolExtraSettings")) {
env.setConfig("contentcontrol.smwimport.baseurl",
post.get("ccsmwimporturl"));
env.setConfig("contentcontrol.smwimport.enabled",
"on".equals(post.get("ccsmwimport")) ? true : false);
env.setConfig("contentcontrol.smwimport.purgelistoninit",
"on".equals(post.get("ccsmwpurge")) ? true : false);
env.setConfig("contentcontrol.smwimport.targetlist",
post.get("ccsmwimportlist"));
env.setConfig("contentcontrol.smwimport.defaultcategory",
post.get("ccsmwimportcat"));
}
if (post.containsKey("contentcontrolSettings")) {
env.setConfig("contentcontrol.enabled",
"on".equals(post.get("contentcontrolenabled")) ? true : false);
env.setConfig("contentcontrol.mandatoryfilterlist",
post.get("contentcontrolmfl"));
env.setConfig("contentcontrol.bookmarklist",
post.get("contentcontrolbml"));
}
}
prop.putHTML("ccsmwimportcat",
env.getConfig("contentcontrol.smwimport.defaultcategory", "yacy"));
prop.putHTML("ccsmwimportlist",
env.getConfig("contentcontrol.smwimport.targetlist", "contentcontrol"));
prop.put("ccsmwpurge_checked", env.getConfigBool(
"contentcontrol.smwimport.purgelistoninit", false) ? "1" : "0");
prop.putHTML("ccsmwimporturl",
env.getConfig("contentcontrol.smwimport.baseurl", ""));
prop.put("ccsmwimport_checked", env.getConfigBool(
"contentcontrol.smwimport.enabled", false) ? "1" : "0");
prop.put("contentcontrolenabled_checked",
env.getConfigBool("contentcontrol.enabled", false) ? "1" : "0");
prop.putHTML("contentcontrolmfl",
env.getConfig("contentcontrol.mandatoryfilterlist", "yacy"));
prop.putHTML("contentcontrolbml",
env.getConfig("contentcontrol.bookmarklist", ""));
// return rewrite properties
return prop;
}
}

@ -5,5 +5,6 @@
<li><a href="/BlacklistCleaner_p.html" class="MenuItemLink lock">Blacklist Cleaner</a></li>
<li><a href="/BlacklistTest_p.html" class="MenuItemLink lock">Blacklist Test</a></li>
<li><a href="/BlacklistImpExp_p.html" class="MenuItemLink lock">Import/Export</a></li>
<li><a href="/ContentControl_p.html" class="MenuItemLink lock">Content Control</a></li>
</ul>
</div>

@ -13,6 +13,7 @@ public class YMarkEntry extends TreeMap<String, String> {
private static final long serialVersionUID = 2179622977348536148L;
public static final YMarkEntry POISON = new YMarkEntry();
public static final YMarkEntry EMPTY = new YMarkEntry();
public static final String BOOKMARKS_ID = "id";
public static final String BOOKMARKS_REF = "ref";
public static final String FOLDERS_IMPORTED = "/imported";
@ -28,7 +29,11 @@ public class YMarkEntry extends TreeMap<String, String> {
PUBLIC ("public", "", "false", "private", "yacy:public", "", "lock"),
TAGS ("tags", "dc:subject", "unsorted", "shortcuturl", "yacy:tags", "keyword", "tag"),
VISITS ("visits", "", "0", "", "yacy:visits", "", "stat"),
FOLDERS ("folders", "", "/unsorted", "", "", "", "folder");
FOLDERS ("folders", "", "/unsorted", "", "", "", "folder"),
FILTER ("filter", "", "", "", "yacy:filter", "", "filter"),
OAI ("oai", "", "", "", "yacy:oai", "", "oai"),
URLHASH ("urlhash", "", "", "", "yacy:urlhash", "", "urlhash"),
STARRATING ("starrating", "", "", "", "yacy:starrating", "", "stat");
private String key;
private String dc_attrb;

@ -0,0 +1,202 @@
package de.anomic.data.ymark;
import java.io.IOException;
import java.io.Reader;
import java.util.HashMap;
import java.util.HashSet;
import java.util.concurrent.ArrayBlockingQueue;
import net.yacy.kelondro.logging.Log;
import net.yacy.search.Switchboard;
import org.json.simple.parser.ContentHandler;
import org.json.simple.parser.JSONParser;
import org.json.simple.parser.ParseException;
public class YMarkSMWJSONImporter implements Runnable, ContentHandler{
// Importer Variables
private final ArrayBlockingQueue<YMarkEntry> bookmarks;
private final Reader bmk_file;
private final String RootFolder;
private final StringBuilder folderstring;
private YMarkEntry bmk;
private final JSONParser parser;
private boolean empty = true;
private int count = 0;
// Parser Variables
private final StringBuilder value;
private final StringBuilder key;
private final StringBuilder date;
private final HashMap<String,String> obj;
private Boolean isBookmark;
public YMarkSMWJSONImporter(final Reader bmk_file, final int queueSize, final String root) {
this.bookmarks = new ArrayBlockingQueue<YMarkEntry>(queueSize);
this.bmk_file = bmk_file;
this.RootFolder = root;
this.folderstring = new StringBuilder(YMarkTables.BUFFER_LENGTH);
this.folderstring.append(this.RootFolder);
this.bmk = new YMarkEntry();
this.parser = new JSONParser();
this.value = new StringBuilder(128);
this.key = new StringBuilder(16);
this.date = new StringBuilder(32);
this.obj = new HashMap<String,String>();
this.isBookmark = false;
this.empty = true;
this.count = 0;
}
public void startJSON() throws ParseException, IOException {
}
public void endJSON() throws ParseException, IOException {
}
public boolean startArray() throws ParseException, IOException {
final String key = this.key.toString();
if(key.equals("items") ) {
this.isBookmark = true;
this.count = 0;
}
return true;
}
public boolean endArray() throws ParseException, IOException {
return true;
}
public boolean startObject() throws ParseException, IOException {
return true;
}
public boolean endObject() throws ParseException, IOException {
if(this.isBookmark) {
if(this.obj.containsKey("category")) {
String catstr = obj.get("category");
HashSet<String> tags = YMarkUtil.keysStringToSet (catstr);
HashSet<String> categories = YMarkUtil.keysStringToSet("");
for (String c: tags) {
c = c.split(":")[1];
c = c.replace("/", "_");
c = c.replace(" ", "_");
if (!c.equals("") && (!c.equals(" "))) {
categories.add ("sc:"+c);
}
}
if (!Switchboard.getSwitchboard().getConfig("contentcontrol.smwimport.defaultcategory", "").equals("")) {
categories.add ("sc:"+Switchboard.getSwitchboard().getConfig("contentcontrol.smwimport.defaultcategory", ""));
}
catstr = YMarkUtil.keySetToString(categories);
this.bmk.put(YMarkEntry.BOOKMARK.TAGS.key(), catstr);
}
if(this.obj.containsKey("article_has_average_rating")) {
this.bmk.put(YMarkEntry.BOOKMARK.STARRATING.key(),obj.get("article_has_average_rating"));
}
this.bmk.put(YMarkEntry.BOOKMARK.TITLE.key(),obj.get("label"));
this.bmk.put(YMarkEntry.BOOKMARK.URL.key(),obj.get("url"));
if(this.obj.containsKey("filter")) {
this.bmk.put(YMarkEntry.BOOKMARK.FILTER.key(),obj.get("filter"));
} else {
this.bmk.put(YMarkEntry.BOOKMARK.FILTER.key(),"");
}
try {
this.bookmarks.put(this.bmk);
this.count++;
} catch (InterruptedException e) {
Log.logException(e);
}
this.obj.clear();
this.bmk = new YMarkEntry();
}
return true;
}
public boolean startObjectEntry(String key) throws ParseException, IOException {
this.key.setLength(0);
this.key.append(key);
return true;
}
public boolean primitive(Object value) throws ParseException, IOException {
this.value.setLength(0);
if(value instanceof java.lang.String) {
this.value.append((String)value);
} else if(value instanceof java.lang.Boolean) {
this.value.append((Boolean)value);
} else if(value instanceof java.lang.Number) {
this.value.append((Number)value);
}
return true;
}
public boolean endObjectEntry() throws ParseException, IOException {
final String key = this.key.toString();
final String value = this.value.toString();
this.obj.put(key, value);
return true;
}
public void run() {
try {
Log.logInfo(YMarkTables.BOOKMARKS_LOG, "SMWJSON Importer run()");
this.empty = true;
this.parser.parse(this.bmk_file, this, true);
} catch (IOException e) {
Log.logException(e);
} catch (ParseException e) {
Log.logException(e);
} finally {
try {
Log.logInfo(YMarkTables.BOOKMARKS_LOG, "SMWJSON Importer inserted poison pill in queue");
this.bookmarks.put(YMarkEntry.POISON);
} catch (InterruptedException e) {
Log.logException(e);
}
}
}
public YMarkEntry take() {
try {
return this.bookmarks.take();
} catch (InterruptedException e) {
Log.logException(e);
return null;
}
}
}

@ -99,9 +99,12 @@ public class YMarkTables {
public final static int BUFFER_LENGTH = 256;
private final WorkTables worktables;
public boolean dirty = false;
public YMarkTables(final Tables wt) {
this.worktables = (WorkTables)wt;
dirty = true;
}
public void deleteBookmark(final String bmk_user, final byte[] urlHash) throws IOException, SpaceExceededException {
@ -111,6 +114,7 @@ public class YMarkTables {
if(bmk_row != null) {
this.worktables.delete(bmk_table,urlHash);
}
dirty = true;
}
public void deleteBookmark(final String bmk_user, final String url) throws IOException, SpaceExceededException {
@ -215,6 +219,16 @@ public class YMarkTables {
final Pattern p = Pattern.compile(patternBuilder.toString(), Pattern.CASE_INSENSITIVE);
return this.worktables.iterator(bmk_table, YMarkEntry.BOOKMARK.TAGS.key(), p);
}
public Iterator<Tables.Row> getBookmarksByTag(final String bmk_user, String regex) throws IOException {
final String bmk_table = TABLES.BOOKMARKS.tablename(bmk_user);
final StringBuilder patternBuilder = new StringBuilder(BUFFER_LENGTH);
patternBuilder.setLength(0);
patternBuilder.append(regex);
final Pattern p = Pattern.compile(patternBuilder.toString(), Pattern.CASE_INSENSITIVE);
return this.worktables.iterator(bmk_table, YMarkEntry.BOOKMARK.TAGS.key(), p);
}
public List<Row> orderBookmarksBy(final Iterator<Row> rowIterator, final String sortname, final String sortorder) {
final List<Row> sortList = new ArrayList<Row>();
@ -236,6 +250,7 @@ public class YMarkTables {
bmk.put(YMarkEntry.BOOKMARK.TAGS.key(), YMarkUtil.cleanTagsString(tagString));
addBookmark(bmk_user, bmk, merge, true);
}
dirty = true;
}
public void replaceTags(final Iterator<Row> rowIterator, final String bmk_user, final String tagString, final String replaceString) throws IOException {
@ -255,6 +270,7 @@ public class YMarkTables {
row.put(YMarkEntry.BOOKMARK.TAGS.key(), YMarkUtil.cleanTagsString(t.toString()));
this.worktables.update(TABLES.BOOKMARKS.tablename(bmk_user), row);
}
dirty = true;
}
public void addFolder(final String bmk_user, final String url, final String folder) throws IOException, SpaceExceededException {
@ -391,6 +407,8 @@ public class YMarkTables {
// update bmk_table
this.worktables.update(bmk_table, bmk_row);
}
dirty = true;
}
}
}

@ -0,0 +1,114 @@
package net.yacy.interaction.contentcontrol;
import java.io.IOException;
import java.util.Iterator;
import net.yacy.kelondro.blob.Tables;
import net.yacy.kelondro.blob.Tables.Row;
import net.yacy.repository.FilterEngine;
import net.yacy.search.Switchboard;
public class ContentControlFilterUpdateThread {
private Switchboard sb;
private Boolean locked = false;
private static FilterEngine networkfilter;
public ContentControlFilterUpdateThread(final Switchboard sb) {
final long time = System.currentTimeMillis();
this.sb = sb;
if (this.sb.getConfigBool("contentcontrol.smwimport.purgelistoninit",
false)) {
this.sb.tables.clear(this.sb.getConfig(
"contentcontrol.smwimport.targetlist", "contentcontrol"));
}
}
@SuppressWarnings("deprecation")
public final void run() {
if (!locked) {
locked = true;
if (this.sb.getConfigBool("contentcontrol.enabled", false) == true) {
if (!this.sb
.getConfig("contentcontrol.mandatoryfilterlist", "")
.equals("")) {
if (sb.tables.bookmarks.dirty) {
networkfilter = updateFilter();
sb.tables.bookmarks.dirty = false;
}
}
}
locked = false;
}
return;
}
private static FilterEngine updateFilter () {
FilterEngine newfilter = new FilterEngine();
Switchboard sb = Switchboard.getSwitchboard();
Iterator<Tables.Row> it;
try {
it = sb.tables.bookmarks.getBookmarksByTag(
sb.getConfig(
"contentcontrol.bookmarklist",
"contentcontrol"),
"^((?!sc:"
+ sb
.getConfig(
"contentcontrol.mandatoryfilterlist",
"") + ").*)$");
while (it.hasNext()) {
Row b = it.next();
if (!b.get("filter", "").equals("")) {
newfilter.add(b.get("filter", ""), null);
}
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return newfilter;
}
public static FilterEngine getNetworkFilter() {
FilterEngine f = networkfilter;
if (f != null && f.size() > 0)
return f;
return null;
}
}

@ -0,0 +1,253 @@
package net.yacy.interaction.contentcontrol;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.protocol.http.HTTPClient;
import net.yacy.cora.util.SpaceExceededException;
import net.yacy.kelondro.blob.Tables.Row;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.logging.Log;
import net.yacy.search.Switchboard;
import de.anomic.data.ymark.YMarkEntry;
import de.anomic.data.ymark.YMarkSMWJSONImporter;
import de.anomic.data.ymark.YMarkUtil;
public class ContentControlImportThread {
private Switchboard sb;
private Boolean locked = false;
private String lastsync = "1900-01-01T01:00:00";
private String currenttimestamp = "1900-01-01T01:00:00";
private long offset = 0;
private long limit = 500;
private long currentmax = 0;
private boolean runningjob = false;
public ContentControlImportThread(final Switchboard sb) {
final long time = System.currentTimeMillis();
this.sb = sb;
if (this.sb.getConfigBool("contentcontrol.smwimport.purgelistoninit",
false)) {
this.sb.tables.clear(this.sb.getConfig(
"contentcontrol.smwimport.targetlist", "contentcontrol"));
}
}
private final String wikiurlify (String s) {
String ret = s;
ret = ret.replace("-", "-2D");
ret = ret.replace("+", "-2B");
ret = ret.replace(" ", "-20");
ret = ret.replace("[", "-5B");
ret = ret.replace("]", "-5D");
ret = ret.replace(":", "-3A");
ret = ret.replace(">", "-3E");
ret = ret.replace("?", "-3F");
return ret;
}
@SuppressWarnings("deprecation")
public final void run() {
if (!locked) {
locked = true;
if (sb.getConfigBool("contentcontrol.smwimport.enabled", false) == true) {
if (runningjob) {
Log.logInfo("CONTENTCONTROL",
"CONTENTCONTROL importing max. " + limit
+ " elements at " + offset + " of "
+ currentmax + ", since "
+ currenttimestamp);
URL bmks_json;
String currenttimestampurl = wikiurlify (currenttimestamp);
try {
if (!sb.getConfig("contentcontrol.smwimport.baseurl",
"").equals("")) {
bmks_json = new URL(
sb.getConfig(
"contentcontrol.smwimport.baseurl",
"")
+ wikiurlify ("/[[Category:Web Page]] [[Modification date::>" +currenttimestamp+ "]]")
+ wikiurlify ("/?Url/?Filter/?Article has average rating/?Category")
+ "/mainlabel%3D"
+ "/offset%3D" + offset
+ "/limit%3D" + limit
+ "/format%3Djson");
offset += limit;
if (offset > currentmax) {
runningjob = false;
}
InputStreamReader reader = null;
try {
reader = new InputStreamReader(
bmks_json.openStream(), "UTF-8");
} catch (Exception e) {
Log.logException(e);
runningjob = false;
}
if (reader != null) {
YMarkSMWJSONImporter bookmarkImporter = null;
try {
bookmarkImporter = new YMarkSMWJSONImporter(
reader, 200, "");
} catch (final Exception e) {
// TODO: display an error message
Log.logException(e);
runningjob = false;
}
Thread t;
YMarkEntry bmk;
t = new Thread(bookmarkImporter,
"YMarks - Network bookmark importer");
t.start();
while ((bmk = bookmarkImporter.take()) != YMarkEntry.POISON) {
if (bmk == YMarkEntry.EMPTY) {
runningjob = false;
} else {
try {
sb.tables.bookmarks.addBookmark(
sb.getConfig("contentcontrol.smwimport.targetlist", "contentcontrol"), bmk,
true, true);
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
} else {
}
}
else {
}
} catch (MalformedURLException e2) {
// TODO Auto-generated catch block
e2.printStackTrace();
}
} else {
try {
if (!sb.getConfig("contentcontrol.smwimport.baseurl",
"").equals("")) {
URL bmks_count;
bmks_count = new URL(
sb.getConfig(
"contentcontrol.smwimport.baseurl",
"")
+ wikiurlify ("/[[Category:Web Page]] [[Modification date::>" +lastsync+ "]]")
+ wikiurlify ("/?Url/?Filter/?Article has average rating/?Category")
+ "/mainlabel%3D"
+ "/format%3Dsupercount");
String reply = UTF8.String(new HTTPClient()
.GETbytes(bmks_count.toString()));
String overallcount = reply.split(",")[0];
String lastsyncstring = reply.split(",")[1];
currentmax = Integer.parseInt(overallcount);
if (currentmax > 0) {
Log.logInfo("CONTENTCONTROL",
"CONTENTCONTROL import job counts "
+ currentmax
+ " new elements between "
+ lastsync + " and "
+ currenttimestamp);
currenttimestamp = lastsync;
runningjob = true;
lastsync = lastsyncstring;
offset = 0;
}
} else {
Log.logWarning("CONTENTCONTROL",
"No SMWimport URL defined");
}
} catch (MalformedURLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
locked = false;
}
}
return;
}
}

@ -110,6 +110,8 @@ import net.yacy.document.content.SurrogateReader;
import net.yacy.document.importer.OAIListFriendsLoader;
import net.yacy.document.parser.html.Evaluation;
import net.yacy.gui.Tray;
import net.yacy.interaction.contentcontrol.ContentControlFilterUpdateThread;
import net.yacy.interaction.contentcontrol.ContentControlImportThread;
import net.yacy.kelondro.blob.Tables;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.meta.URIMetadata;
@ -969,7 +971,39 @@ public final class Switchboard extends serverSwitch
Long.parseLong(getConfig(SwitchboardConstants.INDEX_DIST_IDLESLEEP, "5000")),
Long.parseLong(getConfig(SwitchboardConstants.INDEX_DIST_BUSYSLEEP, "0")),
Long.parseLong(getConfig(SwitchboardConstants.INDEX_DIST_MEMPREREQ, "1000000")));
// content control: initialize list sync thread
deployThread(
"720_ccimport",
"Content Control Import",
"this is the content control import thread",
null,
new InstantBusyThread(
new ContentControlImportThread(sb),
"run",
SwitchboardConstants.PEER_PING_METHOD_JOBCOUNT,
SwitchboardConstants.PEER_PING_METHOD_FREEMEM,
3000,
10000,
3000,
10000),
2000);
deployThread(
"730_ccfilter",
"Content Control Filter",
"this is the content control filter update thread",
null,
new InstantBusyThread(
new ContentControlFilterUpdateThread(sb),
"run",
SwitchboardConstants.PEER_PING_METHOD_JOBCOUNT,
SwitchboardConstants.PEER_PING_METHOD_FREEMEM,
3000,
10000,
3000,
10000),
2000);
// set network-specific performance attributes
if ( this.firstInit ) {
setRemotecrawlPPM(Math.max(1, (int) getConfigLong("network.unit.remotecrawl.speed", 60)));
@ -981,7 +1015,7 @@ public final class Switchboard extends serverSwitch
//query.add(CrawlSwitchboardEntry.word2hash("Zahl"));
//plasmaSnippetCache.result scr = snippetCache.retrieve(new URL("http://www.heise.de/mobil/newsticker/meldung/mail/54980"), query, true);
//plasmaSnippetCache.result scr = snippetCache.retrieve(new URL("http://www.heise.de/security/news/foren/go.shtml?read=1&msg_id=7301419&forum_id=72721"), query, true);
//plasmaSnippetCache.result scr = snippetCache.retrieve(new URL("http://www.heise.de/kiosk/archiv/ct/2003/4/20"), query, true, 260);
//plasmaSnippetCache.result scr = snippetCache.retrieve(new URL("http://www.heise.de/kiosk/archiv/ct/2003/4/20"), query, true, 260);
this.trail = new LinkedBlockingQueue<String>();

@ -56,6 +56,7 @@ import net.yacy.cora.storage.HandleSet;
import net.yacy.cora.util.SpaceExceededException;
import net.yacy.document.Condenser;
import net.yacy.document.LibraryProvider;
import net.yacy.interaction.contentcontrol.ContentControlFilterUpdateThread;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.meta.URIMetadata;
import net.yacy.kelondro.data.meta.URIMetadataRow;
@ -68,6 +69,7 @@ import net.yacy.kelondro.rwi.ReferenceContainer;
import net.yacy.kelondro.rwi.TermSearch;
import net.yacy.peers.graphics.ProfilingGraph;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.repository.FilterEngine;
import net.yacy.search.EventTracker;
import net.yacy.search.Switchboard;
import net.yacy.search.index.Segment;
@ -706,6 +708,29 @@ public final class RWIProcess extends Thread
this.sortout++;
continue;
}
// content control
if (Switchboard.getSwitchboard().getConfigBool(
"contentcontrol.enabled", false) == true) {
// check global network filter from bookmark list
if (!Switchboard.getSwitchboard()
.getConfig("contentcontrol.mandatoryfilterlist", "")
.equals("")) {
FilterEngine f = ContentControlFilterUpdateThread.getNetworkFilter();
if (f != null) {
if (!f.isListed(page.url(), null)) {
this.sortout++;
continue;
}
}
}
}
final String pageurl = page.url().toNormalform(true, true);
final String pageauthor = page.dc_creator();

Loading…
Cancel
Save