blocking of blacklisted urls in indexReceive and small changes

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@397 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 20 years ago
parent 2f0d7ea8d3
commit 311e627363

@ -182,8 +182,8 @@ public class Blacklist_p {
}else{
prop.put("status", 1);//removed
prop.put("status_item", line);
if (httpdProxyHandler.blackListURLs != null)
httpdProxyHandler.blackListURLs.remove(line);
if (listManager.switchboard.blackListURLs != null)
listManager.switchboard.blackListURLs.remove(line);
}
}
prop.put("Itemlist", numItems);
@ -215,8 +215,8 @@ public class Blacklist_p {
prop.put("status_item", newItem);//added
//add to blacklist
if (httpdProxyHandler.blackListURLs != null)
httpdProxyHandler.blackListURLs.put(newItem.substring(0, pos), newItem.substring(pos + 1));
if (listManager.switchboard.blackListURLs != null)
listManager.switchboard.blackListURLs.put(newItem.substring(0, pos), newItem.substring(pos + 1));
}
listManager.writeList(new File(listManager.listsPath, filename), out);

@ -60,12 +60,20 @@ The local index currently consists of (at least) #[wcount]# reverse word indexes
This enables automated, DHT-ruled Index Transmission to other peers. This is currently only activated for junior peers.</td>
<td><input type="submit" name="setIndexDistribute" value="set"></td>
</tr>
</table>
<table border="0" cellpadding="5" cellspacing="0">
<tr valign="top" class="TableCellLight">
<td width="100">Index Receive:</td>
<td><input type="checkbox" name="indexReceive" align="top" #[indexReceiveChecked]#>
Accept remote Index Transmissions. This works only if you are a senior peer. The DHT-rules do not work without this function.</td>
<td><input type="submit" name="setIndexReceive" value="set"></td>
</tr>
<tr valign="top" class="TableCellLight">
<td></td><td>&nbsp;&nbsp;<input type="checkbox" name="indexReceiveBlockBlacklist" align="top" #[indexReceiveBlockBlacklistChecked]#>
If checked, your peer silently ignores transmitted URLs that match your blacklist</td>
</tr>
<tr valign="top" class="TableCellLight">
<td></td><td><input type="submit" name="setIndexReceive" value="set"></td>
</tr>
</table>
</form>

@ -81,6 +81,7 @@ public class IndexControl_p {
prop.put("otherHosts", "");
prop.put("indexDistributeChecked", (switchboard.getConfig("allowDistributeIndex", "true").equals("true")) ? "checked" : "");
prop.put("indexReceiveChecked", (switchboard.getConfig("allowReceiveIndex", "true").equals("true")) ? "checked" : "");
prop.put("indexReceiveBlockBlacklistChecked", (switchboard.getConfig("indexReceiveBlockBlacklist", "true").equals("true")) ? "checked" : "");
return prop; // be save
}
@ -113,6 +114,8 @@ public class IndexControl_p {
boolean allowReceiveIndex = ((String) post.get("indexReceive", "")).equals("on");
switchboard.setConfig("allowReceiveIndex", (allowReceiveIndex) ? "true" : "false");
yacyCore.seedDB.mySeed.setFlagAcceptRemoteIndex(allowReceiveIndex);
boolean indexReceiveBlockBlacklist = ((String) post.get("indexReceiveBlockBlacklist", "")).equals("on");
switchboard.setConfig("indexReceiveBlockBlacklist", (indexReceiveBlockBlacklist) ? "true" : "false");
}
if (post.containsKey("keyhashdeleteall")) {
@ -293,6 +296,7 @@ public class IndexControl_p {
prop.put("ucount", "" + switchboard.urlPool.loadedURL.size());
prop.put("indexDistributeChecked", (switchboard.getConfig("allowDistributeIndex", "true").equals("true")) ? "checked" : "");
prop.put("indexReceiveChecked", (switchboard.getConfig("allowReceiveIndex", "true").equals("true")) ? "checked" : "");
prop.put("indexReceiveBlockBlacklistChecked", (switchboard.getConfig("indexReceiveBlockBlacklist", "true").equals("true")) ? "checked" : "");
// return rewrite properties
return prop;
}

@ -52,7 +52,7 @@
</tr>
#{/table}#
<tr class="TableCellLight">
<td class="small" align="left" colspan="16">
<td class="small" align="left" colspan="18">
<input type="submit" name="delaysubmit" value="Submit New Delay Values">
Changes take effect immediately</td>
</tr>

@ -241,8 +241,8 @@ public class sharedBlacklist_p {
out += newItem+"\n";
prop.put("status_list_"+count+"_entry", newItem);
count++;
if (httpdProxyHandler.blackListURLs != null)
httpdProxyHandler.blackListURLs.put(newItem.substring(0, pos), newItem.substring(pos + 1));
if (switchboard.blackListURLs != null)
switchboard.blackListURLs.put(newItem.substring(0, pos), newItem.substring(pos + 1));
//write the list
try{

@ -43,7 +43,11 @@
// javac -classpath .:../classes transferRWI.java
import java.net.URL;
import java.net.MalformedURLException;
import de.anomic.http.httpHeader;
import de.anomic.http.httpdProxyHandler;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
@ -64,10 +68,12 @@ public class transferURL {
String key = (String) post.get("key", ""); // transmission key
int urlc = Integer.parseInt((String) post.get("urlc", "")); // number of transported urls
boolean granted = switchboard.getConfig("allowReceiveIndex", "false").equals("true");
boolean blockBlacklist = switchboard.getConfig("indexReceiveBlockBlacklist", "false").equals("true");
// response values
String result = "";
String doublevalues = "0";
URL url;
if (granted) {
int received = 0;
@ -76,11 +82,29 @@ public class transferURL {
String urls;
for (int i = 0; i < urlc; i++) {
urls = (String) post.get("url" + i);
if (urls == null) {
yacyCore.log.logDebug("transferURL: got null url-String from peer " + youare);
} else {
try {
url = new URL(urls);
} catch (MalformedURLException e) {
yacyCore.log.logDebug("transferURL: got malformed url-String '" + urls + "' from peer " + youare);
urls = null;
url = null;
}
if ((urls != null) && (blockBlacklist)) {
if (switchboard.blacklistedURL(url.getHost().toLowerCase(), url.getPath())) {
yacyCore.log.logDebug("transferURL: blocked blacklisted url '" + urls + "' from peer " + youare);
urls = null;
}
}
if (urls != null) {
switchboard.urlPool.loadedURL.newEntry(urls, true, iam, iam, 3);
yacyCore.log.logDebug("transferURL: received url '" + urls + "' from peer " + youare);
received++;
}
}
}
yacyCore.seedDB.mySeed.incRU(received);

@ -234,9 +234,9 @@ public class listManager {
public static void reloadBlacklists(){
String f = switchboard.getConfig("proxyBlackListsActive", "");
if (f != ""){
httpdProxyHandler.blackListURLs = httpdProxyHandler.loadBlacklist("black", f, "/");
switchboard.blackListURLs = switchboard.loadBlacklist("black", f, "/");
}else{
httpdProxyHandler.blackListURLs = new TreeMap();
switchboard.blackListURLs = new TreeMap();
}
}

@ -106,12 +106,9 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
private static plasmaSwitchboard switchboard = null;
private static plasmaHTCache cacheManager = null;
public static HashSet yellowList = null;
public static TreeMap blackListURLs = null;
private static int timeout = 30000;
private static boolean yacyTrigger = true;
public static boolean isTransparentProxy = false;
public static boolean remoteProxyUse = false;
public static String remoteProxyHost = "";
public static int remoteProxyPort = -1;
@ -195,65 +192,13 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
String f;
// load the yellow-list
f = switchboard.getConfig("proxyYellowList", null);
if (f != null) yellowList = loadSet("yellow", f); else yellowList = new HashSet();
// load the black-list / inspired by [AS]
f = switchboard.getConfig("proxyBlackListsActive", null);
if (f != null) blackListURLs = loadBlacklist("black", f, "/"); else blackListURLs = new TreeMap();
this.theLogger.logSystem("Proxy Handler Initialized");
}
}
private static HashSet loadSet(String setname, String filename) {
HashSet set = new HashSet();
BufferedReader br = null;
try {
br = new BufferedReader(new InputStreamReader(new FileInputStream(filename)));
String line;
while ((line = br.readLine()) != null) {
line = line.trim();
if ((line.length() > 0) && (!(line.startsWith("#")))) set.add(line.trim().toLowerCase());
}
br.close();
serverLog.logInfo("PROXY", "read " + setname + " set from file " + filename);
} catch (IOException e) {
} finally {
if (br != null) try { br.close(); } catch (Exception e) {}
}
return set;
}
private static TreeMap loadMap(String mapname, String filename, String sep) {
TreeMap map = new TreeMap();
BufferedReader br = null;
try {
br = new BufferedReader(new InputStreamReader(new FileInputStream(filename)));
String line;
int pos;
while ((line = br.readLine()) != null) {
line = line.trim();
if ((line.length() > 0) && (!(line.startsWith("#"))) && ((pos = line.indexOf(sep)) > 0))
map.put(line.substring(0, pos).trim().toLowerCase(), line.substring(pos + sep.length()).trim());
}
serverLog.logInfo("PROXY", "read " + mapname + " map from file " + filename);
} catch (IOException e) {
} finally {
if (br != null) try { br.close(); } catch (Exception e) {}
if (f != null) {
yellowList = serverFileUtils.loadSet("yellow", f);
this.theLogger.logSystem("loaded yellow-list from file " + f + ", " + yellowList.size() + " entries");
} else {
yellowList = new HashSet();
}
return map;
}
public static TreeMap loadBlacklist(String mapname, String filenames, String sep) {
TreeMap map = new TreeMap();
if (switchboard == null) return map; // not initialized yet
File listsPath = new File(switchboard.getRootPath(), switchboard.getConfig("listsPath", "DATA/LISTS"));
String filenamesarray[] = filenames.split(",");
if(filenamesarray.length >0)
for(int i = 0; i < filenamesarray.length; i++)
map.putAll(loadMap(mapname, (new File(listsPath, filenamesarray[i])).toString(), sep));
return map;
}
private static String domain(String host) {
@ -271,31 +216,6 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
return domain;
}
private boolean blacklistedURL(String hostlow, String path) {
if (blackListURLs == null) return false;
String pp = ""; // path-pattern
// first try to match the domain with wildcard '*'
// [TL] While "." are found within the string
int index = 0;
while ((index = hostlow.indexOf('.', index + 1)) != -1) {
if ((pp = (String) blackListURLs.get(hostlow.substring(0, index + 1) + "*")) != null) {
return ((pp.equals("*")) || (path.substring(1).matches(pp)));
}
}
index = hostlow.length();
while ((index = hostlow.lastIndexOf('.', index - 1)) != -1) {
if ((pp = (String) blackListURLs.get("*" + hostlow.substring(index, hostlow.length()))) != null) {
return ((pp.equals("*")) || (path.substring(1).matches(pp)));
}
}
// try to match without wildcard in domain
return (((pp = (String) blackListURLs.get(hostlow)) != null) &&
((pp.equals("*")) || (path.substring(1).matches(pp))));
}
public void handleOutgoingCookies(httpHeader requestHeader, String targethost, String clienthost) {
/*
The syntax for the header is:
@ -391,7 +311,7 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
// blacklist idea inspired by [AS]:
// respond a 404 for all AGIS ("all you get is shit") servers
String hostlow = host.toLowerCase();
if (blacklistedURL(hostlow, path)) {
if (switchboard.blacklistedURL(hostlow, path)) {
httpd.sendRespondError(conProp,respond,4,403,null,
"URL '" + hostlow + "' blocked by yacy proxy (blacklisted)",null);
this.theLogger.logInfo("AGIS blocking of host '" + hostlow + "'");
@ -877,7 +797,7 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
// check the blacklist, inspired by [AS]: respond a 404 for all AGIS (all you get is shit) servers
String hostlow = host.toLowerCase();
if (blacklistedURL(hostlow, path)) {
if (switchboard.blacklistedURL(hostlow, path)) {
try {
byte[] errorMsg = ("404 (generated): URL '" + hostlow + "' blocked by yacy proxy (blacklisted)\r\n").getBytes();
httpd.sendRespondHeader(conProp,respond,httpVer,404,"Not Found (AGIS)",0);

@ -132,12 +132,17 @@ package de.anomic.kelondro;
import java.io.File;
import java.io.IOException;
import de.anomic.server.serverCodings;
public class kelondroHashtable {
kelondroArray hashArray;
int offset;
int maxk;
int maxrehash;
private kelondroArray hashArray;
private int offset;
private int maxk;
private int maxrehash;
private byte[][] dummyRow;
private static final byte[] dummyKey = serverCodings.enhancedCoder.encodeBase64Long(0, 5).getBytes();
public kelondroHashtable(File file, int[] columns, int offset, int maxsize, int maxrehash) throws IOException {
// this creates a new hashtable
@ -149,18 +154,25 @@ public class kelondroHashtable {
// this number is needed to omit grow of the table in case of re-hashing
// the maxsize is re-computed to a virtual folding height and will result in a tablesize
// less than the given maxsize. The actual maxsize can be retrieved by maxsize()
hashArray = new kelondroArray(file, extCol(columns), 6);
this.hashArray = new kelondroArray(file, extCol(columns), 6);
this.offset = offset;
this.maxk = kelondroMSetTools.log2a(maxsize); // equal to log2(maxsize) + 1
this.maxk = kelondroMSetTools.log2a(maxsize); // equal to |log2(maxsize)| + 1
if (this.maxk >= kelondroMSetTools.log2a(maxsize + power2(offset + 1) + 1) - 1) this.maxk--;
this.maxrehash = maxrehash;
hashArray.seti(0, this.offset);
hashArray.seti(1, this.maxk);
hashArray.seti(1, this.maxk);
hashArray.seti(2, this.maxrehash);
dummyRow = new byte[hashArray.columns()][];
dummyRow[0] = dummyKey;
for (int i = 0; i < hashArray.columns(); i++) dummyRow[i] = new byte[0];
}
public kelondroHashtable(File file) throws IOException{
// this opens a file with an existing hashtable
hashArray = new kelondroArray(file);
this.hashArray = new kelondroArray(file);
this.offset = hashArray.geti(0);
this.maxk = hashArray.geti(1);
this.maxrehash = hashArray.geti(2);
}
private int[] extCol(int[] columns) {
@ -175,15 +187,51 @@ public class kelondroHashtable {
while (x > 0) {p = p << 1; x--;}
return p;
}
/*
public synchronized byte[][] get(int key) throws IOException {
public synchronized byte[][] get(int key) throws IOException {
Object[] search = search(new Hash(key));
if (search[1] == null) return null;
byte[][] row = (byte[][]) search[1];
byte[][] result = new byte[row.length - 1][];
System.arraycopy(row, 1, result, 0, row.length - 1);
return result;
}
public synchronized byte[][] put(int key, byte[][] newrow) throws IOException {
public synchronized byte[][] put(int key, byte[][] row) throws IOException {
Hash hash = new Hash(key);
// find row
Object[] search = search(hash);
byte[][] oldrow;
int rowNumber = ((Integer) search[0]).intValue();
if (search[1] == null) {
oldrow = null;
} else {
oldrow = (byte[][]) search[1];
}
// make space
while (rowNumber >= hashArray.size()) hashArray.set(hashArray.size(), dummyRow);
// write row
byte[][] newrow = new byte[hashArray.columns()][];
newrow[0] = serverCodings.enhancedCoder.encodeBase64Long(hash.key(), 5).getBytes();
System.arraycopy(row, 0, newrow, 1, row.length);
hashArray.set(rowNumber, row);
return oldrow;
}
private Object[] search(Hash hash) throws IOException {
byte[][] row;
int rowKey;
int rowNumber;
do {
rowNumber = hash.node();
if (rowNumber >= hashArray.size()) return new Object[]{new Integer(rowNumber), null};
row = hashArray.get(rowNumber);
rowKey = (int) serverCodings.enhancedCoder.decodeBase64Long(new String(row[0]));
if (rowKey == 0) return new Object[]{new Integer(rowNumber), null};
hash.rehash();
} while (rowKey != hash.key());
return new Object[]{new Integer(rowNumber), row};
}
*/
private class Hash {

@ -117,6 +117,7 @@ import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import java.util.TreeMap;
import java.util.Vector;
import de.anomic.data.messageBoard;
@ -134,6 +135,7 @@ import de.anomic.server.serverObjects;
import de.anomic.server.serverSemaphore;
import de.anomic.server.serverSwitch;
import de.anomic.server.logging.serverLog;
import de.anomic.server.serverFileUtils;
import de.anomic.tools.bitfield;
import de.anomic.tools.crypt;
import de.anomic.yacy.yacyClient;
@ -152,6 +154,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
// couloured list management
public static TreeSet blueList = null;
public static TreeSet stopwords = null;
public static TreeMap blackListURLs = null;
// storage management
private File cachePath;
@ -217,6 +220,16 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
if (f != null) blueList = loadList(new File(f)); else blueList= new TreeSet();
}
// load the black-list / inspired by [AS]
String f = getConfig("proxyBlackListsActive", null);
if (f != null) {
blackListURLs = loadBlacklist("black", f, "/");
log.logSystem("loaded black-list from file " + f + ", " + blackListURLs.size() + " entries");
} else {
blackListURLs = new TreeMap();
}
log.logSystem("Proxy Handler Initialized");
// load stopwords
if (stopwords == null) {
stopwords = loadList(new File(rootPath, "yacy.stopwords"));
@ -389,6 +402,42 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
}
public TreeMap loadBlacklist(String mapname, String filenames, String sep) {
TreeMap map = new TreeMap();
File listsPath = new File(getRootPath(), getConfig("listsPath", "DATA/LISTS"));
String filenamesarray[] = filenames.split(",");
if(filenamesarray.length >0)
for(int i = 0; i < filenamesarray.length; i++)
map.putAll(serverFileUtils.loadMap(mapname, (new File(listsPath, filenamesarray[i])).toString(), sep));
return map;
}
public boolean blacklistedURL(String hostlow, String path) {
if (blackListURLs == null) return false;
String pp = ""; // path-pattern
// first try to match the domain with wildcard '*'
// [TL] While "." are found within the string
int index = 0;
while ((index = hostlow.indexOf('.', index + 1)) != -1) {
if ((pp = (String) blackListURLs.get(hostlow.substring(0, index + 1) + "*")) != null) {
return ((pp.equals("*")) || (path.substring(1).matches(pp)));
}
}
index = hostlow.length();
while ((index = hostlow.lastIndexOf('.', index - 1)) != -1) {
if ((pp = (String) blackListURLs.get("*" + hostlow.substring(index, hostlow.length()))) != null) {
return ((pp.equals("*")) || (path.substring(1).matches(pp)));
}
}
// try to match without wildcard in domain
return (((pp = (String) blackListURLs.get(hostlow)) != null) &&
((pp.equals("*")) || (path.substring(1).matches(pp))));
}
private static String ppRamString(int bytes) {
if (bytes < 1024) return bytes + " KByte";
bytes = bytes / 1024;

@ -48,7 +48,11 @@ import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.util.zip.GZIPOutputStream;
import java.util.TreeMap;
import java.util.HashSet;
public final class serverFileUtils {
@ -142,4 +146,41 @@ public final class serverFileUtils {
copy(new ByteArrayInputStream(source), dest);
}
public static HashSet loadSet(String setname, String filename) {
HashSet set = new HashSet();
BufferedReader br = null;
try {
br = new BufferedReader(new InputStreamReader(new FileInputStream(filename)));
String line;
while ((line = br.readLine()) != null) {
line = line.trim();
if ((line.length() > 0) && (!(line.startsWith("#")))) set.add(line.trim().toLowerCase());
}
br.close();
} catch (IOException e) {
} finally {
if (br != null) try { br.close(); } catch (Exception e) {}
}
return set;
}
public static TreeMap loadMap(String mapname, String filename, String sep) {
TreeMap map = new TreeMap();
BufferedReader br = null;
try {
br = new BufferedReader(new InputStreamReader(new FileInputStream(filename)));
String line;
int pos;
while ((line = br.readLine()) != null) {
line = line.trim();
if ((line.length() > 0) && (!(line.startsWith("#"))) && ((pos = line.indexOf(sep)) > 0))
map.put(line.substring(0, pos).trim().toLowerCase(), line.substring(pos + sep.length()).trim());
}
} catch (IOException e) {
} finally {
if (br != null) try { br.close(); } catch (Exception e) {}
}
return map;
}
}

@ -163,7 +163,7 @@ proxyBlueList=yacy.blue
# if several ip's are allowed then they must be separated by a ','
# any ip may contain the wildcard-sign '*'
#proxyClient=192.168.0.4
proxyClient=localhost,127.0.0.1,192.168*,10*
proxyClient=localhost,127.0.0.1,192.168.*,10.*
# serverClient: client-ip's that may connect to the web server,
# thus are allowed to use the search service
@ -325,6 +325,7 @@ yacyDB=DATA/YACYDB
# local indexing, you may switch this off
allowDistributeIndex=true
allowReceiveIndex=true
indexReceiveBlockBlacklist=false
# the frequency is the number of links per minute, that the peer allowes
# _every_ other peer to send to this peer

Loading…
Cancel
Save