diff --git a/htroot/Blacklist_p.java b/htroot/Blacklist_p.java index 1d2e37310..b853b9b73 100644 --- a/htroot/Blacklist_p.java +++ b/htroot/Blacklist_p.java @@ -182,8 +182,8 @@ public class Blacklist_p { }else{ prop.put("status", 1);//removed prop.put("status_item", line); - if (httpdProxyHandler.blackListURLs != null) - httpdProxyHandler.blackListURLs.remove(line); + if (listManager.switchboard.blackListURLs != null) + listManager.switchboard.blackListURLs.remove(line); } } prop.put("Itemlist", numItems); @@ -215,8 +215,8 @@ public class Blacklist_p { prop.put("status_item", newItem);//added //add to blacklist - if (httpdProxyHandler.blackListURLs != null) - httpdProxyHandler.blackListURLs.put(newItem.substring(0, pos), newItem.substring(pos + 1)); + if (listManager.switchboard.blackListURLs != null) + listManager.switchboard.blackListURLs.put(newItem.substring(0, pos), newItem.substring(pos + 1)); } listManager.writeList(new File(listManager.listsPath, filename), out); diff --git a/htroot/IndexControl_p.html b/htroot/IndexControl_p.html index 5eef646d8..33c6f8055 100644 --- a/htroot/IndexControl_p.html +++ b/htroot/IndexControl_p.html @@ -60,12 +60,20 @@ The local index currently consists of (at least) #[wcount]# reverse word indexes This enables automated, DHT-ruled Index Transmission to other peers. This is currently only activated for junior peers. + + - + + + + + +
Index Receive: Accept remote Index Transmissions. This works only if you are a senior peer. The DHT-rules do not work without this function.
   +If checked, your peer silently ignores transmitted URLs that match your blacklist
diff --git a/htroot/IndexControl_p.java b/htroot/IndexControl_p.java index 72cdd622b..ae8e49208 100644 --- a/htroot/IndexControl_p.java +++ b/htroot/IndexControl_p.java @@ -81,6 +81,7 @@ public class IndexControl_p { prop.put("otherHosts", ""); prop.put("indexDistributeChecked", (switchboard.getConfig("allowDistributeIndex", "true").equals("true")) ? "checked" : ""); prop.put("indexReceiveChecked", (switchboard.getConfig("allowReceiveIndex", "true").equals("true")) ? "checked" : ""); + prop.put("indexReceiveBlockBlacklistChecked", (switchboard.getConfig("indexReceiveBlockBlacklist", "true").equals("true")) ? "checked" : ""); return prop; // be save } @@ -113,6 +114,8 @@ public class IndexControl_p { boolean allowReceiveIndex = ((String) post.get("indexReceive", "")).equals("on"); switchboard.setConfig("allowReceiveIndex", (allowReceiveIndex) ? "true" : "false"); yacyCore.seedDB.mySeed.setFlagAcceptRemoteIndex(allowReceiveIndex); + boolean indexReceiveBlockBlacklist = ((String) post.get("indexReceiveBlockBlacklist", "")).equals("on"); + switchboard.setConfig("indexReceiveBlockBlacklist", (indexReceiveBlockBlacklist) ? "true" : "false"); } if (post.containsKey("keyhashdeleteall")) { @@ -293,6 +296,7 @@ public class IndexControl_p { prop.put("ucount", "" + switchboard.urlPool.loadedURL.size()); prop.put("indexDistributeChecked", (switchboard.getConfig("allowDistributeIndex", "true").equals("true")) ? "checked" : ""); prop.put("indexReceiveChecked", (switchboard.getConfig("allowReceiveIndex", "true").equals("true")) ? "checked" : ""); + prop.put("indexReceiveBlockBlacklistChecked", (switchboard.getConfig("indexReceiveBlockBlacklist", "true").equals("true")) ? "checked" : ""); // return rewrite properties return prop; } diff --git a/htroot/Performance_p.html b/htroot/Performance_p.html index cc8792e05..2e05ce9ad 100644 --- a/htroot/Performance_p.html +++ b/htroot/Performance_p.html @@ -52,7 +52,7 @@ #{/table}# - + Changes take effect immediately diff --git a/htroot/sharedBlacklist_p.java b/htroot/sharedBlacklist_p.java index f3b1e224f..e00bdcf5f 100644 --- a/htroot/sharedBlacklist_p.java +++ b/htroot/sharedBlacklist_p.java @@ -241,8 +241,8 @@ public class sharedBlacklist_p { out += newItem+"\n"; prop.put("status_list_"+count+"_entry", newItem); count++; - if (httpdProxyHandler.blackListURLs != null) - httpdProxyHandler.blackListURLs.put(newItem.substring(0, pos), newItem.substring(pos + 1)); + if (switchboard.blackListURLs != null) + switchboard.blackListURLs.put(newItem.substring(0, pos), newItem.substring(pos + 1)); //write the list try{ diff --git a/htroot/yacy/transferURL.java b/htroot/yacy/transferURL.java index cec81faed..55911e315 100644 --- a/htroot/yacy/transferURL.java +++ b/htroot/yacy/transferURL.java @@ -43,7 +43,11 @@ // javac -classpath .:../classes transferRWI.java +import java.net.URL; +import java.net.MalformedURLException; + import de.anomic.http.httpHeader; +import de.anomic.http.httpdProxyHandler; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; @@ -64,10 +68,12 @@ public class transferURL { String key = (String) post.get("key", ""); // transmission key int urlc = Integer.parseInt((String) post.get("urlc", "")); // number of transported urls boolean granted = switchboard.getConfig("allowReceiveIndex", "false").equals("true"); - + boolean blockBlacklist = switchboard.getConfig("indexReceiveBlockBlacklist", "false").equals("true"); + // response values String result = ""; String doublevalues = "0"; + URL url; if (granted) { int received = 0; @@ -76,9 +82,27 @@ public class transferURL { String urls; for (int i = 0; i < urlc; i++) { urls = (String) post.get("url" + i); - if (urls != null) { - switchboard.urlPool.loadedURL.newEntry(urls, true, iam, iam, 3); - received++; + if (urls == null) { + yacyCore.log.logDebug("transferURL: got null url-String from peer " + youare); + } else { + try { + url = new URL(urls); + } catch (MalformedURLException e) { + yacyCore.log.logDebug("transferURL: got malformed url-String '" + urls + "' from peer " + youare); + urls = null; + url = null; + } + if ((urls != null) && (blockBlacklist)) { + if (switchboard.blacklistedURL(url.getHost().toLowerCase(), url.getPath())) { + yacyCore.log.logDebug("transferURL: blocked blacklisted url '" + urls + "' from peer " + youare); + urls = null; + } + } + if (urls != null) { + switchboard.urlPool.loadedURL.newEntry(urls, true, iam, iam, 3); + yacyCore.log.logDebug("transferURL: received url '" + urls + "' from peer " + youare); + received++; + } } } diff --git a/source/de/anomic/data/listManager.java b/source/de/anomic/data/listManager.java index 0a9daf4ab..8271bf785 100644 --- a/source/de/anomic/data/listManager.java +++ b/source/de/anomic/data/listManager.java @@ -234,9 +234,9 @@ public class listManager { public static void reloadBlacklists(){ String f = switchboard.getConfig("proxyBlackListsActive", ""); if (f != ""){ - httpdProxyHandler.blackListURLs = httpdProxyHandler.loadBlacklist("black", f, "/"); + switchboard.blackListURLs = switchboard.loadBlacklist("black", f, "/"); }else{ - httpdProxyHandler.blackListURLs = new TreeMap(); + switchboard.blackListURLs = new TreeMap(); } } diff --git a/source/de/anomic/http/httpdProxyHandler.java b/source/de/anomic/http/httpdProxyHandler.java index aada07309..7c1a049d3 100644 --- a/source/de/anomic/http/httpdProxyHandler.java +++ b/source/de/anomic/http/httpdProxyHandler.java @@ -106,12 +106,9 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt private static plasmaSwitchboard switchboard = null; private static plasmaHTCache cacheManager = null; public static HashSet yellowList = null; - public static TreeMap blackListURLs = null; private static int timeout = 30000; private static boolean yacyTrigger = true; - public static boolean isTransparentProxy = false; - public static boolean remoteProxyUse = false; public static String remoteProxyHost = ""; public static int remoteProxyPort = -1; @@ -195,65 +192,13 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt String f; // load the yellow-list f = switchboard.getConfig("proxyYellowList", null); - if (f != null) yellowList = loadSet("yellow", f); else yellowList = new HashSet(); - - // load the black-list / inspired by [AS] - f = switchboard.getConfig("proxyBlackListsActive", null); - if (f != null) blackListURLs = loadBlacklist("black", f, "/"); else blackListURLs = new TreeMap(); - this.theLogger.logSystem("Proxy Handler Initialized"); - } - } - - - private static HashSet loadSet(String setname, String filename) { - HashSet set = new HashSet(); - BufferedReader br = null; - try { - br = new BufferedReader(new InputStreamReader(new FileInputStream(filename))); - String line; - while ((line = br.readLine()) != null) { - line = line.trim(); - if ((line.length() > 0) && (!(line.startsWith("#")))) set.add(line.trim().toLowerCase()); - } - br.close(); - serverLog.logInfo("PROXY", "read " + setname + " set from file " + filename); - } catch (IOException e) { - } finally { - if (br != null) try { br.close(); } catch (Exception e) {} - } - return set; - } - - private static TreeMap loadMap(String mapname, String filename, String sep) { - TreeMap map = new TreeMap(); - BufferedReader br = null; - try { - br = new BufferedReader(new InputStreamReader(new FileInputStream(filename))); - String line; - int pos; - while ((line = br.readLine()) != null) { - line = line.trim(); - if ((line.length() > 0) && (!(line.startsWith("#"))) && ((pos = line.indexOf(sep)) > 0)) - map.put(line.substring(0, pos).trim().toLowerCase(), line.substring(pos + sep.length()).trim()); + if (f != null) { + yellowList = serverFileUtils.loadSet("yellow", f); + this.theLogger.logSystem("loaded yellow-list from file " + f + ", " + yellowList.size() + " entries"); + } else { + yellowList = new HashSet(); } - serverLog.logInfo("PROXY", "read " + mapname + " map from file " + filename); - } catch (IOException e) { - } finally { - if (br != null) try { br.close(); } catch (Exception e) {} } - return map; - } - - public static TreeMap loadBlacklist(String mapname, String filenames, String sep) { - TreeMap map = new TreeMap(); - if (switchboard == null) return map; // not initialized yet - File listsPath = new File(switchboard.getRootPath(), switchboard.getConfig("listsPath", "DATA/LISTS")); - String filenamesarray[] = filenames.split(","); - - if(filenamesarray.length >0) - for(int i = 0; i < filenamesarray.length; i++) - map.putAll(loadMap(mapname, (new File(listsPath, filenamesarray[i])).toString(), sep)); - return map; } private static String domain(String host) { @@ -271,31 +216,6 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt return domain; } - private boolean blacklistedURL(String hostlow, String path) { - if (blackListURLs == null) return false; - - String pp = ""; // path-pattern - - // first try to match the domain with wildcard '*' - // [TL] While "." are found within the string - int index = 0; - while ((index = hostlow.indexOf('.', index + 1)) != -1) { - if ((pp = (String) blackListURLs.get(hostlow.substring(0, index + 1) + "*")) != null) { - return ((pp.equals("*")) || (path.substring(1).matches(pp))); - } - } - index = hostlow.length(); - while ((index = hostlow.lastIndexOf('.', index - 1)) != -1) { - if ((pp = (String) blackListURLs.get("*" + hostlow.substring(index, hostlow.length()))) != null) { - return ((pp.equals("*")) || (path.substring(1).matches(pp))); - } - } - - // try to match without wildcard in domain - return (((pp = (String) blackListURLs.get(hostlow)) != null) && - ((pp.equals("*")) || (path.substring(1).matches(pp)))); - } - public void handleOutgoingCookies(httpHeader requestHeader, String targethost, String clienthost) { /* The syntax for the header is: @@ -391,7 +311,7 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt // blacklist idea inspired by [AS]: // respond a 404 for all AGIS ("all you get is shit") servers String hostlow = host.toLowerCase(); - if (blacklistedURL(hostlow, path)) { + if (switchboard.blacklistedURL(hostlow, path)) { httpd.sendRespondError(conProp,respond,4,403,null, "URL '" + hostlow + "' blocked by yacy proxy (blacklisted)",null); this.theLogger.logInfo("AGIS blocking of host '" + hostlow + "'"); @@ -877,7 +797,7 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt // check the blacklist, inspired by [AS]: respond a 404 for all AGIS (all you get is shit) servers String hostlow = host.toLowerCase(); - if (blacklistedURL(hostlow, path)) { + if (switchboard.blacklistedURL(hostlow, path)) { try { byte[] errorMsg = ("404 (generated): URL '" + hostlow + "' blocked by yacy proxy (blacklisted)\r\n").getBytes(); httpd.sendRespondHeader(conProp,respond,httpVer,404,"Not Found (AGIS)",0); diff --git a/source/de/anomic/kelondro/kelondroHashtable.java b/source/de/anomic/kelondro/kelondroHashtable.java index ecfe002f8..2c85ae01a 100644 --- a/source/de/anomic/kelondro/kelondroHashtable.java +++ b/source/de/anomic/kelondro/kelondroHashtable.java @@ -132,13 +132,18 @@ package de.anomic.kelondro; import java.io.File; import java.io.IOException; +import de.anomic.server.serverCodings; + public class kelondroHashtable { - kelondroArray hashArray; - int offset; - int maxk; - int maxrehash; + private kelondroArray hashArray; + private int offset; + private int maxk; + private int maxrehash; + private byte[][] dummyRow; + private static final byte[] dummyKey = serverCodings.enhancedCoder.encodeBase64Long(0, 5).getBytes(); + public kelondroHashtable(File file, int[] columns, int offset, int maxsize, int maxrehash) throws IOException { // this creates a new hashtable // the key element is not part of the columns array @@ -149,18 +154,25 @@ public class kelondroHashtable { // this number is needed to omit grow of the table in case of re-hashing // the maxsize is re-computed to a virtual folding height and will result in a tablesize // less than the given maxsize. The actual maxsize can be retrieved by maxsize() - hashArray = new kelondroArray(file, extCol(columns), 6); + this.hashArray = new kelondroArray(file, extCol(columns), 6); this.offset = offset; - this.maxk = kelondroMSetTools.log2a(maxsize); // equal to log2(maxsize) + 1 + this.maxk = kelondroMSetTools.log2a(maxsize); // equal to |log2(maxsize)| + 1 if (this.maxk >= kelondroMSetTools.log2a(maxsize + power2(offset + 1) + 1) - 1) this.maxk--; + this.maxrehash = maxrehash; hashArray.seti(0, this.offset); hashArray.seti(1, this.maxk); - hashArray.seti(1, this.maxk); + hashArray.seti(2, this.maxrehash); + dummyRow = new byte[hashArray.columns()][]; + dummyRow[0] = dummyKey; + for (int i = 0; i < hashArray.columns(); i++) dummyRow[i] = new byte[0]; } public kelondroHashtable(File file) throws IOException{ // this opens a file with an existing hashtable - hashArray = new kelondroArray(file); + this.hashArray = new kelondroArray(file); + this.offset = hashArray.geti(0); + this.maxk = hashArray.geti(1); + this.maxrehash = hashArray.geti(2); } private int[] extCol(int[] columns) { @@ -175,15 +187,51 @@ public class kelondroHashtable { while (x > 0) {p = p << 1; x--;} return p; } - /* + public synchronized byte[][] get(int key) throws IOException { - + Object[] search = search(new Hash(key)); + if (search[1] == null) return null; + byte[][] row = (byte[][]) search[1]; + byte[][] result = new byte[row.length - 1][]; + System.arraycopy(row, 1, result, 0, row.length - 1); + return result; } - public synchronized byte[][] put(int key, byte[][] newrow) throws IOException { - + public synchronized byte[][] put(int key, byte[][] row) throws IOException { + Hash hash = new Hash(key); + // find row + Object[] search = search(hash); + byte[][] oldrow; + int rowNumber = ((Integer) search[0]).intValue(); + if (search[1] == null) { + oldrow = null; + } else { + oldrow = (byte[][]) search[1]; + } + // make space + while (rowNumber >= hashArray.size()) hashArray.set(hashArray.size(), dummyRow); + // write row + byte[][] newrow = new byte[hashArray.columns()][]; + newrow[0] = serverCodings.enhancedCoder.encodeBase64Long(hash.key(), 5).getBytes(); + System.arraycopy(row, 0, newrow, 1, row.length); + hashArray.set(rowNumber, row); + return oldrow; + } + + private Object[] search(Hash hash) throws IOException { + byte[][] row; + int rowKey; + int rowNumber; + do { + rowNumber = hash.node(); + if (rowNumber >= hashArray.size()) return new Object[]{new Integer(rowNumber), null}; + row = hashArray.get(rowNumber); + rowKey = (int) serverCodings.enhancedCoder.decodeBase64Long(new String(row[0])); + if (rowKey == 0) return new Object[]{new Integer(rowNumber), null}; + hash.rehash(); + } while (rowKey != hash.key()); + return new Object[]{new Integer(rowNumber), row}; } - */ private class Hash { diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index 1ef2ccbdb..075aa3cdb 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -117,6 +117,7 @@ import java.util.Iterator; import java.util.Map; import java.util.Set; import java.util.TreeSet; +import java.util.TreeMap; import java.util.Vector; import de.anomic.data.messageBoard; @@ -134,6 +135,7 @@ import de.anomic.server.serverObjects; import de.anomic.server.serverSemaphore; import de.anomic.server.serverSwitch; import de.anomic.server.logging.serverLog; +import de.anomic.server.serverFileUtils; import de.anomic.tools.bitfield; import de.anomic.tools.crypt; import de.anomic.yacy.yacyClient; @@ -152,6 +154,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser // couloured list management public static TreeSet blueList = null; public static TreeSet stopwords = null; + public static TreeMap blackListURLs = null; // storage management private File cachePath; @@ -216,7 +219,17 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser String f = getConfig("plasmaBlueList", null); if (f != null) blueList = loadList(new File(f)); else blueList= new TreeSet(); } - + + // load the black-list / inspired by [AS] + String f = getConfig("proxyBlackListsActive", null); + if (f != null) { + blackListURLs = loadBlacklist("black", f, "/"); + log.logSystem("loaded black-list from file " + f + ", " + blackListURLs.size() + " entries"); + } else { + blackListURLs = new TreeMap(); + } + log.logSystem("Proxy Handler Initialized"); + // load stopwords if (stopwords == null) { stopwords = loadList(new File(rootPath, "yacy.stopwords")); @@ -389,6 +402,42 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser } + public TreeMap loadBlacklist(String mapname, String filenames, String sep) { + TreeMap map = new TreeMap(); + File listsPath = new File(getRootPath(), getConfig("listsPath", "DATA/LISTS")); + String filenamesarray[] = filenames.split(","); + + if(filenamesarray.length >0) + for(int i = 0; i < filenamesarray.length; i++) + map.putAll(serverFileUtils.loadMap(mapname, (new File(listsPath, filenamesarray[i])).toString(), sep)); + return map; + } + + public boolean blacklistedURL(String hostlow, String path) { + if (blackListURLs == null) return false; + + String pp = ""; // path-pattern + + // first try to match the domain with wildcard '*' + // [TL] While "." are found within the string + int index = 0; + while ((index = hostlow.indexOf('.', index + 1)) != -1) { + if ((pp = (String) blackListURLs.get(hostlow.substring(0, index + 1) + "*")) != null) { + return ((pp.equals("*")) || (path.substring(1).matches(pp))); + } + } + index = hostlow.length(); + while ((index = hostlow.lastIndexOf('.', index - 1)) != -1) { + if ((pp = (String) blackListURLs.get("*" + hostlow.substring(index, hostlow.length()))) != null) { + return ((pp.equals("*")) || (path.substring(1).matches(pp))); + } + } + + // try to match without wildcard in domain + return (((pp = (String) blackListURLs.get(hostlow)) != null) && + ((pp.equals("*")) || (path.substring(1).matches(pp)))); + } + private static String ppRamString(int bytes) { if (bytes < 1024) return bytes + " KByte"; bytes = bytes / 1024; diff --git a/source/de/anomic/server/serverFileUtils.java b/source/de/anomic/server/serverFileUtils.java index cd19d0d72..85cb50f13 100644 --- a/source/de/anomic/server/serverFileUtils.java +++ b/source/de/anomic/server/serverFileUtils.java @@ -48,7 +48,11 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.io.BufferedReader; +import java.io.InputStreamReader; import java.util.zip.GZIPOutputStream; +import java.util.TreeMap; +import java.util.HashSet; public final class serverFileUtils { @@ -142,4 +146,41 @@ public final class serverFileUtils { copy(new ByteArrayInputStream(source), dest); } + public static HashSet loadSet(String setname, String filename) { + HashSet set = new HashSet(); + BufferedReader br = null; + try { + br = new BufferedReader(new InputStreamReader(new FileInputStream(filename))); + String line; + while ((line = br.readLine()) != null) { + line = line.trim(); + if ((line.length() > 0) && (!(line.startsWith("#")))) set.add(line.trim().toLowerCase()); + } + br.close(); + } catch (IOException e) { + } finally { + if (br != null) try { br.close(); } catch (Exception e) {} + } + return set; + } + + public static TreeMap loadMap(String mapname, String filename, String sep) { + TreeMap map = new TreeMap(); + BufferedReader br = null; + try { + br = new BufferedReader(new InputStreamReader(new FileInputStream(filename))); + String line; + int pos; + while ((line = br.readLine()) != null) { + line = line.trim(); + if ((line.length() > 0) && (!(line.startsWith("#"))) && ((pos = line.indexOf(sep)) > 0)) + map.put(line.substring(0, pos).trim().toLowerCase(), line.substring(pos + sep.length()).trim()); + } + } catch (IOException e) { + } finally { + if (br != null) try { br.close(); } catch (Exception e) {} + } + return map; + } + } diff --git a/yacy.init b/yacy.init index 81b6af2a5..de4cc37ae 100644 --- a/yacy.init +++ b/yacy.init @@ -163,7 +163,7 @@ proxyBlueList=yacy.blue # if several ip's are allowed then they must be separated by a ',' # any ip may contain the wildcard-sign '*' #proxyClient=192.168.0.4 -proxyClient=localhost,127.0.0.1,192.168*,10* +proxyClient=localhost,127.0.0.1,192.168.*,10.* # serverClient: client-ip's that may connect to the web server, # thus are allowed to use the search service @@ -325,6 +325,7 @@ yacyDB=DATA/YACYDB # local indexing, you may switch this off allowDistributeIndex=true allowReceiveIndex=true +indexReceiveBlockBlacklist=false # the frequency is the number of links per minute, that the peer allowes # _every_ other peer to send to this peer