*) now it's possible to configure the yacy blacklist separately for dht, search, proxy, crawler

See: http://www.yacy-forum.de/viewtopic.php?t=2541
        http://www.yacy-forum.de/viewtopic.php?p=24516

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2389 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
theli 19 years ago
parent 9ae9062bd3
commit d2e8e76218

@ -14,151 +14,172 @@ You may also provide your blacklist to other peers by sharing them; in return yo
collect blacklist entries from other peers.</p> collect blacklist entries from other peers.</p>
<table border="0" cellspacing="1" cellpadding="0"> <table border="1" cellspacing="1" cellpadding="0">
<tr> <!-- blacklist selection -->
<td colspan="3" valign="top" class="TableHeader"> <form action="Blacklist_p.html" method="post" enctype="multipart/form-data">
&nbsp; <tr>
</td> <td colspan="3" valign="top" class="TableHeader">&nbsp;</td>
</tr> </tr>
<tr> <tr>
<td colspan="3"> <td colspan="3">
<form action="Blacklist_p.html" method="post" enctype="multipart/form-data">
<table border="0" cellspacing="1" cellpadding="0" width="100%"> <table border="0" cellspacing="1" cellpadding="0" width="100%">
<tr> <tr>
<td class="TableCellDark"> <td class="TableCellDark">
Edit list: Edit list:
<select name="blackLists" size="1"> <select name="selectedListName" size="1">
#{blackLists}# #{blackLists}#
<option value="#[name]#" #(selected)#::selected#(/selected)#>#[name]# #(active)#not active::active#(/active)# #(shared)#not shared::shared#(/shared)#</option> <option value="#[name]#" #(selected)#::selected#(/selected)#>#[name]# [#(shared)#not shared::shared#(/shared)#] #{active}# #[blTypeName]##{/active}#</option>
#{/blackLists}# #{/blackLists}#
</select> </select>
</td> </td>
<td class="TableCellDark"><center> <td class="TableCellDark"><center>
<input type="submit" name="changelistbutton" value="select"></center> <input type="submit" name="selectList" value="select" />
</td> </td>
<td valign="top" align="right" rowspan="2" class="TableCellDark"> <td valign="top" align="left" rowspan="2" class="TableCellDark">
<input type="submit" name="activatelistbutton" value="Enable/disable this list"><br> <fieldset>
<input type="submit" name="sharelistbutton" value="Share/don't share this list"><br> <legend>Activate this list for ...</legend>
<input type="submit" name="dellistbutton" value="Delete this list"><br> <table>
</td> #{currentActiveFor}#
<tr>
<td>#[blTypeName]#</td><td><input type="checkbox" name="activateList4#[blTypeName]#" value="on" #(checked)#checked::#(/checked)#/></td>
</tr>
#{/currentActiveFor}#
</table>
<input type="submit" name="activateList" value="Enable/disable this list">
</fieldset>
<input type="submit" name="shareList" value="Share/don't share this list" /><br />
<input type="submit" name="deleteList" value="Delete this list" /><br />
</td>
</tr> </tr>
<tr> <tr>
<td class="TableCellDark"> <td class="TableCellDark">
New list: New list:
<input type="text" name="newlist"> <input type="text" name="newListName">
</td> </td>
<td class="TableCellDark"><center> <td class="TableCellDark"><center>
<input type="submit" name="newlistbutton" value="create"></center> <input type="submit" name="createNewList" value="create"></center>
</td> </td>
<!--<input type="checkbox" name="proxylist" value="on" />active<br />
<input type="checkbox" name="sharedlist" value="on" />shared<br />
<input type="submit" name="changebutton" value="Change"><br />-->
</td>
</tr> </tr>
</table> </table>
</td>
</tr>
</form>
</form>
</td> <!-- Blacklist configuration -->
<tr>
<td colspan="3" valign="top" class="TableHeader">
<h3>Active list: <i>#[currentBlacklist]#</i></h3>
</td>
</tr>
<tr>
<td rowspan="6">
<form action="Blacklist_p.html" method="post" enctype="multipart/form-data">
<table border="0" cellspacing="1" cellpadding="0" width="100%">
<tr>
<td class="TableCellDark">
These are the domain name / path patterns in this blacklist:<br>
You can select them here for deletion
</td>
</tr> </tr>
<tr> <tr>
<td colspan="3" valign="top" class="TableHeader"> <td class="TableCellLight">
<h3>Active list: #[filename]#</h3> <input type="hidden" name="currentBlacklist" value="#[currentBlacklist]#">
</td> <select name="selectedEntry" size="8">
<!--<option disabled>blocked Sites</option>-->
#{Itemlist}#
::
<option value="#[item]#">#[item]#</option>
#{/Itemlist}#
</select>
<p />
<input type="submit" name="deleteBlacklistEntry" value="Delete URL pattern">
</td>
</tr> </tr>
<tr> <tr>
<td rowspan="6"> <td class="TableCellDark">
<table border="0" cellspacing="1" cellpadding="0" width="100%">
<tr>
<td class="TableCellDark">
These are the domain name / path patterns in this blacklist:<br>
You can select them here for deletion
</td>
</tr>
<tr>
<td class="TableCellLight">
<form action="Blacklist_p.html" method="post" enctype="multipart/form-data">
<input type="hidden" name="filename" value="#[filename]#">
<select name="Itemlist" size="8">
<!--<option disabled>blocked Sites</option>-->
#{Itemlist}#
::
<option value="#[item]#">#[item]#</option>
#{/Itemlist}#
</select>
<p>
<input type="submit" name="delbutton" value="Delete URL pattern">
</td>
</tr>
<tr>
<td class="TableCellDark">
Enter new domain name / path pattern in the form: Enter new domain name / path pattern in the form:
<ul type="square"> <ul type="square">
<li>domain/fullpath</li> <li>domain/fullpath</li>
<li>domain/.* or <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/util/regex/Pattern.html">regexpr</a></li> <li>domain/.* or <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/util/regex/Pattern.html">regexpr</a></li>
<li>*.domain/.* or <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/util/regex/Pattern.html">regexpr</a></li> <li>*.domain/.* or <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/util/regex/Pattern.html">regexpr</a></li>
<li>domain.*/.* or <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/util/regex/Pattern.html">regexpr</a></li> <li>domain.*/.* or <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/util/regex/Pattern.html">regexpr</a></li>
<li>*.sub.domain/.* or <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/util/regex/Pattern.html">regexpr</a></li> <li>*.sub.domain/.* or <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/util/regex/Pattern.html">regexpr</a></li>
<li>sub.domain.*/.* or <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/util/regex/Pattern.html">regexpr</a></li></ul> <li>sub.domain.*/.* or <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/util/regex/Pattern.html">regexpr</a></li>
</td> </ul>
</tr> </td>
<tr> </tr>
<td class="TableCellLight">
<input type="text" name="newItem" size="50"><p>
<input type="submit" name="addbutton" value="Add URL pattern"></p>
</form>
</td>
</tr>
</table>
<td class="TableHeader" rowspan="6">&nbsp;</td>
<td class="TableCellDark">
Import blacklist items from other YaCy peers:</td>
<tr> <tr>
<td class="TableCellLight"> <td class="TableCellLight">
<input type="text" name="newEntry" size="50"><p>
<input type="submit" name="addBlacklistEntry" value="Add URL pattern"></p>
</td>
</tr>
</table>
</form>
</td>
</tr>
<tr>
<!-- spacer between the two columns -->
<td class="TableHeader" rowspan="6">&nbsp;</td>
</tr>
<!-- Blacklist import from other peer -->
<tr>
<td class="TableCellDark">Import blacklist items from other YaCy peers:</td>
</tr>
<tr>
<td class="TableCellLight">
<form action="sharedBlacklist_p.html" method="get"> <form action="sharedBlacklist_p.html" method="get">
<input type="hidden" name="filename" value="#[filename]#"> <input type="hidden" name="currentBlacklist" value="#[currentBlacklist]#">
Host: <select name="hash"> Host: <select name="hash">
#{otherHosts}# #{otherHosts}#
<option value="#[hash]#">#[name]#</option> <option value="#[hash]#">#[name]#</option>
#{/otherHosts}# #{/otherHosts}#
</select> </select>
<p> <p />
<input type="submit" value="Load new blacklist items"> <input type="submit" value="Load new blacklist items">
</form> </form>
</tr> </td>
<tr> </tr>
<td class="TableCellDark">
Import blacklist items from URL:</td>
</tr> <!-- blacklist import from url -->
<tr> <tr>
<td class="TableCellLight"> <td class="TableCellDark">Import blacklist items from URL:</td>
</tr>
<tr>
<td class="TableCellLight">
<form action="sharedBlacklist_p.html" method="get"> <form action="sharedBlacklist_p.html" method="get">
<input type="hidden" name="filename" value="#[filename]#"> <input type="hidden" name="currentBlacklist" value="#[currentBlacklist]#">
URL: <input type="text" name="url"> URL: <input type="text" name="url">
<p> <p>
<input type="submit" value="Load new blacklist items"> <input type="submit" value="Load new blacklist items">
</form> </form>
</tr> </td>
<tr> </tr>
<td class="TableCellDark">
<p>Import blacklist items from file:</td>
</tr> <!-- blacklist import from file -->
<tr> <tr>
<td class="TableCellLight"> <td class="TableCellDark"><p>Import blacklist items from file:</td>
</tr>
<tr>
<td class="TableCellLight">
<form action="sharedBlacklist_p.html" method="get"> <form action="sharedBlacklist_p.html" method="get">
<input type="hidden" name="filename" value="#[filename]#"> <input type="hidden" name="currentBlacklist" value="#[currentBlacklist]#">
File: <input type="text" name="file"> File: <input type="text" name="file">
<p> <p>
<input type="submit" value="Load new blacklist items"> <input type="submit" value="Load new blacklist items">
</form> </form>
</td> </td>
</tr> </tr>
</table> </table>
<p> <p>
#(status)#
#(status)# <!-- 0: -->
:: ::
<b>#[item]#</b> was removed from blacklist <b>#[item]#</b> was removed from blacklist
:: ::

@ -49,14 +49,15 @@
// if the shell's current path is HTROOT // if the shell's current path is HTROOT
import java.io.File; import java.io.File;
import java.io.FileWriter;
import java.io.IOException; import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Enumeration; import java.util.Enumeration;
import de.anomic.data.listManager; import de.anomic.data.listManager;
import de.anomic.http.httpHeader; import de.anomic.http.httpHeader;
import de.anomic.plasma.plasmaSwitchboard; import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverCore;
import de.anomic.server.serverObjects; import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch; import de.anomic.server.serverSwitch;
import de.anomic.yacy.yacyCore; import de.anomic.yacy.yacyCore;
@ -64,196 +65,273 @@ import de.anomic.yacy.yacySeed;
public class Blacklist_p { public class Blacklist_p {
private final static String BLACKLIST = "blackLists_"; private final static String BLACKLIST = "blackLists_";
private final static String BLACKLIST_ALL = "proxyBlackLists"; private final static String BLACKLIST_SHARED = "BlackLists.Shared";
private final static String BLACKLIST_ACTIVE = "proxyBlackListsActive";
private final static String BLACKLIST_SHARED = "proxyBlackListsShared";
public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) { public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) {
// return variable that accumulates replacements
// initialize the list manager
listManager.switchboard = (plasmaSwitchboard) env; listManager.switchboard = (plasmaSwitchboard) env;
listManager.listsPath = new File(listManager.switchboard.getRootPath(),listManager.switchboard.getConfig("listManager.listsPath", "DATA/LISTS")); listManager.listsPath = new File(listManager.switchboard.getRootPath(),listManager.switchboard.getConfig("listManager.listsPath", "DATA/LISTS"));
final serverObjects prop = new serverObjects();
String line;
// String HTMLout = "";
String removeItem = "removeme"; // getting the list of supported blacklist types
int numItems = 0; String supportedBlacklistTypesStr = env.getConfig("BlackLists.types", "");
int i; // need below String[] supportedBlacklistTypes = supportedBlacklistTypesStr.split(",");
String[] filenames = listManager.getListslistArray(BLACKLIST_ALL); String blacklistToUse = null;
String filename = ""; serverObjects prop = new serverObjects();
// do all post operations
if (post != null) { if (post != null) {
if (post.containsKey("blackLists")) { // Blacklist selected
filename = (String)post.get("blackLists"); if (post.containsKey("selectList")) {
} else if (post.containsKey("filename")) { blacklistToUse = (String)post.get("selectedListName");
filename = (String)post.get("filename");
} else if (filenames.length > 0){ // first BlackList
filename = filenames[0];
// } else { //No BlackList
// System.out.println("DEBUG: No Blacklist found");
} }
prop.put("status", 0); // nothing if (post.containsKey("createNewList")) {
/* ===========================================================
* Creation of a new blacklist
* =========================================================== */
blacklistToUse = (String)post.get("newListName");
if (!blacklistToUse.endsWith(".black")) blacklistToUse += ".black";
try {
final File newFile = new File(listManager.listsPath, blacklistToUse);
newFile.createNewFile();
// share the newly created blacklist
listManager.addListToListslist(BLACKLIST_SHARED, blacklistToUse);
// activate it for all known blacklist types
for (int blTypes=0; blTypes < supportedBlacklistTypes.length; blTypes++) {
listManager.addListToListslist(supportedBlacklistTypes[blTypes] + ".BlackLists",blacklistToUse);
}
} catch (IOException e) {/* */}
} else if (post.containsKey("deleteList")) {
/* ===========================================================
* Delete a blacklist
* =========================================================== */
// del list blacklistToUse = (String)post.get("selectedListName");
if (post.containsKey("dellistbutton")) {
final File BlackListFile = new File(listManager.listsPath, filename); File BlackListFile = new File(listManager.listsPath, blacklistToUse);
BlackListFile.delete(); BlackListFile.delete();
// remove from all BlackLists Lists for (int blTypes=0; blTypes < supportedBlacklistTypes.length; blTypes++) {
listManager.removeListFromListslist(BLACKLIST_ALL, filename); listManager.removeListFromListslist(supportedBlacklistTypes[blTypes] + ".BlackLists",blacklistToUse);
listManager.removeListFromListslist(BLACKLIST_ACTIVE, filename); }
listManager.removeListFromListslist(BLACKLIST_SHARED, filename);
// remove it from the shared list
listManager.removeListFromListslist(BLACKLIST_SHARED, blacklistToUse);
blacklistToUse = null;
// reload Blacklists // reload Blacklists
listManager.reloadBlacklists(); listManager.reloadBlacklists();
filenames = listManager.getListslistArray(BLACKLIST_ALL);
if (filenames.length > 0) {
filename = filenames[0];
}
// new list } else if (post.containsKey("activateList")) {
} else if (post.containsKey("newlistbutton")) {
String newList = (String)post.get("newlist"); /* ===========================================================
if (!newList.endsWith(".black")) { * Activate/Deactivate a blacklist
newList += ".black"; * =========================================================== */
blacklistToUse = (String)post.get("selectedListName");
for (int blTypes=0; blTypes < supportedBlacklistTypes.length; blTypes++) {
if (post.containsKey("activateList4" + supportedBlacklistTypes[blTypes])) {
listManager.addListToListslist(supportedBlacklistTypes[blTypes] + ".BlackLists",blacklistToUse);
} else {
listManager.removeListFromListslist(supportedBlacklistTypes[blTypes] + ".BlackLists",blacklistToUse);
}
} }
filename = newList; //to select it in the returnes Document
try {
final File newFile = new File(listManager.listsPath, newList);
newFile.createNewFile();
listManager.addListToListslist(BLACKLIST_ALL, newList);
listManager.addListToListslist(BLACKLIST_ACTIVE, newList);
listManager.addListToListslist(BLACKLIST_SHARED, newList);
} catch (IOException e) {}
listManager.reloadBlacklists();
} else if (post.containsKey("activatelistbutton")) { } else if (post.containsKey("shareList")) {
if( listManager.ListInListslist(BLACKLIST_ACTIVE, filename) ) {
listManager.removeListFromListslist(BLACKLIST_ACTIVE, filename); /* ===========================================================
} else { // inactive list -> enable * Share a blacklist
listManager.addListToListslist(BLACKLIST_ACTIVE, filename); * =========================================================== */
}
listManager.reloadBlacklists(); blacklistToUse = (String)post.get("selectedListName");
} else if (post.containsKey("sharelistbutton")) { if (listManager.ListInListslist(BLACKLIST_SHARED, blacklistToUse)) {
if (listManager.ListInListslist(BLACKLIST_SHARED, filename)) {
// Remove from shared BlackLists // Remove from shared BlackLists
listManager.removeListFromListslist(BLACKLIST_SHARED, filename); listManager.removeListFromListslist(BLACKLIST_SHARED, blacklistToUse);
} else { // inactive list -> enable } else { // inactive list -> enable
listManager.addListToListslist(BLACKLIST_SHARED, filename); listManager.addListToListslist(BLACKLIST_SHARED, blacklistToUse);
} }
} // List Management End } else if (post.containsKey("deleteBlacklistEntry")) {
// remove a Item? /* ===========================================================
if (post.containsKey("delbutton") && * Delete a blacklist entry
post.containsKey("Itemlist") && * =========================================================== */
!((String)post.get("Itemlist")).equals("") ) {
removeItem = (String)post.get("Itemlist");
}
} // post != null
// Read the List // get the current selected blacklist name
final ArrayList list = listManager.getListArray(new File(listManager.listsPath, filename)); blacklistToUse = (String)post.get("currentBlacklist");
final StringBuffer out = new StringBuffer(list.size() * 64);
String[] sortedlist = new String[list.size()];
Arrays.sort(list.toArray(sortedlist));
for (int j=0;j<sortedlist.length;++j){
line = sortedlist[j];
if (!(line.length() == 0 || line.charAt(0) == '#' || line.equals(removeItem))) { //Not the item to remove // get the entry that should be deleted
prop.put("Itemlist_" + numItems + "_item", line); String oldEntry = (String)post.get("selectedEntry");
numItems++;
} // load blacklist data from file
ArrayList list = listManager.getListArray(new File(listManager.listsPath, blacklistToUse));
// delete the old entry from file
if (list != null) {
for (int i=0; i < list.size(); i++) {
if (((String)list.get(i)).equals(oldEntry)) {
list.remove(i);
break;
}
}
listManager.writeList(new File(listManager.listsPath, blacklistToUse), (String[])list.toArray(new String[list.size()]));
}
if (line.equals(removeItem)) { // remove the entry from the running blacklist engine
prop.put("status", 1);//removed int pos = oldEntry.indexOf("/");
prop.put("status_item", line); if (pos < 0) {
// if (listManager.switchboard.urlBlacklist != null) { // add default empty path pattern
// listManager.switchboard.urlBlacklist.remove(line); pos = oldEntry.length();
if (plasmaSwitchboard.urlBlacklist != null) { oldEntry = oldEntry + "/.*";
plasmaSwitchboard.urlBlacklist.remove(line); }
for (int blTypes=0; blTypes < supportedBlacklistTypes.length; blTypes++) {
if (listManager.ListInListslist(supportedBlacklistTypes[blTypes] + ".BlackLists",blacklistToUse)) {
plasmaSwitchboard.urlBlacklist.add(supportedBlacklistTypes[blTypes],oldEntry.substring(0, pos), oldEntry.substring(pos + 1));
}
}
} else if (post.containsKey("addBlacklistEntry")) {
/* ===========================================================
* Add a new blacklist entry
* =========================================================== */
blacklistToUse = (String)post.get("currentBlacklist");
String newEntry = (String)post.get("newEntry");
// TODO: ignore empty entries
if (newEntry.startsWith("http://") ){
newEntry = newEntry.substring(7);
}
int pos = newEntry.indexOf("/");
if (pos < 0) {
// add default empty path pattern
pos = newEntry.length();
newEntry = newEntry + "/.*";
}
// append the line to the file
PrintWriter pw = null;
try {
pw = new PrintWriter(new FileWriter(new File(listManager.listsPath, blacklistToUse), true));
pw.println(newEntry);
pw.close();
} catch (IOException e) {
e.printStackTrace();
} finally {
if (pw != null) try { pw.close(); } catch (Exception e){ /* */}
}
// add to blacklist
for (int blTypes=0; blTypes < supportedBlacklistTypes.length; blTypes++) {
if (listManager.ListInListslist(supportedBlacklistTypes[blTypes] + ".BlackLists",blacklistToUse)) {
plasmaSwitchboard.urlBlacklist.add(supportedBlacklistTypes[blTypes],newEntry.substring(0, pos), newEntry.substring(pos + 1));
}
} }
} else {
out.append(line).append(serverCore.crlfString); //full list
} }
} }
prop.put("Itemlist", numItems);
// Add a new Item // loading all blacklist files located in the directory
if (post != null && post.containsKey("addbutton") && !((String)post.get("newItem")).equals("")) { String[] dirlist = listManager.getDirListing(listManager.listsPath);
String newItem = (String)post.get("newItem");
//clean http:// // if we have not chosen a blacklist until yet we use the first file
if ( newItem.startsWith("http://") ){ if (blacklistToUse == null && dirlist != null && dirlist.length > 0) {
newItem = newItem.substring(7); blacklistToUse = dirlist[0];
} }
//append "/.*"
int pos = newItem.indexOf("/");
if (pos < 0) {
// add default empty path pattern
pos = newItem.length();
newItem = newItem + "/.*";
}
out.append(newItem).append(serverCore.crlfString); // Read the blacklist items from file
final ArrayList list = listManager.getListArray(new File(listManager.listsPath, blacklistToUse));
prop.put("Itemlist_"+numItems+"_item", newItem); // sort them
numItems++; String[] sortedlist = new String[list.size()];
prop.put("Itemlist", numItems); Arrays.sort(list.toArray(sortedlist));
prop.put("status", 2);//added // display them
prop.put("status_item", newItem);//added int entryCount = 0;
for (int j=0;j<sortedlist.length;++j){
String nextEntry = sortedlist[j];
// add to blacklist if (nextEntry.length() == 0) continue;
// if (listManager.switchboard.urlBlacklist != null) if (nextEntry.startsWith("#")) continue;
// listManager.switchboard.urlBlacklist.add(newItem.substring(0, pos), newItem.substring(pos + 1));
if (plasmaSwitchboard.urlBlacklist != null) { prop.put("Itemlist_" + entryCount + "_item", nextEntry);
plasmaSwitchboard.urlBlacklist.add(newItem.substring(0, pos), newItem.substring(pos + 1)); entryCount++;
}
} }
listManager.writeList(new File(listManager.listsPath, filename), out.toString()); prop.put("Itemlist", entryCount);
// List known hosts for BlackList retrieval // List known hosts for BlackList retrieval
yacySeed seed; yacySeed seed;
if (yacyCore.seedDB != null && yacyCore.seedDB.sizeConnected() > 0) { // no nullpointer error if (yacyCore.seedDB != null && yacyCore.seedDB.sizeConnected() > 0) { // no nullpointer error
final Enumeration e = yacyCore.seedDB.seedsConnected(true, false, null); final Enumeration e = yacyCore.seedDB.seedsConnected(true, false, null);
i = 0; int peerCount = 0;
while (e.hasMoreElements()) { while (e.hasMoreElements()) {
seed = (yacySeed) e.nextElement(); seed = (yacySeed) e.nextElement();
if (seed != null) { if (seed != null) {
final String Hash = seed.hash; final String Hash = seed.hash;
final String Name = seed.get(yacySeed.NAME, "nameless"); final String Name = seed.get(yacySeed.NAME, "nameless");
prop.put("otherHosts_" + i + "_hash", Hash); prop.put("otherHosts_" + peerCount + "_hash", Hash);
prop.put("otherHosts_" + i + "_name", Name); prop.put("otherHosts_" + peerCount + "_name", Name);
i++; peerCount++;
} }
} }
prop.put("otherHosts", i); prop.put("otherHosts", peerCount);
// } else {
// System.out.println("BlackList_p: yacy seed not loaded!"); // DEBUG:
} }
// List BlackLists // List BlackLists
final String[] BlackLists = listManager.getListslistArray(BLACKLIST_ALL); int blacklistCount = 0;
for (i = 0; i <= BlackLists.length - 1; i++) { if (dirlist != null) {
prop.put(BLACKLIST + i + "_name", BlackLists[i]); for (int i = 0; i <= dirlist.length - 1; i++) {
prop.put(BLACKLIST + i + "_active", 0); prop.put(BLACKLIST + blacklistCount + "_name", dirlist[i]);
prop.put(BLACKLIST + i + "_shared", 0); prop.put(BLACKLIST + blacklistCount + "_shared", 0);
prop.put(BLACKLIST + i + "_selected", 0);
if (BlackLists[i].equals(filename)) { //current List if (dirlist[i].equals(blacklistToUse)) { //current List
prop.put(BLACKLIST + i + "_selected", 1); prop.put(BLACKLIST + blacklistCount + "_selected", 1);
}
if (listManager.ListInListslist(BLACKLIST_ACTIVE, BlackLists[i])) { for (int blTypes=0; blTypes < supportedBlacklistTypes.length; blTypes++) {
prop.put(BLACKLIST + i + "_active", 1); prop.put("currentActiveFor_" + blTypes + "_blTypeName",supportedBlacklistTypes[blTypes]);
} prop.put("currentActiveFor_" + blTypes + "_checked",
if (listManager.ListInListslist(BLACKLIST_SHARED, BlackLists[i])) { listManager.ListInListslist(supportedBlacklistTypes[blTypes] + ".BlackLists",dirlist[i])?0:1);
prop.put(BLACKLIST + i + "_shared", 1); }
prop.put("currentActiveFor",supportedBlacklistTypes.length);
}
if (listManager.ListInListslist(BLACKLIST_SHARED, dirlist[i])) {
prop.put(BLACKLIST + blacklistCount + "_shared", 1);
} else {
prop.put(BLACKLIST + blacklistCount + "_selected", 0);
}
int activeCount = 0;
for (int blTypes=0; blTypes < supportedBlacklistTypes.length; blTypes++) {
if (listManager.ListInListslist(supportedBlacklistTypes[blTypes] + ".BlackLists",dirlist[i])) {
prop.put(BLACKLIST + blacklistCount + "_active_" + activeCount + "_blTypeName",supportedBlacklistTypes[blTypes]);
activeCount++;
}
}
prop.put(BLACKLIST + blacklistCount + "_active",activeCount);
blacklistCount++;
} }
} }
prop.put("blackLists", i); prop.put("blackLists", blacklistCount);
prop.put("filename", filename);
prop.put("currentBlacklist", blacklistToUse);
return prop; return prop;
} }

@ -63,6 +63,7 @@ import de.anomic.index.indexEntryAttribute;
import de.anomic.index.indexURL; import de.anomic.index.indexURL;
import de.anomic.plasma.plasmaCrawlLURL; import de.anomic.plasma.plasmaCrawlLURL;
import de.anomic.plasma.plasmaSwitchboard; import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaURLPattern;
import de.anomic.plasma.plasmaWordIndex; import de.anomic.plasma.plasmaWordIndex;
import de.anomic.server.serverObjects; import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch; import de.anomic.server.serverSwitch;
@ -462,7 +463,7 @@ public class IndexControl_p {
} else { } else {
url = new URL(us); url = new URL(us);
if (plasmaSwitchboard.urlBlacklist.isListed(url)) { if (plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_DHT, url)) {
result.append("<input type=\"checkbox\" name=\"urlhx").append(i++).append("\" checked value=\"").append(uh[0]).append("\" align=\"top\">"); result.append("<input type=\"checkbox\" name=\"urlhx").append(i++).append("\" checked value=\"").append(uh[0]).append("\" align=\"top\">");
} else { } else {
result.append("<input type=\"checkbox\" name=\"urlhx").append(i++).append("\" value=\"").append(uh[0]).append("\" align=\"top\">"); result.append("<input type=\"checkbox\" name=\"urlhx").append(i++).append("\" value=\"").append(uh[0]).append("\" align=\"top\">");

@ -61,6 +61,7 @@ import java.util.ArrayList;
import java.util.Enumeration; import java.util.Enumeration;
import java.util.HashSet; import java.util.HashSet;
import de.anomic.data.listManager;
import de.anomic.http.httpHeader; import de.anomic.http.httpHeader;
import de.anomic.http.httpc; import de.anomic.http.httpc;
import de.anomic.plasma.plasmaSwitchboard; import de.anomic.plasma.plasmaSwitchboard;
@ -250,8 +251,16 @@ public class sharedBlacklist_p {
out += newItem+"\n"; out += newItem+"\n";
prop.put("status_list_"+count+"_entry", newItem); prop.put("status_list_"+count+"_entry", newItem);
count++; count++;
if (plasmaSwitchboard.urlBlacklist != null) if (plasmaSwitchboard.urlBlacklist != null) {
plasmaSwitchboard.urlBlacklist.add(newItem.substring(0, pos), newItem.substring(pos + 1)); String supportedBlacklistTypesStr = env.getConfig("BlackLists.types", "");
String[] supportedBlacklistTypes = supportedBlacklistTypesStr.split(",");
for (int blTypes=0; blTypes < supportedBlacklistTypes.length; blTypes++) {
if (listManager.ListInListslist(supportedBlacklistTypes[blTypes] + ".BlackLists",filename)) {
plasmaSwitchboard.urlBlacklist.add(supportedBlacklistTypes[blTypes],newItem.substring(0, pos), newItem.substring(pos + 1));
}
}
}
//write the list //write the list
try{ try{

@ -54,6 +54,7 @@ import de.anomic.http.httpHeader;
import de.anomic.index.indexEntry; import de.anomic.index.indexEntry;
import de.anomic.index.indexURLEntry; import de.anomic.index.indexURLEntry;
import de.anomic.plasma.plasmaSwitchboard; import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaURLPattern;
import de.anomic.server.serverCore; import de.anomic.server.serverCore;
import de.anomic.server.serverObjects; import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch; import de.anomic.server.serverSwitch;
@ -163,7 +164,7 @@ public final class transferRWI {
wordhashes[received] = wordHash; wordhashes[received] = wordHash;
iEntry = new indexURLEntry(estring.substring(p)); iEntry = new indexURLEntry(estring.substring(p));
urlHash = iEntry.urlHash(); urlHash = iEntry.urlHash();
if ((blockBlacklist) && (plasmaSwitchboard.urlBlacklist.hashInBlacklistedCache(urlHash))) { if ((blockBlacklist) && (plasmaSwitchboard.urlBlacklist.hashInBlacklistedCache(plasmaURLPattern.BLACKLIST_DHT, urlHash))) {
//int deleted = sb.wordIndex.tryRemoveURLs(urlHash); //int deleted = sb.wordIndex.tryRemoveURLs(urlHash);
yacyCore.log.logFine("transferRWI: blocked blacklisted URLHash '" + urlHash + "' from peer " + otherPeerName + "; deleted 1 URL entries from RWIs"); yacyCore.log.logFine("transferRWI: blocked blacklisted URLHash '" + urlHash + "' from peer " + otherPeerName + "; deleted 1 URL entries from RWIs");
blocked++; blocked++;

@ -155,11 +155,13 @@ public class listManager {
// overloaded function to write an array // overloaded function to write an array
public static boolean writeList(File listFile, String[] list){ public static boolean writeList(File listFile, String[] list){
String out = ""; StringBuffer out = new StringBuffer();
for(int i=0;i <= list.length; i++){ for(int i=0;i < list.length; i++){
out += list[i] + serverCore.crlfString; out
.append(list[i])
.append(serverCore.crlfString);
} }
return writeList(listFile, out); //(File, String) return writeList(listFile, out.toString()); //(File, String)
} }
public static String getListString(String filename, boolean withcomments){ public static String getListString(String filename, boolean withcomments){
@ -194,6 +196,12 @@ public class listManager {
String[] fileListString; String[] fileListString;
File[] fileList; File[] fileList;
final File dir = new File(dirname); final File dir = new File(dirname);
return getDirListing(dir);
}
public static String[] getDirListing(File dir){
String[] fileListString;
File[] fileList;
if (dir != null ) { if (dir != null ) {
if (!dir.exists()) { if (!dir.exists()) {
@ -321,11 +329,21 @@ public class listManager {
// load all active Blacklists in the Proxy // load all active Blacklists in the Proxy
public static void reloadBlacklists(){ public static void reloadBlacklists(){
final String f = switchboard.getConfig("proxyBlackListsActive", ""); String supportedBlacklistTypesStr = switchboard.getConfig("BlackLists.types", "");
de.anomic.plasma.plasmaSwitchboard.urlBlacklist.clear(); String[] supportedBlacklistTypes = supportedBlacklistTypesStr.split(",");
if (f != "") {
de.anomic.plasma.plasmaSwitchboard.urlBlacklist.loadList(f, "/"); ArrayList blacklistFiles = new ArrayList(supportedBlacklistTypes.length);
} for (int i=0; i < supportedBlacklistTypes.length; i++) {
String[] blacklistFile = new String[]{
supportedBlacklistTypes[i],
switchboard.getConfig(supportedBlacklistTypes[i] + ".BlackLists", "")
};
blacklistFiles.add(blacklistFile);
}
de.anomic.plasma.plasmaSwitchboard.urlBlacklist.clear();
de.anomic.plasma.plasmaSwitchboard.urlBlacklist.loadList((String[][])blacklistFiles.toArray(new String[blacklistFiles.size()][]), "/");
// switchboard.urlBlacklist.clear(); // switchboard.urlBlacklist.clear();
// if (f != "") switchboard.urlBlacklist.loadLists("black", f, "/"); // if (f != "") switchboard.urlBlacklist.loadLists("black", f, "/");
} }

@ -96,6 +96,7 @@ import de.anomic.index.indexURL;
import de.anomic.plasma.plasmaHTCache; import de.anomic.plasma.plasmaHTCache;
import de.anomic.plasma.plasmaParser; import de.anomic.plasma.plasmaParser;
import de.anomic.plasma.plasmaSwitchboard; import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaURLPattern;
import de.anomic.server.serverCore; import de.anomic.server.serverCore;
import de.anomic.server.serverFileUtils; import de.anomic.server.serverFileUtils;
import de.anomic.server.serverObjects; import de.anomic.server.serverObjects;
@ -389,7 +390,7 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
// respond a 404 for all AGIS ("all you get is shit") servers // respond a 404 for all AGIS ("all you get is shit") servers
String hostlow = host.toLowerCase(); String hostlow = host.toLowerCase();
if (args != null) { path = path + "?" + args; } if (args != null) { path = path + "?" + args; }
if (plasmaSwitchboard.urlBlacklist.isListed(hostlow, path)) { if (plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_PROXY, hostlow, path)) {
httpd.sendRespondError(conProp,respond,4,403,null, httpd.sendRespondError(conProp,respond,4,403,null,
"URL '" + hostlow + "' blocked by yacy proxy (blacklisted)",null); "URL '" + hostlow + "' blocked by yacy proxy (blacklisted)",null);
this.theLogger.logInfo("AGIS blocking of host '" + hostlow + "'"); this.theLogger.logInfo("AGIS blocking of host '" + hostlow + "'");
@ -915,7 +916,7 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
// re-calc the url path // re-calc the url path
String remotePath = (args == null) ? path : (path + "?" + args); String remotePath = (args == null) ? path : (path + "?" + args);
if (plasmaSwitchboard.urlBlacklist.isListed(hostlow, remotePath)) { if (plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_PROXY, hostlow, remotePath)) {
httpd.sendRespondError(conProp,respond,4,403,null, httpd.sendRespondError(conProp,respond,4,403,null,
"URL '" + hostlow + "' blocked by yacy proxy (blacklisted)",null); "URL '" + hostlow + "' blocked by yacy proxy (blacklisted)",null);
this.theLogger.logInfo("AGIS blocking of host '" + hostlow + "'"); this.theLogger.logInfo("AGIS blocking of host '" + hostlow + "'");
@ -1128,7 +1129,7 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
// blacklist idea inspired by [AS]: // blacklist idea inspired by [AS]:
// respond a 404 for all AGIS ("all you get is shit") servers // respond a 404 for all AGIS ("all you get is shit") servers
final String hostlow = host.toLowerCase(); final String hostlow = host.toLowerCase();
if (plasmaSwitchboard.urlBlacklist.isListed(hostlow, path)) { if (plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_PROXY, hostlow, path)) {
httpd.sendRespondError(conProp,clientOut,4,403,null, httpd.sendRespondError(conProp,clientOut,4,403,null,
"URL '" + hostlow + "' blocked by yacy proxy (blacklisted)",null); "URL '" + hostlow + "' blocked by yacy proxy (blacklisted)",null);
this.theLogger.logInfo("AGIS blocking of host '" + hostlow + "'"); this.theLogger.logInfo("AGIS blocking of host '" + hostlow + "'");

@ -898,7 +898,7 @@ public final class plasmaCrawlLURL extends indexURL {
plasmaCrawlLURL.Entry entry = (plasmaCrawlLURL.Entry) eiter.next(); plasmaCrawlLURL.Entry entry = (plasmaCrawlLURL.Entry) eiter.next();
totalSearchedUrls++; totalSearchedUrls++;
if (plasmaSwitchboard.urlBlacklist.isListed(entry.url())==true) { if (plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_CRAWLER,entry.url())==true) {
lastBlacklistedUrl = entry.url().toString(); lastBlacklistedUrl = entry.url().toString();
lastBlacklistedHash = entry.hash(); lastBlacklistedHash = entry.hash();
serverLog.logFine("URLDBCLEANER", ++blacklistedUrls + " blacklisted (" + ((double)blacklistedUrls/totalSearchedUrls)*100 + "%): " + entry.hash() + " " + entry.url()); serverLog.logFine("URLDBCLEANER", ++blacklistedUrls + " blacklisted (" + ((double)blacklistedUrls/totalSearchedUrls)*100 + "%): " + entry.hash() + " " + entry.url());

@ -283,7 +283,7 @@ public final class plasmaCrawlStacker {
} }
// check blacklist // check blacklist
if (plasmaSwitchboard.urlBlacklist.isListed(nexturl)) { if (plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_CRAWLER,nexturl)) {
reason = plasmaCrawlEURL.DENIED_URL_IN_BLACKLIST; reason = plasmaCrawlEURL.DENIED_URL_IN_BLACKLIST;
this.log.logFine("URL '" + nexturlString + "' is in blacklist. " + this.log.logFine("URL '" + nexturlString + "' is in blacklist. " +
"Stack processing time: " + (System.currentTimeMillis()-startTime) + "ms"); "Stack processing time: " + (System.currentTimeMillis()-startTime) + "ms");

@ -311,7 +311,7 @@ public final class plasmaCrawlWorker extends Thread {
// check if url is in blacklist // check if url is in blacklist
String hostlow = host.toLowerCase(); String hostlow = host.toLowerCase();
if (plasmaSwitchboard.urlBlacklist.isListed(hostlow, path)) { if (plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_CRAWLER, hostlow, path)) {
log.logInfo("CRAWLER Rejecting URL '" + url.toString() + "'. URL is in blacklist."); log.logInfo("CRAWLER Rejecting URL '" + url.toString() + "'. URL is in blacklist.");
addURLtoErrorDB(url, refererURLString, initiator, name, plasmaCrawlEURL.DENIED_URL_IN_BLACKLIST, new bitfield(indexURL.urlFlagLength)); addURLtoErrorDB(url, refererURLString, initiator, name, plasmaCrawlEURL.DENIED_URL_IN_BLACKLIST, new bitfield(indexURL.urlFlagLength));
return null; return null;

@ -125,6 +125,7 @@ import java.util.logging.Level;
import de.anomic.data.blogBoard; import de.anomic.data.blogBoard;
import de.anomic.data.bookmarksDB; import de.anomic.data.bookmarksDB;
import de.anomic.data.listManager;
import de.anomic.data.messageBoard; import de.anomic.data.messageBoard;
import de.anomic.data.wikiBoard; import de.anomic.data.wikiBoard;
import de.anomic.data.userDB; import de.anomic.data.userDB;
@ -303,13 +304,9 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
// load the black-list / inspired by [AS] // load the black-list / inspired by [AS]
File ulrBlackListFile = new File(getRootPath(), getConfig("listsPath", "DATA/LISTS")); File ulrBlackListFile = new File(getRootPath(), getConfig("listsPath", "DATA/LISTS"));
urlBlacklist = new plasmaURLPattern(ulrBlackListFile); urlBlacklist = new plasmaURLPattern(ulrBlackListFile);
String f = getConfig("proxyBlackListsActive", null); listManager.switchboard = this;
if (f != null) { listManager.listsPath = ulrBlackListFile;
urlBlacklist.loadList(f, "/"); listManager.reloadBlacklists();
this.log.logConfig("loaded black-list from file " + ulrBlackListFile.getName() + ", " +
urlBlacklist.size() + " entries, " +
ppRamString(ulrBlackListFile.length()/1024));
}
// load badwords (to filter the topwords) // load badwords (to filter the topwords)
if (badwords == null) { if (badwords == null) {

@ -43,78 +43,158 @@ package de.anomic.plasma;
import java.io.File; import java.io.File;
import de.anomic.net.URL; import de.anomic.net.URL;
import java.util.Arrays;
import java.util.Collections; import java.util.Collections;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
import java.util.Iterator;
import java.util.Set; import java.util.Set;
import de.anomic.kelondro.kelondroMSetTools; import de.anomic.kelondro.kelondroMSetTools;
public class plasmaURLPattern { public class plasmaURLPattern {
private Set cachedUrlHashs = Collections.synchronizedSet(new HashSet()); public static final String BLACKLIST_CRAWLER = "crawler";
private File rootPath = null; public static final String BLACKLIST_PROXY = "proxy";
public static final String BLACKLIST_DHT = "dht";
public static final String BLACKLIST_SEARCH = "search";
public static final HashSet BLACKLIST_TYPES = new HashSet(Arrays.asList(new String[]{
BLACKLIST_CRAWLER,
BLACKLIST_PROXY,
BLACKLIST_DHT,
BLACKLIST_SEARCH
}));
private File blacklistRootPath = null;
private HashMap cachedUrlHashs = null;
private HashMap hostpaths = null; // key=host, value=path; mapped url is http://host/path; path does not start with '/' here private HashMap hostpaths = null; // key=host, value=path; mapped url is http://host/path; path does not start with '/' here
public plasmaURLPattern(File rootPath) { public plasmaURLPattern(File rootPath) {
super(); super();
this.rootPath = rootPath; this.blacklistRootPath = rootPath;
// prepare the data structure
this.hostpaths = new HashMap(); this.hostpaths = new HashMap();
this.cachedUrlHashs = new HashMap();
Iterator iter = BLACKLIST_TYPES.iterator();
while (iter.hasNext()) {
String blacklistType = (String) iter.next();
this.hostpaths.put(blacklistType, new HashMap());
this.cachedUrlHashs.put(blacklistType, Collections.synchronizedSet(new HashSet()));
}
} }
public void clear() { public void clear() {
this.hostpaths = new HashMap(); Iterator iter = this.hostpaths.keySet().iterator();
while (iter.hasNext()) {
HashMap blacklistMap = (HashMap) this.hostpaths.get(iter.next());
blacklistMap.clear();
}
} }
public int size() { public int size() {
return hostpaths.size(); int size = 0;
Iterator iter = this.hostpaths.keySet().iterator();
while (iter.hasNext()) {
HashMap blacklistMap = (HashMap) this.hostpaths.get(iter.next());
size += blacklistMap.size();
}
return size;
} }
public void loadList(String filenames, String sep) { public void loadList(String blacklistType, String filenames, String sep) {
// File listsPath = new File(getRootPath(), getConfig("listsPath", "DATA/LISTS")); if (blacklistType == null) throw new IllegalArgumentException();
final String[] filenamesarray = filenames.split(","); if (!BLACKLIST_TYPES.contains(blacklistType)) throw new IllegalArgumentException("Unknown backlist type.");
HashMap blacklistMap = (HashMap) this.hostpaths.get(blacklistType);
String[] filenamesarray = filenames.split(",");
if( filenamesarray.length > 0) { if( filenamesarray.length > 0) {
for (int i = 0; i < filenamesarray.length; i++) { for (int i = 0; i < filenamesarray.length; i++) {
hostpaths.putAll(kelondroMSetTools.loadMap(new File(rootPath, filenamesarray[i]).toString(), sep)); blacklistMap.putAll(kelondroMSetTools.loadMap(new File(this.blacklistRootPath, filenamesarray[i]).toString(), sep));
} }
} }
} }
public void remove(String host) { public void loadList(String[][] filenames, String sep) {
hostpaths.remove(host); for (int j = 0; j < filenames.length; j++) {
String[] nextFile = filenames[j];
String blacklistType = nextFile[0];
String fileName = nextFile[1];
this.loadList(blacklistType, fileName, sep);
}
} }
public void add(String host, String path) { public void remove(String blacklistType, String host) {
if (blacklistType == null) throw new IllegalArgumentException();
if (!BLACKLIST_TYPES.contains(blacklistType)) throw new IllegalArgumentException("Unknown backlist type.");
HashMap blacklistMap = (HashMap) this.hostpaths.get(blacklistType);
blacklistMap.remove(host);
}
public void add(String blacklistType, String host, String path) {
if (host == null) throw new NullPointerException();
if (path == null) throw new NullPointerException();
if (blacklistType == null) throw new IllegalArgumentException();
if (!BLACKLIST_TYPES.contains(blacklistType)) throw new IllegalArgumentException("Unknown backlist type.");
if (path.length() > 0 && path.charAt(0) == '/') path = path.substring(1); if (path.length() > 0 && path.charAt(0) == '/') path = path.substring(1);
hostpaths.put(host.toLowerCase(), path);
HashMap blacklistMap = (HashMap) this.hostpaths.get(blacklistType);
blacklistMap.put(host.toLowerCase(), path);
} }
public int blacklistCacheSize() { public int blacklistCacheSize() {
return cachedUrlHashs.size(); int size = 0;
Iterator iter = this.cachedUrlHashs.keySet().iterator();
while (iter.hasNext()) {
Set blacklistMap = (Set) this.cachedUrlHashs.get(iter.next());
size += blacklistMap.size();
}
return size;
} }
public boolean hashInBlacklistedCache(String urlHash) { public boolean hashInBlacklistedCache(String blacklistType, String urlHash) {
return cachedUrlHashs.contains(urlHash); if (blacklistType == null) throw new IllegalArgumentException();
if (!BLACKLIST_TYPES.contains(blacklistType)) throw new IllegalArgumentException("Unknown backlist type.");
Set urlHashCache = (Set) this.cachedUrlHashs.get(blacklistType);
return urlHashCache.contains(urlHash);
} }
public boolean isListed(String urlHash, URL url) { public boolean isListed(String blacklistType, String urlHash, URL url) {
if (!cachedUrlHashs.contains(urlHash)) { if (blacklistType == null) throw new IllegalArgumentException();
boolean temp = isListed(url.getHost().toLowerCase(), url.getFile()); if (!BLACKLIST_TYPES.contains(blacklistType)) throw new IllegalArgumentException("Unknown backlist type.");
if (temp)
{ Set urlHashCache = (Set) this.cachedUrlHashs.get(blacklistType);
cachedUrlHashs.add(urlHash); if (!urlHashCache.contains(urlHash)) {
} boolean temp = isListed(blacklistType, url.getHost().toLowerCase(), url.getFile());
if (temp) {
urlHashCache.add(urlHash);
}
return temp; return temp;
} }
return true; return true;
} }
public boolean isListed(URL url) { public boolean isListed(String blacklistType, URL url) {
return isListed(url.getHost().toLowerCase(), url.getFile()); return isListed(blacklistType, url.getHost().toLowerCase(), url.getFile());
} }
public boolean isListed(String hostlow, String path) { public boolean isListed(String blacklistType, String hostlow, String path) {
if (hostlow == null) throw new NullPointerException();
if (path == null) throw new NullPointerException();
if (blacklistType == null) throw new IllegalArgumentException();
if (!BLACKLIST_TYPES.contains(blacklistType)) throw new IllegalArgumentException("Unknown backlist type.");
// getting the proper blacklist
HashMap blacklistMap = (HashMap) this.hostpaths.get(blacklistType);
if (path.length() > 0 && path.charAt(0) == '/') path = path.substring(1); if (path.length() > 0 && path.charAt(0) == '/') path = path.substring(1);
String pp = ""; // path-pattern String pp = ""; // path-pattern
@ -122,19 +202,19 @@ public class plasmaURLPattern {
// [TL] While "." are found within the string // [TL] While "." are found within the string
int index = 0; int index = 0;
while ((index = hostlow.indexOf('.', index + 1)) != -1) { while ((index = hostlow.indexOf('.', index + 1)) != -1) {
if ((pp = (String) hostpaths.get(hostlow.substring(0, index + 1) + "*")) != null) { if ((pp = (String) blacklistMap.get(hostlow.substring(0, index + 1) + "*")) != null) {
return ((pp.equals("*")) || (path.matches(pp))); return ((pp.equals("*")) || (path.matches(pp)));
} }
} }
index = hostlow.length(); index = hostlow.length();
while ((index = hostlow.lastIndexOf('.', index - 1)) != -1) { while ((index = hostlow.lastIndexOf('.', index - 1)) != -1) {
if ((pp = (String) hostpaths.get("*" + hostlow.substring(index, hostlow.length()))) != null) { if ((pp = (String) blacklistMap.get("*" + hostlow.substring(index, hostlow.length()))) != null) {
return ((pp.equals("*")) || (path.matches(pp))); return ((pp.equals("*")) || (path.matches(pp)));
} }
} }
// try to match without wildcard in domain // try to match without wildcard in domain
return (((pp = (String) hostpaths.get(hostlow)) != null) && return (((pp = (String) blacklistMap.get(hostlow)) != null) &&
((pp.equals("*")) || (path.matches(pp)))); ((pp.equals("*")) || (path.matches(pp))));
} }

@ -708,7 +708,7 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
// "+entry.getUrlHash()); // "+entry.getUrlHash());
try { try {
url = lurl.getEntry(entry.urlHash(), null).url(); url = lurl.getEntry(entry.urlHash(), null).url();
if ((url == null) || (plasmaSwitchboard.urlBlacklist.isListed(url) == true)) { if ((url == null) || (plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_CRAWLER, url) == true)) {
urlHashs.add(entry.urlHash()); urlHashs.add(entry.urlHash());
} }
} catch (IOException e) { } catch (IOException e) {

@ -478,7 +478,7 @@ public final class yacyClient {
for (int n = 0; n < results; n++) { for (int n = 0; n < results; n++) {
// get one single search result // get one single search result
urlEntry = urlManager.newEntry((String) result.get("resource" + n), true); urlEntry = urlManager.newEntry((String) result.get("resource" + n), true);
if ((urlEntry == null) || (blacklist.isListed(urlEntry.url()))) { continue; } // block with backlist if ((urlEntry == null) || (blacklist.isListed(plasmaURLPattern.BLACKLIST_SEARCH, urlEntry.url()))) { continue; } // block with backlist
urlEntry.store(); urlEntry.store();
int urlLength = urlEntry.url().toString().length(); int urlLength = urlEntry.url().toString().length();
int urlComps = htmlFilterContentScraper.urlComps(urlEntry.url().toString()).length; int urlComps = htmlFilterContentScraper.urlComps(urlEntry.url().toString()).length;

@ -245,6 +245,16 @@ public class migration {
sb.setConfig("portForwarding.sch.HostUser", sb.getConfig("portForwardingHostUser","")); sb.setConfig("portForwarding.sch.HostUser", sb.getConfig("portForwardingHostUser",""));
sb.setConfig("portForwarding.sch.HostPwd", sb.getConfig("portForwardingHostPwd","")); sb.setConfig("portForwarding.sch.HostPwd", sb.getConfig("portForwardingHostPwd",""));
} }
// migration for blacklists
if ((value = sb.getConfig("proxyBlackLists","")).length() > 0) {
sb.setConfig("proxy.BlackLists", value);
sb.setConfig("crawler.BlackLists", value);
sb.setConfig("dht.BlackLists", value);
sb.setConfig("search.BlackLists", value);
sb.setConfig("BlackLists.Shared",sb.getConfig("proxyBlackListsShared",""));
}
} }
} }

@ -205,9 +205,14 @@ proxyYellowList=yacy.yellow
# the black-list; URLs appearing in this list will not be loaded; # the black-list; URLs appearing in this list will not be loaded;
# instead always a 404 is returned # instead always a 404 is returned
# all these files will be placed in the listsPath # all these files will be placed in the listsPath
proxyBlackLists=url.default.black BlackLists.types=proxy,crawler,dht,search
proxyBlackListsActive=url.default.black BlackLists.Shared=url.default.black
proxyBlackListsShared=url.default.black
proxy.BlackLists=url.default.black
crawler.BlackLists=url.default.black
dht.BlackLists=url.default.black
search.BlackLists=url.default.black
proxyCookieBlackList=cookie.default.black proxyCookieBlackList=cookie.default.black
proxyCookieWhiteList=cookie.default.black proxyCookieWhiteList=cookie.default.black

Loading…
Cancel
Save