*) now it's possible to configure the yacy blacklist separately for dht, search, proxy, crawler

See: http://www.yacy-forum.de/viewtopic.php?t=2541
        http://www.yacy-forum.de/viewtopic.php?p=24516

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2389 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
theli 19 years ago
parent 9ae9062bd3
commit d2e8e76218

@ -14,151 +14,172 @@ You may also provide your blacklist to other peers by sharing them; in return yo
collect blacklist entries from other peers.</p>
<table border="0" cellspacing="1" cellpadding="0">
<tr>
<td colspan="3" valign="top" class="TableHeader">
&nbsp;
</td>
</tr>
<tr>
<td colspan="3">
<form action="Blacklist_p.html" method="post" enctype="multipart/form-data">
<table border="1" cellspacing="1" cellpadding="0">
<!-- blacklist selection -->
<form action="Blacklist_p.html" method="post" enctype="multipart/form-data">
<tr>
<td colspan="3" valign="top" class="TableHeader">&nbsp;</td>
</tr>
<tr>
<td colspan="3">
<table border="0" cellspacing="1" cellpadding="0" width="100%">
<tr>
<td class="TableCellDark">
Edit list:
<select name="blackLists" size="1">
#{blackLists}#
<option value="#[name]#" #(selected)#::selected#(/selected)#>#[name]# #(active)#not active::active#(/active)# #(shared)#not shared::shared#(/shared)#</option>
#{/blackLists}#
</select>
</td>
<td class="TableCellDark"><center>
<input type="submit" name="changelistbutton" value="select"></center>
</td>
<td valign="top" align="right" rowspan="2" class="TableCellDark">
<input type="submit" name="activatelistbutton" value="Enable/disable this list"><br>
<input type="submit" name="sharelistbutton" value="Share/don't share this list"><br>
<input type="submit" name="dellistbutton" value="Delete this list"><br>
</td>
<td class="TableCellDark">
Edit list:
<select name="selectedListName" size="1">
#{blackLists}#
<option value="#[name]#" #(selected)#::selected#(/selected)#>#[name]# [#(shared)#not shared::shared#(/shared)#] #{active}# #[blTypeName]##{/active}#</option>
#{/blackLists}#
</select>
</td>
<td class="TableCellDark"><center>
<input type="submit" name="selectList" value="select" />
</td>
<td valign="top" align="left" rowspan="2" class="TableCellDark">
<fieldset>
<legend>Activate this list for ...</legend>
<table>
#{currentActiveFor}#
<tr>
<td>#[blTypeName]#</td><td><input type="checkbox" name="activateList4#[blTypeName]#" value="on" #(checked)#checked::#(/checked)#/></td>
</tr>
#{/currentActiveFor}#
</table>
<input type="submit" name="activateList" value="Enable/disable this list">
</fieldset>
<input type="submit" name="shareList" value="Share/don't share this list" /><br />
<input type="submit" name="deleteList" value="Delete this list" /><br />
</td>
</tr>
<tr>
<td class="TableCellDark">
New list:
<input type="text" name="newlist">
</td>
<td class="TableCellDark"><center>
<input type="submit" name="newlistbutton" value="create"></center>
</td>
<!--<input type="checkbox" name="proxylist" value="on" />active<br />
<input type="checkbox" name="sharedlist" value="on" />shared<br />
<input type="submit" name="changebutton" value="Change"><br />-->
</td>
<td class="TableCellDark">
New list:
<input type="text" name="newListName">
</td>
<td class="TableCellDark"><center>
<input type="submit" name="createNewList" value="create"></center>
</td>
</tr>
</table>
</form>
</td>
</tr>
<tr>
<td colspan="3" valign="top" class="TableHeader">
<h3>Active list: #[filename]#</h3>
</td>
</tr>
<tr>
<td rowspan="6">
</td>
</tr>
</form>
<table border="0" cellspacing="1" cellpadding="0" width="100%">
<tr>
<td class="TableCellDark">
<!-- Blacklist configuration -->
<tr>
<td colspan="3" valign="top" class="TableHeader">
<h3>Active list: <i>#[currentBlacklist]#</i></h3>
</td>
</tr>
<tr>
<td rowspan="6">
<form action="Blacklist_p.html" method="post" enctype="multipart/form-data">
<table border="0" cellspacing="1" cellpadding="0" width="100%">
<tr>
<td class="TableCellDark">
These are the domain name / path patterns in this blacklist:<br>
You can select them here for deletion
</td>
</tr>
<tr>
<td class="TableCellLight">
<form action="Blacklist_p.html" method="post" enctype="multipart/form-data">
<input type="hidden" name="filename" value="#[filename]#">
<select name="Itemlist" size="8">
<!--<option disabled>blocked Sites</option>-->
#{Itemlist}#
::
<option value="#[item]#">#[item]#</option>
#{/Itemlist}#
</select>
<p>
<input type="submit" name="delbutton" value="Delete URL pattern">
</td>
</tr>
<tr>
<td class="TableCellDark">
</td>
</tr>
<tr>
<td class="TableCellLight">
<input type="hidden" name="currentBlacklist" value="#[currentBlacklist]#">
<select name="selectedEntry" size="8">
<!--<option disabled>blocked Sites</option>-->
#{Itemlist}#
::
<option value="#[item]#">#[item]#</option>
#{/Itemlist}#
</select>
<p />
<input type="submit" name="deleteBlacklistEntry" value="Delete URL pattern">
</td>
</tr>
<tr>
<td class="TableCellDark">
Enter new domain name / path pattern in the form:
<ul type="square">
<li>domain/fullpath</li>
<li>domain/.* or <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/util/regex/Pattern.html">regexpr</a></li>
<li>*.domain/.* or <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/util/regex/Pattern.html">regexpr</a></li>
<li>domain.*/.* or <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/util/regex/Pattern.html">regexpr</a></li>
<li>*.sub.domain/.* or <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/util/regex/Pattern.html">regexpr</a></li>
<li>sub.domain.*/.* or <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/util/regex/Pattern.html">regexpr</a></li></ul>
</td>
</tr>
<tr>
<td class="TableCellLight">
<input type="text" name="newItem" size="50"><p>
<input type="submit" name="addbutton" value="Add URL pattern"></p>
</form>
</td>
</tr>
</table>
<td class="TableHeader" rowspan="6">&nbsp;</td>
<td class="TableCellDark">
Import blacklist items from other YaCy peers:</td>
<li>domain/fullpath</li>
<li>domain/.* or <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/util/regex/Pattern.html">regexpr</a></li>
<li>*.domain/.* or <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/util/regex/Pattern.html">regexpr</a></li>
<li>domain.*/.* or <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/util/regex/Pattern.html">regexpr</a></li>
<li>*.sub.domain/.* or <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/util/regex/Pattern.html">regexpr</a></li>
<li>sub.domain.*/.* or <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/util/regex/Pattern.html">regexpr</a></li>
</ul>
</td>
</tr>
<tr>
<td class="TableCellLight">
<td class="TableCellLight">
<input type="text" name="newEntry" size="50"><p>
<input type="submit" name="addBlacklistEntry" value="Add URL pattern"></p>
</td>
</tr>
</table>
</form>
</td>
</tr>
<tr>
<!-- spacer between the two columns -->
<td class="TableHeader" rowspan="6">&nbsp;</td>
</tr>
<!-- Blacklist import from other peer -->
<tr>
<td class="TableCellDark">Import blacklist items from other YaCy peers:</td>
</tr>
<tr>
<td class="TableCellLight">
<form action="sharedBlacklist_p.html" method="get">
<input type="hidden" name="filename" value="#[filename]#">
<input type="hidden" name="currentBlacklist" value="#[currentBlacklist]#">
Host: <select name="hash">
#{otherHosts}#
<option value="#[hash]#">#[name]#</option>
#{/otherHosts}#
</select>
<p>
<p />
<input type="submit" value="Load new blacklist items">
</form>
</tr>
<tr>
<td class="TableCellDark">
Import blacklist items from URL:</td>
</tr>
<tr>
<td class="TableCellLight">
</td>
</tr>
<!-- blacklist import from url -->
<tr>
<td class="TableCellDark">Import blacklist items from URL:</td>
</tr>
<tr>
<td class="TableCellLight">
<form action="sharedBlacklist_p.html" method="get">
<input type="hidden" name="filename" value="#[filename]#">
<input type="hidden" name="currentBlacklist" value="#[currentBlacklist]#">
URL: <input type="text" name="url">
<p>
<input type="submit" value="Load new blacklist items">
</form>
</tr>
<tr>
<td class="TableCellDark">
<p>Import blacklist items from file:</td>
</tr>
<tr>
<td class="TableCellLight">
<form action="sharedBlacklist_p.html" method="get">
<input type="hidden" name="filename" value="#[filename]#">
</form>
</td>
</tr>
<!-- blacklist import from file -->
<tr>
<td class="TableCellDark"><p>Import blacklist items from file:</td>
</tr>
<tr>
<td class="TableCellLight">
<form action="sharedBlacklist_p.html" method="get">
<input type="hidden" name="currentBlacklist" value="#[currentBlacklist]#">
File: <input type="text" name="file">
<p>
<input type="submit" value="Load new blacklist items">
<input type="submit" value="Load new blacklist items">
</form>
</td>
</tr>
</table>
</td>
</tr>
</table>
<p>
#(status)#
#(status)# <!-- 0: -->
::
<b>#[item]#</b> was removed from blacklist
::

@ -49,14 +49,15 @@
// if the shell's current path is HTROOT
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Enumeration;
import de.anomic.data.listManager;
import de.anomic.http.httpHeader;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverCore;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.yacy.yacyCore;
@ -64,196 +65,273 @@ import de.anomic.yacy.yacySeed;
public class Blacklist_p {
private final static String BLACKLIST = "blackLists_";
private final static String BLACKLIST_ALL = "proxyBlackLists";
private final static String BLACKLIST_ACTIVE = "proxyBlackListsActive";
private final static String BLACKLIST_SHARED = "proxyBlackListsShared";
private final static String BLACKLIST_SHARED = "BlackLists.Shared";
public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch env) {
// return variable that accumulates replacements
// initialize the list manager
listManager.switchboard = (plasmaSwitchboard) env;
listManager.listsPath = new File(listManager.switchboard.getRootPath(),listManager.switchboard.getConfig("listManager.listsPath", "DATA/LISTS"));
final serverObjects prop = new serverObjects();
String line;
// String HTMLout = "";
String removeItem = "removeme";
int numItems = 0;
int i; // need below
String[] filenames = listManager.getListslistArray(BLACKLIST_ALL);
String filename = "";
// getting the list of supported blacklist types
String supportedBlacklistTypesStr = env.getConfig("BlackLists.types", "");
String[] supportedBlacklistTypes = supportedBlacklistTypesStr.split(",");
String blacklistToUse = null;
serverObjects prop = new serverObjects();
// do all post operations
if (post != null) {
if (post.containsKey("blackLists")) { // Blacklist selected
filename = (String)post.get("blackLists");
} else if (post.containsKey("filename")) {
filename = (String)post.get("filename");
} else if (filenames.length > 0){ // first BlackList
filename = filenames[0];
// } else { //No BlackList
// System.out.println("DEBUG: No Blacklist found");
if (post.containsKey("selectList")) {
blacklistToUse = (String)post.get("selectedListName");
}
prop.put("status", 0); // nothing
if (post.containsKey("createNewList")) {
/* ===========================================================
* Creation of a new blacklist
* =========================================================== */
blacklistToUse = (String)post.get("newListName");
if (!blacklistToUse.endsWith(".black")) blacklistToUse += ".black";
// del list
if (post.containsKey("dellistbutton")) {
final File BlackListFile = new File(listManager.listsPath, filename);
try {
final File newFile = new File(listManager.listsPath, blacklistToUse);
newFile.createNewFile();
// share the newly created blacklist
listManager.addListToListslist(BLACKLIST_SHARED, blacklistToUse);
// activate it for all known blacklist types
for (int blTypes=0; blTypes < supportedBlacklistTypes.length; blTypes++) {
listManager.addListToListslist(supportedBlacklistTypes[blTypes] + ".BlackLists",blacklistToUse);
}
} catch (IOException e) {/* */}
} else if (post.containsKey("deleteList")) {
/* ===========================================================
* Delete a blacklist
* =========================================================== */
blacklistToUse = (String)post.get("selectedListName");
File BlackListFile = new File(listManager.listsPath, blacklistToUse);
BlackListFile.delete();
// remove from all BlackLists Lists
listManager.removeListFromListslist(BLACKLIST_ALL, filename);
listManager.removeListFromListslist(BLACKLIST_ACTIVE, filename);
listManager.removeListFromListslist(BLACKLIST_SHARED, filename);
for (int blTypes=0; blTypes < supportedBlacklistTypes.length; blTypes++) {
listManager.removeListFromListslist(supportedBlacklistTypes[blTypes] + ".BlackLists",blacklistToUse);
}
// remove it from the shared list
listManager.removeListFromListslist(BLACKLIST_SHARED, blacklistToUse);
blacklistToUse = null;
// reload Blacklists
listManager.reloadBlacklists();
filenames = listManager.getListslistArray(BLACKLIST_ALL);
if (filenames.length > 0) {
filename = filenames[0];
}
// new list
} else if (post.containsKey("newlistbutton")) {
String newList = (String)post.get("newlist");
if (!newList.endsWith(".black")) {
newList += ".black";
}
filename = newList; //to select it in the returnes Document
try {
final File newFile = new File(listManager.listsPath, newList);
newFile.createNewFile();
listManager.addListToListslist(BLACKLIST_ALL, newList);
listManager.addListToListslist(BLACKLIST_ACTIVE, newList);
listManager.addListToListslist(BLACKLIST_SHARED, newList);
} catch (IOException e) {}
} else if (post.containsKey("activateList")) {
/* ===========================================================
* Activate/Deactivate a blacklist
* =========================================================== */
blacklistToUse = (String)post.get("selectedListName");
for (int blTypes=0; blTypes < supportedBlacklistTypes.length; blTypes++) {
if (post.containsKey("activateList4" + supportedBlacklistTypes[blTypes])) {
listManager.addListToListslist(supportedBlacklistTypes[blTypes] + ".BlackLists",blacklistToUse);
} else {
listManager.removeListFromListslist(supportedBlacklistTypes[blTypes] + ".BlackLists",blacklistToUse);
}
}
} else if (post.containsKey("activatelistbutton")) {
if( listManager.ListInListslist(BLACKLIST_ACTIVE, filename) ) {
listManager.removeListFromListslist(BLACKLIST_ACTIVE, filename);
} else { // inactive list -> enable
listManager.addListToListslist(BLACKLIST_ACTIVE, filename);
}
listManager.reloadBlacklists();
listManager.reloadBlacklists();
} else if (post.containsKey("shareList")) {
} else if (post.containsKey("sharelistbutton")) {
if (listManager.ListInListslist(BLACKLIST_SHARED, filename)) {
/* ===========================================================
* Share a blacklist
* =========================================================== */
blacklistToUse = (String)post.get("selectedListName");
if (listManager.ListInListslist(BLACKLIST_SHARED, blacklistToUse)) {
// Remove from shared BlackLists
listManager.removeListFromListslist(BLACKLIST_SHARED, filename);
listManager.removeListFromListslist(BLACKLIST_SHARED, blacklistToUse);
} else { // inactive list -> enable
listManager.addListToListslist(BLACKLIST_SHARED, filename);
listManager.addListToListslist(BLACKLIST_SHARED, blacklistToUse);
}
} else if (post.containsKey("deleteBlacklistEntry")) {
/* ===========================================================
* Delete a blacklist entry
* =========================================================== */
// get the current selected blacklist name
blacklistToUse = (String)post.get("currentBlacklist");
// get the entry that should be deleted
String oldEntry = (String)post.get("selectedEntry");
// load blacklist data from file
ArrayList list = listManager.getListArray(new File(listManager.listsPath, blacklistToUse));
// delete the old entry from file
if (list != null) {
for (int i=0; i < list.size(); i++) {
if (((String)list.get(i)).equals(oldEntry)) {
list.remove(i);
break;
}
}
listManager.writeList(new File(listManager.listsPath, blacklistToUse), (String[])list.toArray(new String[list.size()]));
}
// remove the entry from the running blacklist engine
int pos = oldEntry.indexOf("/");
if (pos < 0) {
// add default empty path pattern
pos = oldEntry.length();
oldEntry = oldEntry + "/.*";
}
for (int blTypes=0; blTypes < supportedBlacklistTypes.length; blTypes++) {
if (listManager.ListInListslist(supportedBlacklistTypes[blTypes] + ".BlackLists",blacklistToUse)) {
plasmaSwitchboard.urlBlacklist.add(supportedBlacklistTypes[blTypes],oldEntry.substring(0, pos), oldEntry.substring(pos + 1));
}
}
} else if (post.containsKey("addBlacklistEntry")) {
/* ===========================================================
* Add a new blacklist entry
* =========================================================== */
blacklistToUse = (String)post.get("currentBlacklist");
String newEntry = (String)post.get("newEntry");
// TODO: ignore empty entries
if (newEntry.startsWith("http://") ){
newEntry = newEntry.substring(7);
}
int pos = newEntry.indexOf("/");
if (pos < 0) {
// add default empty path pattern
pos = newEntry.length();
newEntry = newEntry + "/.*";
}
// append the line to the file
PrintWriter pw = null;
try {
pw = new PrintWriter(new FileWriter(new File(listManager.listsPath, blacklistToUse), true));
pw.println(newEntry);
pw.close();
} catch (IOException e) {
e.printStackTrace();
} finally {
if (pw != null) try { pw.close(); } catch (Exception e){ /* */}
}
} // List Management End
// remove a Item?
if (post.containsKey("delbutton") &&
post.containsKey("Itemlist") &&
!((String)post.get("Itemlist")).equals("") ) {
removeItem = (String)post.get("Itemlist");
// add to blacklist
for (int blTypes=0; blTypes < supportedBlacklistTypes.length; blTypes++) {
if (listManager.ListInListslist(supportedBlacklistTypes[blTypes] + ".BlackLists",blacklistToUse)) {
plasmaSwitchboard.urlBlacklist.add(supportedBlacklistTypes[blTypes],newEntry.substring(0, pos), newEntry.substring(pos + 1));
}
}
}
} // post != null
}
// loading all blacklist files located in the directory
String[] dirlist = listManager.getDirListing(listManager.listsPath);
// if we have not chosen a blacklist until yet we use the first file
if (blacklistToUse == null && dirlist != null && dirlist.length > 0) {
blacklistToUse = dirlist[0];
}
// Read the List
final ArrayList list = listManager.getListArray(new File(listManager.listsPath, filename));
final StringBuffer out = new StringBuffer(list.size() * 64);
// Read the blacklist items from file
final ArrayList list = listManager.getListArray(new File(listManager.listsPath, blacklistToUse));
// sort them
String[] sortedlist = new String[list.size()];
Arrays.sort(list.toArray(sortedlist));
// display them
int entryCount = 0;
for (int j=0;j<sortedlist.length;++j){
line = sortedlist[j];
String nextEntry = sortedlist[j];
if (nextEntry.length() == 0) continue;
if (nextEntry.startsWith("#")) continue;
if (!(line.length() == 0 || line.charAt(0) == '#' || line.equals(removeItem))) { //Not the item to remove
prop.put("Itemlist_" + numItems + "_item", line);
numItems++;
}
if (line.equals(removeItem)) {
prop.put("status", 1);//removed
prop.put("status_item", line);
// if (listManager.switchboard.urlBlacklist != null) {
// listManager.switchboard.urlBlacklist.remove(line);
if (plasmaSwitchboard.urlBlacklist != null) {
plasmaSwitchboard.urlBlacklist.remove(line);
}
} else {
out.append(line).append(serverCore.crlfString); //full list
}
prop.put("Itemlist_" + entryCount + "_item", nextEntry);
entryCount++;
}
prop.put("Itemlist", numItems);
// Add a new Item
if (post != null && post.containsKey("addbutton") && !((String)post.get("newItem")).equals("")) {
String newItem = (String)post.get("newItem");
//clean http://
if ( newItem.startsWith("http://") ){
newItem = newItem.substring(7);
}
//append "/.*"
int pos = newItem.indexOf("/");
if (pos < 0) {
// add default empty path pattern
pos = newItem.length();
newItem = newItem + "/.*";
}
out.append(newItem).append(serverCore.crlfString);
prop.put("Itemlist_"+numItems+"_item", newItem);
numItems++;
prop.put("Itemlist", numItems);
prop.put("Itemlist", entryCount);
prop.put("status", 2);//added
prop.put("status_item", newItem);//added
// add to blacklist
// if (listManager.switchboard.urlBlacklist != null)
// listManager.switchboard.urlBlacklist.add(newItem.substring(0, pos), newItem.substring(pos + 1));
if (plasmaSwitchboard.urlBlacklist != null) {
plasmaSwitchboard.urlBlacklist.add(newItem.substring(0, pos), newItem.substring(pos + 1));
}
}
listManager.writeList(new File(listManager.listsPath, filename), out.toString());
// List known hosts for BlackList retrieval
yacySeed seed;
if (yacyCore.seedDB != null && yacyCore.seedDB.sizeConnected() > 0) { // no nullpointer error
final Enumeration e = yacyCore.seedDB.seedsConnected(true, false, null);
i = 0;
int peerCount = 0;
while (e.hasMoreElements()) {
seed = (yacySeed) e.nextElement();
if (seed != null) {
final String Hash = seed.hash;
final String Name = seed.get(yacySeed.NAME, "nameless");
prop.put("otherHosts_" + i + "_hash", Hash);
prop.put("otherHosts_" + i + "_name", Name);
i++;
prop.put("otherHosts_" + peerCount + "_hash", Hash);
prop.put("otherHosts_" + peerCount + "_name", Name);
peerCount++;
}
}
prop.put("otherHosts", i);
// } else {
// System.out.println("BlackList_p: yacy seed not loaded!"); // DEBUG:
prop.put("otherHosts", peerCount);
}
// List BlackLists
final String[] BlackLists = listManager.getListslistArray(BLACKLIST_ALL);
for (i = 0; i <= BlackLists.length - 1; i++) {
prop.put(BLACKLIST + i + "_name", BlackLists[i]);
prop.put(BLACKLIST + i + "_active", 0);
prop.put(BLACKLIST + i + "_shared", 0);
prop.put(BLACKLIST + i + "_selected", 0);
if (BlackLists[i].equals(filename)) { //current List
prop.put(BLACKLIST + i + "_selected", 1);
}
if (listManager.ListInListslist(BLACKLIST_ACTIVE, BlackLists[i])) {
prop.put(BLACKLIST + i + "_active", 1);
}
if (listManager.ListInListslist(BLACKLIST_SHARED, BlackLists[i])) {
prop.put(BLACKLIST + i + "_shared", 1);
int blacklistCount = 0;
if (dirlist != null) {
for (int i = 0; i <= dirlist.length - 1; i++) {
prop.put(BLACKLIST + blacklistCount + "_name", dirlist[i]);
prop.put(BLACKLIST + blacklistCount + "_shared", 0);
if (dirlist[i].equals(blacklistToUse)) { //current List
prop.put(BLACKLIST + blacklistCount + "_selected", 1);
for (int blTypes=0; blTypes < supportedBlacklistTypes.length; blTypes++) {
prop.put("currentActiveFor_" + blTypes + "_blTypeName",supportedBlacklistTypes[blTypes]);
prop.put("currentActiveFor_" + blTypes + "_checked",
listManager.ListInListslist(supportedBlacklistTypes[blTypes] + ".BlackLists",dirlist[i])?0:1);
}
prop.put("currentActiveFor",supportedBlacklistTypes.length);
}
if (listManager.ListInListslist(BLACKLIST_SHARED, dirlist[i])) {
prop.put(BLACKLIST + blacklistCount + "_shared", 1);
} else {
prop.put(BLACKLIST + blacklistCount + "_selected", 0);
}
int activeCount = 0;
for (int blTypes=0; blTypes < supportedBlacklistTypes.length; blTypes++) {
if (listManager.ListInListslist(supportedBlacklistTypes[blTypes] + ".BlackLists",dirlist[i])) {
prop.put(BLACKLIST + blacklistCount + "_active_" + activeCount + "_blTypeName",supportedBlacklistTypes[blTypes]);
activeCount++;
}
}
prop.put(BLACKLIST + blacklistCount + "_active",activeCount);
blacklistCount++;
}
}
prop.put("blackLists", i);
prop.put("filename", filename);
prop.put("blackLists", blacklistCount);
prop.put("currentBlacklist", blacklistToUse);
return prop;
}

@ -63,6 +63,7 @@ import de.anomic.index.indexEntryAttribute;
import de.anomic.index.indexURL;
import de.anomic.plasma.plasmaCrawlLURL;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaURLPattern;
import de.anomic.plasma.plasmaWordIndex;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
@ -462,7 +463,7 @@ public class IndexControl_p {
} else {
url = new URL(us);
if (plasmaSwitchboard.urlBlacklist.isListed(url)) {
if (plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_DHT, url)) {
result.append("<input type=\"checkbox\" name=\"urlhx").append(i++).append("\" checked value=\"").append(uh[0]).append("\" align=\"top\">");
} else {
result.append("<input type=\"checkbox\" name=\"urlhx").append(i++).append("\" value=\"").append(uh[0]).append("\" align=\"top\">");

@ -61,6 +61,7 @@ import java.util.ArrayList;
import java.util.Enumeration;
import java.util.HashSet;
import de.anomic.data.listManager;
import de.anomic.http.httpHeader;
import de.anomic.http.httpc;
import de.anomic.plasma.plasmaSwitchboard;
@ -250,8 +251,16 @@ public class sharedBlacklist_p {
out += newItem+"\n";
prop.put("status_list_"+count+"_entry", newItem);
count++;
if (plasmaSwitchboard.urlBlacklist != null)
plasmaSwitchboard.urlBlacklist.add(newItem.substring(0, pos), newItem.substring(pos + 1));
if (plasmaSwitchboard.urlBlacklist != null) {
String supportedBlacklistTypesStr = env.getConfig("BlackLists.types", "");
String[] supportedBlacklistTypes = supportedBlacklistTypesStr.split(",");
for (int blTypes=0; blTypes < supportedBlacklistTypes.length; blTypes++) {
if (listManager.ListInListslist(supportedBlacklistTypes[blTypes] + ".BlackLists",filename)) {
plasmaSwitchboard.urlBlacklist.add(supportedBlacklistTypes[blTypes],newItem.substring(0, pos), newItem.substring(pos + 1));
}
}
}
//write the list
try{

@ -54,6 +54,7 @@ import de.anomic.http.httpHeader;
import de.anomic.index.indexEntry;
import de.anomic.index.indexURLEntry;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaURLPattern;
import de.anomic.server.serverCore;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
@ -163,7 +164,7 @@ public final class transferRWI {
wordhashes[received] = wordHash;
iEntry = new indexURLEntry(estring.substring(p));
urlHash = iEntry.urlHash();
if ((blockBlacklist) && (plasmaSwitchboard.urlBlacklist.hashInBlacklistedCache(urlHash))) {
if ((blockBlacklist) && (plasmaSwitchboard.urlBlacklist.hashInBlacklistedCache(plasmaURLPattern.BLACKLIST_DHT, urlHash))) {
//int deleted = sb.wordIndex.tryRemoveURLs(urlHash);
yacyCore.log.logFine("transferRWI: blocked blacklisted URLHash '" + urlHash + "' from peer " + otherPeerName + "; deleted 1 URL entries from RWIs");
blocked++;

@ -155,11 +155,13 @@ public class listManager {
// overloaded function to write an array
public static boolean writeList(File listFile, String[] list){
String out = "";
for(int i=0;i <= list.length; i++){
out += list[i] + serverCore.crlfString;
StringBuffer out = new StringBuffer();
for(int i=0;i < list.length; i++){
out
.append(list[i])
.append(serverCore.crlfString);
}
return writeList(listFile, out); //(File, String)
return writeList(listFile, out.toString()); //(File, String)
}
public static String getListString(String filename, boolean withcomments){
@ -194,6 +196,12 @@ public class listManager {
String[] fileListString;
File[] fileList;
final File dir = new File(dirname);
return getDirListing(dir);
}
public static String[] getDirListing(File dir){
String[] fileListString;
File[] fileList;
if (dir != null ) {
if (!dir.exists()) {
@ -207,7 +215,7 @@ public class listManager {
return fileListString;
}
return null;
}
}
public static ArrayList getDirsRecursive(File dir, String notdir){
return getDirsRecursive(dir, notdir, true);
@ -321,11 +329,21 @@ public class listManager {
// load all active Blacklists in the Proxy
public static void reloadBlacklists(){
final String f = switchboard.getConfig("proxyBlackListsActive", "");
de.anomic.plasma.plasmaSwitchboard.urlBlacklist.clear();
if (f != "") {
de.anomic.plasma.plasmaSwitchboard.urlBlacklist.loadList(f, "/");
}
String supportedBlacklistTypesStr = switchboard.getConfig("BlackLists.types", "");
String[] supportedBlacklistTypes = supportedBlacklistTypesStr.split(",");
ArrayList blacklistFiles = new ArrayList(supportedBlacklistTypes.length);
for (int i=0; i < supportedBlacklistTypes.length; i++) {
String[] blacklistFile = new String[]{
supportedBlacklistTypes[i],
switchboard.getConfig(supportedBlacklistTypes[i] + ".BlackLists", "")
};
blacklistFiles.add(blacklistFile);
}
de.anomic.plasma.plasmaSwitchboard.urlBlacklist.clear();
de.anomic.plasma.plasmaSwitchboard.urlBlacklist.loadList((String[][])blacklistFiles.toArray(new String[blacklistFiles.size()][]), "/");
// switchboard.urlBlacklist.clear();
// if (f != "") switchboard.urlBlacklist.loadLists("black", f, "/");
}

@ -96,6 +96,7 @@ import de.anomic.index.indexURL;
import de.anomic.plasma.plasmaHTCache;
import de.anomic.plasma.plasmaParser;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaURLPattern;
import de.anomic.server.serverCore;
import de.anomic.server.serverFileUtils;
import de.anomic.server.serverObjects;
@ -389,7 +390,7 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
// respond a 404 for all AGIS ("all you get is shit") servers
String hostlow = host.toLowerCase();
if (args != null) { path = path + "?" + args; }
if (plasmaSwitchboard.urlBlacklist.isListed(hostlow, path)) {
if (plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_PROXY, hostlow, path)) {
httpd.sendRespondError(conProp,respond,4,403,null,
"URL '" + hostlow + "' blocked by yacy proxy (blacklisted)",null);
this.theLogger.logInfo("AGIS blocking of host '" + hostlow + "'");
@ -915,7 +916,7 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
// re-calc the url path
String remotePath = (args == null) ? path : (path + "?" + args);
if (plasmaSwitchboard.urlBlacklist.isListed(hostlow, remotePath)) {
if (plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_PROXY, hostlow, remotePath)) {
httpd.sendRespondError(conProp,respond,4,403,null,
"URL '" + hostlow + "' blocked by yacy proxy (blacklisted)",null);
this.theLogger.logInfo("AGIS blocking of host '" + hostlow + "'");
@ -1128,7 +1129,7 @@ public final class httpdProxyHandler extends httpdAbstractHandler implements htt
// blacklist idea inspired by [AS]:
// respond a 404 for all AGIS ("all you get is shit") servers
final String hostlow = host.toLowerCase();
if (plasmaSwitchboard.urlBlacklist.isListed(hostlow, path)) {
if (plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_PROXY, hostlow, path)) {
httpd.sendRespondError(conProp,clientOut,4,403,null,
"URL '" + hostlow + "' blocked by yacy proxy (blacklisted)",null);
this.theLogger.logInfo("AGIS blocking of host '" + hostlow + "'");

@ -898,7 +898,7 @@ public final class plasmaCrawlLURL extends indexURL {
plasmaCrawlLURL.Entry entry = (plasmaCrawlLURL.Entry) eiter.next();
totalSearchedUrls++;
if (plasmaSwitchboard.urlBlacklist.isListed(entry.url())==true) {
if (plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_CRAWLER,entry.url())==true) {
lastBlacklistedUrl = entry.url().toString();
lastBlacklistedHash = entry.hash();
serverLog.logFine("URLDBCLEANER", ++blacklistedUrls + " blacklisted (" + ((double)blacklistedUrls/totalSearchedUrls)*100 + "%): " + entry.hash() + " " + entry.url());

@ -283,7 +283,7 @@ public final class plasmaCrawlStacker {
}
// check blacklist
if (plasmaSwitchboard.urlBlacklist.isListed(nexturl)) {
if (plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_CRAWLER,nexturl)) {
reason = plasmaCrawlEURL.DENIED_URL_IN_BLACKLIST;
this.log.logFine("URL '" + nexturlString + "' is in blacklist. " +
"Stack processing time: " + (System.currentTimeMillis()-startTime) + "ms");

@ -311,7 +311,7 @@ public final class plasmaCrawlWorker extends Thread {
// check if url is in blacklist
String hostlow = host.toLowerCase();
if (plasmaSwitchboard.urlBlacklist.isListed(hostlow, path)) {
if (plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_CRAWLER, hostlow, path)) {
log.logInfo("CRAWLER Rejecting URL '" + url.toString() + "'. URL is in blacklist.");
addURLtoErrorDB(url, refererURLString, initiator, name, plasmaCrawlEURL.DENIED_URL_IN_BLACKLIST, new bitfield(indexURL.urlFlagLength));
return null;

@ -125,6 +125,7 @@ import java.util.logging.Level;
import de.anomic.data.blogBoard;
import de.anomic.data.bookmarksDB;
import de.anomic.data.listManager;
import de.anomic.data.messageBoard;
import de.anomic.data.wikiBoard;
import de.anomic.data.userDB;
@ -302,14 +303,10 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
// load the black-list / inspired by [AS]
File ulrBlackListFile = new File(getRootPath(), getConfig("listsPath", "DATA/LISTS"));
urlBlacklist = new plasmaURLPattern(ulrBlackListFile);
String f = getConfig("proxyBlackListsActive", null);
if (f != null) {
urlBlacklist.loadList(f, "/");
this.log.logConfig("loaded black-list from file " + ulrBlackListFile.getName() + ", " +
urlBlacklist.size() + " entries, " +
ppRamString(ulrBlackListFile.length()/1024));
}
urlBlacklist = new plasmaURLPattern(ulrBlackListFile);
listManager.switchboard = this;
listManager.listsPath = ulrBlackListFile;
listManager.reloadBlacklists();
// load badwords (to filter the topwords)
if (badwords == null) {

@ -43,78 +43,158 @@ package de.anomic.plasma;
import java.io.File;
import de.anomic.net.URL;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import de.anomic.kelondro.kelondroMSetTools;
public class plasmaURLPattern {
public static final String BLACKLIST_CRAWLER = "crawler";
public static final String BLACKLIST_PROXY = "proxy";
public static final String BLACKLIST_DHT = "dht";
public static final String BLACKLIST_SEARCH = "search";
public static final HashSet BLACKLIST_TYPES = new HashSet(Arrays.asList(new String[]{
BLACKLIST_CRAWLER,
BLACKLIST_PROXY,
BLACKLIST_DHT,
BLACKLIST_SEARCH
}));
private Set cachedUrlHashs = Collections.synchronizedSet(new HashSet());
private File rootPath = null;
private File blacklistRootPath = null;
private HashMap cachedUrlHashs = null;
private HashMap hostpaths = null; // key=host, value=path; mapped url is http://host/path; path does not start with '/' here
public plasmaURLPattern(File rootPath) {
super();
this.rootPath = rootPath;
this.blacklistRootPath = rootPath;
// prepare the data structure
this.hostpaths = new HashMap();
this.cachedUrlHashs = new HashMap();
Iterator iter = BLACKLIST_TYPES.iterator();
while (iter.hasNext()) {
String blacklistType = (String) iter.next();
this.hostpaths.put(blacklistType, new HashMap());
this.cachedUrlHashs.put(blacklistType, Collections.synchronizedSet(new HashSet()));
}
}
public void clear() {
this.hostpaths = new HashMap();
Iterator iter = this.hostpaths.keySet().iterator();
while (iter.hasNext()) {
HashMap blacklistMap = (HashMap) this.hostpaths.get(iter.next());
blacklistMap.clear();
}
}
public int size() {
return hostpaths.size();
int size = 0;
Iterator iter = this.hostpaths.keySet().iterator();
while (iter.hasNext()) {
HashMap blacklistMap = (HashMap) this.hostpaths.get(iter.next());
size += blacklistMap.size();
}
return size;
}
public void loadList(String filenames, String sep) {
// File listsPath = new File(getRootPath(), getConfig("listsPath", "DATA/LISTS"));
final String[] filenamesarray = filenames.split(",");
public void loadList(String blacklistType, String filenames, String sep) {
if (blacklistType == null) throw new IllegalArgumentException();
if (!BLACKLIST_TYPES.contains(blacklistType)) throw new IllegalArgumentException("Unknown backlist type.");
HashMap blacklistMap = (HashMap) this.hostpaths.get(blacklistType);
String[] filenamesarray = filenames.split(",");
if( filenamesarray.length > 0) {
for (int i = 0; i < filenamesarray.length; i++) {
hostpaths.putAll(kelondroMSetTools.loadMap(new File(rootPath, filenamesarray[i]).toString(), sep));
blacklistMap.putAll(kelondroMSetTools.loadMap(new File(this.blacklistRootPath, filenamesarray[i]).toString(), sep));
}
}
}
public void loadList(String[][] filenames, String sep) {
for (int j = 0; j < filenames.length; j++) {
String[] nextFile = filenames[j];
String blacklistType = nextFile[0];
String fileName = nextFile[1];
this.loadList(blacklistType, fileName, sep);
}
}
public void remove(String host) {
hostpaths.remove(host);
public void remove(String blacklistType, String host) {
if (blacklistType == null) throw new IllegalArgumentException();
if (!BLACKLIST_TYPES.contains(blacklistType)) throw new IllegalArgumentException("Unknown backlist type.");
HashMap blacklistMap = (HashMap) this.hostpaths.get(blacklistType);
blacklistMap.remove(host);
}
public void add(String host, String path) {
public void add(String blacklistType, String host, String path) {
if (host == null) throw new NullPointerException();
if (path == null) throw new NullPointerException();
if (blacklistType == null) throw new IllegalArgumentException();
if (!BLACKLIST_TYPES.contains(blacklistType)) throw new IllegalArgumentException("Unknown backlist type.");
if (path.length() > 0 && path.charAt(0) == '/') path = path.substring(1);
hostpaths.put(host.toLowerCase(), path);
HashMap blacklistMap = (HashMap) this.hostpaths.get(blacklistType);
blacklistMap.put(host.toLowerCase(), path);
}
public int blacklistCacheSize() {
return cachedUrlHashs.size();
int size = 0;
Iterator iter = this.cachedUrlHashs.keySet().iterator();
while (iter.hasNext()) {
Set blacklistMap = (Set) this.cachedUrlHashs.get(iter.next());
size += blacklistMap.size();
}
return size;
}
public boolean hashInBlacklistedCache(String urlHash) {
return cachedUrlHashs.contains(urlHash);
public boolean hashInBlacklistedCache(String blacklistType, String urlHash) {
if (blacklistType == null) throw new IllegalArgumentException();
if (!BLACKLIST_TYPES.contains(blacklistType)) throw new IllegalArgumentException("Unknown backlist type.");
Set urlHashCache = (Set) this.cachedUrlHashs.get(blacklistType);
return urlHashCache.contains(urlHash);
}
public boolean isListed(String urlHash, URL url) {
if (!cachedUrlHashs.contains(urlHash)) {
boolean temp = isListed(url.getHost().toLowerCase(), url.getFile());
if (temp)
{
cachedUrlHashs.add(urlHash);
}
public boolean isListed(String blacklistType, String urlHash, URL url) {
if (blacklistType == null) throw new IllegalArgumentException();
if (!BLACKLIST_TYPES.contains(blacklistType)) throw new IllegalArgumentException("Unknown backlist type.");
Set urlHashCache = (Set) this.cachedUrlHashs.get(blacklistType);
if (!urlHashCache.contains(urlHash)) {
boolean temp = isListed(blacklistType, url.getHost().toLowerCase(), url.getFile());
if (temp) {
urlHashCache.add(urlHash);
}
return temp;
}
return true;
}
public boolean isListed(URL url) {
return isListed(url.getHost().toLowerCase(), url.getFile());
public boolean isListed(String blacklistType, URL url) {
return isListed(blacklistType, url.getHost().toLowerCase(), url.getFile());
}
public boolean isListed(String hostlow, String path) {
public boolean isListed(String blacklistType, String hostlow, String path) {
if (hostlow == null) throw new NullPointerException();
if (path == null) throw new NullPointerException();
if (blacklistType == null) throw new IllegalArgumentException();
if (!BLACKLIST_TYPES.contains(blacklistType)) throw new IllegalArgumentException("Unknown backlist type.");
// getting the proper blacklist
HashMap blacklistMap = (HashMap) this.hostpaths.get(blacklistType);
if (path.length() > 0 && path.charAt(0) == '/') path = path.substring(1);
String pp = ""; // path-pattern
@ -122,19 +202,19 @@ public class plasmaURLPattern {
// [TL] While "." are found within the string
int index = 0;
while ((index = hostlow.indexOf('.', index + 1)) != -1) {
if ((pp = (String) hostpaths.get(hostlow.substring(0, index + 1) + "*")) != null) {
if ((pp = (String) blacklistMap.get(hostlow.substring(0, index + 1) + "*")) != null) {
return ((pp.equals("*")) || (path.matches(pp)));
}
}
index = hostlow.length();
while ((index = hostlow.lastIndexOf('.', index - 1)) != -1) {
if ((pp = (String) hostpaths.get("*" + hostlow.substring(index, hostlow.length()))) != null) {
if ((pp = (String) blacklistMap.get("*" + hostlow.substring(index, hostlow.length()))) != null) {
return ((pp.equals("*")) || (path.matches(pp)));
}
}
// try to match without wildcard in domain
return (((pp = (String) hostpaths.get(hostlow)) != null) &&
return (((pp = (String) blacklistMap.get(hostlow)) != null) &&
((pp.equals("*")) || (path.matches(pp))));
}

@ -708,7 +708,7 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
// "+entry.getUrlHash());
try {
url = lurl.getEntry(entry.urlHash(), null).url();
if ((url == null) || (plasmaSwitchboard.urlBlacklist.isListed(url) == true)) {
if ((url == null) || (plasmaSwitchboard.urlBlacklist.isListed(plasmaURLPattern.BLACKLIST_CRAWLER, url) == true)) {
urlHashs.add(entry.urlHash());
}
} catch (IOException e) {

@ -478,7 +478,7 @@ public final class yacyClient {
for (int n = 0; n < results; n++) {
// get one single search result
urlEntry = urlManager.newEntry((String) result.get("resource" + n), true);
if ((urlEntry == null) || (blacklist.isListed(urlEntry.url()))) { continue; } // block with backlist
if ((urlEntry == null) || (blacklist.isListed(plasmaURLPattern.BLACKLIST_SEARCH, urlEntry.url()))) { continue; } // block with backlist
urlEntry.store();
int urlLength = urlEntry.url().toString().length();
int urlComps = htmlFilterContentScraper.urlComps(urlEntry.url().toString()).length;

@ -245,6 +245,16 @@ public class migration {
sb.setConfig("portForwarding.sch.HostUser", sb.getConfig("portForwardingHostUser",""));
sb.setConfig("portForwarding.sch.HostPwd", sb.getConfig("portForwardingHostPwd",""));
}
// migration for blacklists
if ((value = sb.getConfig("proxyBlackLists","")).length() > 0) {
sb.setConfig("proxy.BlackLists", value);
sb.setConfig("crawler.BlackLists", value);
sb.setConfig("dht.BlackLists", value);
sb.setConfig("search.BlackLists", value);
sb.setConfig("BlackLists.Shared",sb.getConfig("proxyBlackListsShared",""));
}
}
}

@ -205,9 +205,14 @@ proxyYellowList=yacy.yellow
# the black-list; URLs appearing in this list will not be loaded;
# instead always a 404 is returned
# all these files will be placed in the listsPath
proxyBlackLists=url.default.black
proxyBlackListsActive=url.default.black
proxyBlackListsShared=url.default.black
BlackLists.types=proxy,crawler,dht,search
BlackLists.Shared=url.default.black
proxy.BlackLists=url.default.black
crawler.BlackLists=url.default.black
dht.BlackLists=url.default.black
search.BlackLists=url.default.black
proxyCookieBlackList=cookie.default.black
proxyCookieWhiteList=cookie.default.black

Loading…
Cancel
Save