Harmonize loading/reading blacklist

between init  and servlet to use the same procedures
-added BlacklistHelper.blacklistToSortedArray to simplify use in servlet
pull/461/head
reger24 3 years ago
parent 11c4a1b45c
commit 18dddb74c9

@ -395,12 +395,8 @@ public class Blacklist_p {
// Read the blacklist items from file
if (blacklistToUse != null) {
int entryCount = 0;
final List<String> list = FileUtils.getListArray(new File(ListManager.listsPath, blacklistToUse));
// sort them
final String[] sortedlist = new String[list.size()];
Arrays.sort(list.toArray(sortedlist));
// we should use same load methode as during internal init (even if we only need a simple array to display content)
String[] sortedlist = BlacklistHelper.blacklistToSortedArray(blacklistToUse);
// display them
boolean dark = true;
int offset = 0;

@ -258,7 +258,6 @@ public final class SetTools {
/**
* test if one set is totally included in another set
* @param <A>
* @param small
* @param large
* @return true if the small set is completely included in the large set
@ -285,7 +284,6 @@ public final class SetTools {
/**
* test if the intersection of two sets is not empty
* @param <A>
* @param set1
* @param set2
* @return true if any element of the first set is part of the second set or vice-versa
@ -490,6 +488,17 @@ public final class SetTools {
return map;
}
/**
* Used to load blacklist entries from blacklist file (text file). The
* blacklist entry is expected to have a host and a path part with standard
* deparator "/". To deal with possibly modified lists the procedure is
* error tolerant if path part is missing and adds the default "/.*".
*
* Lines starting with '#' character are interpreted as comment line
*
* @param filename filename to load
* @param sep separator between host and path part (typically '/')
*/
public static SortedMap<String, List<String>> loadMapMultiValsPerKey(final String filename, final String sep) {
final SortedMap<String, List<String>> map = new TreeMap<String, List<String>>();
BufferedReader br = null;
@ -499,11 +508,17 @@ public final class SetTools {
int pos;
while ((line = br.readLine()) != null) {
line = line.trim();
if ((!line.isEmpty() && line.charAt(0) != '#') && ((pos = line.indexOf(sep)) > 0)) {
if ((!line.isEmpty() && line.charAt(0) != '#')) {
// old statement 2022-02-05: if ((!line.isEmpty() && line.charAt(0) != '#') && ((pos = line.indexOf(sep)) > 0)) {
pos = line.indexOf(sep); // deal with missing path part
if (pos <= 0) {
line = line + sep + ".*"; // fix missing path part
pos = line.length() - sep.length() - 2;
}
key = line.substring(0, pos).trim().toLowerCase();
value = line.substring(pos + sep.length()).trim();
if (!map.containsKey(key)) map.put(key, new ArrayList<String>());
map.get(key).add(value);
map.get(key).add(value); // add value to the list stored as value for the map
}
}
} catch (final IOException e) {

@ -1,5 +1,9 @@
package net.yacy.repository;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.SortedMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
@ -8,6 +12,7 @@ import net.yacy.cora.document.id.Punycode.PunycodeException;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.data.ListManager;
import static net.yacy.kelondro.util.SetTools.loadMapMultiValsPerKey;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard;
import net.yacy.search.query.SearchEventCache;
@ -139,4 +144,34 @@ public final class BlacklistHelper {
return null;
}
/**
* Reads a blacklist file and returns all entries as string in a sorted
* String array.This uses same read/load method as used during normal init
* and should be used in servlet (in preference of creating a private list
* or array)
*
* @param blacklistToUse filename of the blacklist file to use (e.g.
* url.default.black)
*
* @return array with entries as string
*/
public static String[] blacklistToSortedArray(String blacklistToUse) {
final SortedMap<String, List<String>> blklist = loadMapMultiValsPerKey(ListManager.listsPath + "/" + blacklistToUse, "/");
final List<String> list = new ArrayList<String>();
// convert the loaded Map to the list used in this servlet
for (String it : blklist.keySet()) {
List<String> thevalue = blklist.get(it);
String valstr = "";
for (String valitem : thevalue) {
valstr += "/" + valitem;
}
list.add(it + valstr);
}
// sort them
final String[] sortedlist = new String[list.size()];
Arrays.sort(list.toArray(sortedlist));
return sortedlist;
}
}

Loading…
Cancel
Save