You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
403 lines
14 KiB
403 lines
14 KiB
// listManager.java
|
|
// -------------------------------------
|
|
// part of YACY
|
|
//
|
|
// (C) 2005, 2006 by Alexander Schier
|
|
// (C) 2007 by Bjoern 'Fuchs' Krombholz; fox.box@gmail.com
|
|
//
|
|
// last change: $LastChangedDate$ by $LastChangedBy$
|
|
// $LastChangedRevision$
|
|
//
|
|
// This program is free software; you can redistribute it and/or modify
|
|
// it under the terms of the GNU General Public License as published by
|
|
// the Free Software Foundation; either version 2 of the License, or
|
|
// (at your option) any later version.
|
|
//
|
|
// This program is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU General Public License
|
|
// along with this program; if not, write to the Free Software
|
|
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
//
|
|
// Using this software in any meaning (reading, learning, copying, compiling,
|
|
// running) means that you agree that the Author(s) is (are) not responsible
|
|
// for cost, loss of data or any harm that may be caused directly or indirectly
|
|
// by usage of this softare or this documentation. The usage of this software
|
|
// is on your own risk. The installation and usage (starting/running) of this
|
|
// software may allow other people or application to access your computer and
|
|
// any attached devices and is highly dependent on the configuration of the
|
|
// software which must be done by the user of the software; the author(s) is
|
|
// (are) also not responsible for proper configuration and usage of the
|
|
// software, even if provoked by documentation provided together with
|
|
// the software.
|
|
//
|
|
// Any changes to this file according to the GPL as documented in the file
|
|
// gpl.txt aside this file in the shipment you received can be done to the
|
|
// lines that follows this copyright notice here, but changes must not be
|
|
// done inside the copyright notive above. A re-distribution must contain
|
|
// the intact and unchanged copyright notice.
|
|
// Contributions and changes to the program code must be marked as such.
|
|
|
|
package de.anomic.data;
|
|
|
|
import java.io.BufferedReader;
|
|
import java.io.BufferedWriter;
|
|
import java.io.File;
|
|
import java.io.FileInputStream;
|
|
import java.io.FileWriter;
|
|
import java.io.IOException;
|
|
import java.io.InputStreamReader;
|
|
import java.io.PrintWriter;
|
|
import java.util.ArrayList;
|
|
import java.util.Arrays;
|
|
import java.util.Collection;
|
|
import java.util.HashSet;
|
|
import java.util.Iterator;
|
|
import java.util.Set;
|
|
import java.util.Vector;
|
|
|
|
import de.anomic.index.indexAbstractReferenceBlacklist;
|
|
import de.anomic.index.indexReferenceBlacklist.blacklistFile;
|
|
import de.anomic.plasma.plasmaSwitchboard;
|
|
import de.anomic.server.serverCore;
|
|
|
|
// The Naming of the functions is a bit strange...
|
|
|
|
public class listManager {
|
|
public static plasmaSwitchboard switchboard = null;
|
|
public static File listsPath = null;
|
|
|
|
/**
|
|
* Get ListSet from configuration file and return it as a unified Set.
|
|
*
|
|
* <b>Meaning of ListSet</b>: There are various "lists" in YaCy which are
|
|
* actually disjunct (pairwise unequal) sets which themselves can be seperated
|
|
* into different subsets. E.g., there can be more than one blacklist of a type.
|
|
* A ListSet is the set of all those "lists" (subsets) of an equal type.
|
|
*
|
|
* @param setName name of the ListSet
|
|
* @return a ListSet from configuration file
|
|
*/
|
|
public static Set<String> getListSet(String setName) {
|
|
return string2set(switchboard.getConfig(setName, ""));
|
|
}
|
|
|
|
/**
|
|
* Removes an element from a ListSet and updates the configuration file
|
|
* accordingly. If the element doesn't exist, then nothing will be changed.
|
|
*
|
|
* @param setName name of the ListSet.
|
|
* @param listName name of the element to remove from the ListSet.
|
|
*/
|
|
public static void removeFromListSet(String setName, String listName) {
|
|
Set<String> listSet = getListSet(setName);
|
|
|
|
if (listSet.size() > 0) {
|
|
listSet.remove(listName);
|
|
switchboard.setConfig(setName, collection2string(listSet));
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Adds an element to an existing ListSet. If the ListSet doesn't exist yet,
|
|
* a new one will be added. If the ListSet already contains an identical element,
|
|
* then nothing happens.
|
|
*
|
|
* The new list will be written to the configuartion file.
|
|
*
|
|
* @param setName
|
|
* @param newListName
|
|
*/
|
|
public static void updateListSet(String setName, String newListName) {
|
|
Set<String> listSet = getListSet(setName);
|
|
listSet.add(newListName);
|
|
|
|
switchboard.setConfig(setName, collection2string(listSet));
|
|
}
|
|
|
|
/**
|
|
* @param setName ListSet in which to search for an element.
|
|
* @param listName the element to search for.
|
|
* @return <code>true</code> if the ListSet "setName" contains an element
|
|
* "listName", <code>false</code> otherwise.
|
|
*/
|
|
public static boolean listSetContains(String setName, String listName) {
|
|
Set<String> Lists = getListSet(setName);
|
|
|
|
return Lists.contains(listName);
|
|
}
|
|
|
|
|
|
//================general Lists==================
|
|
|
|
/**
|
|
* Read lines of a file into an ArrayList.
|
|
*
|
|
* @param listFile the file
|
|
* @return the resulting array as an ArrayList
|
|
*/
|
|
public static ArrayList<String> getListArray(File listFile){
|
|
String line;
|
|
ArrayList<String> list = new ArrayList<String>();
|
|
int count = 0;
|
|
BufferedReader br = null;
|
|
try {
|
|
br = new BufferedReader(new InputStreamReader(new FileInputStream(listFile),"UTF-8"));
|
|
|
|
while((line = br.readLine()) != null){
|
|
list.add(line);
|
|
count++;
|
|
}
|
|
br.close();
|
|
} catch(IOException e) {
|
|
// list is empty
|
|
} finally {
|
|
if (br!=null) try { br.close(); } catch (Exception e) {}
|
|
}
|
|
return list;
|
|
}
|
|
|
|
/**
|
|
* Write a String to a file (used for string representation of lists).
|
|
*
|
|
* @param listFile the file to write to
|
|
* @param out the String to write
|
|
* @return returns <code>true</code> if successful, <code>false</code> otherwise
|
|
*/
|
|
public static boolean writeList(File listFile, String out) {
|
|
BufferedWriter bw = null;
|
|
try {
|
|
bw = new BufferedWriter(new PrintWriter(new FileWriter(listFile)));
|
|
bw.write(out);
|
|
bw.close();
|
|
return true;
|
|
} catch(IOException e) {
|
|
return false;
|
|
} finally {
|
|
if (bw!=null) try { bw.close(); } catch (Exception e) {}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Write elements of an Array of Strings to a file (one element per line).
|
|
*
|
|
* @param listFile the file to write to
|
|
* @param list the Array to write
|
|
* @return returns <code>true</code> if successful, <code>false</code> otherwise
|
|
*/
|
|
public static boolean writeList(File listFile, String[] list){
|
|
StringBuffer out = new StringBuffer();
|
|
for(int i=0;i < list.length; i++){
|
|
out
|
|
.append(list[i])
|
|
.append(serverCore.CRLF_STRING);
|
|
}
|
|
return writeList(listFile, new String(out)); //(File, String)
|
|
}
|
|
|
|
// same as below
|
|
public static String getListString(String filename, boolean withcomments) {
|
|
File listFile = new File(listsPath ,filename);
|
|
return getListString(listFile, withcomments);
|
|
}
|
|
|
|
/**
|
|
* Read lines of a text file into a String, optionally ignoring comments.
|
|
*
|
|
* @param listFile the File to read from.
|
|
* @param withcomments If <code>false</code> ignore lines starting with '#'.
|
|
* @return String representation of the file content.
|
|
*/
|
|
public static String getListString(File listFile, boolean withcomments){
|
|
StringBuffer temp = new StringBuffer();
|
|
|
|
BufferedReader br = null;
|
|
try{
|
|
br = new BufferedReader(new InputStreamReader(new FileInputStream(listFile)));
|
|
temp.ensureCapacity((int) listFile.length());
|
|
|
|
// Read the List
|
|
String line = "";
|
|
while ((line = br.readLine()) != null) {
|
|
if ((!line.startsWith("#") || withcomments) || !line.equals("")) {
|
|
//temp += line + serverCore.CRLF_STRING;
|
|
temp.append(line)
|
|
.append(serverCore.CRLF_STRING);
|
|
}
|
|
}
|
|
br.close();
|
|
} catch (IOException e) {
|
|
} finally {
|
|
if (br!=null) try { br.close(); } catch (Exception e) {}
|
|
}
|
|
|
|
return new String(temp);
|
|
}
|
|
|
|
// get a Directory Listing as a String Array
|
|
public static String[] getDirListing(String dirname){
|
|
final File dir = new File(dirname);
|
|
return getDirListing(dir);
|
|
}
|
|
|
|
/**
|
|
* Read content of a directory into a String array of file names.
|
|
*
|
|
* @param dir The directory to get the file listing from. If it doesn't exist yet,
|
|
* it will be created.
|
|
* @return array of file names
|
|
*/
|
|
public static String[] getDirListing(File dir){
|
|
String[] fileListString;
|
|
File[] fileList;
|
|
|
|
if (dir != null ) {
|
|
if (!dir.exists()) {
|
|
dir.mkdir();
|
|
}
|
|
fileList = dir.listFiles();
|
|
fileListString = new String[fileList.length];
|
|
for (int i=0; i<= fileList.length-1; i++) {
|
|
fileListString[i]=fileList[i].getName();
|
|
}
|
|
return fileListString;
|
|
}
|
|
return null;
|
|
}
|
|
|
|
// same as below
|
|
public static ArrayList<File> getDirsRecursive(File dir, String notdir){
|
|
return getDirsRecursive(dir, notdir, true);
|
|
}
|
|
|
|
/**
|
|
* Returns a List of all dirs and subdirs as File Objects
|
|
*
|
|
* Warning: untested
|
|
*/
|
|
public static ArrayList<File> getDirsRecursive(File dir, String notdir, boolean excludeDotfiles){
|
|
final File[] dirList = dir.listFiles();
|
|
final ArrayList<File> resultList = new ArrayList<File>();
|
|
ArrayList<File> recursive;
|
|
Iterator<File> iter;
|
|
for (int i=0;i<dirList.length;i++) {
|
|
if (dirList[i].isDirectory() && (!excludeDotfiles || !dirList[i].getName().startsWith(".")) && !dirList[i].getName().equals(notdir)) {
|
|
resultList.add(dirList[i]);
|
|
recursive = getDirsRecursive(dirList[i], notdir, excludeDotfiles);
|
|
iter=recursive.iterator();
|
|
while (iter.hasNext()) {
|
|
resultList.add(iter.next());
|
|
}
|
|
}
|
|
}
|
|
return resultList;
|
|
}
|
|
|
|
|
|
//================Helper functions for collection conversion==================
|
|
|
|
/**
|
|
* Simple conversion of a Collection of Strings to a comma separated String.
|
|
* If the implementing Collection subclass guaranties an order of its elements,
|
|
* the substrings of the result will have the same order.
|
|
*
|
|
* @param col a Collection of Strings.
|
|
* @return String with elements from set separated by comma.
|
|
*/
|
|
public static String collection2string(Collection<String> col){
|
|
StringBuffer str = new StringBuffer();
|
|
|
|
if (col != null && (col.size() > 0)) {
|
|
Iterator<String> it = col.iterator();
|
|
str.append(it.next());
|
|
while(it.hasNext()) {
|
|
str.append(",").append(it.next());
|
|
}
|
|
}
|
|
|
|
return str.toString();
|
|
}
|
|
|
|
/**
|
|
* @see listManager#string2vector(String)
|
|
*/
|
|
public static ArrayList<String> string2arraylist(String string){
|
|
ArrayList<String> l;
|
|
|
|
if (string != null && string.length() > 0) {
|
|
l = new ArrayList<String>(Arrays.asList(string.split(",")));
|
|
} else {
|
|
l = new ArrayList<String>();
|
|
}
|
|
|
|
return l;
|
|
}
|
|
|
|
/**
|
|
* Simple conversion of a comma separated list to a unified Set.
|
|
*
|
|
* @param string list of comma separated Strings
|
|
* @return resulting Set or empty Set if string is <code>null</code>
|
|
*/
|
|
public static Set<String> string2set(String string){
|
|
HashSet<String> set;
|
|
|
|
if (string != null) {
|
|
set = new HashSet<String>(Arrays.asList(string.split(",")));
|
|
} else {
|
|
set = new HashSet<String>();
|
|
}
|
|
|
|
return set;
|
|
}
|
|
|
|
/**
|
|
* Simple conversion of a comma separated list to a Vector containing
|
|
* the order of the substrings.
|
|
*
|
|
* @param string list of comma separated Strings
|
|
* @return resulting Vector or empty Vector if string is <code>null</code>
|
|
*/
|
|
public static Vector<String> string2vector(String string){
|
|
Vector<String> v;
|
|
|
|
if (string != null) {
|
|
v = new Vector<String>(Arrays.asList(string.split(",")));
|
|
} else {
|
|
v = new Vector<String>();
|
|
}
|
|
|
|
return v;
|
|
}
|
|
|
|
//=============Blacklist specific================
|
|
|
|
/**
|
|
* Load or reload all active Blacklists
|
|
*/
|
|
public static void reloadBlacklists(){
|
|
String supportedBlacklistTypesStr = indexAbstractReferenceBlacklist.BLACKLIST_TYPES_STRING;
|
|
String[] supportedBlacklistTypes = supportedBlacklistTypesStr.split(",");
|
|
|
|
ArrayList<blacklistFile> blacklistFiles = new ArrayList<blacklistFile>(supportedBlacklistTypes.length);
|
|
for (int i=0; i < supportedBlacklistTypes.length; i++) {
|
|
blacklistFile blFile = new blacklistFile(
|
|
switchboard.getConfig(
|
|
supportedBlacklistTypes[i] + ".BlackLists", switchboard.getConfig("BlackLists.DefaultList", "url.default.black")),
|
|
supportedBlacklistTypes[i]);
|
|
blacklistFiles.add(blFile);
|
|
}
|
|
|
|
plasmaSwitchboard.urlBlacklist.clear();
|
|
plasmaSwitchboard.urlBlacklist.loadList(
|
|
blacklistFiles.toArray(new blacklistFile[blacklistFiles.size()]),
|
|
"/");
|
|
|
|
// switchboard.urlBlacklist.clear();
|
|
// if (f != "") switchboard.urlBlacklist.loadLists("black", f, "/");
|
|
}
|
|
}
|