*) Classes to handle import of lists (especially blacklists) from XML files, not used yet, but will be used soon.
git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5780 6c8d7289-2bf4-0310-a012-ef5d649a1542pull/1/head
parent
596e6215dc
commit
9180617dd9
@ -0,0 +1,142 @@
|
|||||||
|
// XmlBlacklistImporter.java
|
||||||
|
// -------------------------------------
|
||||||
|
// part of YACY
|
||||||
|
//
|
||||||
|
// (C) 2009 by Marc Nause
|
||||||
|
//
|
||||||
|
// $LastChangedDate: $
|
||||||
|
// $LastChangedRevision: $
|
||||||
|
// $LastChangedBy: $
|
||||||
|
//
|
||||||
|
// This program is free software; you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU General Public License as published by
|
||||||
|
// the Free Software Foundation; either version 2 of the License, or
|
||||||
|
// (at your option) any later version.
|
||||||
|
//
|
||||||
|
// This program is distributed in the hope that it will be useful,
|
||||||
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
// GNU General Public License for more details.
|
||||||
|
//
|
||||||
|
// You should have received a copy of the GNU General Public License
|
||||||
|
// along with this program; if not, write to the Free Software
|
||||||
|
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||||
|
|
||||||
|
package de.anomic.data.list;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.LinkedList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This class is used to store content and properties of several blacklists.
|
||||||
|
*/
|
||||||
|
public class ListAccumulator {
|
||||||
|
|
||||||
|
private final Map<String,Integer> names = new HashMap<String,Integer>();
|
||||||
|
private final List<List<String>> entries = new ArrayList<List<String>>();
|
||||||
|
private final List<Map<String,String>> properties = new ArrayList<Map<String,String>>();
|
||||||
|
private int listCount = 0;
|
||||||
|
private List<String> currentEntries;
|
||||||
|
private Map<String,String> currentProperties;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adds a new list if a list by that name does not exist yet.
|
||||||
|
* @param name The name of the list to be added.
|
||||||
|
* @return True if the new list has been added, else false (if list by name exists already).
|
||||||
|
*/
|
||||||
|
public boolean addList(final String name) {
|
||||||
|
boolean ret = false;
|
||||||
|
if (!names.containsKey(name)) {
|
||||||
|
names.put(name, listCount);
|
||||||
|
entries.add(new LinkedList<String>());
|
||||||
|
properties.add(new HashMap<String,String>());
|
||||||
|
|
||||||
|
currentEntries = entries.get(listCount);
|
||||||
|
currentProperties = properties.get(listCount);
|
||||||
|
|
||||||
|
listCount++;
|
||||||
|
ret = true;
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adds a new entry to a list identified by a given name.
|
||||||
|
* @param name The name of the list the entry is to be added to.
|
||||||
|
* @param entry The new entry.
|
||||||
|
* @return True if the entry has been added, else false (if list does not exists).
|
||||||
|
*/
|
||||||
|
public boolean addEntry(final String list, final String entry) {
|
||||||
|
boolean ret = false;
|
||||||
|
if (names.containsKey(list)) {
|
||||||
|
entries.get(names.get(list)).add(entry);
|
||||||
|
ret = true;
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adds a new entry to the list which has been added as the latest.
|
||||||
|
* @param entry The new entry.
|
||||||
|
* @return True if the entry has been added, else false (if no list has been added yet).
|
||||||
|
*/
|
||||||
|
public boolean addEntryToCurrent(final String entry) {
|
||||||
|
boolean ret = false;
|
||||||
|
if (currentEntries != null) {
|
||||||
|
currentEntries.add(entry);
|
||||||
|
ret = true;
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adds a new property to a list identified by a given name.
|
||||||
|
* @param list The name of the list.
|
||||||
|
* @param property The name of the property.
|
||||||
|
* @param value The value of the property.
|
||||||
|
* @return True if the property has been added, else false (if list does not exists).
|
||||||
|
*/
|
||||||
|
public boolean addProperty(final String list, final String property, final String value) {
|
||||||
|
boolean ret = false;
|
||||||
|
if (names.containsKey(list)) {
|
||||||
|
properties.get(names.get(list)).put(property, value);
|
||||||
|
ret = true;
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adds a new property to the list which has been added as the latest.
|
||||||
|
* @param list The name of the list.
|
||||||
|
* @param property The name of the property.
|
||||||
|
* @param value The value of the property.
|
||||||
|
* @return True if the property has been added, else false (if no list has been added yet).
|
||||||
|
*/
|
||||||
|
public boolean addPropertyToCurrent(final String property, final String value) {
|
||||||
|
boolean ret = false;
|
||||||
|
if (currentProperties != null) {
|
||||||
|
currentProperties.put(property, value);
|
||||||
|
ret = true;
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a {@link List} which contains all the {@link List Lists} of entries.
|
||||||
|
* @return list of lists.
|
||||||
|
*/
|
||||||
|
public List<List<String>> getEntryLists() {
|
||||||
|
return entries;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a {@link List} which contains all the {@link Maps} of entries.
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public List<Map<String,String>> getPropertyMaps() {
|
||||||
|
return properties;
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,176 @@
|
|||||||
|
// XmlBlacklistImporter.java
|
||||||
|
// -------------------------------------
|
||||||
|
// part of YACY
|
||||||
|
//
|
||||||
|
// (C) 2009 by Marc Nause
|
||||||
|
//
|
||||||
|
// $LastChangedDate: $
|
||||||
|
// $LastChangedRevision: $
|
||||||
|
// $LastChangedBy: $
|
||||||
|
//
|
||||||
|
// This program is free software; you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU General Public License as published by
|
||||||
|
// the Free Software Foundation; either version 2 of the License, or
|
||||||
|
// (at your option) any later version.
|
||||||
|
//
|
||||||
|
// This program is distributed in the hope that it will be useful,
|
||||||
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
// GNU General Public License for more details.
|
||||||
|
//
|
||||||
|
// You should have received a copy of the GNU General Public License
|
||||||
|
// along with this program; if not, write to the Free Software
|
||||||
|
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||||
|
|
||||||
|
package de.anomic.data.list;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.io.Reader;
|
||||||
|
import org.apache.xerces.parsers.SAXParser;
|
||||||
|
import org.xml.sax.Attributes;
|
||||||
|
import org.xml.sax.InputSource;
|
||||||
|
import org.xml.sax.SAXException;
|
||||||
|
import org.xml.sax.XMLReader;
|
||||||
|
import org.xml.sax.helpers.DefaultHandler;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This class provides methods to import blacklists from an XML file (see
|
||||||
|
* http://www.yacy-websuche.de/wiki/index.php/Dev:APIblacklists
|
||||||
|
* for examples) and to return this data as a {@link ListAccumulator} object.
|
||||||
|
*/
|
||||||
|
public class XMLBlacklistImporter extends DefaultHandler {
|
||||||
|
|
||||||
|
private ListAccumulator ba = null;
|
||||||
|
private String currentListName = null;
|
||||||
|
private String lastText = null;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Takes the input data and turns it into a {@link ListAccumulator} which can
|
||||||
|
* be used for further processing.
|
||||||
|
* @param input the XML data
|
||||||
|
* @return the data from the XML
|
||||||
|
* @throws java.io.IOException if input can't be read
|
||||||
|
* @throws org.xml.sax.SAXException if XML can't be parsed
|
||||||
|
*/
|
||||||
|
public synchronized ListAccumulator parse(InputSource input) throws IOException, SAXException {
|
||||||
|
|
||||||
|
XMLReader reader = new SAXParser();
|
||||||
|
reader.setContentHandler(this);
|
||||||
|
reader.parse(input);
|
||||||
|
|
||||||
|
return ba;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Takes the input data and turns it into a {@link ListAccumulator} which can
|
||||||
|
* be used for further processing.
|
||||||
|
* @param input the XML data
|
||||||
|
* @return the data from the XML
|
||||||
|
* @throws java.io.IOException if input can't be read
|
||||||
|
* @throws org.xml.sax.SAXException if XML can't be parsed
|
||||||
|
*/
|
||||||
|
public synchronized ListAccumulator parse(Reader input) throws IOException, SAXException {
|
||||||
|
return this.parse(new InputSource(input));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Takes the input data and turns it into a {@link ListAccumulator} which can
|
||||||
|
* be used for further processing.
|
||||||
|
* @param input the XML data
|
||||||
|
* @return the data from the XML
|
||||||
|
* @throws java.io.IOException if input can't be read
|
||||||
|
* @throws org.xml.sax.SAXException if XML can't be parsed
|
||||||
|
*/
|
||||||
|
public synchronized ListAccumulator parse(String input) throws IOException, SAXException {
|
||||||
|
return this.parse(new InputSource(input));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Takes the input data and turns it into a {@link ListAccumulator} which can
|
||||||
|
* be used for further processing.
|
||||||
|
* @param input The XML data.
|
||||||
|
* @return The data from the XML.
|
||||||
|
* @throws java.io.IOException if input can't be read
|
||||||
|
* @throws org.xml.sax.SAXException if XML can't be parsed
|
||||||
|
*/
|
||||||
|
public synchronized ListAccumulator parse(InputStream input) throws IOException, SAXException {
|
||||||
|
return this.parse(new InputSource(input));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* At the start of the document a new {@link ListAccumulator} is created.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void startDocument() {
|
||||||
|
ba = new ListAccumulator();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* If the <list> tag is encountered a new list will be addedto the
|
||||||
|
* {@link ListAccumulator} and the properties of the list will be set
|
||||||
|
* if provided in the XML.
|
||||||
|
* @param uri The Namespace URI, or the empty string if the
|
||||||
|
* element has no Namespace URI or if Namespace
|
||||||
|
* processing is not being performed.
|
||||||
|
* @param localName The local name (without prefix), or the
|
||||||
|
* empty string if Namespace processing is not being
|
||||||
|
* performed.
|
||||||
|
* @param qName The qualified name (with prefix), or the
|
||||||
|
* empty string if qualified names are not available.
|
||||||
|
* @param attributes The attributes attached to the element. If
|
||||||
|
* there are no attributes, it shall be an empty
|
||||||
|
* Attributes object.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void startElement(final String uri, final String localName, final String qName, final Attributes attributes) {
|
||||||
|
|
||||||
|
if (qName.equalsIgnoreCase("list")) {
|
||||||
|
currentListName = attributes.getValue("name");
|
||||||
|
ba.addList(currentListName);
|
||||||
|
|
||||||
|
int attributesLength = 0;
|
||||||
|
|
||||||
|
if ((attributesLength = attributes.getLength()) > 1) {
|
||||||
|
for (int i = 0; i < attributesLength; i++) {
|
||||||
|
if (!attributes.getQName(i).equals("name")) {
|
||||||
|
ba.addPropertyToCurrent(attributes.getQName(i), attributes.getValue(i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adds a new item to the current list in the {@link ListAccumulator}.
|
||||||
|
* @param uri The Namespace URI, or the empty string if the
|
||||||
|
* element has no Namespace URI or if Namespace
|
||||||
|
* processing is not being performed.
|
||||||
|
* @param localName The local name (without prefix), or the
|
||||||
|
* empty string if Namespace processing is not being
|
||||||
|
* performed.
|
||||||
|
* @param qName The qualified name (with prefix), or the
|
||||||
|
* empty string if qualified names are not available.
|
||||||
|
* @throws org.xml.sax.SAXException
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void endElement(final String uri, final String localName, final String qName) throws SAXException {
|
||||||
|
if (qName.equalsIgnoreCase("item")) {
|
||||||
|
ba.addEntryToCurrent(lastText);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Writes characters to a String which might be used by endElement() later.
|
||||||
|
* @param ch The characters.
|
||||||
|
* @param start The start position in the character array.
|
||||||
|
* @param lengthThe number of characters to use from the character array.
|
||||||
|
* @throws org.xml.sax.SAXException
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void characters(char[] ch, int start, int length) throws SAXException {
|
||||||
|
lastText = new String(ch, start, length);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in new issue