From 9180617dd939139b493d4079ec917a80461ea912 Mon Sep 17 00:00:00 2001 From: low012 Date: Sun, 5 Apr 2009 13:36:44 +0000 Subject: [PATCH] *) Classes to handle import of lists (especially blacklists) from XML files, not used yet, but will be used soon. git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5780 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- .../de/anomic/data/list/ListAccumulator.java | 142 ++++++++++++++ .../data/list/XMLBlacklistImporter.java | 176 ++++++++++++++++++ 2 files changed, 318 insertions(+) create mode 100644 source/de/anomic/data/list/ListAccumulator.java create mode 100644 source/de/anomic/data/list/XMLBlacklistImporter.java diff --git a/source/de/anomic/data/list/ListAccumulator.java b/source/de/anomic/data/list/ListAccumulator.java new file mode 100644 index 000000000..7e3d2ac2b --- /dev/null +++ b/source/de/anomic/data/list/ListAccumulator.java @@ -0,0 +1,142 @@ +// XmlBlacklistImporter.java +// ------------------------------------- +// part of YACY +// +// (C) 2009 by Marc Nause +// +// $LastChangedDate: $ +// $LastChangedRevision: $ +// $LastChangedBy: $ +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +package de.anomic.data.list; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; + +/** + * This class is used to store content and properties of several blacklists. + */ +public class ListAccumulator { + + private final Map names = new HashMap(); + private final List> entries = new ArrayList>(); + private final List> properties = new ArrayList>(); + private int listCount = 0; + private List currentEntries; + private Map currentProperties; + + /** + * Adds a new list if a list by that name does not exist yet. + * @param name The name of the list to be added. + * @return True if the new list has been added, else false (if list by name exists already). + */ + public boolean addList(final String name) { + boolean ret = false; + if (!names.containsKey(name)) { + names.put(name, listCount); + entries.add(new LinkedList()); + properties.add(new HashMap()); + + currentEntries = entries.get(listCount); + currentProperties = properties.get(listCount); + + listCount++; + ret = true; + } + return ret; + } + + /** + * Adds a new entry to a list identified by a given name. + * @param name The name of the list the entry is to be added to. + * @param entry The new entry. + * @return True if the entry has been added, else false (if list does not exists). + */ + public boolean addEntry(final String list, final String entry) { + boolean ret = false; + if (names.containsKey(list)) { + entries.get(names.get(list)).add(entry); + ret = true; + } + return ret; + } + + /** + * Adds a new entry to the list which has been added as the latest. + * @param entry The new entry. + * @return True if the entry has been added, else false (if no list has been added yet). + */ + public boolean addEntryToCurrent(final String entry) { + boolean ret = false; + if (currentEntries != null) { + currentEntries.add(entry); + ret = true; + } + return ret; + } + + /** + * Adds a new property to a list identified by a given name. + * @param list The name of the list. + * @param property The name of the property. + * @param value The value of the property. + * @return True if the property has been added, else false (if list does not exists). + */ + public boolean addProperty(final String list, final String property, final String value) { + boolean ret = false; + if (names.containsKey(list)) { + properties.get(names.get(list)).put(property, value); + ret = true; + } + return ret; + } + + /** + * Adds a new property to the list which has been added as the latest. + * @param list The name of the list. + * @param property The name of the property. + * @param value The value of the property. + * @return True if the property has been added, else false (if no list has been added yet). + */ + public boolean addPropertyToCurrent(final String property, final String value) { + boolean ret = false; + if (currentProperties != null) { + currentProperties.put(property, value); + ret = true; + } + return ret; + } + + /** + * Returns a {@link List} which contains all the {@link List Lists} of entries. + * @return list of lists. + */ + public List> getEntryLists() { + return entries; + } + + /** + * Returns a {@link List} which contains all the {@link Maps} of entries. + * @return + */ + public List> getPropertyMaps() { + return properties; + } +} \ No newline at end of file diff --git a/source/de/anomic/data/list/XMLBlacklistImporter.java b/source/de/anomic/data/list/XMLBlacklistImporter.java new file mode 100644 index 000000000..5b09eba22 --- /dev/null +++ b/source/de/anomic/data/list/XMLBlacklistImporter.java @@ -0,0 +1,176 @@ +// XmlBlacklistImporter.java +// ------------------------------------- +// part of YACY +// +// (C) 2009 by Marc Nause +// +// $LastChangedDate: $ +// $LastChangedRevision: $ +// $LastChangedBy: $ +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +package de.anomic.data.list; + +import java.io.IOException; +import java.io.InputStream; +import java.io.Reader; +import org.apache.xerces.parsers.SAXParser; +import org.xml.sax.Attributes; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; +import org.xml.sax.XMLReader; +import org.xml.sax.helpers.DefaultHandler; + +/** + * This class provides methods to import blacklists from an XML file (see + * http://www.yacy-websuche.de/wiki/index.php/Dev:APIblacklists + * for examples) and to return this data as a {@link ListAccumulator} object. + */ +public class XMLBlacklistImporter extends DefaultHandler { + + private ListAccumulator ba = null; + private String currentListName = null; + private String lastText = null; + + /** + * Takes the input data and turns it into a {@link ListAccumulator} which can + * be used for further processing. + * @param input the XML data + * @return the data from the XML + * @throws java.io.IOException if input can't be read + * @throws org.xml.sax.SAXException if XML can't be parsed + */ + public synchronized ListAccumulator parse(InputSource input) throws IOException, SAXException { + + XMLReader reader = new SAXParser(); + reader.setContentHandler(this); + reader.parse(input); + + return ba; + } + + /** + * Takes the input data and turns it into a {@link ListAccumulator} which can + * be used for further processing. + * @param input the XML data + * @return the data from the XML + * @throws java.io.IOException if input can't be read + * @throws org.xml.sax.SAXException if XML can't be parsed + */ + public synchronized ListAccumulator parse(Reader input) throws IOException, SAXException { + return this.parse(new InputSource(input)); + } + + /** + * Takes the input data and turns it into a {@link ListAccumulator} which can + * be used for further processing. + * @param input the XML data + * @return the data from the XML + * @throws java.io.IOException if input can't be read + * @throws org.xml.sax.SAXException if XML can't be parsed + */ + public synchronized ListAccumulator parse(String input) throws IOException, SAXException { + return this.parse(new InputSource(input)); + } + + /** + * Takes the input data and turns it into a {@link ListAccumulator} which can + * be used for further processing. + * @param input The XML data. + * @return The data from the XML. + * @throws java.io.IOException if input can't be read + * @throws org.xml.sax.SAXException if XML can't be parsed + */ + public synchronized ListAccumulator parse(InputStream input) throws IOException, SAXException { + return this.parse(new InputSource(input)); + } + + /** + * At the start of the document a new {@link ListAccumulator} is created. + */ + @Override + public void startDocument() { + ba = new ListAccumulator(); + } + + /** + * If the tag is encountered a new list will be addedto the + * {@link ListAccumulator} and the properties of the list will be set + * if provided in the XML. + * @param uri The Namespace URI, or the empty string if the + * element has no Namespace URI or if Namespace + * processing is not being performed. + * @param localName The local name (without prefix), or the + * empty string if Namespace processing is not being + * performed. + * @param qName The qualified name (with prefix), or the + * empty string if qualified names are not available. + * @param attributes The attributes attached to the element. If + * there are no attributes, it shall be an empty + * Attributes object. + */ + @Override + public void startElement(final String uri, final String localName, final String qName, final Attributes attributes) { + + if (qName.equalsIgnoreCase("list")) { + currentListName = attributes.getValue("name"); + ba.addList(currentListName); + + int attributesLength = 0; + + if ((attributesLength = attributes.getLength()) > 1) { + for (int i = 0; i < attributesLength; i++) { + if (!attributes.getQName(i).equals("name")) { + ba.addPropertyToCurrent(attributes.getQName(i), attributes.getValue(i)); + } + } + } + } + + } + + /** + * Adds a new item to the current list in the {@link ListAccumulator}. + * @param uri The Namespace URI, or the empty string if the + * element has no Namespace URI or if Namespace + * processing is not being performed. + * @param localName The local name (without prefix), or the + * empty string if Namespace processing is not being + * performed. + * @param qName The qualified name (with prefix), or the + * empty string if qualified names are not available. + * @throws org.xml.sax.SAXException + */ + @Override + public void endElement(final String uri, final String localName, final String qName) throws SAXException { + if (qName.equalsIgnoreCase("item")) { + ba.addEntryToCurrent(lastText); + } + } + + /** + * Writes characters to a String which might be used by endElement() later. + * @param ch The characters. + * @param start The start position in the character array. + * @param lengthThe number of characters to use from the character array. + * @throws org.xml.sax.SAXException + */ + @Override + public void characters(char[] ch, int start, int length) throws SAXException { + lastText = new String(ch, start, length); + } + +}