From 724bbdf9b242fb3043f9617679d2cbf4133776f9 Mon Sep 17 00:00:00 2001 From: orbiter Date: Thu, 24 Apr 2008 21:31:07 +0000 Subject: [PATCH] refactoring of RSS reader git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4736 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- bin/searchprofiling | 15 ++ htroot/FeedReader_p.java | 21 ++- htroot/rct_p.java | 11 +- .../plasma/crawler/plasmaCrawlQueues.java | 11 +- .../anomic/plasma/parser/rss/rssParser.java | 22 ++- source/de/anomic/xml/RSSFeed.java | 142 +++++++++++++++ source/de/anomic/xml/RSSMessage.java | 117 +++++++++++++ source/de/anomic/xml/RSSReader.java | 165 ++++++++++++++++++ source/de/anomic/xml/rssReader.java | 150 ++-------------- source/de/anomic/yacy/yacyClient.java | 11 +- 10 files changed, 496 insertions(+), 169 deletions(-) create mode 100644 source/de/anomic/xml/RSSFeed.java create mode 100644 source/de/anomic/xml/RSSMessage.java create mode 100644 source/de/anomic/xml/RSSReader.java diff --git a/bin/searchprofiling b/bin/searchprofiling index cc08608b6..2f1ea673d 100755 --- a/bin/searchprofiling +++ b/bin/searchprofiling @@ -1,12 +1,27 @@ touch /tmp/yp$$.dump +echo "cleaning search buffer" curl -s --user $1:$2 http://localhost:8080/Ranking_p.html > /dev/null +echo "start search for $4" curl -s -o /tmp/yp$$.query "http://localhost:8080/yacysearch.rss?maximumRecords=10&resource=local&query=$4" & +curl -s --user $1:$2 http://localhost:8080/Status.html > /dev/null & +curl -s --user $1:$2 http://localhost:8080/Status.html > /dev/null & +curl -s --user $1:$2 http://localhost:8080/Status.html > /dev/null & +curl -s --user $1:$2 http://localhost:8080/Status.html > /dev/null & +curl -s --user $1:$2 http://localhost:8080/Status.html > /dev/null & +echo "search started" curl -s --user $1:$2 "http://localhost:8080/Threaddump_p.html?sleep=0&createThreaddump=&plain=true" >> /tmp/yp$$.dump +echo "thread dump 1 ready" curl -s --user $1:$2 "http://localhost:8080/Threaddump_p.html?sleep=$3&createThreaddump=&plain=true" >> /tmp/yp$$.dump +echo "thread dump 2 ready" curl -s --user $1:$2 "http://localhost:8080/Threaddump_p.html?sleep=$3&createThreaddump=&plain=true" >> /tmp/yp$$.dump +echo "thread dump 3 ready" curl -s --user $1:$2 "http://localhost:8080/Threaddump_p.html?sleep=$3&createThreaddump=&plain=true" >> /tmp/yp$$.dump +echo "thread dump 4 ready" curl -s --user $1:$2 "http://localhost:8080/Threaddump_p.html?sleep=$3&createThreaddump=&plain=true" >> /tmp/yp$$.dump +echo "thread dump 5 ready" curl -s --user $1:$2 "http://localhost:8080/Threaddump_p.html?sleep=$3&createThreaddump=&plain=true" >> /tmp/yp$$.dump +echo "thread dump 6 ready" curl -s --user $1:$2 "http://localhost:8080/Threaddump_p.html?sleep=$3&createThreaddump=&plain=true" >> /tmp/yp$$.dump +echo "thread dump 7 ready" cat /tmp/yp$$.dump rm -f /tmp/yp$$.dump diff --git a/htroot/FeedReader_p.java b/htroot/FeedReader_p.java index bf8af786e..d8bc0a326 100644 --- a/htroot/FeedReader_p.java +++ b/htroot/FeedReader_p.java @@ -27,7 +27,9 @@ import de.anomic.http.httpHeader; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; import de.anomic.server.servletProperties; -import de.anomic.xml.rssReader; +import de.anomic.xml.RSSFeed; +import de.anomic.xml.RSSMessage; +import de.anomic.xml.RSSReader; import de.anomic.yacy.yacyURL; // test url: @@ -50,26 +52,27 @@ public class FeedReader_p { // int maxitems=Integer.parseInt(post.get("max", "0")); // int offset=Integer.parseInt(post.get("offset", "0")); //offset to the first displayed item - rssReader parser = new rssReader(url.toString()); + RSSFeed feed = new RSSReader(url.toString()).getFeed(); - prop.putHTML("page_title", parser.getChannel().getTitle()); - if (parser.getChannel().getAuthor() == null) { + prop.putHTML("page_title", feed.getChannel().getTitle()); + if (feed.getChannel().getAuthor() == null) { prop.put("page_hasAuthor", "0"); } else { prop.put("page_hasAuthor", "1"); - prop.putHTML("page_hasAuthor_author", parser.getChannel().getAuthor()); + prop.putHTML("page_hasAuthor_author", feed.getChannel().getAuthor()); } - prop.putHTML("page_description", parser.getChannel().getDescription()); + prop.putHTML("page_description", feed.getChannel().getDescription()); - for (int i = 0; i < parser.items(); i++) { - rssReader.Item item = parser.getItem(i); + int i = 0; + for (RSSMessage item: feed) { prop.putHTML("page_items_" + i + "_author", item.getAuthor()); prop.putHTML("page_items_" + i + "_title", item.getTitle()); prop.put("page_items_" + i + "_link", item.getLink()); prop.put("page_items_" + i + "_description", item.getDescription()); prop.put("page_items_" + i + "_date", item.getPubDate()); + i++; } - prop.put("page_items", parser.items()); + prop.put("page_items", feed.size()); prop.put("page", "1"); } diff --git a/htroot/rct_p.java b/htroot/rct_p.java index 492002650..d79c2526a 100644 --- a/htroot/rct_p.java +++ b/htroot/rct_p.java @@ -35,7 +35,8 @@ import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverDate; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; -import de.anomic.xml.rssReader; +import de.anomic.xml.RSSFeed; +import de.anomic.xml.RSSMessage; import de.anomic.yacy.yacyClient; import de.anomic.yacy.yacyCore; import de.anomic.yacy.yacySeed; @@ -52,11 +53,9 @@ public class rct_p { if (post.containsKey("retrieve")) { String peerhash = post.get("peer", null); yacySeed seed = (peerhash == null) ? null : yacyCore.seedDB.getConnected(peerhash); - rssReader reader = (seed == null) ? null : yacyClient.queryRemoteCrawlURLs(seed, 10); - if (reader != null) { - rssReader.Item item; - for (int i = 0; i < reader.items(); i++) { - item = reader.getItem(i); + RSSFeed feed = (seed == null) ? null : yacyClient.queryRemoteCrawlURLs(seed, 10); + if (feed != null) { + for (RSSMessage item: feed) { //System.out.println("URL=" + item.getLink() + ", desc=" + item.getDescription() + ", pubDate=" + item.getPubDate()); // put url on remote crawl stack diff --git a/source/de/anomic/plasma/crawler/plasmaCrawlQueues.java b/source/de/anomic/plasma/crawler/plasmaCrawlQueues.java index a330915ec..dc2d8c99b 100644 --- a/source/de/anomic/plasma/crawler/plasmaCrawlQueues.java +++ b/source/de/anomic/plasma/crawler/plasmaCrawlQueues.java @@ -47,7 +47,8 @@ import de.anomic.plasma.plasmaParser; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverDate; import de.anomic.server.logging.serverLog; -import de.anomic.xml.rssReader; +import de.anomic.xml.RSSFeed; +import de.anomic.xml.RSSMessage; import de.anomic.yacy.yacyClient; import de.anomic.yacy.yacyCore; import de.anomic.yacy.yacySeed; @@ -295,14 +296,12 @@ public class plasmaCrawlQueues { if (seed == null) return false; // we know a peer which should provide remote crawl entries. load them now. - rssReader reader = (seed == null) ? null : yacyClient.queryRemoteCrawlURLs(seed, 20); - if (reader == null) return true; + RSSFeed feed = (seed == null) ? null : yacyClient.queryRemoteCrawlURLs(seed, 20); + if (feed == null) return true; // parse the rss - rssReader.Item item; yacyURL url, referrer; Date loaddate; - for (int i = 0; i < reader.items(); i++) { - item = reader.getItem(i); + for (RSSMessage item: feed) { //System.out.println("URL=" + item.getLink() + ", desc=" + item.getDescription() + ", pubDate=" + item.getPubDate()); // put url on remote crawl stack diff --git a/source/de/anomic/plasma/parser/rss/rssParser.java b/source/de/anomic/plasma/parser/rss/rssParser.java index 4d26f8a8e..80f8b19db 100644 --- a/source/de/anomic/plasma/parser/rss/rssParser.java +++ b/source/de/anomic/plasma/parser/rss/rssParser.java @@ -62,8 +62,9 @@ import de.anomic.plasma.parser.ParserException; import de.anomic.server.serverByteBuffer; import de.anomic.server.serverCharBuffer; import de.anomic.server.serverFileUtils; -import de.anomic.xml.rssReader; -import de.anomic.xml.rssReader.Item; +import de.anomic.xml.RSSFeed; +import de.anomic.xml.RSSReader; +import de.anomic.xml.RSSMessage; import de.anomic.yacy.yacyURL; public class rssParser extends AbstractParser implements Parser { @@ -100,31 +101,28 @@ public class rssParser extends AbstractParser implements Parser { serverByteBuffer text = new serverByteBuffer(); serverCharBuffer authors = new serverCharBuffer(); - rssReader reader = new rssReader(source); + RSSFeed feed = new RSSReader(source).getFeed(); // getting the rss feed title and description - String feedTitle = reader.getChannel().getTitle(); + String feedTitle = feed.getChannel().getTitle(); // getting feed creator - String feedCreator = reader.getChannel().getAuthor(); + String feedCreator = feed.getChannel().getAuthor(); if (feedCreator != null && feedCreator.length() > 0) authors.append(",").append(feedCreator); // getting the feed description - String feedDescription = reader.getChannel().getDescription(); + String feedDescription = feed.getChannel().getDescription(); - if (reader.getImage() != null) { - yacyURL imgURL = new yacyURL(reader.getImage(), null); + if (feed.getImage() != null) { + yacyURL imgURL = new yacyURL(feed.getImage(), null); images.put(imgURL.hash(), new htmlFilterImageEntry(imgURL, feedTitle, -1, -1)); } // loop through the feed items - for (int i = 0; i < reader.items(); i++) { + for (RSSMessage item: feed) { // check for interruption checkInterruption(); - // getting the next item - Item item = reader.getItem(i); - String itemTitle = item.getTitle(); yacyURL itemURL = new yacyURL(item.getLink(), null); String itemDescr = item.getDescription(); diff --git a/source/de/anomic/xml/RSSFeed.java b/source/de/anomic/xml/RSSFeed.java new file mode 100644 index 000000000..4b6fa90d2 --- /dev/null +++ b/source/de/anomic/xml/RSSFeed.java @@ -0,0 +1,142 @@ +// RSSFeed.java +// (C) 2007 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany +// first published 24.04.2008 on http://yacy.net +// +// This is a part of YaCy, a peer-to-peer based web search engine +// +// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $ +// $LastChangedRevision: 1986 $ +// $LastChangedBy: orbiter $ +// +// LICENSE +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +package de.anomic.xml; + +import java.util.Iterator; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentLinkedQueue; + +public class RSSFeed implements Iterable { + + // class variables + private RSSMessage channel; + private String imageURL; + private ConcurrentLinkedQueue messageQueue; // a list of GUIDs, so the items can be retrieved by a specific order + private ConcurrentHashMap messages; // a guid:Item map + private int maxsize; + + public RSSFeed() { + messageQueue = new ConcurrentLinkedQueue(); + messages = new ConcurrentHashMap(); + channel = null; + maxsize = Integer.MAX_VALUE; + } + + public RSSFeed(int maxsize) { + this(); + this.maxsize = maxsize; + } + + public void setMaxsize(int maxsize) { + this.maxsize = maxsize; + while (messageQueue.size() > this.maxsize) pollMessage(); + } + + public void setChannel(RSSMessage channelItem) { + this.channel = channelItem; + } + + public RSSMessage getChannel() { + return channel; + } + + public void setImage(String imageURL) { + this.imageURL = imageURL; + } + + public String getImage() { + return this.imageURL; + } + + public void addMessage(RSSMessage item) { + String guid = item.getGuid(); + messageQueue.add(guid); + messages.put(guid, item); + while (messageQueue.size() > this.maxsize) pollMessage(); + } + + public RSSMessage getMessage(String guid) { + // retrieve item by guid + return messages.get(guid); + } + + public int size() { + return messages.size(); + } + + public Iterator iterator() { + return new messageIterator(); + } + + public RSSMessage pollMessage() { + // retrieve and delete item + if (messageQueue.size() == 0) return null; + String nextGUID = messageQueue.poll(); + if (nextGUID == null) return null; + return messages.remove(nextGUID); + } + + public class messageIterator implements Iterator{ + + Iterator GUIDiterator; + String lastGUID; + + public messageIterator() { + GUIDiterator = messageQueue.iterator(); + lastGUID = null; + } + + public boolean hasNext() { + return GUIDiterator.hasNext(); + } + + public RSSMessage next() { + lastGUID = GUIDiterator.next(); + if (lastGUID == null) return null; + return messages.get(lastGUID); + } + + public void remove() { + if (lastGUID == null) return; + GUIDiterator.remove(); + messages.remove(lastGUID); + } + } + + /** + * the following static channels object is used to organize a storage array for RSS feeds + */ + private static final ConcurrentHashMap channels = new ConcurrentHashMap(); + + public static RSSFeed channels(String channelName) { + RSSFeed feed = channels.get(channelName); + if (feed != null) return feed; + feed = new RSSFeed(); + channels.put(channelName, feed); + return feed; + } +} diff --git a/source/de/anomic/xml/RSSMessage.java b/source/de/anomic/xml/RSSMessage.java new file mode 100644 index 000000000..c86b58517 --- /dev/null +++ b/source/de/anomic/xml/RSSMessage.java @@ -0,0 +1,117 @@ +// RSSMessage.java +// (C) 2007 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany +// first published 16.07.2007 on http://yacy.net +// +// This is a part of YaCy, a peer-to-peer based web search engine +// +// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $ +// $LastChangedRevision: 1986 $ +// $LastChangedBy: orbiter $ +// +// LICENSE +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + +package de.anomic.xml; + +import java.util.HashMap; +import java.util.HashSet; + +public class RSSMessage { + + // statics for item generation and automatic categorization + private static int guidcount = 0; + private static final String[] tagsDef = new String[] { + "author", // + "copyright", // + "category", // + "title", // + "link", // + "referrer", // + "language", // + "description", // + "creator", // + "pubDate", // + "guid", // + "docs" // + }; + + public static final HashSet tags = new HashSet(); + static { + for (int i = 0; i < tagsDef.length; i++) { + tags.add(tagsDef[i]); + } + } + + private HashMap map; + + public RSSMessage() { + this.map = new HashMap(); + this.map.put("guid", Long.toHexString(System.currentTimeMillis()) + ":" + guidcount++); + } + + public void setValue(String name, String value) { + map.put(name, value); + } + + public String getAuthor() { + return (String) map.get("author"); + } + + public String getCopyright() { + return (String) map.get("copyright"); + } + + public String getCategory() { + return (String) map.get("category"); + } + + public String getTitle() { + return (String) map.get("title"); + } + + public String getLink() { + return (String) map.get("link"); + } + + public String getReferrer() { + return (String) map.get("referrer"); + } + + public String getLanguage() { + return (String) map.get("language"); + } + + public String getDescription() { + return (String) map.get("description"); + } + + public String getCreator() { + return (String) map.get("creator"); + } + + public String getPubDate() { + return (String) map.get("pubDate"); + } + + public String getGuid() { + return (String) map.get("guid"); + } + + public String getDocs() { + return (String) map.get("docs"); + } +} diff --git a/source/de/anomic/xml/RSSReader.java b/source/de/anomic/xml/RSSReader.java new file mode 100644 index 000000000..806f8f062 --- /dev/null +++ b/source/de/anomic/xml/RSSReader.java @@ -0,0 +1,165 @@ +// rssReader.java +// (C) 2007 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany +// first published 16.07.2007 on http://yacy.net +// +// This is a part of YaCy, a peer-to-peer based web search engine +// +// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $ +// $LastChangedRevision: 1986 $ +// $LastChangedBy: orbiter $ +// +// LICENSE +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +package de.anomic.xml; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; + +import javax.xml.parsers.SAXParser; +import javax.xml.parsers.SAXParserFactory; + +import org.xml.sax.Attributes; +import org.xml.sax.SAXException; +import org.xml.sax.helpers.DefaultHandler; + +import de.anomic.server.serverByteBuffer; +import de.anomic.server.logging.serverLog; + +public class RSSReader extends DefaultHandler { + + // class variables + private RSSMessage item; + private StringBuffer buffer; + private boolean parsingChannel, parsingImage, parsingItem; + private RSSFeed theChannel; + + public RSSReader() { + theChannel = new RSSFeed(); + buffer = new StringBuffer(); + item = null; + parsingChannel = false; + parsingImage = false; + parsingItem = false; + } + + public RSSReader(String path) { + this(); + try { + SAXParserFactory factory = SAXParserFactory.newInstance(); + SAXParser saxParser = factory.newSAXParser(); + saxParser.parse(path, this); + } catch (Exception e) { + e.printStackTrace(); + } + } + + public RSSReader(InputStream stream) { + this(); + try { + SAXParserFactory factory = SAXParserFactory.newInstance(); + SAXParser saxParser = factory.newSAXParser(); + saxParser.parse(stream, this); + } catch (Exception e) { + e.printStackTrace(); + } + } + + public static RSSReader parse(byte[] a) { + + // check integrity of array + if ((a == null) || (a.length == 0)) { + serverLog.logWarning("rssReader", "response=null"); + return null; + } + if (a.length < 100) { + serverLog.logWarning("rssReader", "response=" + new String(a)); + return null; + } + if (!serverByteBuffer.equals(a, " tags = new HashSet(); - static { - for (int i = 0; i < tagsDef.length; i++) { - tags.add(tagsDef[i]); - } - } +public class RSSReader extends DefaultHandler { // class variables - private Item channel, item; + private RSSMessage item; private StringBuffer buffer; private boolean parsingChannel, parsingImage, parsingItem; - private String imageURL; - private ArrayList itemsGUID; // a list of GUIDs, so the items can be retrieved by a specific order - private HashMap items; // a guid:Item map - + private RSSFeed theChannel; - public rssReader() { - itemsGUID = new ArrayList(); - items = new HashMap(); + public RSSReader() { + theChannel = new RSSFeed(); buffer = new StringBuffer(); item = null; - channel = null; parsingChannel = false; parsingImage = false; parsingItem = false; } - public rssReader(String path) { + public RSSReader(String path) { this(); try { SAXParserFactory factory = SAXParserFactory.newInstance(); @@ -100,7 +68,7 @@ public class rssReader extends DefaultHandler { } } - public rssReader(InputStream stream) { + public RSSReader(InputStream stream) { this(); try { SAXParserFactory factory = SAXParserFactory.newInstance(); @@ -111,7 +79,7 @@ public class rssReader extends DefaultHandler { } } - public static rssReader parse(byte[] a) { + public static RSSReader parse(byte[] a) { // check integrity of array if ((a == null) || (a.length == 0)) { @@ -136,9 +104,9 @@ public class rssReader extends DefaultHandler { ByteArrayInputStream bais = new ByteArrayInputStream(a); // parse stream - rssReader reader = null; + RSSReader reader = null; try { - reader = new rssReader(bais); + reader = new RSSReader(bais); } catch (Exception e) { serverLog.logWarning("rssReader", "parse exception: " + e); return null; @@ -149,10 +117,10 @@ public class rssReader extends DefaultHandler { public void startElement(String uri, String name, String tag, Attributes atts) throws SAXException { if ("channel".equals(tag)) { - channel = new Item(); + item = new RSSMessage(); parsingChannel = true; } else if ("item".equals(tag)) { - item = new Item(); + item = new RSSMessage(); parsingItem = true; } else if ("image".equals(tag)) { parsingImage = true; @@ -163,25 +131,24 @@ public class rssReader extends DefaultHandler { if (tag == null) return; if ("channel".equals(tag)) { parsingChannel = false; + theChannel.setChannel(item); } else if ("item".equals(tag)) { - String guid = item.getGuid(); - itemsGUID.add(guid); - items.put(guid, item); + theChannel.addMessage(item); parsingItem = false; } else if ("image".equals(tag)) { parsingImage = false; } else if ((parsingImage) && (parsingChannel)) { String value = buffer.toString().trim(); buffer.setLength(0); - if ("url".equals(tag)) imageURL = value; + if ("url".equals(tag)) theChannel.setImage(value); } else if (parsingItem) { String value = buffer.toString().trim(); buffer.setLength(0); - if (tags.contains(tag)) item.setValue(tag, value); + if (RSSMessage.tags.contains(tag)) item.setValue(tag, value); } else if (parsingChannel) { String value = buffer.toString().trim(); buffer.setLength(0); - if (tags.contains(tag)) channel.setValue(tag, value); + if (RSSMessage.tags.contains(tag)) item.setValue(tag, value); } } @@ -190,88 +157,9 @@ public class rssReader extends DefaultHandler { buffer.append(ch, start, length); } } - - public Item getChannel() { - return channel; - } - - public Item getItem(int i) { - // retrieve item by order number - return getItem((String) itemsGUID.get(i)); - } - - public Item getItem(String guid) { - // retrieve item by guid - return (Item) items.get(guid); - } - - public int items() { - return items.size(); - } - public String getImage() { - return this.imageURL; + public RSSFeed getFeed() { + return theChannel; } - - public static class Item { - - private HashMap map; - public Item() { - this.map = new HashMap(); - this.map.put("guid", Long.toHexString(System.currentTimeMillis()) + ":" + guidcount++); - } - - public void setValue(String name, String value) { - map.put(name, value); - } - - public String getAuthor() { - return (String) map.get("author"); - } - - public String getCopyright() { - return (String) map.get("copyright"); - } - - public String getCategory() { - return (String) map.get("category"); - } - - public String getTitle() { - return (String) map.get("title"); - } - - public String getLink() { - return (String) map.get("link"); - } - - public String getReferrer() { - return (String) map.get("referrer"); - } - - public String getLanguage() { - return (String) map.get("language"); - } - - public String getDescription() { - return (String) map.get("description"); - } - - public String getCreator() { - return (String) map.get("creator"); - } - - public String getPubDate() { - return (String) map.get("pubDate"); - } - - public String getGuid() { - return (String) map.get("guid"); - } - - public String getDocs() { - return (String) map.get("docs"); - } - } } \ No newline at end of file diff --git a/source/de/anomic/yacy/yacyClient.java b/source/de/anomic/yacy/yacyClient.java index c61862f46..417ac4c6f 100644 --- a/source/de/anomic/yacy/yacyClient.java +++ b/source/de/anomic/yacy/yacyClient.java @@ -84,7 +84,8 @@ import de.anomic.server.serverCore; import de.anomic.server.serverDomains; import de.anomic.tools.crypt; import de.anomic.tools.nxTools; -import de.anomic.xml.rssReader; +import de.anomic.xml.RSSFeed; +import de.anomic.xml.RSSReader; public final class yacyClient { @@ -374,7 +375,7 @@ public final class yacyClient { } } - public static rssReader queryRemoteCrawlURLs(yacySeed target, int count) { + public static RSSFeed queryRemoteCrawlURLs(yacySeed target, int count) { // returns a list of if (target == null) { return null; } if (yacyCore.seedDB.mySeed() == null) return null; @@ -390,8 +391,8 @@ public final class yacyClient { /* a long time-out is needed */ final byte[] result = wput("http://" + target.getClusterAddress() + "/yacy/urls.xml", target.getHexHash() + ".yacyh", post, 60000); - rssReader reader = rssReader.parse(result); - if (reader == null) { + RSSFeed feed = RSSReader.parse(result).getFeed(); + if (feed == null) { // case where the rss reader does not understand the content yacyCore.log.logWarning("yacyClient.queryRemoteCrawlURLs failed asking peer '" + target.getName() + "': probably bad response from remote peer"); System.out.println("***DEBUG*** rss input = " + new String(result)); @@ -400,7 +401,7 @@ public final class yacyClient { //e.printStackTrace(); return null; } - return reader; + return feed; } catch (IOException e) { yacyCore.log.logSevere("yacyClient.queryRemoteCrawlURLs error asking peer '" + target.getName() + "':" + e.toString()); return null;