refactoring of RSS reader

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4736 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 17 years ago
parent b9a2a2d287
commit 724bbdf9b2

@ -1,12 +1,27 @@
touch /tmp/yp$$.dump
echo "cleaning search buffer"
curl -s --user $1:$2 http://localhost:8080/Ranking_p.html > /dev/null
echo "start search for $4"
curl -s -o /tmp/yp$$.query "http://localhost:8080/yacysearch.rss?maximumRecords=10&resource=local&query=$4" &
curl -s --user $1:$2 http://localhost:8080/Status.html > /dev/null &
curl -s --user $1:$2 http://localhost:8080/Status.html > /dev/null &
curl -s --user $1:$2 http://localhost:8080/Status.html > /dev/null &
curl -s --user $1:$2 http://localhost:8080/Status.html > /dev/null &
curl -s --user $1:$2 http://localhost:8080/Status.html > /dev/null &
echo "search started"
curl -s --user $1:$2 "http://localhost:8080/Threaddump_p.html?sleep=0&createThreaddump=&plain=true" >> /tmp/yp$$.dump
echo "thread dump 1 ready"
curl -s --user $1:$2 "http://localhost:8080/Threaddump_p.html?sleep=$3&createThreaddump=&plain=true" >> /tmp/yp$$.dump
echo "thread dump 2 ready"
curl -s --user $1:$2 "http://localhost:8080/Threaddump_p.html?sleep=$3&createThreaddump=&plain=true" >> /tmp/yp$$.dump
echo "thread dump 3 ready"
curl -s --user $1:$2 "http://localhost:8080/Threaddump_p.html?sleep=$3&createThreaddump=&plain=true" >> /tmp/yp$$.dump
echo "thread dump 4 ready"
curl -s --user $1:$2 "http://localhost:8080/Threaddump_p.html?sleep=$3&createThreaddump=&plain=true" >> /tmp/yp$$.dump
echo "thread dump 5 ready"
curl -s --user $1:$2 "http://localhost:8080/Threaddump_p.html?sleep=$3&createThreaddump=&plain=true" >> /tmp/yp$$.dump
echo "thread dump 6 ready"
curl -s --user $1:$2 "http://localhost:8080/Threaddump_p.html?sleep=$3&createThreaddump=&plain=true" >> /tmp/yp$$.dump
echo "thread dump 7 ready"
cat /tmp/yp$$.dump
rm -f /tmp/yp$$.dump

@ -27,7 +27,9 @@ import de.anomic.http.httpHeader;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.server.servletProperties;
import de.anomic.xml.rssReader;
import de.anomic.xml.RSSFeed;
import de.anomic.xml.RSSMessage;
import de.anomic.xml.RSSReader;
import de.anomic.yacy.yacyURL;
// test url:
@ -50,26 +52,27 @@ public class FeedReader_p {
// int maxitems=Integer.parseInt(post.get("max", "0"));
// int offset=Integer.parseInt(post.get("offset", "0")); //offset to the first displayed item
rssReader parser = new rssReader(url.toString());
RSSFeed feed = new RSSReader(url.toString()).getFeed();
prop.putHTML("page_title", parser.getChannel().getTitle());
if (parser.getChannel().getAuthor() == null) {
prop.putHTML("page_title", feed.getChannel().getTitle());
if (feed.getChannel().getAuthor() == null) {
prop.put("page_hasAuthor", "0");
} else {
prop.put("page_hasAuthor", "1");
prop.putHTML("page_hasAuthor_author", parser.getChannel().getAuthor());
prop.putHTML("page_hasAuthor_author", feed.getChannel().getAuthor());
}
prop.putHTML("page_description", parser.getChannel().getDescription());
prop.putHTML("page_description", feed.getChannel().getDescription());
for (int i = 0; i < parser.items(); i++) {
rssReader.Item item = parser.getItem(i);
int i = 0;
for (RSSMessage item: feed) {
prop.putHTML("page_items_" + i + "_author", item.getAuthor());
prop.putHTML("page_items_" + i + "_title", item.getTitle());
prop.put("page_items_" + i + "_link", item.getLink());
prop.put("page_items_" + i + "_description", item.getDescription());
prop.put("page_items_" + i + "_date", item.getPubDate());
i++;
}
prop.put("page_items", parser.items());
prop.put("page_items", feed.size());
prop.put("page", "1");
}

@ -35,7 +35,8 @@ import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverDate;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.xml.rssReader;
import de.anomic.xml.RSSFeed;
import de.anomic.xml.RSSMessage;
import de.anomic.yacy.yacyClient;
import de.anomic.yacy.yacyCore;
import de.anomic.yacy.yacySeed;
@ -52,11 +53,9 @@ public class rct_p {
if (post.containsKey("retrieve")) {
String peerhash = post.get("peer", null);
yacySeed seed = (peerhash == null) ? null : yacyCore.seedDB.getConnected(peerhash);
rssReader reader = (seed == null) ? null : yacyClient.queryRemoteCrawlURLs(seed, 10);
if (reader != null) {
rssReader.Item item;
for (int i = 0; i < reader.items(); i++) {
item = reader.getItem(i);
RSSFeed feed = (seed == null) ? null : yacyClient.queryRemoteCrawlURLs(seed, 10);
if (feed != null) {
for (RSSMessage item: feed) {
//System.out.println("URL=" + item.getLink() + ", desc=" + item.getDescription() + ", pubDate=" + item.getPubDate());
// put url on remote crawl stack

@ -47,7 +47,8 @@ import de.anomic.plasma.plasmaParser;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverDate;
import de.anomic.server.logging.serverLog;
import de.anomic.xml.rssReader;
import de.anomic.xml.RSSFeed;
import de.anomic.xml.RSSMessage;
import de.anomic.yacy.yacyClient;
import de.anomic.yacy.yacyCore;
import de.anomic.yacy.yacySeed;
@ -295,14 +296,12 @@ public class plasmaCrawlQueues {
if (seed == null) return false;
// we know a peer which should provide remote crawl entries. load them now.
rssReader reader = (seed == null) ? null : yacyClient.queryRemoteCrawlURLs(seed, 20);
if (reader == null) return true;
RSSFeed feed = (seed == null) ? null : yacyClient.queryRemoteCrawlURLs(seed, 20);
if (feed == null) return true;
// parse the rss
rssReader.Item item;
yacyURL url, referrer;
Date loaddate;
for (int i = 0; i < reader.items(); i++) {
item = reader.getItem(i);
for (RSSMessage item: feed) {
//System.out.println("URL=" + item.getLink() + ", desc=" + item.getDescription() + ", pubDate=" + item.getPubDate());
// put url on remote crawl stack

@ -62,8 +62,9 @@ import de.anomic.plasma.parser.ParserException;
import de.anomic.server.serverByteBuffer;
import de.anomic.server.serverCharBuffer;
import de.anomic.server.serverFileUtils;
import de.anomic.xml.rssReader;
import de.anomic.xml.rssReader.Item;
import de.anomic.xml.RSSFeed;
import de.anomic.xml.RSSReader;
import de.anomic.xml.RSSMessage;
import de.anomic.yacy.yacyURL;
public class rssParser extends AbstractParser implements Parser {
@ -100,31 +101,28 @@ public class rssParser extends AbstractParser implements Parser {
serverByteBuffer text = new serverByteBuffer();
serverCharBuffer authors = new serverCharBuffer();
rssReader reader = new rssReader(source);
RSSFeed feed = new RSSReader(source).getFeed();
// getting the rss feed title and description
String feedTitle = reader.getChannel().getTitle();
String feedTitle = feed.getChannel().getTitle();
// getting feed creator
String feedCreator = reader.getChannel().getAuthor();
String feedCreator = feed.getChannel().getAuthor();
if (feedCreator != null && feedCreator.length() > 0) authors.append(",").append(feedCreator);
// getting the feed description
String feedDescription = reader.getChannel().getDescription();
String feedDescription = feed.getChannel().getDescription();
if (reader.getImage() != null) {
yacyURL imgURL = new yacyURL(reader.getImage(), null);
if (feed.getImage() != null) {
yacyURL imgURL = new yacyURL(feed.getImage(), null);
images.put(imgURL.hash(), new htmlFilterImageEntry(imgURL, feedTitle, -1, -1));
}
// loop through the feed items
for (int i = 0; i < reader.items(); i++) {
for (RSSMessage item: feed) {
// check for interruption
checkInterruption();
// getting the next item
Item item = reader.getItem(i);
String itemTitle = item.getTitle();
yacyURL itemURL = new yacyURL(item.getLink(), null);
String itemDescr = item.getDescription();

@ -0,0 +1,142 @@
// RSSFeed.java
// (C) 2007 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 24.04.2008 on http://yacy.net
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
// $LastChangedRevision: 1986 $
// $LastChangedBy: orbiter $
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package de.anomic.xml;
import java.util.Iterator;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentLinkedQueue;
public class RSSFeed implements Iterable<RSSMessage> {
// class variables
private RSSMessage channel;
private String imageURL;
private ConcurrentLinkedQueue<String> messageQueue; // a list of GUIDs, so the items can be retrieved by a specific order
private ConcurrentHashMap<String, RSSMessage> messages; // a guid:Item map
private int maxsize;
public RSSFeed() {
messageQueue = new ConcurrentLinkedQueue<String>();
messages = new ConcurrentHashMap<String, RSSMessage>();
channel = null;
maxsize = Integer.MAX_VALUE;
}
public RSSFeed(int maxsize) {
this();
this.maxsize = maxsize;
}
public void setMaxsize(int maxsize) {
this.maxsize = maxsize;
while (messageQueue.size() > this.maxsize) pollMessage();
}
public void setChannel(RSSMessage channelItem) {
this.channel = channelItem;
}
public RSSMessage getChannel() {
return channel;
}
public void setImage(String imageURL) {
this.imageURL = imageURL;
}
public String getImage() {
return this.imageURL;
}
public void addMessage(RSSMessage item) {
String guid = item.getGuid();
messageQueue.add(guid);
messages.put(guid, item);
while (messageQueue.size() > this.maxsize) pollMessage();
}
public RSSMessage getMessage(String guid) {
// retrieve item by guid
return messages.get(guid);
}
public int size() {
return messages.size();
}
public Iterator<RSSMessage> iterator() {
return new messageIterator();
}
public RSSMessage pollMessage() {
// retrieve and delete item
if (messageQueue.size() == 0) return null;
String nextGUID = messageQueue.poll();
if (nextGUID == null) return null;
return messages.remove(nextGUID);
}
public class messageIterator implements Iterator<RSSMessage>{
Iterator<String> GUIDiterator;
String lastGUID;
public messageIterator() {
GUIDiterator = messageQueue.iterator();
lastGUID = null;
}
public boolean hasNext() {
return GUIDiterator.hasNext();
}
public RSSMessage next() {
lastGUID = GUIDiterator.next();
if (lastGUID == null) return null;
return messages.get(lastGUID);
}
public void remove() {
if (lastGUID == null) return;
GUIDiterator.remove();
messages.remove(lastGUID);
}
}
/**
* the following static channels object is used to organize a storage array for RSS feeds
*/
private static final ConcurrentHashMap<String, RSSFeed> channels = new ConcurrentHashMap<String, RSSFeed>();
public static RSSFeed channels(String channelName) {
RSSFeed feed = channels.get(channelName);
if (feed != null) return feed;
feed = new RSSFeed();
channels.put(channelName, feed);
return feed;
}
}

@ -0,0 +1,117 @@
// RSSMessage.java
// (C) 2007 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 16.07.2007 on http://yacy.net
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
// $LastChangedRevision: 1986 $
// $LastChangedBy: orbiter $
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package de.anomic.xml;
import java.util.HashMap;
import java.util.HashSet;
public class RSSMessage {
// statics for item generation and automatic categorization
private static int guidcount = 0;
private static final String[] tagsDef = new String[] {
"author", //
"copyright", //
"category", //
"title", //
"link", //
"referrer", //
"language", //
"description", //
"creator", //
"pubDate", //
"guid", //
"docs" //
};
public static final HashSet<String> tags = new HashSet<String>();
static {
for (int i = 0; i < tagsDef.length; i++) {
tags.add(tagsDef[i]);
}
}
private HashMap<String, String> map;
public RSSMessage() {
this.map = new HashMap<String, String>();
this.map.put("guid", Long.toHexString(System.currentTimeMillis()) + ":" + guidcount++);
}
public void setValue(String name, String value) {
map.put(name, value);
}
public String getAuthor() {
return (String) map.get("author");
}
public String getCopyright() {
return (String) map.get("copyright");
}
public String getCategory() {
return (String) map.get("category");
}
public String getTitle() {
return (String) map.get("title");
}
public String getLink() {
return (String) map.get("link");
}
public String getReferrer() {
return (String) map.get("referrer");
}
public String getLanguage() {
return (String) map.get("language");
}
public String getDescription() {
return (String) map.get("description");
}
public String getCreator() {
return (String) map.get("creator");
}
public String getPubDate() {
return (String) map.get("pubDate");
}
public String getGuid() {
return (String) map.get("guid");
}
public String getDocs() {
return (String) map.get("docs");
}
}

@ -0,0 +1,165 @@
// rssReader.java
// (C) 2007 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 16.07.2007 on http://yacy.net
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
// $LastChangedRevision: 1986 $
// $LastChangedBy: orbiter $
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package de.anomic.xml;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
import de.anomic.server.serverByteBuffer;
import de.anomic.server.logging.serverLog;
public class RSSReader extends DefaultHandler {
// class variables
private RSSMessage item;
private StringBuffer buffer;
private boolean parsingChannel, parsingImage, parsingItem;
private RSSFeed theChannel;
public RSSReader() {
theChannel = new RSSFeed();
buffer = new StringBuffer();
item = null;
parsingChannel = false;
parsingImage = false;
parsingItem = false;
}
public RSSReader(String path) {
this();
try {
SAXParserFactory factory = SAXParserFactory.newInstance();
SAXParser saxParser = factory.newSAXParser();
saxParser.parse(path, this);
} catch (Exception e) {
e.printStackTrace();
}
}
public RSSReader(InputStream stream) {
this();
try {
SAXParserFactory factory = SAXParserFactory.newInstance();
SAXParser saxParser = factory.newSAXParser();
saxParser.parse(stream, this);
} catch (Exception e) {
e.printStackTrace();
}
}
public static RSSReader parse(byte[] a) {
// check integrity of array
if ((a == null) || (a.length == 0)) {
serverLog.logWarning("rssReader", "response=null");
return null;
}
if (a.length < 100) {
serverLog.logWarning("rssReader", "response=" + new String(a));
return null;
}
if (!serverByteBuffer.equals(a, "<?xml".getBytes())) {
serverLog.logWarning("rssReader", "response does not contain valid xml");
return null;
}
String end = new String(a, a.length - 10, 10);
if (end.indexOf("rss") < 0) {
serverLog.logWarning("rssReader", "response incomplete");
return null;
}
// make input stream
ByteArrayInputStream bais = new ByteArrayInputStream(a);
// parse stream
RSSReader reader = null;
try {
reader = new RSSReader(bais);
} catch (Exception e) {
serverLog.logWarning("rssReader", "parse exception: " + e);
return null;
}
try { bais.close(); } catch (IOException e) {}
return reader;
}
public void startElement(String uri, String name, String tag, Attributes atts) throws SAXException {
if ("channel".equals(tag)) {
item = new RSSMessage();
parsingChannel = true;
} else if ("item".equals(tag)) {
item = new RSSMessage();
parsingItem = true;
} else if ("image".equals(tag)) {
parsingImage = true;
}
}
public void endElement(String uri, String name, String tag) {
if (tag == null) return;
if ("channel".equals(tag)) {
parsingChannel = false;
theChannel.setChannel(item);
} else if ("item".equals(tag)) {
theChannel.addMessage(item);
parsingItem = false;
} else if ("image".equals(tag)) {
parsingImage = false;
} else if ((parsingImage) && (parsingChannel)) {
String value = buffer.toString().trim();
buffer.setLength(0);
if ("url".equals(tag)) theChannel.setImage(value);
} else if (parsingItem) {
String value = buffer.toString().trim();
buffer.setLength(0);
if (RSSMessage.tags.contains(tag)) item.setValue(tag, value);
} else if (parsingChannel) {
String value = buffer.toString().trim();
buffer.setLength(0);
if (RSSMessage.tags.contains(tag)) item.setValue(tag, value);
}
}
public void characters(char ch[], int start, int length) {
if (parsingItem || parsingChannel) {
buffer.append(ch, start, length);
}
}
public RSSFeed getFeed() {
return theChannel;
}
}

@ -29,9 +29,6 @@ package de.anomic.xml;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
@ -43,53 +40,24 @@ import org.xml.sax.helpers.DefaultHandler;
import de.anomic.server.serverByteBuffer;
import de.anomic.server.logging.serverLog;
public class rssReader extends DefaultHandler {
// statics for item generation and automatic categorization
static int guidcount = 0;
private static final String[] tagsDef = new String[]{
"author", //
"copyright", //
"category", //
"title", //
"link", //
"referrer", //
"language", //
"description", //
"creator", //
"pubDate", //
"guid", //
"docs" //
};
private static final HashSet<String> tags = new HashSet<String>();
static {
for (int i = 0; i < tagsDef.length; i++) {
tags.add(tagsDef[i]);
}
}
public class RSSReader extends DefaultHandler {
// class variables
private Item channel, item;
private RSSMessage item;
private StringBuffer buffer;
private boolean parsingChannel, parsingImage, parsingItem;
private String imageURL;
private ArrayList<String> itemsGUID; // a list of GUIDs, so the items can be retrieved by a specific order
private HashMap<String, Item> items; // a guid:Item map
private RSSFeed theChannel;
public rssReader() {
itemsGUID = new ArrayList<String>();
items = new HashMap<String, Item>();
public RSSReader() {
theChannel = new RSSFeed();
buffer = new StringBuffer();
item = null;
channel = null;
parsingChannel = false;
parsingImage = false;
parsingItem = false;
}
public rssReader(String path) {
public RSSReader(String path) {
this();
try {
SAXParserFactory factory = SAXParserFactory.newInstance();
@ -100,7 +68,7 @@ public class rssReader extends DefaultHandler {
}
}
public rssReader(InputStream stream) {
public RSSReader(InputStream stream) {
this();
try {
SAXParserFactory factory = SAXParserFactory.newInstance();
@ -111,7 +79,7 @@ public class rssReader extends DefaultHandler {
}
}
public static rssReader parse(byte[] a) {
public static RSSReader parse(byte[] a) {
// check integrity of array
if ((a == null) || (a.length == 0)) {
@ -136,9 +104,9 @@ public class rssReader extends DefaultHandler {
ByteArrayInputStream bais = new ByteArrayInputStream(a);
// parse stream
rssReader reader = null;
RSSReader reader = null;
try {
reader = new rssReader(bais);
reader = new RSSReader(bais);
} catch (Exception e) {
serverLog.logWarning("rssReader", "parse exception: " + e);
return null;
@ -149,10 +117,10 @@ public class rssReader extends DefaultHandler {
public void startElement(String uri, String name, String tag, Attributes atts) throws SAXException {
if ("channel".equals(tag)) {
channel = new Item();
item = new RSSMessage();
parsingChannel = true;
} else if ("item".equals(tag)) {
item = new Item();
item = new RSSMessage();
parsingItem = true;
} else if ("image".equals(tag)) {
parsingImage = true;
@ -163,25 +131,24 @@ public class rssReader extends DefaultHandler {
if (tag == null) return;
if ("channel".equals(tag)) {
parsingChannel = false;
theChannel.setChannel(item);
} else if ("item".equals(tag)) {
String guid = item.getGuid();
itemsGUID.add(guid);
items.put(guid, item);
theChannel.addMessage(item);
parsingItem = false;
} else if ("image".equals(tag)) {
parsingImage = false;
} else if ((parsingImage) && (parsingChannel)) {
String value = buffer.toString().trim();
buffer.setLength(0);
if ("url".equals(tag)) imageURL = value;
if ("url".equals(tag)) theChannel.setImage(value);
} else if (parsingItem) {
String value = buffer.toString().trim();
buffer.setLength(0);
if (tags.contains(tag)) item.setValue(tag, value);
if (RSSMessage.tags.contains(tag)) item.setValue(tag, value);
} else if (parsingChannel) {
String value = buffer.toString().trim();
buffer.setLength(0);
if (tags.contains(tag)) channel.setValue(tag, value);
if (RSSMessage.tags.contains(tag)) item.setValue(tag, value);
}
}
@ -190,88 +157,9 @@ public class rssReader extends DefaultHandler {
buffer.append(ch, start, length);
}
}
public Item getChannel() {
return channel;
}
public Item getItem(int i) {
// retrieve item by order number
return getItem((String) itemsGUID.get(i));
}
public Item getItem(String guid) {
// retrieve item by guid
return (Item) items.get(guid);
}
public int items() {
return items.size();
}
public String getImage() {
return this.imageURL;
public RSSFeed getFeed() {
return theChannel;
}
public static class Item {
private HashMap<String, String> map;
public Item() {
this.map = new HashMap<String, String>();
this.map.put("guid", Long.toHexString(System.currentTimeMillis()) + ":" + guidcount++);
}
public void setValue(String name, String value) {
map.put(name, value);
}
public String getAuthor() {
return (String) map.get("author");
}
public String getCopyright() {
return (String) map.get("copyright");
}
public String getCategory() {
return (String) map.get("category");
}
public String getTitle() {
return (String) map.get("title");
}
public String getLink() {
return (String) map.get("link");
}
public String getReferrer() {
return (String) map.get("referrer");
}
public String getLanguage() {
return (String) map.get("language");
}
public String getDescription() {
return (String) map.get("description");
}
public String getCreator() {
return (String) map.get("creator");
}
public String getPubDate() {
return (String) map.get("pubDate");
}
public String getGuid() {
return (String) map.get("guid");
}
public String getDocs() {
return (String) map.get("docs");
}
}
}

@ -84,7 +84,8 @@ import de.anomic.server.serverCore;
import de.anomic.server.serverDomains;
import de.anomic.tools.crypt;
import de.anomic.tools.nxTools;
import de.anomic.xml.rssReader;
import de.anomic.xml.RSSFeed;
import de.anomic.xml.RSSReader;
public final class yacyClient {
@ -374,7 +375,7 @@ public final class yacyClient {
}
}
public static rssReader queryRemoteCrawlURLs(yacySeed target, int count) {
public static RSSFeed queryRemoteCrawlURLs(yacySeed target, int count) {
// returns a list of
if (target == null) { return null; }
if (yacyCore.seedDB.mySeed() == null) return null;
@ -390,8 +391,8 @@ public final class yacyClient {
/* a long time-out is needed */
final byte[] result = wput("http://" + target.getClusterAddress() + "/yacy/urls.xml", target.getHexHash() + ".yacyh", post, 60000);
rssReader reader = rssReader.parse(result);
if (reader == null) {
RSSFeed feed = RSSReader.parse(result).getFeed();
if (feed == null) {
// case where the rss reader does not understand the content
yacyCore.log.logWarning("yacyClient.queryRemoteCrawlURLs failed asking peer '" + target.getName() + "': probably bad response from remote peer");
System.out.println("***DEBUG*** rss input = " + new String(result));
@ -400,7 +401,7 @@ public final class yacyClient {
//e.printStackTrace();
return null;
}
return reader;
return feed;
} catch (IOException e) {
yacyCore.log.logSevere("yacyClient.queryRemoteCrawlURLs error asking peer '" + target.getName() + "':" + e.toString());
return null;

Loading…
Cancel
Save