From 30a8a2f76ba650b12aeec8a003d8da9a01ed7f67 Mon Sep 17 00:00:00 2001 From: low012 Date: Sun, 28 Aug 2011 13:32:42 +0000 Subject: [PATCH] *) replacing one ugly hack with an extended ugly hack ;-) git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7908 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- source/net/yacy/cora/document/RSSReader.java | 38 ++++++++++++++++---- 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/source/net/yacy/cora/document/RSSReader.java b/source/net/yacy/cora/document/RSSReader.java index 3e443001b..c3a06be83 100644 --- a/source/net/yacy/cora/document/RSSReader.java +++ b/source/net/yacy/cora/document/RSSReader.java @@ -96,11 +96,8 @@ public class RSSReader extends DefaultHandler { if (!equals(a, UTF8.getBytes(" 0) type = Type.rss; - if (end.indexOf("feed") > 0) type = Type.atom; - if (end.indexOf("rdf") > 0) type = Type.rdf; + + final Type type = findOutType(a); if (type == Type.none) { throw new IOException("response incomplete"); } @@ -118,7 +115,36 @@ public class RSSReader extends DefaultHandler { try { bais.close(); } catch (final IOException e) {} return reader; } - + + /** + * Tries to find out the type of feed by stepping through its data + * starting in the end and looking at the last XML tag. Just grabbing + * the last few characters of the data does not work since some + * people add quite long comments at the end of their feeds. + * @param a contains the feed + * @return type of feed + */ + private static Type findOutType(final byte[] a) { + String end; + int i = 1; + + do { + /* In first iteration grab the last 80 characters, after that + * move towards the start of the data and take some more (90) + * to have an overlap in order to not miss anything if the tag + * is on the border of two 80 character blocks. + */ + end = UTF8.String(a, a.length - (i * 80), (80 + ((i > 1)? 10 : 0))); + i++; + } while(!end.contains(" 0) type = Type.rss; + if (end.indexOf("feed") > 0) type = Type.atom; + if (end.indexOf("rdf") > 0) type = Type.rdf; + return type; + } + private final static boolean equals(final byte[] buffer, final byte[] pattern) { // compares two byte arrays: true, if pattern appears completely at offset position if (buffer.length < pattern.length) return false;