From 4f6d56330d106d5a2e22c8b766e5f27ba655893e Mon Sep 17 00:00:00 2001 From: borg-0300 Date: Tue, 21 Aug 2007 22:07:23 +0000 Subject: [PATCH] =?UTF-8?q?Bugfix=20f=C3=BCr=20abgeschnittene=20=C3=9Cbers?= =?UTF-8?q?chriften=20-=20http://forum.yacy-websuche.de/viewtopic.php=3Ff?= =?UTF-8?q?=3D6&t=3D273?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4055 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- .../htmlFilter/htmlFilterContentScraper.java | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/source/de/anomic/htmlFilter/htmlFilterContentScraper.java b/source/de/anomic/htmlFilter/htmlFilterContentScraper.java index 89fc6ad74..847c2a0ca 100644 --- a/source/de/anomic/htmlFilter/htmlFilterContentScraper.java +++ b/source/de/anomic/htmlFilter/htmlFilterContentScraper.java @@ -275,15 +275,19 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen for (int i = 0; i < s.length(); i++) if (s.charAt(i) < ' ') s = s.substring(0, i) + " " + s.substring(i + 1); */ - - // remove double-spaces + int p; - while ((p = s.indexOf(" ")) >= 0) s = s.substring(0, p) + s.substring(p + 1); + + // CR/LF entfernen, dabei koennen doppelte Leerzeichen enstehen die aber weiter unten entfernt werden - thq + while ((p = s.indexOf("\n")) >= 0) s = s.substring(0, p) + ((p + 1 == s.length()) ? "" : " " + s.substring(p + 1)); + + // remove double-spaces + while ((p = s.indexOf(" ")) >= 0) s = s.substring(0, p) + s.substring(p + 1); // we don't accept headlines that are too short s = s.trim(); if (s.length() < 4) s = ""; - + // return result return s; } @@ -360,7 +364,7 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen * @return the {@link URL} to the favicon that belongs to the document */ public URL getFavicon() { - return this.favicon; + return this.favicon; } public String getDescription() {