From e0da0a84b05e0d80f4559a6af454f273b8e53a12 Mon Sep 17 00:00:00 2001 From: orbiter Date: Mon, 22 Mar 2010 09:12:52 +0000 Subject: [PATCH] performance fix in http parser git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6760 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- .../document/parser/html/ContentScraper.java | 23 +++++++++++-------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/source/net/yacy/document/parser/html/ContentScraper.java b/source/net/yacy/document/parser/html/ContentScraper.java index 534ffc0b5..72724b95b 100644 --- a/source/net/yacy/document/parser/html/ContentScraper.java +++ b/source/net/yacy/document/parser/html/ContentScraper.java @@ -280,16 +280,21 @@ public class ContentScraper extends AbstractScraper implements Scraper { } private static String cleanLine(String s) { - // may contain too many funny symbols - for (int i = 0; i < s.length(); i++) - if (s.charAt(i) < ' ') s = s.substring(0, i) + " " + s.substring(i + 1); - - // remove double-spaces - int p; - while ((p = s.indexOf(" ")) >= 0) s = s.substring(0, p) + s.substring(p + 1); - + StringBuilder sb = new StringBuilder(s.length()); + char c, l = ' '; + for (int i = 0; i < s.length(); i++) { + c = s.charAt(i); + if (c < ' ') c = ' '; + if (c == ' ') { + if (l != ' ') sb.append(c); + } else { + sb.append(c); + } + l = c; + } + // return result - return s.trim(); + return sb.toString().trim(); } public String getTitle() {