From 47fd226bdbbb8cdd3dac04678406b6aa755b9d14 Mon Sep 17 00:00:00 2001 From: lotus Date: Tue, 19 May 2009 16:41:27 +0000 Subject: [PATCH] proper parsing of sentences does not affect tokens/words git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5964 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- source/de/anomic/plasma/parser/Condenser.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/de/anomic/plasma/parser/Condenser.java b/source/de/anomic/plasma/parser/Condenser.java index fa16aff8c..425c5f400 100644 --- a/source/de/anomic/plasma/parser/Condenser.java +++ b/source/de/anomic/plasma/parser/Condenser.java @@ -696,7 +696,7 @@ public final class Condenser { if (c < ' ') c = ' '; if ((lc == ' ') && (c == ' ')) continue; // ignore double spaces s.append(c); - if (htmlFilterContentScraper.punctuation(c)) break; + if (htmlFilterContentScraper.punctuation(lc) && invisible(c)) break; lc = c; }