From f63fff90085fc5ff144deb54891c69d5ef5bf146 Mon Sep 17 00:00:00 2001 From: reger Date: Mon, 16 Mar 2015 02:03:40 +0100 Subject: [PATCH] fix snippet containig number with comma as desmo point http://mantis.tokeek.de/view.php?id=344 to keep it as one word (by altering the split regex) - added sniipet test case with number - regex for word split to match multiple splitcars --- source/net/yacy/search/snippet/TextSnippet.java | 6 ++++-- test/net/yacy/search/snippet/TextSnippetTest.java | 12 ++++++++++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/source/net/yacy/search/snippet/TextSnippet.java b/source/net/yacy/search/snippet/TextSnippet.java index d864d7c21..b10fbd44b 100644 --- a/source/net/yacy/search/snippet/TextSnippet.java +++ b/source/net/yacy/search/snippet/TextSnippet.java @@ -75,9 +75,11 @@ public class TextSnippet implements Comparable, Comparator\\A[\\p{L}\\p{N}]+[^\\p{L}\\p{N}].+\\Z + * updated to \\A([\\p{L}\\p{N}]+[^\\p{L}\\p{N}].+)([\\p{N}]+[.,][\\p{N}])+\\Z + * to detect words with none alphanumeric chars (1) allow comma/dot surrounded by number (2) */ private static final Pattern p3 = - Pattern.compile("\\A[\\p{L}\\p{N}]+[^\\p{L}\\p{N}].+\\Z"); + Pattern.compile("\\A([\\p{L}\\p{N}]+[^\\p{L}\\p{N}].+)([\\p{N}]+[.,][\\p{N}])+\\Z"); /** * [^\\p{L}\\p{N}] */ @@ -390,7 +392,7 @@ public class TextSnippet implements Comparable, Comparator")); // display text not to include unwanted html assertTrue ("Query word not marked", sniptxt.contains("test")); // query word to be marked + + // test text with some numbers (english/german format) + rawtestline = "Test Version 1.83 calculates pi to 3,14 always"; + ts = new TextSnippet( + url.hash(), + rawtestline, + false, // isMarked, + TextSnippet.ResultClass.SOURCE_METADATA, ""); + sniptxt = ts.descriptionline(qg); + System.out.println("testDescriptionline: (with numbers) snippet="+sniptxt); + assertTrue ("number (.) broken up",sniptxt.contains("1.83")); + assertTrue ("number (,) broken up",sniptxt.contains("3,14")); } }