fix snippet containig number with comma as desmo point http://mantis.tokeek.de/view.php?id=344

to keep it as one word (by altering the split regex)
- added sniipet test case with number
- regex for word split to match multiple splitcars
pull/1/head
reger 10 years ago
parent b241264632
commit f63fff9008

@ -75,9 +75,11 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
Pattern.compile(".+[^\\p{L}\\p{N}]\\Z"); Pattern.compile(".+[^\\p{L}\\p{N}]\\Z");
/** /**
* <code>\\A[\\p{L}\\p{N}]+[^\\p{L}\\p{N}].+\\Z</code> * <code>\\A[\\p{L}\\p{N}]+[^\\p{L}\\p{N}].+\\Z</code>
* updated to <code>\\A([\\p{L}\\p{N}]+[^\\p{L}\\p{N}].+)([\\p{N}]+[.,][\\p{N}])+\\Z</code>
* to detect words with none alphanumeric chars (1) allow comma/dot surrounded by number (2)
*/ */
private static final Pattern p3 = private static final Pattern p3 =
Pattern.compile("\\A[\\p{L}\\p{N}]+[^\\p{L}\\p{N}].+\\Z"); Pattern.compile("\\A([\\p{L}\\p{N}]+[^\\p{L}\\p{N}].+)([\\p{N}]+[.,][\\p{N}])+\\Z");
/** /**
* <code>[^\\p{L}\\p{N}]</code> * <code>[^\\p{L}\\p{N}]</code>
*/ */
@ -390,7 +392,7 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
return this.resultStatus; return this.resultStatus;
} }
private static final Pattern SPLIT_PATTERN = Pattern.compile(" |-"); private static final Pattern SPLIT_PATTERN = Pattern.compile("[ |-]+");
/** /**
* Marks all words in current line which have the same * Marks all words in current line which have the same

@ -152,5 +152,17 @@ public class TextSnippetTest {
System.out.println("testDescriptionline: snippet=" + sniptxt); System.out.println("testDescriptionline: snippet=" + sniptxt);
assertFalse ("HTML code not allowed in snippet text",sniptxt.contains("<pre>")); // display text not to include unwanted html assertFalse ("HTML code not allowed in snippet text",sniptxt.contains("<pre>")); // display text not to include unwanted html
assertTrue ("Query word not marked", sniptxt.contains("<b>test</b>")); // query word to be marked assertTrue ("Query word not marked", sniptxt.contains("<b>test</b>")); // query word to be marked
// test text with some numbers (english/german format)
rawtestline = "Test Version 1.83 calculates pi to 3,14 always";
ts = new TextSnippet(
url.hash(),
rawtestline,
false, // isMarked,
TextSnippet.ResultClass.SOURCE_METADATA, "");
sniptxt = ts.descriptionline(qg);
System.out.println("testDescriptionline: (with numbers) snippet="+sniptxt);
assertTrue ("number (.) broken up",sniptxt.contains("1.83"));
assertTrue ("number (,) broken up",sniptxt.contains("3,14"));
} }
} }

Loading…
Cancel
Save