From ec24a0c85a1ec7be117ca869caa7bce7c33e3fed Mon Sep 17 00:00:00 2001 From: reger Date: Thu, 24 Mar 2016 19:26:38 +0100 Subject: [PATCH] add test case for optimized toTokens() --- .../cora/document/id/MultiProtocolURL.java | 12 +++-------- .../document/id/MultiProtocolURLTest.java | 21 +++++++++++++++++++ 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/source/net/yacy/cora/document/id/MultiProtocolURL.java b/source/net/yacy/cora/document/id/MultiProtocolURL.java index 081296228..64570dd4c 100644 --- a/source/net/yacy/cora/document/id/MultiProtocolURL.java +++ b/source/net/yacy/cora/document/id/MultiProtocolURL.java @@ -980,8 +980,8 @@ public class MultiProtocolURL implements Serializable, Comparable= '0' && c <='9') || (c >= 'a' && c <='z') || (c >= 'A' && c <='Z')) sb.append(c); else sb.append(' '); } - String t = sb.toString(); - - // remove all double-spaces - int p; - while ((p = t.indexOf(" ",0)) >= 0) t = t.substring(0, p) + t.substring(p + 1); - // split the string into tokens and add all camel-case splitting - final String[] u = CommonPattern.SPACES.split(t); + final String[] u = CommonPattern.SPACES.split(sb); final Set token = new LinkedHashSet(); for (final String r: u) token.add(r); for (final String r: u) token.addAll(parseCamelCase(r)); diff --git a/test/java/net/yacy/cora/document/id/MultiProtocolURLTest.java b/test/java/net/yacy/cora/document/id/MultiProtocolURLTest.java index b3c3abf14..9ada0285c 100644 --- a/test/java/net/yacy/cora/document/id/MultiProtocolURLTest.java +++ b/test/java/net/yacy/cora/document/id/MultiProtocolURLTest.java @@ -235,6 +235,27 @@ public class MultiProtocolURLTest { assertEquals(testurls.get(s),result); } } + + /** + * Test of toTokens method, of class MultiProtocolURL. + */ + @Test + public void testToTokens() { + // test string pairs which should generate equal results + String[][] testString = new String[][]{ + {"abc", "abc "}, + {" cde", "cde"}, + {" efg", "efg "}, + {"hij hij", " hij "}, + {"klm mno", "klm@mno"}, + {"abc/cde?fff", "abc\\cde-fff "} }; + String result1, result2; + for (String[] s : testString) { + result1 = MultiProtocolURL.toTokens(s[0]); + result2 = MultiProtocolURL.toTokens(s[1]); + assertEquals("input: "+s[0]+"="+s[1],result1, result2); + } + } }