add test case for optimized toTokens()

pull/51/head
reger 9 years ago
parent 2f8bb04e11
commit ec24a0c85a

@ -980,8 +980,8 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
/**
* create word tokens for parser. Find CamelCases and separate these words
* resulting words are not ordered by appearance, but all
* @return
* resulting words are not ordered by appearance, but all in sequence
* @return string with unique tokens
*/
public static String toTokens(final String s) {
// remove all non-character & non-number
@ -992,14 +992,8 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
if ((c >= '0' && c <='9') || (c >= 'a' && c <='z') || (c >= 'A' && c <='Z')) sb.append(c); else sb.append(' ');
}
String t = sb.toString();
// remove all double-spaces
int p;
while ((p = t.indexOf(" ",0)) >= 0) t = t.substring(0, p) + t.substring(p + 1);
// split the string into tokens and add all camel-case splitting
final String[] u = CommonPattern.SPACES.split(t);
final String[] u = CommonPattern.SPACES.split(sb);
final Set<String> token = new LinkedHashSet<String>();
for (final String r: u) token.add(r);
for (final String r: u) token.addAll(parseCamelCase(r));

@ -235,6 +235,27 @@ public class MultiProtocolURLTest {
assertEquals(testurls.get(s),result);
}
}
/**
* Test of toTokens method, of class MultiProtocolURL.
*/
@Test
public void testToTokens() {
// test string pairs which should generate equal results
String[][] testString = new String[][]{
{"abc", "abc "},
{" cde", "cde"},
{" efg", "efg "},
{"hij hij", " hij "},
{"klm mno", "klm@mno"},
{"abc/cde?fff", "abc\\cde-fff "} };
String result1, result2;
for (String[] s : testString) {
result1 = MultiProtocolURL.toTokens(s[0]);
result2 = MultiProtocolURL.toTokens(s[1]);
assertEquals("input: "+s[0]+"="+s[1],result1, result2);
}
}
}

Loading…
Cancel
Save