You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
|
|
|
package net.yacy.document;
|
|
|
|
|
|
|
|
import org.junit.Test;
|
|
|
|
import static org.junit.Assert.*;
|
|
|
|
|
|
|
|
public class WordTokenizerTest {
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Test of nextElement method, of class WordTokenizer.
|
|
|
|
*/
|
|
|
|
@Test
|
|
|
|
public void testNextElement() {
|
|
|
|
// test sentences containing 10x the word "word"
|
|
|
|
String[] testTxtArr = new String[]{
|
|
|
|
" word word..... (word) [word] . 'word word' \"word word\" word ? word! ",
|
|
|
|
"word-word word . word.word@word.word ....word... word,word "
|
|
|
|
};
|
|
|
|
|
|
|
|
for (String testTxt : testTxtArr) {
|
|
|
|
SentenceReader sr = new SentenceReader(testTxt);
|
|
|
|
WordTokenizer wt = new WordTokenizer(sr, null);
|
|
|
|
int cnt = 0;
|
|
|
|
while (wt.hasMoreElements()) {
|
|
|
|
StringBuilder sb = wt.nextElement();
|
|
|
|
if (sb.length() > 1) { // skip punktuation
|
|
|
|
assertEquals("word", sb.toString());
|
|
|
|
cnt++;
|
|
|
|
} else {
|
|
|
|
assertTrue("punktuation", SentenceReader.punctuation(sb.charAt(0)));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
wt.close();
|
|
|
|
assertEquals(10, cnt);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|