remove not needed counter in Tokeninzer (completing last changes)

including a small change, word posintext counting. 
We remember/store 1st posintext. Previously following words got a handle (posintext)
excluding found. Now it just counts and assigns true posintext as handle (posintext)
pull/93/head
reger 9 years ago
parent ce536fe90b
commit 96467c5467

@ -74,9 +74,6 @@ public class Tokenizer {
String k;
Tagging.Metatag tag;
int wordlen;
int wordHandle;
int wordHandleCount = 0;
//final int sentenceHandleCount = 0;
int allwordcounter = 0;
int allsentencecounter = 0;
int wordInSentenceCounter = 1;
@ -167,12 +164,10 @@ public class Tokenizer {
Word wsp = this.words.get(word);
if (wsp != null) {
// word already exists
wordHandle = wsp.posInText;
wsp.inc();
} else {
// word does not yet exist, create new word entry
wordHandle = ++wordHandleCount; // let start pos with 1
wsp = new Word(wordHandle, wordInSentenceCounter, allsentencecounter + 100); // nomal sentence start at 100 !
wsp = new Word(allwordcounter, wordInSentenceCounter, allsentencecounter + 100); // nomal sentence start at 100 !
wsp.flags = this.RESULT_FLAGS.clone();
this.words.put(word.toLowerCase(), wsp);
}

@ -66,7 +66,7 @@ public class Word {
// object carries statistics for words and sentences
public int count; // number of occurrences
public int posInText; // unique handle, is initialized with word position (excluding double occurring words)
public int posInText; // unique handle, is initialized with first word position in text
public int posInPhrase; // position of word in phrase
public int numOfPhrase; // number of phrase. 'normal' phrases begin with number 100
public Bitfield flags; // the flag bits for each word

Loading…
Cancel
Save