remove not needed counter in Tokeninzer (completing last changes)

including a small change, word posintext counting. We remember/store 1st posintext. Previously following words got a handle (posintext) excluding found. Now it just counts and assigns true posintext as handle (posintext)
9 years ago · 96467c5467
parent ce536fe90b
commit 96467c5467
2 changed files with 2 additions and 7 deletions
--- a/source/net/yacy/document/Tokenizer.java
+++ b/source/net/yacy/document/Tokenizer.java
@ -74,9 +74,6 @@ public class Tokenizer {
        String k;
        Tagging.Metatag tag;
        int wordlen;
-        int wordHandle;
-        int wordHandleCount = 0;
-        //final int sentenceHandleCount = 0;
        int allwordcounter = 0;
        int allsentencecounter = 0;
        int wordInSentenceCounter = 1;
@ -167,12 +164,10 @@ public class Tokenizer {
                Word wsp = this.words.get(word);
                if (wsp != null) {
                    // word already exists
-                    wordHandle = wsp.posInText;
                    wsp.inc();
                } else {
                    // word does not yet exist, create new word entry
-                    wordHandle = ++wordHandleCount; // let start pos with 1
-                    wsp = new Word(wordHandle, wordInSentenceCounter, allsentencecounter + 100); // nomal sentence start at 100 !
+                    wsp = new Word(allwordcounter, wordInSentenceCounter, allsentencecounter + 100); // nomal sentence start at 100 !
                    wsp.flags = this.RESULT_FLAGS.clone();
                    this.words.put(word.toLowerCase(), wsp);
                }
--- a/source/net/yacy/kelondro/data/word/Word.java
+++ b/source/net/yacy/kelondro/data/word/Word.java
@ -66,7 +66,7 @@ public class Word {

    // object carries statistics for words and sentences
    public  int      count;       // number of occurrences
-    public  int      posInText;   // unique handle, is initialized with word position (excluding double occurring words)
+    public  int      posInText;   // unique handle, is initialized with first word position in text
    public  int      posInPhrase; // position of word in phrase
    public  int      numOfPhrase; // number of phrase. 'normal' phrases begin with number 100
    public  Bitfield flags;       // the flag bits for each word