// indexRWIRowEntry.java
// (C) 2006 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 20.05.2006 on http://yacy.net
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
// $LastChangedRevision: 1986 $
// $LastChangedBy: orbiter $
//
// LICENSE
// 
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

package de.anomic.index;

import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroBitfield;
import de.anomic.kelondro.kelondroColumn;
import de.anomic.kelondro.kelondroMicroDate;
import de.anomic.kelondro.kelondroRow;
import de.anomic.kelondro.kelondroRow.Entry;
import de.anomic.yacy.yacySeedDB;

public final class indexRWIRowEntry implements indexRWIEntry, Cloneable {

    // this object stores attributes to URL references inside RWI collections

    public static final kelondroRow urlEntryRow = new kelondroRow(new kelondroColumn[]{
            new kelondroColumn("h", kelondroColumn.celltype_string,    kelondroColumn.encoder_bytes, yacySeedDB.commonHashLength, "urlhash"),
            new kelondroColumn("a", kelondroColumn.celltype_cardinal,  kelondroColumn.encoder_b256,  2, "lastModified"),
            new kelondroColumn("s", kelondroColumn.celltype_cardinal,  kelondroColumn.encoder_b256,  2, "freshUntil"),
            new kelondroColumn("u", kelondroColumn.celltype_cardinal,  kelondroColumn.encoder_b256,  1, "wordsInTitle"),
            new kelondroColumn("w", kelondroColumn.celltype_cardinal,  kelondroColumn.encoder_b256,  2, "wordsInText"),
            new kelondroColumn("p", kelondroColumn.celltype_cardinal,  kelondroColumn.encoder_b256,  2, "phrasesInText"),
            new kelondroColumn("d", kelondroColumn.celltype_binary,    kelondroColumn.encoder_bytes, 1, "doctype"),
            new kelondroColumn("l", kelondroColumn.celltype_string,    kelondroColumn.encoder_bytes, 2, "language"),
            new kelondroColumn("x", kelondroColumn.celltype_cardinal,  kelondroColumn.encoder_b256,  1, "llocal"),
            new kelondroColumn("y", kelondroColumn.celltype_cardinal,  kelondroColumn.encoder_b256,  1, "lother"),
            new kelondroColumn("m", kelondroColumn.celltype_cardinal,  kelondroColumn.encoder_b256,  1, "urlLength"),
            new kelondroColumn("n", kelondroColumn.celltype_cardinal,  kelondroColumn.encoder_b256,  1, "urlComps"),
            new kelondroColumn("g", kelondroColumn.celltype_binary,    kelondroColumn.encoder_bytes, 1, "typeofword"),
            new kelondroColumn("z", kelondroColumn.celltype_bitfield,  kelondroColumn.encoder_bytes, 4, "flags"),
            new kelondroColumn("c", kelondroColumn.celltype_cardinal,  kelondroColumn.encoder_b256,  1, "hitcount"),
            new kelondroColumn("t", kelondroColumn.celltype_cardinal,  kelondroColumn.encoder_b256,  2, "posintext"),
            new kelondroColumn("r", kelondroColumn.celltype_cardinal,  kelondroColumn.encoder_b256,  1, "posinphrase"),
            new kelondroColumn("o", kelondroColumn.celltype_cardinal,  kelondroColumn.encoder_b256,  1, "posofphrase"),
            new kelondroColumn("i", kelondroColumn.celltype_cardinal,  kelondroColumn.encoder_b256,  1, "worddistance"),
            new kelondroColumn("k", kelondroColumn.celltype_cardinal,  kelondroColumn.encoder_b256,  1, "reserve")
    },
    kelondroBase64Order.enhancedCoder,
    0);
    // available chars: b,e,j,q
    
    // static properties
    private static final int col_urlhash       =  0; // h 12 the url hash b64-encoded
    private static final int col_lastModified  =  1; // a  2 last-modified time of the document where word appears
    private static final int col_freshUntil    =  2; // s  2 TTL for the word, so it can be removed easily if the TTL is short
    private static final int col_wordsInTitle  =  3; // u  1 words in description/length (longer are better?)
    private static final int col_wordsInText   =  4; // w  2 total number of words in document
    private static final int col_phrasesInText =  5; // p  2 total number of phrases in document
    private static final int col_doctype       =  6; // d  1 type of document
    private static final int col_language      =  7; // l  2 (guessed) language of document
    private static final int col_llocal        =  8; // x  1 outlinks to same domain
    private static final int col_lother        =  9; // y  1 outlinks to other domain
    private static final int col_urlLength     = 10; // m  1 byte-length of complete URL
    private static final int col_urlComps      = 11; // n  1 number of path components

    // dynamic properties    
    private static final int col_typeofword    = 12; // g  1 grammatical classification
    private static final int col_flags         = 13; // z  4 b64-encoded appearance flags (24 bit, see definition below)
    private static final int col_hitcount      = 14; // c  1 number of occurrences of this word in text
    private static final int col_posintext     = 15; // t  2 first appearance of word in text
    private static final int col_posinphrase   = 16; // r  1 position of word in its phrase
    private static final int col_posofphrase   = 17; // o  1 number of the phrase where word appears
    private static final int col_reserve1      = 18; // i  1 reserve1
    private static final int col_reserve2      = 19; // k  1 reserve2

    private final kelondroRow.Entry entry;
    
    public indexRWIRowEntry(final String  urlHash,
            final int      urlLength,     // byte-length of complete URL
            final int      urlComps,      // number of path components
            final int      titleLength,   // length of description/length (longer are better?)
            final int      hitcount,      // how often appears this word in the text
            final int      wordcount,     // total number of words
            final int      phrasecount,   // total number of phrases
            final int      posintext,     // position of word in all words
            final int      posinphrase,   // position of word in its phrase
            final int      posofphrase,   // number of the phrase where word appears
            final long     lastmodified,  // last-modified time of the document where word appears
            final long     updatetime,    // update time; this is needed to compute a TTL for the word, so it can be removed easily if the TTL is short
            String   language,      // (guessed) language of document
            final char     doctype,       // type of document
            final int      outlinksSame,  // outlinks to same domain
            final int      outlinksOther, // outlinks to other domain
            final kelondroBitfield flags  // attributes to the url and to the word according the url
    ) {

        assert (urlHash.length() == 12) : "urlhash = " + urlHash;
        if ((language == null) || (language.length() != urlEntryRow.width(col_language))) language = "uk";
        this.entry = urlEntryRow.newEntry();
        final int mddlm = kelondroMicroDate.microDateDays(lastmodified);
        final int mddct = kelondroMicroDate.microDateDays(updatetime);
        this.entry.setCol(col_urlhash, urlHash, null);
        this.entry.setCol(col_lastModified, mddlm);
        this.entry.setCol(col_freshUntil, Math.max(0, mddlm + (mddct - mddlm) * 2)); // TTL computation
        this.entry.setCol(col_wordsInTitle, titleLength / 6); // word count estimation; TODO: change value handover to number of words
        this.entry.setCol(col_wordsInText, wordcount);
        this.entry.setCol(col_phrasesInText, phrasecount);
        this.entry.setCol(col_doctype, new byte[]{(byte) doctype});
        this.entry.setCol(col_language, language, null);
        this.entry.setCol(col_llocal, outlinksSame);
        this.entry.setCol(col_lother, outlinksOther);
        this.entry.setCol(col_urlLength, urlLength);
        this.entry.setCol(col_urlComps, urlComps);
        this.entry.setCol(col_typeofword, new byte[]{(byte) 0}); // TODO: grammatical classification
        this.entry.setCol(col_flags, flags.bytes());
        this.entry.setCol(col_hitcount, hitcount);
        this.entry.setCol(col_posintext, posintext);
        this.entry.setCol(col_posinphrase, posinphrase);
        this.entry.setCol(col_posofphrase, posofphrase);
        this.entry.setCol(col_reserve1, 0);
        this.entry.setCol(col_reserve2, 0);
    }
    
    public indexRWIRowEntry(final String urlHash, final String code) {
        // the code is the external form of the row minus the leading urlHash entry
        this.entry = urlEntryRow.newEntry((urlHash + code).getBytes());
    }
    
    public indexRWIRowEntry(final String external) {
        this.entry = urlEntryRow.newEntry(external, true);
    }
    
    public indexRWIRowEntry(final byte[] row) {
        this.entry = urlEntryRow.newEntry(row);
    }
    
    public indexRWIRowEntry(final byte[] row, final int offset, final boolean clone) {
        this.entry = urlEntryRow.newEntry(row, offset, clone);
    }
    
    public indexRWIRowEntry(final kelondroRow.Entry rentry) {
        // FIXME: see if cloning is necessary
        this.entry = rentry;
    }
    
    public indexRWIRowEntry clone() {
        final byte[] b = new byte[urlEntryRow.objectsize];
        System.arraycopy(entry.bytes(), 0, b, 0, urlEntryRow.objectsize);
        return new indexRWIRowEntry(b);
    }

    public String toPropertyForm() {
        return entry.toPropertyForm(true, true, false);
    }
    
    public Entry toKelondroEntry() {
        return this.entry;
    }

    public String urlHash() {
        return this.entry.getColString(col_urlhash, null);
    }

    public int virtualAge() {
        return (int) this.entry.getColLong(col_lastModified);  // this is the time in MicoDateDays format
    }

    public long lastModified() {
        return kelondroMicroDate.reverseMicroDateDays((int) this.entry.getColLong(col_lastModified));
    }
    
    public long freshUntil() {
        return kelondroMicroDate.reverseMicroDateDays((int) this.entry.getColLong(col_freshUntil));
    }

    public int hitcount() {
        return (int) this.entry.getColLong(col_hitcount);
    }

    public int posintext() {
        return (int) this.entry.getColLong(col_posintext);
    }

    public int posinphrase() {
        return (int) this.entry.getColLong(col_posinphrase);
    }

    public int posofphrase() {
        return (int) this.entry.getColLong(col_posofphrase);
    }

    public int wordsintext() {
        return (int) this.entry.getColLong(col_wordsInText);
    }

    public int phrasesintext() {
        return (int) this.entry.getColLong(col_phrasesInText);
    }

    public String getLanguage() {
        return this.entry.getColString(col_language, null);
    }

    public char getType() {
        return (char) this.entry.getColByte(col_doctype);
    }

    public int wordsintitle() {
        return (int) this.entry.getColLong(col_wordsInTitle);
    }
    
    public int llocal() {
        return (int) this.entry.getColLong(col_llocal);
    }
    
    public int lother() {
        return (int) this.entry.getColLong(col_lother);
    }
    
    public int urllength() {
        return (int) this.entry.getColLong(col_urlLength);
    }
    
    public int urlcomps() {
        return (int) this.entry.getColLong(col_urlComps);
    }
    
    public kelondroBitfield flags() {
        return new kelondroBitfield(this.entry.getColBytes(col_flags));
    }
    
    public double termFrequency() {
        return (((double) this.hitcount()) / ((double) (this.wordsintext() + this.wordsintitle() + 1)));
    }
    
    public String toString() {
        return toPropertyForm();
    }

    public boolean isNewer(final indexRWIEntry other) {
        if (other == null) return true;
        if (this.lastModified() > other.lastModified()) return true;
        return false;
    }
 
    public boolean isOlder(final indexRWIEntry other) {
        if (other == null) return false;
        if (this.lastModified() < other.lastModified()) return true;
        return false;
    }
    
    public int hashCode() {
        return this.urlHash().hashCode();
    }
}