diff --git a/source/net/yacy/kelondro/data/image/ImageReference.java b/source/net/yacy/kelondro/data/image/ImageReference.java deleted file mode 100644 index 30a09fbb4..000000000 --- a/source/net/yacy/kelondro/data/image/ImageReference.java +++ /dev/null @@ -1,64 +0,0 @@ -// ImageReference.java -// (C) 2010 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany -// first published 21.01.2010 on http://yacy.net -// -// This is a part of YaCy, a peer-to-peer based web search engine -// -// $LastChangedDate$ -// $LastChangedRevision$ -// $LastChangedBy$ -// -// LICENSE -// -// This program is free software; you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation; either version 2 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -package net.yacy.kelondro.data.image; - -import net.yacy.kelondro.order.Bitfield; -import net.yacy.kelondro.rwi.Reference; - -public interface ImageReference extends Reference { - - public int virtualAge(); - - public int hitcount(); - - public int posinphrase(); - - public int posofphrase(); - - public int wordsintext(); - - public int phrasesintext(); - - public String getLanguage(); - - public char getType(); - - public int wordsintitle(); - - public int llocal(); - - public int lother(); - - public int urllength(); - - public int urlcomps(); - - public Bitfield flags(); - - public double termFrequency(); - -} diff --git a/source/net/yacy/kelondro/data/image/ImageReferenceFactory.java b/source/net/yacy/kelondro/data/image/ImageReferenceFactory.java deleted file mode 100644 index 692912900..000000000 --- a/source/net/yacy/kelondro/data/image/ImageReferenceFactory.java +++ /dev/null @@ -1,55 +0,0 @@ -// ImageReferenceFactory.java -// (C) 2010 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany -// first published 21.01.2010 on http://yacy.net -// -// This is a part of YaCy, a peer-to-peer based web search engine -// -// $LastChangedDate$ -// $LastChangedRevision$ -// $LastChangedBy$ -// -// LICENSE -// -// This program is free software; you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation; either version 2 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -package net.yacy.kelondro.data.image; - -import java.io.Serializable; - -import net.yacy.kelondro.index.Row; -import net.yacy.kelondro.index.Row.Entry; -import net.yacy.kelondro.rwi.ReferenceFactory; - -public class ImageReferenceFactory implements ReferenceFactory, Serializable { - - private static final long serialVersionUID=-2209473508756878863L; - - @Override - public ImageReference produceSlow(Entry e) { - return null; //new ImageReferenceRow(e); - } - - @Override - public ImageReference produceFast(ImageReference r) { - if (r instanceof ImageReferenceVars) return r; - return new ImageReferenceVars(r); - } - - @Override - public Row getRow() { - return ImageReferenceRow.urlEntryRow; - } - -} diff --git a/source/net/yacy/kelondro/data/image/ImageReferenceRow.java b/source/net/yacy/kelondro/data/image/ImageReferenceRow.java deleted file mode 100644 index 12c21b72f..000000000 --- a/source/net/yacy/kelondro/data/image/ImageReferenceRow.java +++ /dev/null @@ -1,302 +0,0 @@ -// ImageReferenceRow.java -// (C) 2010 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany -// first published 21.01.2010 on http://yacy.net -// -// This is a part of YaCy, a peer-to-peer based web search engine -// -// $LastChangedDate$ -// $LastChangedRevision$ -// $LastChangedBy$ -// -// LICENSE -// -// This program is free software; you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation; either version 2 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -package net.yacy.kelondro.data.image; - -import java.util.ArrayList; -import java.util.Collection; - -import net.yacy.cora.document.ASCII; -import net.yacy.cora.document.UTF8; -import net.yacy.kelondro.data.word.Word; -import net.yacy.kelondro.index.Column; -import net.yacy.kelondro.index.Row; -import net.yacy.kelondro.index.Row.Entry; -import net.yacy.kelondro.order.Base64Order; -import net.yacy.kelondro.order.Bitfield; -import net.yacy.kelondro.order.MicroDate; -import net.yacy.kelondro.rwi.AbstractReference; -import net.yacy.kelondro.rwi.Reference; -import net.yacy.kelondro.util.ByteArray; - -/** - * this object stores attributes to URL references inside RWI collections - * - */ -public final class ImageReferenceRow extends AbstractReference implements /*ImageReference,*/ Cloneable { - - /** - * object for termination of concurrent blocking queue processing - */ - public static final ImageReferenceRow poison = new ImageReferenceRow((Row.Entry) null); - - - public static final Row urlEntryRow = new Row(new Column[]{ - new Column("h", Column.celltype_string, Column.encoder_bytes, Word.commonHashLength, "urlhash"), - new Column("f", Column.celltype_cardinal, Column.encoder_b256, 4, "created"), - new Column("m", Column.celltype_cardinal, Column.encoder_b256, 4, "modified"), - new Column("s", Column.celltype_cardinal, Column.encoder_bytes, 4, "size-bytes"), - new Column("d", Column.celltype_binary, Column.encoder_bytes, 1, "doctype"), - new Column("q", Column.celltype_binary, Column.encoder_bytes, 1, "quality"), - new Column("w", Column.celltype_cardinal, Column.encoder_b256, 2, "width"), // pixels - new Column("i", Column.celltype_cardinal, Column.encoder_b256, 2, "height"), // pixels - new Column("i", Column.celltype_cardinal, Column.encoder_b256, 2, "iso"), // iso number - new Column("i", Column.celltype_cardinal, Column.encoder_b256, 2, "verschlusszeit"), // the x in 1/x - new Column("i", Column.celltype_cardinal, Column.encoder_b256, 2, "blende"), - new Column("i", Column.celltype_cardinal, Column.encoder_b256, 4, "distance"), - new Column("o", Column.celltype_cardinal, Column.encoder_b256, 4, "author-id"), // author, creator, operator, camera-number - new Column("o", Column.celltype_cardinal, Column.encoder_b256, 4, "group-id"), // may be also a crawl start identifier - new Column("o", Column.celltype_cardinal, Column.encoder_b256, 4, "subgroupgroup-id"), // may be also a pages-in-crawl identifier - new Column("o", Column.celltype_cardinal, Column.encoder_b256, 4, "counter-in-subgroup"), // may be also a counter of images on a page - new Column("o", Column.celltype_cardinal, Column.encoder_b256, 4, "location-lon-x"), - new Column("a", Column.celltype_cardinal, Column.encoder_b256, 4, "location-lat-y"), - new Column("l", Column.celltype_cardinal, Column.encoder_b256, 4, "location-alt-h"), - new Column("t", Column.celltype_string, Column.encoder_bytes, 4, "typeOfImage"), // a 4-stage taxonomy - new Column("z", Column.celltype_bitfield, Column.encoder_bytes, 4, "flags"), - new Column("r", Column.celltype_binary, Column.encoder_bytes, 3, "RGBAverage"), - new Column("k", Column.celltype_cardinal, Column.encoder_b256, 1, "reserve") - }, - Base64Order.enhancedCoder - ); - // available chars: b,e,j,q - - // static properties - private static final int col_urlhash = 0; // h 12 the url hash b64-encoded - private static final int col_lastModified = 1; // a 2 last-modified time of the document where word appears - private static final int col_freshUntil = 2; // s 2 TTL for the word, so it can be removed easily if the TTL is short - private static final int col_doctype = 6; // d 1 type of document - private static final int col_urlLength = 10; // m 1 byte-length of complete URL - private static final int col_urlComps = 11; // n 1 number of path components - - // dynamic properties - //private static final int col_rgbaverage = 12; // g 6 an average of the RGB values - //private static final int col_typeofimage = 12; // g 4 classification - private static final int col_flags = 13; // z 4 b64-encoded appearance flags (24 bit, see definition below) - private static final int col_hitcount = 14; // c 1 number of occurrences of this word in text - private static final int col_posintext = 15; // t 2 first appearance of word in text - private static final int col_posinphrase = 16; // r 1 position of word in its phrase - private static final int col_posofphrase = 17; // o 1 number of the phrase where word appears - private static final int col_reserve1 = 18; // i 1 reserve1 - private static final int col_reserve2 = 19; // k 1 reserve2 - - // ideas for the classification bytes - // 0 : content-type (person-portrait, persons-group, landscape, buildings, technical, artistical) - // 1 : content-situation (a categorization of the type, like: person/standing, building/factory, artistical/cubistic) - // 2 : content-category (a classification that is taken from the text environment by text analysis) - // 3 : - - private final Row.Entry entry; - - public ImageReferenceRow(final byte[] urlHash, - final int urlLength, // byte-length of complete URL - final int urlComps, // number of path components - final int titleLength, // length of description/length (longer are better?) - final int hitcount, // how often appears this word in the text - final int wordcount, // total number of words - final int phrasecount, // total number of phrases - final int posintext, // position of word in all words - final int posinphrase, // position of word in its phrase - final int posofphrase, // number of the phrase where word appears - final long lastmodified, // last-modified time of the document where word appears - final long updatetime, // update time; this is needed to compute a TTL for the word, so it can be removed easily if the TTL is short - final String language, // (guessed) language of document - final char doctype, // type of document - final int outlinksSame, // outlinks to same domain - final int outlinksOther, // outlinks to other domain - final Bitfield flags // attributes to the url and to the word according the url - ) { - - assert (urlHash.length == 12) : "urlhash = " + urlHash; - this.entry = urlEntryRow.newEntry(); - final int mddlm = MicroDate.microDateDays(lastmodified); - final int mddct = MicroDate.microDateDays(updatetime); - this.entry.setCol(col_urlhash, urlHash); - this.entry.setCol(col_lastModified, mddlm); - this.entry.setCol(col_freshUntil, Math.max(0, mddlm + (mddct - mddlm) * 2)); // TTL computation - this.entry.setCol(col_doctype, new byte[]{(byte) doctype}); - this.entry.setCol(col_urlLength, urlLength); - this.entry.setCol(col_urlComps, urlComps); - this.entry.setCol(col_flags, flags.bytes()); - this.entry.setCol(col_hitcount, hitcount); - this.entry.setCol(col_posintext, posintext); - this.entry.setCol(col_posinphrase, posinphrase); - this.entry.setCol(col_posofphrase, posofphrase); - this.entry.setCol(col_reserve1, 0); - this.entry.setCol(col_reserve2, 0); - } - - public ImageReferenceRow(final byte[] urlHash, - final int urlLength, // byte-length of complete URL - final int urlComps, // number of path components - final int titleLength, // length of description/length (longer are better?) - final int wordcount, // total number of words - final int phrasecount, // total number of phrases - final long lastmodified, // last-modified time of the document where word appears - final long updatetime, // update time; this is needed to compute a TTL for the word, so it can be removed easily if the TTL is short - final String language, // (guessed) language of document - final char doctype, // type of document - final int outlinksSame, // outlinks to same domain - final int outlinksOther // outlinks to other domain - ) { - assert (urlHash.length == 12) : "urlhash = " + ASCII.String(urlHash); - this.entry = urlEntryRow.newEntry(); - final int mddlm = MicroDate.microDateDays(lastmodified); - final int mddct = MicroDate.microDateDays(updatetime); - this.entry.setCol(col_urlhash, urlHash); - this.entry.setCol(col_lastModified, mddlm); - this.entry.setCol(col_freshUntil, Math.max(0, mddlm + (mddct - mddlm) * 2)); // TTL computation - this.entry.setCol(col_doctype, new byte[]{(byte) doctype}); - this.entry.setCol(col_urlLength, urlLength); - this.entry.setCol(col_urlComps, urlComps); - this.entry.setCol(col_reserve1, 0); - this.entry.setCol(col_reserve2, 0); - } - - public ImageReferenceRow(final String urlHash, final String code) { - // the code is the external form of the row minus the leading urlHash entry - this.entry = urlEntryRow.newEntry(UTF8.getBytes((urlHash + code))); - } - - public ImageReferenceRow(final String external) { - this.entry = urlEntryRow.newEntry(external, true); - } - - public ImageReferenceRow(final byte[] row) { - this.entry = urlEntryRow.newEntry(row); - } - - public ImageReferenceRow(final byte[] row, final int offset, final boolean clone) { - this.entry = urlEntryRow.newEntry(row, offset, clone); - } - - public ImageReferenceRow(final Row.Entry rentry) { - // FIXME: see if cloning is necessary - this.entry = rentry; - } - - @Override - public ImageReferenceRow clone() { - final byte[] b = new byte[urlEntryRow.objectsize]; - System.arraycopy(this.entry.bytes(), 0, b, 0, urlEntryRow.objectsize); - return new ImageReferenceRow(b); - } - - @Override - public String toPropertyForm() { - return this.entry.toPropertyForm('=', true, true, false, false); - } - - @Override - public Entry toKelondroEntry() { - return this.entry; - } - - @Override - public byte[] urlhash() { - return this.entry.getColBytes(col_urlhash, true); - } - - public int virtualAge() { - return (int) this.entry.getColLong(col_lastModified); // this is the time in MicoDateDays format - } - - @Override - public long lastModified() { - return MicroDate.reverseMicroDateDays((int) this.entry.getColLong(col_lastModified)); - } - - public long freshUntil() { - return MicroDate.reverseMicroDateDays((int) this.entry.getColLong(col_freshUntil)); - } - - public int hitcount() { - return (int) this.entry.getColLong(col_hitcount); - } - - @Override - public Collection positions() { - return new ArrayList(0); - } - - public int position(int p) { - assert p == 0 : "p = " + p; - return (int) this.entry.getColLong(col_posintext); - } - - public int posinphrase() { - return (int) this.entry.getColLong(col_posinphrase); - } - - public int posofphrase() { - return (int) this.entry.getColLong(col_posofphrase); - } - - - public char getType() { - return (char) this.entry.getColByte(col_doctype); - } - - public int urllength() { - return (int) this.entry.getColLong(col_urlLength); - } - - public int urlcomps() { - return (int) this.entry.getColLong(col_urlComps); - } - - public Bitfield flags() { - return new Bitfield(this.entry.getColBytes(col_flags, true)); - } - - @Override - public String toString() { - return toPropertyForm(); - } - - @Override - public boolean isOlder(final Reference other) { - if (other == null) return false; - if (this.lastModified() < other.lastModified()) return true; - return false; - } - - private int hashCache = Integer.MIN_VALUE; // if this is used in a compare method many times, a cache is useful - - @Override - public int hashCode() { - if (this.hashCache == Integer.MIN_VALUE) { - this.hashCache = ByteArray.hashCode(this.urlhash()); - } - return this.hashCache; - } - - @Override - public void join(Reference oe) { - throw new UnsupportedOperationException(""); - - } - -} diff --git a/source/net/yacy/kelondro/data/image/ImageReferenceVars.java b/source/net/yacy/kelondro/data/image/ImageReferenceVars.java deleted file mode 100644 index 816f1d314..000000000 --- a/source/net/yacy/kelondro/data/image/ImageReferenceVars.java +++ /dev/null @@ -1,398 +0,0 @@ -// ImageReferenceVars.java -// (C) 2010 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany -// first published 21.01.2010 on http://yacy.net -// -// This is a part of YaCy, a peer-to-peer based web search engine -// -// $LastChangedDate$ -// $LastChangedRevision$ -// $LastChangedBy$ -// -// LICENSE -// -// This program is free software; you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation; either version 2 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -package net.yacy.kelondro.data.image; - -import java.io.Serializable; -import java.util.Collection; -import java.util.Queue; -import java.util.concurrent.LinkedBlockingQueue; - -import net.yacy.kelondro.index.Row.Entry; -import net.yacy.kelondro.order.Bitfield; -import net.yacy.kelondro.order.MicroDate; -import net.yacy.kelondro.rwi.AbstractReference; -import net.yacy.kelondro.rwi.Reference; -import net.yacy.kelondro.util.ByteArray; - - -public class ImageReferenceVars extends AbstractReference implements ImageReference, Reference, Cloneable, Serializable { - - private static final long serialVersionUID=3669156620967277347L; - - - /** - * object for termination of concurrent blocking queue processing - */ - public static final ImageReferenceVars poison = new ImageReferenceVars(); - - - public Bitfield flags; - public long lastModified; - public byte[] urlHash; - public String language; - public char type; - public int hitcount, llocal, lother, phrasesintext, - posinphrase, posofphrase, - urlcomps, urllength, virtualAge, - wordsintext, wordsintitle; - private final Queue positions; - public double termFrequency; - - public ImageReferenceVars( - final byte[] urlHash, - final int urlLength, // byte-length of complete URL - final int urlComps, // number of path components - final int titleLength, // length of description/length (longer are better?) - final int hitcount, // how often appears this word in the text - final int wordcount, // total number of words - final int phrasecount, // total number of phrases - final Queue ps, // positions of words that are joined into the reference - final int posinphrase, // position of word in its phrase - final int posofphrase, // number of the phrase where word appears - final long lastmodified, // last-modified time of the document where word appears - final long updatetime, // update time; this is needed to compute a TTL for the word, so it can be removed easily if the TTL is short - String language, // (guessed) language of document - final char doctype, // type of document - final int outlinksSame, // outlinks to same domain - final int outlinksOther, // outlinks to other domain - final Bitfield flags, // attributes to the url and to the word according the url - final double termfrequency - ) { - if ((language == null) || (language.length() != 2)) language = "uk"; - final int mddlm = MicroDate.microDateDays(lastmodified); - //final int mddct = MicroDate.microDateDays(updatetime); - this.flags = flags; - //this.freshUntil = Math.max(0, mddlm + (mddct - mddlm) * 2); - this.lastModified = lastmodified; - this.language = language; - this.urlHash = urlHash; - this.type = doctype; - this.hitcount = hitcount; - this.llocal = outlinksSame; - this.lother = outlinksOther; - this.phrasesintext = phrasecount; - this.positions = new LinkedBlockingQueue(); - for (Integer i: ps) this.positions.add(i); - this.posinphrase = posinphrase; - this.posofphrase = posofphrase; - this.urlcomps = urlComps; - this.urllength = urlLength; - this.virtualAge = mddlm; - this.wordsintext = wordcount; - this.wordsintitle = titleLength; - this.termFrequency = termfrequency; - } - - public ImageReferenceVars(final ImageReference e) { - this.flags = e.flags(); - //this.freshUntil = e.freshUntil(); - this.lastModified = e.lastModified(); - this.language = e.getLanguage(); - this.urlHash = e.urlhash(); - this.type = e.getType(); - this.hitcount = e.hitcount(); - this.llocal = e.llocal(); - this.lother = e.lother(); - this.phrasesintext = e.phrasesintext(); - this.positions = new LinkedBlockingQueue(); - for (Integer i: e.positions()) this.positions.add(i); - this.posinphrase = e.posinphrase(); - this.posofphrase = e.posofphrase(); - this.urlcomps = e.urlcomps(); - this.urllength = e.urllength(); - this.virtualAge = e.virtualAge(); - this.wordsintext = e.wordsintext(); - this.wordsintitle = e.wordsintitle(); - this.termFrequency = e.termFrequency(); - } - - /** - * initializer for special poison object - */ - public ImageReferenceVars() { - this.flags = null; - this.lastModified = 0; - this.language = null; - this.urlHash = null; - this.type = ' '; - this.hitcount = 0; - this.llocal = 0; - this.lother = 0; - this.phrasesintext = 0; - this.positions = null; - this.posinphrase = 0; - this.posofphrase = 0; - this.urlcomps = 0; - this.urllength = 0; - this.virtualAge = 0; - this.wordsintext = 0; - this.wordsintitle = 0; - this.termFrequency = 0.0; - } - - @Override - public ImageReferenceVars clone() { - final ImageReferenceVars c = new ImageReferenceVars( - this.urlHash, - this.urllength, - this.urlcomps, - this.wordsintitle, - this.hitcount, - this.wordsintext, - this.phrasesintext, - this.positions, - this.posinphrase, - this.posofphrase, - this.lastModified, - System.currentTimeMillis(), - this.language, - this.type, - this.llocal, - this.lother, - this.flags, - this.termFrequency); - return c; - } - - public void join(final ImageReferenceVars v) { - // combine the distance - this.positions.addAll(v.positions); - this.posinphrase = (this.posofphrase == v.posofphrase) ? Math.min(this.posinphrase, v.posinphrase) : 0; - this.posofphrase = Math.min(this.posofphrase, v.posofphrase); - - // combine term frequency - this.wordsintext = this.wordsintext + v.wordsintext; - this.termFrequency = this.termFrequency + v.termFrequency; - } - - @Override - public Bitfield flags() { - return this.flags; - } -/* - public long freshUntil() { - return freshUntil; - } -*/ - @Override - public String getLanguage() { - return this.language; - } - - @Override - public char getType() { - return this.type; - } - - @Override - public int hitcount() { - return this.hitcount; - } - - @Override - public boolean isOlder(final Reference other) { - assert false; // should not be used - return false; - } - - @Override - public long lastModified() { - return this.lastModified; - } - - @Override - public int llocal() { - return this.llocal; - } - - @Override - public int lother() { - return this.lother; - } - - @Override - public int phrasesintext() { - return this.phrasesintext; - } - - @Override - public int posinphrase() { - return this.posinphrase; - } - - @Override - public Collection positions() { - return this.positions; - } - - @Override - public int posofphrase() { - return this.posofphrase; - } - - public ImageReferenceRow toRowEntry() { - return new ImageReferenceRow( - this.urlHash, - this.urllength, // byte-length of complete URL - this.urlcomps, // number of path components - this.wordsintitle, // length of description/length (longer are better?) - this.hitcount, // how often appears this word in the text - this.wordsintext, // total number of words - this.phrasesintext, // total number of phrases - this.positions.iterator().next(), // position of word in all words - this.posinphrase, // position of word in its phrase - this.posofphrase, // number of the phrase where word appears - this.lastModified, // last-modified time of the document where word appears - System.currentTimeMillis(), // update time; - this.language, // (guessed) language of document - this.type, // type of document - this.llocal, // outlinks to same domain - this.lother, // outlinks to other domain - this.flags // attributes to the url and to the word according the url - ); - } - - @Override - public Entry toKelondroEntry() { - return toRowEntry().toKelondroEntry(); - } - - @Override - public String toPropertyForm() { - return toRowEntry().toPropertyForm(); - } - - @Override - public byte[] urlhash() { - return this.urlHash; - } - - @Override - public int urlcomps() { - return this.urlcomps; - } - - @Override - public int urllength() { - return this.urllength; - } - - @Override - public int virtualAge() { - return this.virtualAge; - } - - @Override - public int wordsintext() { - return this.wordsintext; - } - - @Override - public int wordsintitle() { - return this.wordsintitle; - } - - @Override - public double termFrequency() { - if (this.termFrequency == 0.0) this.termFrequency = (((double) this.hitcount()) / ((double) (this.wordsintext() + this.wordsintitle() + 1))); - return this.termFrequency; - } - - public final void min(final ImageReferenceVars other) { - if (other == null) return; - int v; - long w; - double d; - if (this.hitcount > (v = other.hitcount)) this.hitcount = v; - if (this.llocal > (v = other.llocal)) this.llocal = v; - if (this.lother > (v = other.lother)) this.lother = v; - if (this.virtualAge > (v = other.virtualAge)) this.virtualAge = v; - if (this.wordsintext > (v = other.wordsintext)) this.wordsintext = v; - if (this.phrasesintext > (v = other.phrasesintext)) this.phrasesintext = v; - if (other.positions != null) a(this.positions, min(this.positions, other.positions)); - if (this.posinphrase > (v = other.posinphrase)) this.posinphrase = v; - if (this.posofphrase > (v = other.posofphrase)) this.posofphrase = v; - if (this.lastModified > (w = other.lastModified)) this.lastModified = w; - //if (this.freshUntil > (w = other.freshUntil)) this.freshUntil = w; - if (this.urllength > (v = other.urllength)) this.urllength = v; - if (this.urlcomps > (v = other.urlcomps)) this.urlcomps = v; - if (this.wordsintitle > (v = other.wordsintitle)) this.wordsintitle = v; - if (this.termFrequency > (d = other.termFrequency)) this.termFrequency = d; - } - - public final void max(final ImageReferenceVars other) { - if (other == null) return; - int v; - long w; - double d; - if (this.hitcount < (v = other.hitcount)) this.hitcount = v; - if (this.llocal < (v = other.llocal)) this.llocal = v; - if (this.lother < (v = other.lother)) this.lother = v; - if (this.virtualAge < (v = other.virtualAge)) this.virtualAge = v; - if (this.wordsintext < (v = other.wordsintext)) this.wordsintext = v; - if (this.phrasesintext < (v = other.phrasesintext)) this.phrasesintext = v; - if (other.positions != null) a(this.positions, max(this.positions, other.positions)); - if (this.posinphrase < (v = other.posinphrase)) this.posinphrase = v; - if (this.posofphrase < (v = other.posofphrase)) this.posofphrase = v; - if (this.lastModified < (w = other.lastModified)) this.lastModified = w; - //if (this.freshUntil < (w = other.freshUntil)) this.freshUntil = w; - if (this.urllength < (v = other.urllength)) this.urllength = v; - if (this.urlcomps < (v = other.urlcomps)) this.urlcomps = v; - if (this.wordsintitle < (v = other.wordsintitle)) this.wordsintitle = v; - if (this.termFrequency < (d = other.termFrequency)) this.termFrequency = d; - } - - @Override - public void join(final Reference r) { - // joins two entries into one entry - - // combine the distance - ImageReference oe = (ImageReference) r; - for (Integer i: r.positions()) this.positions.add(i); - this.posinphrase = (this.posofphrase == oe.posofphrase()) ? Math.min(this.posinphrase, oe.posinphrase()) : 0; - this.posofphrase = Math.min(this.posofphrase, oe.posofphrase()); - - // combine term frequency - this.termFrequency = this.termFrequency + oe.termFrequency(); - this.wordsintext = this.wordsintext + oe.wordsintext(); - } - - private int hashCache = Integer.MIN_VALUE; // if this is used in a compare method many times, a cache is useful - - @Override - public int hashCode() { - if (this.hashCache == Integer.MIN_VALUE) { - this.hashCache = ByteArray.hashCode(this.urlHash); - } - return this.hashCache; - } - - public void addPosition(int position) { - this.positions.add(position); - } - -}