You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
yacy_search_server/source/net/yacy/kelondro/data/citation/CitationReferenceRow.java

222 lines
8.3 KiB

// CitationReferenceRow.java
// (C) 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 03.04.2009 on http://yacy.net
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// $LastChangedDate: 2009-03-20 16:44:59 +0100 (Fr, 20 Mrz 2009) $
// $LastChangedRevision$
// $LastChangedBy$
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package net.yacy.kelondro.data.citation;
import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.index.Column;
import net.yacy.kelondro.index.Row;
import net.yacy.kelondro.index.Row.Entry;
import net.yacy.kelondro.order.Base64Order;
import net.yacy.kelondro.order.MicroDate;
import net.yacy.kelondro.rwi.Reference;
import net.yacy.kelondro.util.ByteArray;
public final class CitationReferenceRow implements Reference /*, Cloneable*/ {
// this object stores citation attributes to URL references
public static final Row citationRow = new Row(new Column[]{
new Column("h", Column.celltype_string, Column.encoder_bytes, Word.commonHashLength, "urlhash"),
new Column("a", Column.celltype_cardinal, Column.encoder_b256, 2, "lastModified"),
new Column("a", Column.celltype_cardinal, Column.encoder_b256, 2, "lastAccessed"),
new Column("t", Column.celltype_cardinal, Column.encoder_b256, 2, "posintext"),
new Column("x", Column.celltype_cardinal, Column.encoder_b256, 1, "llocal"),
new Column("y", Column.celltype_cardinal, Column.encoder_b256, 1, "lother"),
new Column("m", Column.celltype_cardinal, Column.encoder_b256, 1, "urlLength"),
new Column("n", Column.celltype_cardinal, Column.encoder_b256, 1, "urlComps"),
new Column("g", Column.celltype_binary, Column.encoder_bytes, 1, "typeofurl"),
new Column("k", Column.celltype_cardinal, Column.encoder_b256, 1, "reserve")
},
Base64Order.enhancedCoder
);
// available chars: b,e,j,q
// static properties
private static final int col_urlhash = 0; // h 12 the url hash b64-encoded
private static final int col_lastModified = 1; // a 2 last-modified time of the document where url appears
private static final int col_lastAccessed = 2; // a 2 curent time when the url was seen
private static final int col_posintext = 3; // t 2 appearance of url in text; simply counts up the urls
private static final int col_llocal = 4; // x 1 outlinks to same domain
private static final int col_lother = 5; // y 1 outlinks to other domain
private static final int col_urlLength = 6; // m 1 byte-length of complete URL
private static final int col_urlComps = 7; // n 1 number of path components
private static final int col_typeofurl = 8; // g typeofurl
private static final int col_reserve = 9; // k 1 reserve2
private final Row.Entry entry;
public CitationReferenceRow(
final String urlHash,
final long lastmodified, // last-modified time of the document where word appears
final long updatetime, // update time
final int posintext, // occurrence of url; counts the url
final int llocal,
final int lother,
final int urlLength, // byte-length of complete URL
final int urlComps, // number of path components
final byte typeofurl // outlinks to same domain
) {
assert (urlHash.length() == 12) : "urlhash = " + urlHash;
this.entry = citationRow.newEntry();
final int mddlm = MicroDate.microDateDays(lastmodified);
final int mddct = MicroDate.microDateDays(updatetime);
this.entry.setCol(col_urlhash, urlHash, null);
this.entry.setCol(col_lastModified, mddlm);
this.entry.setCol(col_lastAccessed, mddct);
this.entry.setCol(col_posintext, posintext);
this.entry.setCol(col_llocal, llocal);
this.entry.setCol(col_lother, lother);
this.entry.setCol(col_urlLength, urlLength);
this.entry.setCol(col_urlComps, urlComps);
this.entry.setCol(col_typeofurl, new byte[]{typeofurl});
this.entry.setCol(col_reserve, 0);
}
public CitationReferenceRow(final String urlHash, final String code) {
// the code is the external form of the row minus the leading urlHash entry
this.entry = citationRow.newEntry((urlHash + code).getBytes());
}
public CitationReferenceRow(final String external) {
this.entry = citationRow.newEntry(external, true);
}
public CitationReferenceRow(final byte[] row) {
this.entry = citationRow.newEntry(row);
}
public CitationReferenceRow(final byte[] row, final int offset, final boolean clone) {
this.entry = citationRow.newEntry(row, offset, clone);
}
public CitationReferenceRow(final Row.Entry rentry) {
// FIXME: see if cloning is necessary
this.entry = rentry;
}
@Override
public CitationReferenceRow clone() {
final byte[] b = new byte[citationRow.objectsize];
System.arraycopy(entry.bytes(), 0, b, 0, citationRow.objectsize);
return new CitationReferenceRow(b);
}
public String toPropertyForm() {
return entry.toPropertyForm(true, true, false);
}
public Entry toKelondroEntry() {
return this.entry;
}
public byte[] metadataHash() {
return this.entry.getColBytes(col_urlhash, true);
}
public int virtualAge() {
return (int) this.entry.getColLong(col_lastModified); // this is the time in MicoDateDays format
}
public long lastModified() {
return MicroDate.reverseMicroDateDays((int) this.entry.getColLong(col_lastModified));
}
public int posintext() {
return (int) this.entry.getColLong(col_posintext);
}
public int llocal() {
return (int) this.entry.getColLong(col_llocal);
}
public int lother() {
return (int) this.entry.getColLong(col_lother);
}
public int urllength() {
return (int) this.entry.getColLong(col_urlLength);
}
public int urlcomps() {
return (int) this.entry.getColLong(col_urlComps);
}
public double citationFrequency() {
return 1.0 / ((double) (llocal() + lother() + 1));
}
@Override
public String toString() {
return toPropertyForm();
}
public boolean isOlder(final Reference other) {
if (other == null) return false;
if (this.lastModified() < other.lastModified()) return true;
return false;
}
@Override
public int hashCode() {
return ByteArray.hashCode(this.metadataHash());
}
@Override
public boolean equals(final Object obj) {
if (this == obj) return true;
if (obj == null) return false;
if (!(obj instanceof CitationReferenceRow)) return false;
CitationReferenceRow other = (CitationReferenceRow) obj;
return Base64Order.enhancedCoder.equal(this.metadataHash(), other.metadataHash());
}
public int distance() {
throw new UnsupportedOperationException();
}
public void join(Reference oe) {
throw new UnsupportedOperationException();
}
public int maxposition() {
throw new UnsupportedOperationException();
}
public int minposition() {
throw new UnsupportedOperationException();
}
public int position(int p) {
throw new UnsupportedOperationException();
}
public int positions() {
throw new UnsupportedOperationException();
}
}