performance hack: use a hash cache for all hashes that are computed by a

byte array. If this hash is used in a HashMap (which is very often the
case) then this hack eliminates a lot of re-computations of the same
hash.
pull/1/head
Michael Peter Christen 13 years ago
parent f8a0cf6d7c
commit 10da7335ea

@ -133,9 +133,14 @@ public class CitationReference implements Reference, Serializable {
return false;
}
private int hashCache = Integer.MIN_VALUE; // if this is used in a compare method many times, a cache is useful
@Override
public int hashCode() {
return ByteArray.hashCode(this.urlhash());
if (this.hashCache == Integer.MIN_VALUE) {
this.hashCache = ByteArray.hashCode(this.urlhash());
}
return this.hashCache;
}
@Override

@ -9,7 +9,7 @@
// $LastChangedBy$
//
// LICENSE
//
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
@ -52,8 +52,8 @@ public final class ImageReferenceRow extends AbstractReference implements /*Imag
* object for termination of concurrent blocking queue processing
*/
public static final ImageReferenceRow poison = new ImageReferenceRow((Row.Entry) null);
public static final Row urlEntryRow = new Row(new Column[]{
new Column("h", Column.celltype_string, Column.encoder_bytes, Word.commonHashLength, "urlhash"),
new Column("f", Column.celltype_cardinal, Column.encoder_b256, 4, "created"),
@ -65,7 +65,7 @@ public final class ImageReferenceRow extends AbstractReference implements /*Imag
new Column("i", Column.celltype_cardinal, Column.encoder_b256, 2, "height"), // pixels
new Column("i", Column.celltype_cardinal, Column.encoder_b256, 2, "iso"), // iso number
new Column("i", Column.celltype_cardinal, Column.encoder_b256, 2, "verschlusszeit"), // the x in 1/x
new Column("i", Column.celltype_cardinal, Column.encoder_b256, 2, "blende"),
new Column("i", Column.celltype_cardinal, Column.encoder_b256, 2, "blende"),
new Column("i", Column.celltype_cardinal, Column.encoder_b256, 4, "distance"),
new Column("o", Column.celltype_cardinal, Column.encoder_b256, 4, "author-id"), // author, creator, operator, camera-number
new Column("o", Column.celltype_cardinal, Column.encoder_b256, 4, "group-id"), // may be also a crawl start identifier
@ -82,7 +82,7 @@ public final class ImageReferenceRow extends AbstractReference implements /*Imag
Base64Order.enhancedCoder
);
// available chars: b,e,j,q
// static properties
private static final int col_urlhash = 0; // h 12 the url hash b64-encoded
private static final int col_lastModified = 1; // a 2 last-modified time of the document where word appears
@ -101,15 +101,15 @@ public final class ImageReferenceRow extends AbstractReference implements /*Imag
private static final int col_posofphrase = 17; // o 1 number of the phrase where word appears
private static final int col_reserve1 = 18; // i 1 reserve1
private static final int col_reserve2 = 19; // k 1 reserve2
// ideas for the classification bytes
// 0 : content-type (person-portrait, persons-group, landscape, buildings, technical, artistical)
// 1 : content-situation (a categorization of the type, like: person/standing, building/factory, artistical/cubistic)
// 2 : content-category (a classification that is taken from the text environment by text analysis)
// 3 :
// 3 :
private final Row.Entry entry;
public ImageReferenceRow(final byte[] urlHash,
final int urlLength, // byte-length of complete URL
final int urlComps, // number of path components
@ -147,7 +147,7 @@ public final class ImageReferenceRow extends AbstractReference implements /*Imag
this.entry.setCol(col_reserve1, 0);
this.entry.setCol(col_reserve2, 0);
}
public ImageReferenceRow(final byte[] urlHash,
final int urlLength, // byte-length of complete URL
final int urlComps, // number of path components
@ -174,44 +174,47 @@ public final class ImageReferenceRow extends AbstractReference implements /*Imag
this.entry.setCol(col_reserve1, 0);
this.entry.setCol(col_reserve2, 0);
}
public ImageReferenceRow(final String urlHash, final String code) {
// the code is the external form of the row minus the leading urlHash entry
this.entry = urlEntryRow.newEntry(UTF8.getBytes((urlHash + code)));
}
public ImageReferenceRow(final String external) {
this.entry = urlEntryRow.newEntry(external, true);
}
public ImageReferenceRow(final byte[] row) {
this.entry = urlEntryRow.newEntry(row);
}
public ImageReferenceRow(final byte[] row, final int offset, final boolean clone) {
this.entry = urlEntryRow.newEntry(row, offset, clone);
}
public ImageReferenceRow(final Row.Entry rentry) {
// FIXME: see if cloning is necessary
this.entry = rentry;
}
@Override
public ImageReferenceRow clone() {
final byte[] b = new byte[urlEntryRow.objectsize];
System.arraycopy(entry.bytes(), 0, b, 0, urlEntryRow.objectsize);
System.arraycopy(this.entry.bytes(), 0, b, 0, urlEntryRow.objectsize);
return new ImageReferenceRow(b);
}
@Override
public String toPropertyForm() {
return entry.toPropertyForm('=', true, true, false, false);
return this.entry.toPropertyForm('=', true, true, false, false);
}
@Override
public Entry toKelondroEntry() {
return this.entry;
}
@Override
public byte[] urlhash() {
return this.entry.getColBytes(col_urlhash, true);
}
@ -220,10 +223,11 @@ public final class ImageReferenceRow extends AbstractReference implements /*Imag
return (int) this.entry.getColLong(col_lastModified); // this is the time in MicoDateDays format
}
@Override
public long lastModified() {
return MicroDate.reverseMicroDateDays((int) this.entry.getColLong(col_lastModified));
}
public long freshUntil() {
return MicroDate.reverseMicroDateDays((int) this.entry.getColLong(col_freshUntil));
}
@ -232,6 +236,7 @@ public final class ImageReferenceRow extends AbstractReference implements /*Imag
return (int) this.entry.getColLong(col_hitcount);
}
@Override
public Collection<Integer> positions() {
return new ArrayList<Integer>(0);
}
@ -253,38 +258,45 @@ public final class ImageReferenceRow extends AbstractReference implements /*Imag
public char getType() {
return (char) this.entry.getColByte(col_doctype);
}
public int urllength() {
return (int) this.entry.getColLong(col_urlLength);
}
public int urlcomps() {
return (int) this.entry.getColLong(col_urlComps);
}
public Bitfield flags() {
return new Bitfield(this.entry.getColBytes(col_flags, true));
}
@Override
public String toString() {
return toPropertyForm();
}
@Override
public boolean isOlder(final Reference other) {
if (other == null) return false;
if (this.lastModified() < other.lastModified()) return true;
return false;
}
private int hashCache = Integer.MIN_VALUE; // if this is used in a compare method many times, a cache is useful
@Override
public int hashCode() {
return ByteArray.hashCode(this.urlhash());
if (this.hashCache == Integer.MIN_VALUE) {
this.hashCache = ByteArray.hashCode(this.urlhash());
}
return this.hashCache;
}
@Override
public void join(Reference oe) {
throw new UnsupportedOperationException("");
}
}

@ -381,9 +381,14 @@ public class ImageReferenceVars extends AbstractReference implements ImageRefere
this.wordsintext = this.wordsintext + oe.wordsintext();
}
private int hashCache = Integer.MIN_VALUE; // if this is used in a compare method many times, a cache is useful
@Override
public int hashCode() {
return ByteArray.hashCode(this.urlHash);
if (this.hashCache == Integer.MIN_VALUE) {
this.hashCache = ByteArray.hashCode(this.urlHash);
}
return this.hashCache;
}
public void addPosition(int position) {

@ -160,12 +160,14 @@ public class DigestURI extends MultiProtocolURI implements Serializable {
this.hash = null;
}
/* (non-Javadoc)
* @see java.lang.Object#hashCode()
*/
private int hashCache = Integer.MIN_VALUE; // if this is used in a compare method many times, a cache is useful
@Override
public int hashCode() {
return ByteArray.hashCode(hash());
if (this.hashCache == Integer.MIN_VALUE) {
this.hashCache = ByteArray.hashCode(hash());
}
return this.hashCache;
}
public static final int flagTypeID(final String hash) {

@ -215,6 +215,7 @@ public final class WordReferenceRow extends AbstractReference implements WordRef
this.out = new LinkedBlockingQueue<WordReferenceRow>();
for (int i = 0; i < concurrency; i++) {
this.worker[i] = new Thread() {
@Override
public void run() {
String s;
try {
@ -290,22 +291,27 @@ public final class WordReferenceRow extends AbstractReference implements WordRef
return new WordReferenceRow(b);
}
@Override
public String toPropertyForm() {
return this.entry.toPropertyForm('=', true, true, false, false);
}
@Override
public Entry toKelondroEntry() {
return this.entry;
}
@Override
public byte[] urlhash() {
return this.entry.getColBytes(col_urlhash, true);
}
@Override
public int virtualAge() {
return (int) this.entry.getColLong(col_lastModified); // this is the time in MicoDateDays format
}
@Override
public long lastModified() {
return MicroDate.reverseMicroDateDays((int) this.entry.getColLong(col_lastModified));
}
@ -314,10 +320,12 @@ public final class WordReferenceRow extends AbstractReference implements WordRef
return MicroDate.reverseMicroDateDays((int) this.entry.getColLong(col_freshUntil));
}
@Override
public int hitcount() {
return (0xff & this.entry.getColByte(col_hitcount));
}
@Override
public Collection<Integer> positions() {
return new ArrayList<Integer>(0);
}
@ -327,54 +335,67 @@ public final class WordReferenceRow extends AbstractReference implements WordRef
return (int) this.entry.getColLong(col_posintext);
}
@Override
public int posinphrase() {
return (0xff & this.entry.getColByte(col_posinphrase));
}
@Override
public int posofphrase() {
return (0xff & this.entry.getColByte(col_posofphrase));
}
@Override
public int wordsintext() {
return (int) this.entry.getColLong(col_wordsInText);
}
@Override
public int phrasesintext() {
return (int) this.entry.getColLong(col_phrasesInText);
}
@Override
public byte[] getLanguage() {
return this.entry.getColBytes(col_language, true);
}
@Override
public char getType() {
return (char) this.entry.getColByte(col_doctype);
}
@Override
public int wordsintitle() {
return (0xff & this.entry.getColByte(col_wordsInTitle));
}
@Override
public int llocal() {
return (0xff & this.entry.getColByte(col_llocal));
}
@Override
public int lother() {
return (0xff & this.entry.getColByte(col_lother));
}
@Override
public int urllength() {
return (0xff & this.entry.getColByte(col_urlLength));
}
@Override
public int urlcomps() {
return (0xff & this.entry.getColByte(col_urlComps));
}
@Override
public Bitfield flags() {
return new Bitfield(this.entry.getColBytes(col_flags, false));
}
@Override
public double termFrequency() {
return (((double) hitcount()) / ((double) (wordsintext() + wordsintitle() + 1)));
}
@ -393,11 +414,17 @@ public final class WordReferenceRow extends AbstractReference implements WordRef
return Base64Order.enhancedCoder.equal(urlhash(), other.urlhash());
}
private int hashCache = Integer.MIN_VALUE; // if this is used in a compare method many times, a cache is useful
@Override
public int hashCode() {
return ByteArray.hashCode(urlhash());
if (this.hashCache == Integer.MIN_VALUE) {
this.hashCache = ByteArray.hashCode(urlhash());
}
return this.hashCache;
}
@Override
public void join(final Reference oe) {
throw new UnsupportedOperationException("");

@ -31,7 +31,6 @@ import java.util.Comparator;
import java.util.Queue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.Semaphore;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.UTF8;
@ -394,9 +393,14 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc
return Base64Order.enhancedCoder.equal(this.urlHash, other.urlHash);
}
private int hashCache = Integer.MIN_VALUE; // if this is used in a compare method many times, a cache is useful
@Override
public int hashCode() {
return ByteArray.hashCode(this.urlHash);
if (this.hashCache == Integer.MIN_VALUE) {
this.hashCache = ByteArray.hashCode(this.urlHash);
}
return this.hashCache;
}
@Override

@ -7,7 +7,7 @@
// $LastChangedBy$
//
// LICENSE
//
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
@ -42,28 +42,28 @@ import net.yacy.cora.order.ByteOrder;
*/
public class ByteArray {
private final byte[] buffer;
private int hash;
private final int hash;
public ByteArray(final byte[] bb) {
this.buffer = bb;
this.hash = 0;
}
public int length() {
return buffer.length;
return this.buffer.length;
}
public byte[] asBytes() {
return this.buffer;
}
public byte readByte(final int pos) {
return buffer[pos];
return this.buffer[pos];
}
public static boolean startsWith(final byte[] buffer, final byte[] pattern) {
// compares two byte arrays: true, if pattern appears completely at offset position
if (buffer == null && pattern == null) return true;
@ -72,22 +72,25 @@ public class ByteArray {
for (int i = 0; i < pattern.length; i++) if (buffer[i] != pattern[i]) return false;
return true;
}
public int compareTo(final ByteArray b, final ByteOrder order) {
assert this.buffer.length == b.buffer.length;
return order.compare(this.buffer, b.buffer);
}
public int compareTo(final int aoffset, final int alength, final ByteArray b, final int boffset, final int blength, final ByteOrder order) {
assert alength == blength;
return order.compare(this.buffer, aoffset, b.buffer, boffset, blength);
}
private int hashCache = Integer.MIN_VALUE; // if this is used in a compare method many times, a cache is useful
@Override
public int hashCode() {
if (this.hash != 0) return this.hash;
this.hash = hashCode(this.buffer);
return this.hash;
if (this.hashCache == Integer.MIN_VALUE) {
this.hashCache = ByteArray.hashCode(this.buffer);
}
return this.hashCache;
}
/**
@ -100,18 +103,18 @@ public class ByteArray {
for (byte c: b) h = 31 * h + (c & 0xFF);
return h;
}
@Override
public boolean equals(Object other) {
ByteArray b = (ByteArray) other;
if (buffer == null && b == null) return true;
if (buffer == null || b == null) return false;
if (this.buffer == null && b == null) return true;
if (this.buffer == null || b == null) return false;
if (this.buffer.length != b.buffer.length) return false;
int l = this.buffer.length;
while (--l >= 0) if (this.buffer[l] != b.buffer[l]) return false;
return true;
}
public static long parseDecimal(final byte[] s) throws NumberFormatException {
if (s == null) throw new NumberFormatException("null");
@ -121,7 +124,7 @@ public class ByteArray {
long limit;
long multmin;
long digit;
if (max <= 0) throw new NumberFormatException(UTF8.String(s));
if (s[0] == '-') {
negative = true;

@ -100,9 +100,14 @@ public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaS
if ((this.attr == null) || (this.attr.length() == 0)) this.attr = "_";
}
private int hashCache = Integer.MIN_VALUE; // if this is used in a compare method many times, a cache is useful
@Override
public int hashCode() {
return ByteArray.hashCode(this.href.hash());
if (this.hashCache == Integer.MIN_VALUE) {
this.hashCache = ByteArray.hashCode(this.href.hash());
}
return this.hashCache;
}
@Override

@ -9,7 +9,7 @@
// $LastChangedBy$
//
// LICENSE
//
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
@ -48,7 +48,7 @@ import net.yacy.search.index.Segment;
public class ResultEntry implements Comparable<ResultEntry>, Comparator<ResultEntry> {
// payload objects
private final URIMetadataRow urlentry;
private String alternative_urlstring;
@ -56,10 +56,10 @@ public class ResultEntry implements Comparable<ResultEntry>, Comparator<ResultEn
private final TextSnippet textSnippet;
private final List<MediaSnippet> mediaSnippets;
private final Segment indexSegment;
// statistic objects
public long dbRetrievalTime, snippetComputationTime, ranking;
public ResultEntry(final URIMetadataRow urlentry,
final Segment indexSegment,
SeedDB peers,
@ -103,9 +103,13 @@ public class ResultEntry implements Comparable<ResultEntry>, Comparator<ResultEn
if ((p = this.alternative_urlname.indexOf('?')) > 0) this.alternative_urlname = this.alternative_urlname.substring(0, p);
}
}
private int hashCache = Integer.MIN_VALUE; // if this is used in a compare method many times, a cache is useful
@Override
public int hashCode() {
return ByteArray.hashCode(this.urlentry.hash());
if (this.hashCache == Integer.MIN_VALUE) {
this.hashCache = ByteArray.hashCode(this.urlentry.hash());
}
return this.hashCache;
}
@Override
public boolean equals(final Object obj) {

@ -400,9 +400,14 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
return o1.compareTo(o2);
}
private int hashCache = Integer.MIN_VALUE; // if this is used in a compare method many times, a cache is useful
@Override
public int hashCode() {
return ByteArray.hashCode(this.urlhash);
if (this.hashCache == Integer.MIN_VALUE) {
this.hashCache = ByteArray.hashCode(this.urlhash);
}
return this.hashCache;
}
@Override

Loading…
Cancel
Save