From 10da7335ea92d9817917ba927005b2d0dca38b07 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Wed, 30 May 2012 16:59:13 +0200 Subject: [PATCH] performance hack: use a hash cache for all hashes that are computed by a byte array. If this hash is used in a HashMap (which is very often the case) then this hack eliminates a lot of re-computations of the same hash. --- .../data/citation/CitationReference.java | 7 +- .../data/image/ImageReferenceRow.java | 66 +++++++++++-------- .../data/image/ImageReferenceVars.java | 7 +- .../yacy/kelondro/data/meta/DigestURI.java | 10 +-- .../kelondro/data/word/WordReferenceRow.java | 29 +++++++- .../kelondro/data/word/WordReferenceVars.java | 8 ++- source/net/yacy/kelondro/util/ByteArray.java | 43 ++++++------ .../net/yacy/search/snippet/MediaSnippet.java | 7 +- .../net/yacy/search/snippet/ResultEntry.java | 14 ++-- .../net/yacy/search/snippet/TextSnippet.java | 7 +- 10 files changed, 135 insertions(+), 63 deletions(-) diff --git a/source/net/yacy/kelondro/data/citation/CitationReference.java b/source/net/yacy/kelondro/data/citation/CitationReference.java index 38e3612e1..1272b7193 100644 --- a/source/net/yacy/kelondro/data/citation/CitationReference.java +++ b/source/net/yacy/kelondro/data/citation/CitationReference.java @@ -133,9 +133,14 @@ public class CitationReference implements Reference, Serializable { return false; } + private int hashCache = Integer.MIN_VALUE; // if this is used in a compare method many times, a cache is useful + @Override public int hashCode() { - return ByteArray.hashCode(this.urlhash()); + if (this.hashCache == Integer.MIN_VALUE) { + this.hashCache = ByteArray.hashCode(this.urlhash()); + } + return this.hashCache; } @Override diff --git a/source/net/yacy/kelondro/data/image/ImageReferenceRow.java b/source/net/yacy/kelondro/data/image/ImageReferenceRow.java index 4ba4039be..12c21b72f 100644 --- a/source/net/yacy/kelondro/data/image/ImageReferenceRow.java +++ b/source/net/yacy/kelondro/data/image/ImageReferenceRow.java @@ -9,7 +9,7 @@ // $LastChangedBy$ // // LICENSE -// +// // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or @@ -52,8 +52,8 @@ public final class ImageReferenceRow extends AbstractReference implements /*Imag * object for termination of concurrent blocking queue processing */ public static final ImageReferenceRow poison = new ImageReferenceRow((Row.Entry) null); - - + + public static final Row urlEntryRow = new Row(new Column[]{ new Column("h", Column.celltype_string, Column.encoder_bytes, Word.commonHashLength, "urlhash"), new Column("f", Column.celltype_cardinal, Column.encoder_b256, 4, "created"), @@ -65,7 +65,7 @@ public final class ImageReferenceRow extends AbstractReference implements /*Imag new Column("i", Column.celltype_cardinal, Column.encoder_b256, 2, "height"), // pixels new Column("i", Column.celltype_cardinal, Column.encoder_b256, 2, "iso"), // iso number new Column("i", Column.celltype_cardinal, Column.encoder_b256, 2, "verschlusszeit"), // the x in 1/x - new Column("i", Column.celltype_cardinal, Column.encoder_b256, 2, "blende"), + new Column("i", Column.celltype_cardinal, Column.encoder_b256, 2, "blende"), new Column("i", Column.celltype_cardinal, Column.encoder_b256, 4, "distance"), new Column("o", Column.celltype_cardinal, Column.encoder_b256, 4, "author-id"), // author, creator, operator, camera-number new Column("o", Column.celltype_cardinal, Column.encoder_b256, 4, "group-id"), // may be also a crawl start identifier @@ -82,7 +82,7 @@ public final class ImageReferenceRow extends AbstractReference implements /*Imag Base64Order.enhancedCoder ); // available chars: b,e,j,q - + // static properties private static final int col_urlhash = 0; // h 12 the url hash b64-encoded private static final int col_lastModified = 1; // a 2 last-modified time of the document where word appears @@ -101,15 +101,15 @@ public final class ImageReferenceRow extends AbstractReference implements /*Imag private static final int col_posofphrase = 17; // o 1 number of the phrase where word appears private static final int col_reserve1 = 18; // i 1 reserve1 private static final int col_reserve2 = 19; // k 1 reserve2 - + // ideas for the classification bytes // 0 : content-type (person-portrait, persons-group, landscape, buildings, technical, artistical) // 1 : content-situation (a categorization of the type, like: person/standing, building/factory, artistical/cubistic) // 2 : content-category (a classification that is taken from the text environment by text analysis) - // 3 : + // 3 : private final Row.Entry entry; - + public ImageReferenceRow(final byte[] urlHash, final int urlLength, // byte-length of complete URL final int urlComps, // number of path components @@ -147,7 +147,7 @@ public final class ImageReferenceRow extends AbstractReference implements /*Imag this.entry.setCol(col_reserve1, 0); this.entry.setCol(col_reserve2, 0); } - + public ImageReferenceRow(final byte[] urlHash, final int urlLength, // byte-length of complete URL final int urlComps, // number of path components @@ -174,44 +174,47 @@ public final class ImageReferenceRow extends AbstractReference implements /*Imag this.entry.setCol(col_reserve1, 0); this.entry.setCol(col_reserve2, 0); } - + public ImageReferenceRow(final String urlHash, final String code) { // the code is the external form of the row minus the leading urlHash entry this.entry = urlEntryRow.newEntry(UTF8.getBytes((urlHash + code))); } - + public ImageReferenceRow(final String external) { this.entry = urlEntryRow.newEntry(external, true); } - + public ImageReferenceRow(final byte[] row) { this.entry = urlEntryRow.newEntry(row); } - + public ImageReferenceRow(final byte[] row, final int offset, final boolean clone) { this.entry = urlEntryRow.newEntry(row, offset, clone); } - + public ImageReferenceRow(final Row.Entry rentry) { // FIXME: see if cloning is necessary this.entry = rentry; } - + @Override public ImageReferenceRow clone() { final byte[] b = new byte[urlEntryRow.objectsize]; - System.arraycopy(entry.bytes(), 0, b, 0, urlEntryRow.objectsize); + System.arraycopy(this.entry.bytes(), 0, b, 0, urlEntryRow.objectsize); return new ImageReferenceRow(b); } + @Override public String toPropertyForm() { - return entry.toPropertyForm('=', true, true, false, false); + return this.entry.toPropertyForm('=', true, true, false, false); } - + + @Override public Entry toKelondroEntry() { return this.entry; } + @Override public byte[] urlhash() { return this.entry.getColBytes(col_urlhash, true); } @@ -220,10 +223,11 @@ public final class ImageReferenceRow extends AbstractReference implements /*Imag return (int) this.entry.getColLong(col_lastModified); // this is the time in MicoDateDays format } + @Override public long lastModified() { return MicroDate.reverseMicroDateDays((int) this.entry.getColLong(col_lastModified)); } - + public long freshUntil() { return MicroDate.reverseMicroDateDays((int) this.entry.getColLong(col_freshUntil)); } @@ -232,6 +236,7 @@ public final class ImageReferenceRow extends AbstractReference implements /*Imag return (int) this.entry.getColLong(col_hitcount); } + @Override public Collection positions() { return new ArrayList(0); } @@ -253,38 +258,45 @@ public final class ImageReferenceRow extends AbstractReference implements /*Imag public char getType() { return (char) this.entry.getColByte(col_doctype); } - + public int urllength() { return (int) this.entry.getColLong(col_urlLength); } - + public int urlcomps() { return (int) this.entry.getColLong(col_urlComps); } - + public Bitfield flags() { return new Bitfield(this.entry.getColBytes(col_flags, true)); } - + @Override public String toString() { return toPropertyForm(); } + @Override public boolean isOlder(final Reference other) { if (other == null) return false; if (this.lastModified() < other.lastModified()) return true; return false; } - + + private int hashCache = Integer.MIN_VALUE; // if this is used in a compare method many times, a cache is useful + @Override public int hashCode() { - return ByteArray.hashCode(this.urlhash()); + if (this.hashCache == Integer.MIN_VALUE) { + this.hashCache = ByteArray.hashCode(this.urlhash()); + } + return this.hashCache; } + @Override public void join(Reference oe) { throw new UnsupportedOperationException(""); - + } - + } diff --git a/source/net/yacy/kelondro/data/image/ImageReferenceVars.java b/source/net/yacy/kelondro/data/image/ImageReferenceVars.java index e4415f155..816f1d314 100644 --- a/source/net/yacy/kelondro/data/image/ImageReferenceVars.java +++ b/source/net/yacy/kelondro/data/image/ImageReferenceVars.java @@ -381,9 +381,14 @@ public class ImageReferenceVars extends AbstractReference implements ImageRefere this.wordsintext = this.wordsintext + oe.wordsintext(); } + private int hashCache = Integer.MIN_VALUE; // if this is used in a compare method many times, a cache is useful + @Override public int hashCode() { - return ByteArray.hashCode(this.urlHash); + if (this.hashCache == Integer.MIN_VALUE) { + this.hashCache = ByteArray.hashCode(this.urlHash); + } + return this.hashCache; } public void addPosition(int position) { diff --git a/source/net/yacy/kelondro/data/meta/DigestURI.java b/source/net/yacy/kelondro/data/meta/DigestURI.java index 5b2b95aab..a04e065a7 100644 --- a/source/net/yacy/kelondro/data/meta/DigestURI.java +++ b/source/net/yacy/kelondro/data/meta/DigestURI.java @@ -160,12 +160,14 @@ public class DigestURI extends MultiProtocolURI implements Serializable { this.hash = null; } - /* (non-Javadoc) - * @see java.lang.Object#hashCode() - */ + private int hashCache = Integer.MIN_VALUE; // if this is used in a compare method many times, a cache is useful + @Override public int hashCode() { - return ByteArray.hashCode(hash()); + if (this.hashCache == Integer.MIN_VALUE) { + this.hashCache = ByteArray.hashCode(hash()); + } + return this.hashCache; } public static final int flagTypeID(final String hash) { diff --git a/source/net/yacy/kelondro/data/word/WordReferenceRow.java b/source/net/yacy/kelondro/data/word/WordReferenceRow.java index b2a92fd76..472421b2d 100644 --- a/source/net/yacy/kelondro/data/word/WordReferenceRow.java +++ b/source/net/yacy/kelondro/data/word/WordReferenceRow.java @@ -215,6 +215,7 @@ public final class WordReferenceRow extends AbstractReference implements WordRef this.out = new LinkedBlockingQueue(); for (int i = 0; i < concurrency; i++) { this.worker[i] = new Thread() { + @Override public void run() { String s; try { @@ -290,22 +291,27 @@ public final class WordReferenceRow extends AbstractReference implements WordRef return new WordReferenceRow(b); } + @Override public String toPropertyForm() { return this.entry.toPropertyForm('=', true, true, false, false); } + @Override public Entry toKelondroEntry() { return this.entry; } + @Override public byte[] urlhash() { return this.entry.getColBytes(col_urlhash, true); } + @Override public int virtualAge() { return (int) this.entry.getColLong(col_lastModified); // this is the time in MicoDateDays format } + @Override public long lastModified() { return MicroDate.reverseMicroDateDays((int) this.entry.getColLong(col_lastModified)); } @@ -314,10 +320,12 @@ public final class WordReferenceRow extends AbstractReference implements WordRef return MicroDate.reverseMicroDateDays((int) this.entry.getColLong(col_freshUntil)); } + @Override public int hitcount() { return (0xff & this.entry.getColByte(col_hitcount)); } + @Override public Collection positions() { return new ArrayList(0); } @@ -327,54 +335,67 @@ public final class WordReferenceRow extends AbstractReference implements WordRef return (int) this.entry.getColLong(col_posintext); } + @Override public int posinphrase() { return (0xff & this.entry.getColByte(col_posinphrase)); } + @Override public int posofphrase() { return (0xff & this.entry.getColByte(col_posofphrase)); } + @Override public int wordsintext() { return (int) this.entry.getColLong(col_wordsInText); } + @Override public int phrasesintext() { return (int) this.entry.getColLong(col_phrasesInText); } + @Override public byte[] getLanguage() { return this.entry.getColBytes(col_language, true); } + @Override public char getType() { return (char) this.entry.getColByte(col_doctype); } + @Override public int wordsintitle() { return (0xff & this.entry.getColByte(col_wordsInTitle)); } + @Override public int llocal() { return (0xff & this.entry.getColByte(col_llocal)); } + @Override public int lother() { return (0xff & this.entry.getColByte(col_lother)); } + @Override public int urllength() { return (0xff & this.entry.getColByte(col_urlLength)); } + @Override public int urlcomps() { return (0xff & this.entry.getColByte(col_urlComps)); } + @Override public Bitfield flags() { return new Bitfield(this.entry.getColBytes(col_flags, false)); } + @Override public double termFrequency() { return (((double) hitcount()) / ((double) (wordsintext() + wordsintitle() + 1))); } @@ -393,11 +414,17 @@ public final class WordReferenceRow extends AbstractReference implements WordRef return Base64Order.enhancedCoder.equal(urlhash(), other.urlhash()); } + private int hashCache = Integer.MIN_VALUE; // if this is used in a compare method many times, a cache is useful + @Override public int hashCode() { - return ByteArray.hashCode(urlhash()); + if (this.hashCache == Integer.MIN_VALUE) { + this.hashCache = ByteArray.hashCode(urlhash()); + } + return this.hashCache; } + @Override public void join(final Reference oe) { throw new UnsupportedOperationException(""); diff --git a/source/net/yacy/kelondro/data/word/WordReferenceVars.java b/source/net/yacy/kelondro/data/word/WordReferenceVars.java index c7cdede29..ec198cd8b 100644 --- a/source/net/yacy/kelondro/data/word/WordReferenceVars.java +++ b/source/net/yacy/kelondro/data/word/WordReferenceVars.java @@ -31,7 +31,6 @@ import java.util.Comparator; import java.util.Queue; import java.util.concurrent.BlockingQueue; import java.util.concurrent.LinkedBlockingQueue; -import java.util.concurrent.Semaphore; import net.yacy.cora.document.ASCII; import net.yacy.cora.document.UTF8; @@ -394,9 +393,14 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc return Base64Order.enhancedCoder.equal(this.urlHash, other.urlHash); } + private int hashCache = Integer.MIN_VALUE; // if this is used in a compare method many times, a cache is useful + @Override public int hashCode() { - return ByteArray.hashCode(this.urlHash); + if (this.hashCache == Integer.MIN_VALUE) { + this.hashCache = ByteArray.hashCode(this.urlHash); + } + return this.hashCache; } @Override diff --git a/source/net/yacy/kelondro/util/ByteArray.java b/source/net/yacy/kelondro/util/ByteArray.java index 0ac0b633a..6dc21e9d2 100644 --- a/source/net/yacy/kelondro/util/ByteArray.java +++ b/source/net/yacy/kelondro/util/ByteArray.java @@ -7,7 +7,7 @@ // $LastChangedBy$ // // LICENSE -// +// // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or @@ -42,28 +42,28 @@ import net.yacy.cora.order.ByteOrder; */ public class ByteArray { - + private final byte[] buffer; - private int hash; + private final int hash; + - public ByteArray(final byte[] bb) { this.buffer = bb; this.hash = 0; } public int length() { - return buffer.length; + return this.buffer.length; } - + public byte[] asBytes() { return this.buffer; } - + public byte readByte(final int pos) { - return buffer[pos]; + return this.buffer[pos]; } - + public static boolean startsWith(final byte[] buffer, final byte[] pattern) { // compares two byte arrays: true, if pattern appears completely at offset position if (buffer == null && pattern == null) return true; @@ -72,22 +72,25 @@ public class ByteArray { for (int i = 0; i < pattern.length; i++) if (buffer[i] != pattern[i]) return false; return true; } - + public int compareTo(final ByteArray b, final ByteOrder order) { assert this.buffer.length == b.buffer.length; return order.compare(this.buffer, b.buffer); } - + public int compareTo(final int aoffset, final int alength, final ByteArray b, final int boffset, final int blength, final ByteOrder order) { assert alength == blength; return order.compare(this.buffer, aoffset, b.buffer, boffset, blength); } - + + private int hashCache = Integer.MIN_VALUE; // if this is used in a compare method many times, a cache is useful + @Override public int hashCode() { - if (this.hash != 0) return this.hash; - this.hash = hashCode(this.buffer); - return this.hash; + if (this.hashCache == Integer.MIN_VALUE) { + this.hashCache = ByteArray.hashCode(this.buffer); + } + return this.hashCache; } /** @@ -100,18 +103,18 @@ public class ByteArray { for (byte c: b) h = 31 * h + (c & 0xFF); return h; } - + @Override public boolean equals(Object other) { ByteArray b = (ByteArray) other; - if (buffer == null && b == null) return true; - if (buffer == null || b == null) return false; + if (this.buffer == null && b == null) return true; + if (this.buffer == null || b == null) return false; if (this.buffer.length != b.buffer.length) return false; int l = this.buffer.length; while (--l >= 0) if (this.buffer[l] != b.buffer[l]) return false; return true; } - + public static long parseDecimal(final byte[] s) throws NumberFormatException { if (s == null) throw new NumberFormatException("null"); @@ -121,7 +124,7 @@ public class ByteArray { long limit; long multmin; long digit; - + if (max <= 0) throw new NumberFormatException(UTF8.String(s)); if (s[0] == '-') { negative = true; diff --git a/source/net/yacy/search/snippet/MediaSnippet.java b/source/net/yacy/search/snippet/MediaSnippet.java index 21c6eb602..7f6707601 100644 --- a/source/net/yacy/search/snippet/MediaSnippet.java +++ b/source/net/yacy/search/snippet/MediaSnippet.java @@ -100,9 +100,14 @@ public class MediaSnippet implements Comparable, Comparator, Comparator { - + // payload objects private final URIMetadataRow urlentry; private String alternative_urlstring; @@ -56,10 +56,10 @@ public class ResultEntry implements Comparable, Comparator mediaSnippets; private final Segment indexSegment; - + // statistic objects public long dbRetrievalTime, snippetComputationTime, ranking; - + public ResultEntry(final URIMetadataRow urlentry, final Segment indexSegment, SeedDB peers, @@ -103,9 +103,13 @@ public class ResultEntry implements Comparable, Comparator 0) this.alternative_urlname = this.alternative_urlname.substring(0, p); } } + private int hashCache = Integer.MIN_VALUE; // if this is used in a compare method many times, a cache is useful @Override public int hashCode() { - return ByteArray.hashCode(this.urlentry.hash()); + if (this.hashCache == Integer.MIN_VALUE) { + this.hashCache = ByteArray.hashCode(this.urlentry.hash()); + } + return this.hashCache; } @Override public boolean equals(final Object obj) { diff --git a/source/net/yacy/search/snippet/TextSnippet.java b/source/net/yacy/search/snippet/TextSnippet.java index c0d0c5fa9..60133bed2 100644 --- a/source/net/yacy/search/snippet/TextSnippet.java +++ b/source/net/yacy/search/snippet/TextSnippet.java @@ -400,9 +400,14 @@ public class TextSnippet implements Comparable, Comparator