diff --git a/source/net/yacy/kelondro/blob/HeapReader.java b/source/net/yacy/kelondro/blob/HeapReader.java index 7dbe44dc8..eda2e659e 100644 --- a/source/net/yacy/kelondro/blob/HeapReader.java +++ b/source/net/yacy/kelondro/blob/HeapReader.java @@ -42,6 +42,7 @@ import net.yacy.kelondro.io.Writer; import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.order.ByteOrder; import net.yacy.kelondro.order.CloneableIterator; +import net.yacy.kelondro.order.Digest; import net.yacy.kelondro.order.NaturalOrder; import net.yacy.kelondro.order.RotateIterator; import net.yacy.kelondro.util.FileUtils; @@ -147,17 +148,17 @@ public class HeapReader { private boolean initIndexReadDump() { // look for an index dump and read it if it exist // if this is successful, return true; otherwise false - String fingerprint = HeapWriter.fingerprintFileHash(this.heapFile); + String fingerprint = fingerprintFileHash(this.heapFile); if (fingerprint == null) { Log.logSevere("HeapReader", "cannot generate a fingerprint for " + this.heapFile + ": null"); return false; } - File fif = HeapWriter.fingerprintIndexFile(this.heapFile, fingerprint); + File fif = fingerprintIndexFile(this.heapFile, fingerprint); if (!fif.exists()) fif = new File(fif.getAbsolutePath() + ".gz"); - File fgf = HeapWriter.fingerprintGapFile(this.heapFile, fingerprint); + File fgf = fingerprintGapFile(this.heapFile, fingerprint); if (!fgf.exists()) fgf = new File(fgf.getAbsolutePath() + ".gz"); if (!fif.exists() || !fgf.exists()) { - HeapWriter.deleteAllFingerprints(this.heapFile); + deleteAllFingerprints(this.heapFile, fif.getName(), fgf.getName()); return false; } @@ -194,6 +195,41 @@ public class HeapReader { return !this.index.isEmpty(); } + protected static File fingerprintIndexFile(File f, String fingerprint) { + assert f != null; + return new File(f.getParentFile(), f.getName() + "." + fingerprint + ".idx"); + } + + protected static File fingerprintGapFile(File f, String fingerprint) { + assert f != null; + return new File(f.getParentFile(), f.getName() + "." + fingerprint + ".gap"); + } + + protected static String fingerprintFileHash(File f) { + assert f != null; + assert f.exists() : "file = " + f.toString(); + String fp = Digest.fastFingerprintB64(f, false); + assert fp != null : "file = " + f.toString(); + if (fp == null) return null; + return fp.substring(0, 12); + } + + public static void deleteAllFingerprints(File f, String exception1, String exception2) { + File d = f.getParentFile(); + String n = f.getName(); + String[] l = d.list(); + for (int i = 0; i < l.length; i++) { + if (!l[i].startsWith(n)) continue; + if (exception1 != null && l[i].equals(exception1)) continue; + if (exception2 != null && l[i].equals(exception2)) continue; + if (l[i].endsWith(".idx") || + l[i].endsWith(".gap") || + l[i].endsWith(".idx.gz") || + l[i].endsWith(".gap.gz") + ) FileUtils.deletedelete(new File(d, l[i])); + } + } + private void initIndexReadFromHeap() throws IOException { // this initializes the this.index object by reading positions from the heap file Log.logInfo("HeapReader", "generating index for " + heapFile.toString() + ", " + (file.length() / 1024 / 1024) + " MB. Please wait."); @@ -513,16 +549,16 @@ public class HeapReader { // to speed up the next start try { long start = System.currentTimeMillis(); - String fingerprint = HeapWriter.fingerprintFileHash(this.heapFile); + String fingerprint = fingerprintFileHash(this.heapFile); if (fingerprint == null) { Log.logSevere("kelondroBLOBHeap", "cannot write a dump for " + heapFile.getName()+ ": fingerprint is null"); } else { - free.dump(HeapWriter.fingerprintGapFile(this.heapFile, fingerprint)); + free.dump(fingerprintGapFile(this.heapFile, fingerprint)); } free.clear(); free = null; if (fingerprint != null) { - index.dump(HeapWriter.fingerprintIndexFile(this.heapFile, fingerprint)); + index.dump(fingerprintIndexFile(this.heapFile, fingerprint)); Log.logInfo("kelondroBLOBHeap", "wrote a dump for the " + this.index.size() + " index entries of " + heapFile.getName()+ " in " + (System.currentTimeMillis() - start) + " milliseconds."); } index.close(); diff --git a/source/net/yacy/kelondro/blob/HeapWriter.java b/source/net/yacy/kelondro/blob/HeapWriter.java index 9d1ad632c..ef6aa756a 100644 --- a/source/net/yacy/kelondro/blob/HeapWriter.java +++ b/source/net/yacy/kelondro/blob/HeapWriter.java @@ -34,7 +34,6 @@ import net.yacy.kelondro.index.HandleMap; import net.yacy.kelondro.index.RowSpaceExceededException; import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.order.ByteOrder; -import net.yacy.kelondro.order.Digest; import net.yacy.kelondro.util.FileUtils; @@ -110,34 +109,6 @@ public final class HeapWriter { //os.flush(); // necessary? may cause bad IO performance :-( } - protected static File fingerprintIndexFile(File f, String fingerprint) { - assert f != null; - return new File(f.getParentFile(), f.getName() + "." + fingerprint + ".idx"); - } - - protected static File fingerprintGapFile(File f, String fingerprint) { - assert f != null; - return new File(f.getParentFile(), f.getName() + "." + fingerprint + ".gap"); - } - - protected static String fingerprintFileHash(File f) { - assert f != null; - assert f.exists() : "file = " + f.toString(); - String fp = Digest.fastFingerprintB64(f, false); - assert fp != null : "file = " + f.toString(); - if (fp == null) return null; - return fp.substring(0, 12); - } - - public static void deleteAllFingerprints(File f) { - File d = f.getParentFile(); - String n = f.getName(); - String[] l = d.list(); - for (int i = 0; i < l.length; i++) { - if (l[i].startsWith(n) && (l[i].endsWith(".idx") || l[i].endsWith(".gap") || l[i].endsWith(".idx.gz") || l[i].endsWith(".gap.gz"))) FileUtils.deletedelete(new File(d, l[i])); - } - } - /** * close the BLOB table * @throws @@ -160,12 +131,12 @@ public final class HeapWriter { // now we can create a dump of the index and the gap information // to speed up the next start long start = System.currentTimeMillis(); - String fingerprint = HeapWriter.fingerprintFileHash(this.heapFileREADY); + String fingerprint = HeapReader.fingerprintFileHash(this.heapFileREADY); if (fingerprint == null) { Log.logSevere("kelondroBLOBHeapWriter", "cannot write a dump for " + heapFileREADY.getName()+ ": fingerprint is null"); } else { - new Gap().dump(fingerprintGapFile(this.heapFileREADY, fingerprint)); - index.dump(fingerprintIndexFile(this.heapFileREADY, fingerprint)); + new Gap().dump(HeapReader.fingerprintGapFile(this.heapFileREADY, fingerprint)); + index.dump(HeapReader.fingerprintIndexFile(this.heapFileREADY, fingerprint)); Log.logInfo("kelondroBLOBHeapWriter", "wrote a dump for the " + this.index.size() + " index entries of " + heapFileREADY.getName()+ " in " + (System.currentTimeMillis() - start) + " milliseconds."); } index.close(); diff --git a/source/net/yacy/kelondro/data/citation/CitationReferenceRow.java b/source/net/yacy/kelondro/data/citation/CitationReferenceRow.java index 52a8a6814..31b97c9d9 100644 --- a/source/net/yacy/kelondro/data/citation/CitationReferenceRow.java +++ b/source/net/yacy/kelondro/data/citation/CitationReferenceRow.java @@ -26,6 +26,8 @@ package net.yacy.kelondro.data.citation; +import java.util.Collection; + import net.yacy.kelondro.data.word.Word; import net.yacy.kelondro.index.Column; import net.yacy.kelondro.index.Row; @@ -214,7 +216,7 @@ public final class CitationReferenceRow implements Reference /*, Cloneable*/ { throw new UnsupportedOperationException(); } - public int positions() { + public Collection positions() { throw new UnsupportedOperationException(); } diff --git a/source/net/yacy/kelondro/data/image/ImageReferenceRow.java b/source/net/yacy/kelondro/data/image/ImageReferenceRow.java index c3922da5e..a8b3e4e17 100644 --- a/source/net/yacy/kelondro/data/image/ImageReferenceRow.java +++ b/source/net/yacy/kelondro/data/image/ImageReferenceRow.java @@ -26,6 +26,9 @@ package net.yacy.kelondro.data.image; +import java.util.ArrayList; +import java.util.Collection; + import net.yacy.kelondro.data.word.Word; import net.yacy.kelondro.index.Column; import net.yacy.kelondro.index.Row; @@ -226,8 +229,8 @@ public final class ImageReferenceRow extends AbstractReference implements /*Imag return (int) this.entry.getColLong(col_hitcount); } - public int positions() { - return 1; + public Collection positions() { + return new ArrayList(0); } public int position(int p) { diff --git a/source/net/yacy/kelondro/data/image/ImageReferenceVars.java b/source/net/yacy/kelondro/data/image/ImageReferenceVars.java index 0e4e46357..90a6c7758 100644 --- a/source/net/yacy/kelondro/data/image/ImageReferenceVars.java +++ b/source/net/yacy/kelondro/data/image/ImageReferenceVars.java @@ -26,7 +26,8 @@ package net.yacy.kelondro.data.image; -import java.util.ArrayList; +import java.util.Collection; +import java.util.concurrent.ConcurrentLinkedQueue; import net.yacy.kelondro.index.Row.Entry; import net.yacy.kelondro.order.Bitfield; @@ -53,7 +54,7 @@ public class ImageReferenceVars extends AbstractReference implements ImageRefere posinphrase, posofphrase, urlcomps, urllength, virtualAge, wordsintext, wordsintitle; - private final ArrayList positions; + private final ConcurrentLinkedQueue positions; public double termFrequency; public ImageReferenceVars( @@ -64,7 +65,7 @@ public class ImageReferenceVars extends AbstractReference implements ImageRefere final int hitcount, // how often appears this word in the text final int wordcount, // total number of words final int phrasecount, // total number of phrases - final ArrayList ps, // positions of words that are joined into the reference + final ConcurrentLinkedQueue ps, // positions of words that are joined into the reference final int posinphrase, // position of word in its phrase final int posofphrase, // number of the phrase where word appears final long lastmodified, // last-modified time of the document where word appears @@ -89,8 +90,8 @@ public class ImageReferenceVars extends AbstractReference implements ImageRefere this.llocal = outlinksSame; this.lother = outlinksOther; this.phrasesintext = phrasecount; - this.positions = new ArrayList(ps.size()); - for (int i = 0; i < ps.size(); i++) this.positions.add(ps.get(i)); + this.positions = new ConcurrentLinkedQueue(); + for (Integer i: ps) this.positions.add(i); this.posinphrase = posinphrase; this.posofphrase = posofphrase; this.urlcomps = urlComps; @@ -112,8 +113,8 @@ public class ImageReferenceVars extends AbstractReference implements ImageRefere this.llocal = e.llocal(); this.lother = e.lother(); this.phrasesintext = e.phrasesintext(); - this.positions = new ArrayList(e.positions()); - for (int i = 0; i < e.positions(); i++) this.positions.add(e.position(i)); + this.positions = new ConcurrentLinkedQueue(); + for (Integer i: e.positions()) this.positions.add(i); this.posinphrase = e.posinphrase(); this.posofphrase = e.posofphrase(); this.urlcomps = e.urlcomps(); @@ -227,12 +228,8 @@ public class ImageReferenceVars extends AbstractReference implements ImageRefere return posinphrase; } - public int positions() { - return this.positions.size(); - } - - public int position(int p) { - return this.positions.get(p); + public Collection positions() { + return this.positions; } public int posofphrase() { @@ -248,7 +245,7 @@ public class ImageReferenceVars extends AbstractReference implements ImageRefere hitcount, // how often appears this word in the text wordsintext, // total number of words phrasesintext, // total number of phrases - positions.get(0), // position of word in all words + positions.iterator().next(), // position of word in all words posinphrase, // position of word in its phrase posofphrase, // number of the phrase where word appears lastModified, // last-modified time of the document where word appears @@ -347,7 +344,7 @@ public class ImageReferenceVars extends AbstractReference implements ImageRefere // combine the distance ImageReference oe = (ImageReference) r; - for (int i = 0; i < r.positions(); i++) this.positions.add(r.position(i)); + for (Integer i: r.positions()) this.positions.add(i); this.posinphrase = (this.posofphrase == oe.posofphrase()) ? Math.min(this.posinphrase, oe.posinphrase()) : 0; this.posofphrase = Math.min(this.posofphrase, oe.posofphrase()); diff --git a/source/net/yacy/kelondro/data/navigation/NavigationReferenceRow.java b/source/net/yacy/kelondro/data/navigation/NavigationReferenceRow.java index ae2c3cda0..f26f5dd8d 100644 --- a/source/net/yacy/kelondro/data/navigation/NavigationReferenceRow.java +++ b/source/net/yacy/kelondro/data/navigation/NavigationReferenceRow.java @@ -26,6 +26,8 @@ package net.yacy.kelondro.data.navigation; +import java.util.Collection; + import net.yacy.kelondro.data.word.Word; import net.yacy.kelondro.index.Column; import net.yacy.kelondro.index.Row; @@ -174,7 +176,7 @@ public final class NavigationReferenceRow extends AbstractReference implements N throw new UnsupportedOperationException(); } - public int positions() { + public Collection positions() { throw new UnsupportedOperationException(); } diff --git a/source/net/yacy/kelondro/data/navigation/NavigationReferenceVars.java b/source/net/yacy/kelondro/data/navigation/NavigationReferenceVars.java index fd6f92b78..794c9607f 100644 --- a/source/net/yacy/kelondro/data/navigation/NavigationReferenceVars.java +++ b/source/net/yacy/kelondro/data/navigation/NavigationReferenceVars.java @@ -26,6 +26,8 @@ package net.yacy.kelondro.data.navigation; +import java.util.Collection; + import net.yacy.kelondro.index.Row.Entry; import net.yacy.kelondro.rwi.AbstractReference; import net.yacy.kelondro.rwi.Reference; @@ -146,7 +148,7 @@ public class NavigationReferenceVars extends AbstractReference implements Navig throw new UnsupportedOperationException(); } - public int positions() { + public Collection positions() { throw new UnsupportedOperationException(); } diff --git a/source/net/yacy/kelondro/data/word/WordReferenceRow.java b/source/net/yacy/kelondro/data/word/WordReferenceRow.java index 873632857..17cd59aea 100644 --- a/source/net/yacy/kelondro/data/word/WordReferenceRow.java +++ b/source/net/yacy/kelondro/data/word/WordReferenceRow.java @@ -26,6 +26,9 @@ package net.yacy.kelondro.data.word; +import java.util.ArrayList; +import java.util.Collection; + import net.yacy.kelondro.index.Column; import net.yacy.kelondro.index.Row; import net.yacy.kelondro.index.Row.Entry; @@ -257,8 +260,8 @@ public final class WordReferenceRow extends AbstractReference implements WordRef return (int) this.entry.getColLong(col_hitcount); } - public int positions() { - return 1; + public Collection positions() { + return new ArrayList(0); } public int position(final int p) { diff --git a/source/net/yacy/kelondro/data/word/WordReferenceVars.java b/source/net/yacy/kelondro/data/word/WordReferenceVars.java index ffab3d7c8..a7a7f55f1 100644 --- a/source/net/yacy/kelondro/data/word/WordReferenceVars.java +++ b/source/net/yacy/kelondro/data/word/WordReferenceVars.java @@ -26,11 +26,10 @@ package net.yacy.kelondro.data.word; -import java.util.ArrayList; -import java.util.Collections; +import java.util.Collection; import java.util.Comparator; -import java.util.List; import java.util.concurrent.BlockingQueue; +import java.util.concurrent.ConcurrentLinkedQueue; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.Semaphore; @@ -62,7 +61,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc posinphrase, posofphrase, urlcomps, urllength, virtualAge, wordsintext, wordsintitle; - private final List positions; + private final ConcurrentLinkedQueue positions; public double termFrequency; public WordReferenceVars( @@ -73,7 +72,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc final int hitcount, // how often appears this word in the text final int wordcount, // total number of words final int phrasecount, // total number of phrases - final List ps, // positions of words that are joined into the reference + final ConcurrentLinkedQueue ps, // positions of words that are joined into the reference final int posinphrase, // position of word in its phrase final int posofphrase, // number of the phrase where word appears final long lastmodified, // last-modified time of the document where word appears @@ -98,8 +97,8 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc this.llocal = outlinksSame; this.lother = outlinksOther; this.phrasesintext = phrasecount; - this.positions = Collections.synchronizedList(new ArrayList(ps.size())); - for (int i = 0; i < ps.size(); i++) this.positions.add(ps.get(i)); + this.positions = new ConcurrentLinkedQueue(); + for (Integer i: ps) this.positions.add(i); this.posinphrase = posinphrase; this.posofphrase = posofphrase; this.urlcomps = urlComps; @@ -121,8 +120,8 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc this.llocal = e.llocal(); this.lother = e.lother(); this.phrasesintext = e.phrasesintext(); - this.positions = new ArrayList(e.positions()); - for (int i = 0; i < e.positions(); i++) this.positions.add(e.position(i)); + this.positions = new ConcurrentLinkedQueue(); + for (Integer i: e.positions()) this.positions.add(i); this.posinphrase = e.posinphrase(); this.posofphrase = e.posofphrase(); this.urlcomps = e.urlcomps(); @@ -237,14 +236,10 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc return posinphrase; } - public int positions() { - return this.positions.size(); + public Collection positions() { + return this.positions; } - - public int position(final int p) { - return this.positions.get(p); - } - + public int posofphrase() { return posofphrase; } @@ -258,7 +253,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc hitcount, // how often appears this word in the text wordsintext, // total number of words phrasesintext, // total number of phrases - positions.get(0), // position of word in all words + positions.size() == 0 ? 1 : positions.iterator().next(), // position of word in all words posinphrase, // position of word in its phrase posofphrase, // number of the phrase where word appears lastModified, // last-modified time of the document where word appears @@ -357,7 +352,7 @@ public class WordReferenceVars extends AbstractReference implements WordReferenc // combine the distance WordReference oe = (WordReference) r; - for (int i = 0; i < r.positions(); i++) this.positions.add(r.position(i)); + for (Integer i: r.positions()) this.positions.add(i); this.posinphrase = (this.posofphrase == oe.posofphrase()) ? Math.min(this.posinphrase, oe.posinphrase()) : 0; this.posofphrase = Math.min(this.posofphrase, oe.posofphrase()); diff --git a/source/net/yacy/kelondro/rwi/AbstractReference.java b/source/net/yacy/kelondro/rwi/AbstractReference.java index cffb7fbfd..0429e0821 100644 --- a/source/net/yacy/kelondro/rwi/AbstractReference.java +++ b/source/net/yacy/kelondro/rwi/AbstractReference.java @@ -26,26 +26,27 @@ package net.yacy.kelondro.rwi; -import java.util.List; +import java.util.Collection; +import java.util.Iterator; public abstract class AbstractReference implements Reference { - protected static void a(List a, int i) { + protected static void a(Collection a, int i) { assert a != null; if (i < 0) return; // signal for 'do nothing' - synchronized (a) { - a.clear(); - a.add(i); - } + a.clear(); + a.add(i); } - protected static int max(List a, List b) { + + protected static int max(Collection a, Collection b) { assert a != null; if (a.size() == 0) return max(b); if (b.size() == 0) return max(a); return Math.max(max(a), max(b)); } - protected static int min(List a, List b) { + + protected static int min(Collection a, Collection b) { assert a != null; if (a.size() == 0) return min(b); if (b.size() == 0) return min(a); @@ -56,46 +57,56 @@ public abstract class AbstractReference implements Reference { return Math.min(ma, mb); } - private static int max(List a) { + private static int max(Collection a) { assert a != null; if (a.size() == 0) return -1; - if (a.size() == 1) return a.get(0); - if (a.size() == 2) return Math.max(a.get(0), a.get(1)); - int r = a.get(0); - for (int i = 1; i < a.size(); i++) if (a.get(i) > r) r = a.get(i); + Iterator i = a.iterator(); + if (a.size() == 1) return i.next(); + if (a.size() == 2) return Math.max(i.next(), i.next()); + int r = i.next(); + int s; + while (i.hasNext()) { + s = i.next(); + if (s > r) r = s; + } return r; } - private static int min(List a) { + + private static int min(Collection a) { assert a != null; if (a.size() == 0) return -1; - if (a.size() == 1) return a.get(0); - if (a.size() == 2) return Math.min(a.get(0), a.get(1)); - int r = a.get(0); - for (int i = 1; i < a.size(); i++) if (a.get(i) < r) r = a.get(i); + Iterator i = a.iterator(); + if (a.size() == 1) return i.next(); + if (a.size() == 2) return Math.min(i.next(), i.next()); + int r = i.next(); + int s; + while (i.hasNext()) { + s = i.next(); + if (s 0; - if (positions() == 1) return position(0); - int p = position(0); - for (int i = positions() - 1; i > 0; i--) if (position(i) > p) p = position(i); - return p; + assert positions().size() > 0; + return max(positions()); } public int minposition() { - assert positions() > 0; - if (positions() == 1) return position(0); - int p = position(0); - for (int i = positions() - 1; i > 0; i--) if (position(i) < p) p = position(i); - return p; + assert positions().size() > 0; + return min(positions()); } public int distance() { + if (positions().size() < 2) return 0; int d = 0; - for (int i = 0; i < this.positions() - 1; i++) { - d += Math.abs(this.position(i) - this.position(i + 1)); + Iterator i = positions().iterator(); + int s0 = i.next(), s1; + while (i.hasNext()) { + s1 = i.next(); + d += Math.abs(s0 - s1); + s0 = s1; } - return d; + return d / (positions().size() - 1); } } diff --git a/source/net/yacy/kelondro/rwi/Reference.java b/source/net/yacy/kelondro/rwi/Reference.java index 2edb445ca..bf41f4c92 100644 --- a/source/net/yacy/kelondro/rwi/Reference.java +++ b/source/net/yacy/kelondro/rwi/Reference.java @@ -26,6 +26,8 @@ package net.yacy.kelondro.rwi; +import java.util.Collection; + import net.yacy.kelondro.index.Row.Entry; public interface Reference { @@ -48,14 +50,12 @@ public interface Reference { public void join(final Reference oe); - public int positions(); + public Collection positions(); public int maxposition(); public int minposition(); - public int position(int p); - public int distance(); }