From 971247b78fe8c34e12c2c75fa1862d5d47c5cb95 Mon Sep 17 00:00:00 2001 From: hermens Date: Fri, 13 Jan 2006 23:59:04 +0000 Subject: [PATCH] - rotate merged indexes after merging see: http://www.yacy-forum.de/viewtopic.php?t=1717 - fix -rwihashlist to correctly shutdown git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1336 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- source/de/anomic/plasma/plasmaWordIndex.java | 37 +++++++++++++++++-- .../plasmaWordIndexAssortmentCluster.java | 8 +++- .../anomic/plasma/plasmaWordIndexCache.java | 12 +++++- source/yacy.java | 10 ++++- 4 files changed, 59 insertions(+), 8 deletions(-) diff --git a/source/de/anomic/plasma/plasmaWordIndex.java b/source/de/anomic/plasma/plasmaWordIndex.java index e53b99cc0..de7eafab2 100644 --- a/source/de/anomic/plasma/plasmaWordIndex.java +++ b/source/de/anomic/plasma/plasmaWordIndex.java @@ -216,7 +216,8 @@ public final class plasmaWordIndex { public Iterator wordHashes(String startHash, boolean up, boolean rot) { //return ramCache.wordHashes(startHash, up); - return new correctedWordIterator(up, rot, startHash); // use correction until bug is found + if (rot) return new rotatingWordIterator(up, startHash); + else return new correctedWordIterator(up, rot, startHash); // use correction until bug is found } private final class correctedWordIterator implements Iterator { @@ -224,7 +225,7 @@ public final class plasmaWordIndex { String nextWord; public correctedWordIterator(boolean up, boolean rotating, String firstWord) { - iter = ramCache.wordHashes(firstWord, up); + iter = ramCache.wordHashes(firstWord, up, rotating); nextWord = (iter.hasNext()) ? (String) iter.next() : null; boolean corrected = true; int cc = 0; // to avoid rotation loops @@ -262,10 +263,40 @@ public final class plasmaWordIndex { } public void remove() { - throw new java.lang.UnsupportedOperationException("kelondroTree: remove in kelondro Tables not yet supported"); + throw new java.lang.UnsupportedOperationException("correctedWordIterator does not support remove"); } } // correctedWordIterator + private class rotatingWordIterator implements Iterator { + Iterator i; + boolean up; + + public rotatingWordIterator(boolean up, String startWordHash) { + this.up = up; + i = new correctedWordIterator(up, false, startWordHash); + } + + public void finalize() { + i = null; + } + + public boolean hasNext() { + if (i.hasNext()) return true; + else { + i = new correctedWordIterator(up, false, (up)?"------------":"zzzzzzzzzzzz"); + return i.hasNext(); + } + } + + public Object next() { + return i.next(); + } + + public void remove() { + throw new java.lang.UnsupportedOperationException("rotatingWordIterator does not support remove"); + } + } // class rotatingWordIterator + public Iterator fileIterator(String startHash, boolean up, boolean deleteEmpty) { return new iterateFiles(startHash, up, deleteEmpty); } diff --git a/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java b/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java index 58743268b..49fd798bf 100644 --- a/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java +++ b/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java @@ -215,8 +215,14 @@ public final class plasmaWordIndexAssortmentCluster { } public Iterator hashConjunction(String startWordHash, boolean up) { + // Old convention implies rot = true + return hashConjunction(startWordHash, up, true); + } + + public Iterator hashConjunction(String startWordHash, boolean up, boolean rot) { HashSet iterators = new HashSet(); - for (int i = 0; i < clusterCount; i++) iterators.add(assortments[i].hashes(startWordHash, up, true)); + //if (rot) System.out.println("WARNING: kelondroMergeIterator does not work correctly when individual iterators rotate on their own!"); + for (int i = 0; i < clusterCount; i++) iterators.add(assortments[i].hashes(startWordHash, up, rot)); return kelondroMergeIterator.cascade(iterators, kelondroNaturalOrder.naturalOrder, up); } diff --git a/source/de/anomic/plasma/plasmaWordIndexCache.java b/source/de/anomic/plasma/plasmaWordIndexCache.java index e441ab7e6..7666b27b0 100644 --- a/source/de/anomic/plasma/plasmaWordIndexCache.java +++ b/source/de/anomic/plasma/plasmaWordIndexCache.java @@ -259,15 +259,23 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface { } public Iterator wordHashes(String startWordHash, boolean up) { + // Old convention implies rot = true + //return new rotatingWordHashes(startWordHash, up); + return wordHashes(startWordHash, up, true); + } + + public Iterator wordHashes(String startWordHash, boolean up, boolean rot) { // here we merge 3 databases into one view: // - the RAM Cache // - the assortmentCluster File Cache // - the backend if (!(up)) throw new RuntimeException("plasmaWordIndexCache.wordHashes can only count up"); + //if (rot) System.out.println("WARNING: wordHashes does not work correctly when individual Assotments rotate on their own!"); + //return new rotatingWordHashes(startWordHash, up); return new kelondroMergeIterator( new kelondroMergeIterator( cache.tailMap(startWordHash).keySet().iterator(), - assortmentCluster.hashConjunction(startWordHash, true), + assortmentCluster.hashConjunction(startWordHash, true, rot), kelondroNaturalOrder.naturalOrder, true), backend.wordHashes(startWordHash, true), @@ -552,4 +560,4 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface { } } -} \ No newline at end of file +} diff --git a/source/yacy.java b/source/yacy.java index c5d251307..9614381a9 100644 --- a/source/yacy.java +++ b/source/yacy.java @@ -1265,6 +1265,7 @@ public final class yacy { } private static void RWIHashList(String homePath, String targetName, String resource) { + plasmaWordIndex WordIndex = null; serverLog log = new serverLog("HASHLIST"); File homeDBroot = new File(new File(homePath), "DATA/PLASMADB"); String wordChunkStartHash = "------------"; @@ -1276,11 +1277,11 @@ public final class yacy { BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(file)); Iterator WordHashIterator = null; if (resource.equals("all")) { - plasmaWordIndex WordIndex = new plasmaWordIndex(homeDBroot, 8*1024*1024, log); + WordIndex = new plasmaWordIndex(homeDBroot, 8*1024*1024, log); WordHashIterator = WordIndex.wordHashes(wordChunkStartHash, true, false); } else if (resource.equals("assortments")) { plasmaWordIndexAssortmentCluster assortmentCluster = new plasmaWordIndexAssortmentCluster(new File(homeDBroot, "ACLUSTER"), 64, 16*1024*1024, log); - WordHashIterator = assortmentCluster.hashConjunction(wordChunkStartHash, true); + WordHashIterator = assortmentCluster.hashConjunction(wordChunkStartHash, true, false); } else if (resource.startsWith("assortment")) { int a = Integer.parseInt(resource.substring(10)); plasmaWordIndexAssortment assortment = new plasmaWordIndexAssortment(new File(homeDBroot, "ACLUSTER"), a, 8*1024*1024, null); @@ -1299,9 +1300,14 @@ public final class yacy { log.logInfo("Found " + counter + " Hashs until now. Last found Hash: " + wordHash); } } + bos.close(); } catch (IOException e) { e.printStackTrace(); } + if (WordIndex != null) { + WordIndex.close(60); + WordIndex = null; + } }