From 0abf33ed032c075392d6fd2579034e143b253975 Mon Sep 17 00:00:00 2001 From: orbiter Date: Mon, 12 Nov 2007 01:14:51 +0000 Subject: [PATCH] - tried to remove deadlock - enhanced searchtime in kelondroRowSets - enhanced uniq() - reverse enumeration causes less time in case of mass removal of doubles git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4207 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- source/de/anomic/index/indexCollectionRI.java | 20 ++--- source/de/anomic/index/indexRAMRI.java | 2 +- .../kelondro/kelondroRowCollection.java | 86 ++++++++++++++++--- source/de/anomic/kelondro/kelondroRowSet.java | 62 ++++++++++--- source/de/anomic/plasma/plasmaWordIndex.java | 26 +++--- 5 files changed, 146 insertions(+), 50 deletions(-) diff --git a/source/de/anomic/index/indexCollectionRI.java b/source/de/anomic/index/indexCollectionRI.java index 543bae7a3..120fb5b1b 100644 --- a/source/de/anomic/index/indexCollectionRI.java +++ b/source/de/anomic/index/indexCollectionRI.java @@ -71,7 +71,7 @@ public class indexCollectionRI implements indexRI { return collectionIndex.size(); } - public synchronized int indexSize(String wordHash) { + public int indexSize(String wordHash) { try { return collectionIndex.indexSize(wordHash.getBytes()); } catch (IOException e) { @@ -122,7 +122,7 @@ public class indexCollectionRI implements indexRI { } - public synchronized boolean hasContainer(String wordHash) { + public boolean hasContainer(String wordHash) { try { return collectionIndex.has(wordHash.getBytes()); } catch (IOException e) { @@ -130,7 +130,7 @@ public class indexCollectionRI implements indexRI { } } - public synchronized indexContainer getContainer(String wordHash, Set urlselection) { + public indexContainer getContainer(String wordHash, Set urlselection) { try { kelondroRowSet collection = collectionIndex.get(wordHash.getBytes()); if (collection != null) collection.select(urlselection); @@ -141,7 +141,7 @@ public class indexCollectionRI implements indexRI { } } - public synchronized indexContainer deleteContainer(String wordHash) { + public indexContainer deleteContainer(String wordHash) { try { kelondroRowSet collection = collectionIndex.delete(wordHash.getBytes()); if (collection == null) return null; @@ -151,13 +151,13 @@ public class indexCollectionRI implements indexRI { } } - public synchronized boolean removeEntry(String wordHash, String urlHash) { + public boolean removeEntry(String wordHash, String urlHash) { HashSet hs = new HashSet(); hs.add(urlHash.getBytes()); return removeEntries(wordHash, hs) == 1; } - public synchronized int removeEntries(String wordHash, Set urlHashes) { + public int removeEntries(String wordHash, Set urlHashes) { try { return collectionIndex.remove(wordHash.getBytes(), urlHashes); } catch (kelondroOutOfLimitsException e) { @@ -169,7 +169,7 @@ public class indexCollectionRI implements indexRI { } } - public synchronized void addEntries(indexContainer newEntries, long creationTime, boolean dhtCase) { + public void addEntries(indexContainer newEntries, long creationTime, boolean dhtCase) { try { collectionIndex.merge(newEntries); } catch (kelondroOutOfLimitsException e) { @@ -179,10 +179,10 @@ public class indexCollectionRI implements indexRI { } } - public synchronized void addMultipleEntries(List /*of indexContainer*/ containerList) { + public void addMultipleEntries(List /*of indexContainer*/ containerList) { try { //for (int i = 0; i < containerList.size(); i++) collectionIndex.merge((indexContainer) containerList.get(i)); - synchronized (containerList) {collectionIndex.mergeMultiple(containerList);} + collectionIndex.mergeMultiple(containerList); } catch (kelondroOutOfLimitsException e) { e.printStackTrace(); } catch (IOException e) { @@ -190,7 +190,7 @@ public class indexCollectionRI implements indexRI { } } - public synchronized void close() { + public void close() { collectionIndex.close(); } diff --git a/source/de/anomic/index/indexRAMRI.java b/source/de/anomic/index/indexRAMRI.java index c2706db5e..0d6bc4d7a 100644 --- a/source/de/anomic/index/indexRAMRI.java +++ b/source/de/anomic/index/indexRAMRI.java @@ -288,7 +288,7 @@ public final class indexRAMRI implements indexRI { public class wordContainerIterator implements kelondroCloneableIterator { // this class exists, because the wCache cannot be iterated with rotation - // and because every indeContainer Object that is iterated must be returned as top-level-clone + // and because every indexContainer Object that is iterated must be returned as top-level-clone // so this class simulates wCache.tailMap(startWordHash).values().iterator() // plus the mentioned features diff --git a/source/de/anomic/kelondro/kelondroRowCollection.java b/source/de/anomic/kelondro/kelondroRowCollection.java index 58f5dfe05..b468f9f5d 100644 --- a/source/de/anomic/kelondro/kelondroRowCollection.java +++ b/source/de/anomic/kelondro/kelondroRowCollection.java @@ -56,7 +56,7 @@ public class kelondroRowCollection { private static final int exp_order_bound = 5; private static final int exp_collection = 6; - private static int processors = 1; //Runtime.getRuntime().availableProcessors(); + private static int processors = Runtime.getRuntime().availableProcessors(); public kelondroRowCollection(kelondroRowCollection rc) { this.rowdef = rc.rowdef; @@ -427,7 +427,7 @@ public class kelondroRowCollection { qsort(p, this.chunkcount, 0, swapspace); } this.sortBound = this.chunkcount; - assert this.isSorted(); + //assert this.isSorted(); } private class qsortthread extends Thread { @@ -528,14 +528,13 @@ public class kelondroRowCollection { // then this method may run a long time with 100% CPU load which is caused // by the large number of memory movements. Therefore it is possible // to assign a runtime limitation - if (chunkcount <= 1) return; - int i = 0; - while (i < chunkcount - 1) { + if (chunkcount < 2) return; + int i = chunkcount - 2; + while (i >= 0) { if (compare(i, i + 1) == 0) { - removeRow(i, true); // this decreases the chunkcount - } else { - i++; + removeRow(i, true); } + i--; } } @@ -580,8 +579,7 @@ public class kelondroRowCollection { return c; } - private final byte[] compilePivot(int i) { - assert (chunkcount * this.rowdef.objectsize <= chunkcache.length) : "chunkcount = " + chunkcount + ", objsize = " + this.rowdef.objectsize + ", chunkcache.length = " + chunkcache.length; + protected final byte[] compilePivot(int i) { assert (i >= 0) && (i < chunkcount) : "i = " + i + ", chunkcount = " + chunkcount; assert (this.rowdef.objectOrder != null); assert (this.rowdef.objectOrder instanceof kelondroBase64Order); @@ -591,7 +589,14 @@ public class kelondroRowCollection { return ((kelondroBase64Order) this.rowdef.objectOrder).compilePivot(chunkcache, i * this.rowdef.objectsize + colstart, this.rowdef.primaryKeyLength); } - private final int comparePivot(byte[] compiledPivot, int j) { + protected final byte[] compilePivot(byte[] a, int astart, int alength) { + assert (this.rowdef.objectOrder != null); + assert (this.rowdef.objectOrder instanceof kelondroBase64Order); + assert (this.rowdef.primaryKeyIndex == 0) : "this.sortColumn = " + this.rowdef.primaryKeyIndex; + return ((kelondroBase64Order) this.rowdef.objectOrder).compilePivot(a, astart, alength); + } + + protected final int comparePivot(byte[] compiledPivot, int j) { assert (chunkcount * this.rowdef.objectsize <= chunkcache.length) : "chunkcount = " + chunkcount + ", objsize = " + this.rowdef.objectsize + ", chunkcache.length = " + chunkcache.length; assert (j >= 0) && (j < chunkcount) : "j = " + j + ", chunkcount = " + chunkcount; assert (this.rowdef.objectOrder != null); @@ -693,7 +698,26 @@ public class kelondroRowCollection { boolean eis = e.isSorted(); long t12 = System.currentTimeMillis(); System.out.println("e isSorted = " + ((eis) ? "true" : "false") + ": " + (t12 - t11) + " milliseconds"); - System.out.println("Result size: c = " + c.size() + ", d = " + d.size() + ", e = " + e.size()); + random = new Random(0); + boolean allfound = true; + for (int i = 0; i < testsize; i++) { + if (e.get(randomHash().getBytes()) == null) { + allfound = false; + break; + } + } + long t13 = System.currentTimeMillis(); + System.out.println("e allfound = " + ((allfound) ? "true" : "false") + ": " + (t13 - t12) + " milliseconds"); + boolean noghosts = true; + for (int i = 0; i < testsize; i++) { + if (e.get(randomHash().getBytes()) != null) { + noghosts = false; + break; + } + } + long t14 = System.currentTimeMillis(); + System.out.println("e noghosts = " + ((noghosts) ? "true" : "false") + ": " + (t14 - t13) + " milliseconds"); + System.out.println("Result size: c = " + c.size() + ", d = " + d.size() + ", e = " + e.size()); System.out.println(); } @@ -702,8 +726,6 @@ public class kelondroRowCollection { test(10000); test(100000); //test(1000000); - - // 368, 12029 /* System.out.println(new java.util.Date(10957 * day)); @@ -711,4 +733,40 @@ public class kelondroRowCollection { System.out.println(daysSince2000(System.currentTimeMillis())); */ } + + /* +kelondroRowCollection test with size = 10000 +create c : 134 milliseconds, 74 entries/millisecond +copy c -> d: 47 milliseconds, 212 entries/millisecond +sort c (1) : 66 milliseconds, 151 entries/millisecond +sort d (2) : 23 milliseconds, 434 entries/millisecond +uniq c : 3 milliseconds, 3333 entries/millisecond +uniq d : 2 milliseconds, 5000 entries/millisecond +create e : 528 milliseconds, 18 entries/millisecond +sort e (2) : 13 milliseconds, 769 entries/millisecond +uniq e : 2 milliseconds, 5000 entries/millisecond +c isSorted = true: 2 milliseconds +d isSorted = true: 3 milliseconds +e isSorted = true: 2 milliseconds +e allfound = true: 85 milliseconds +e noghosts = true: 75 milliseconds +Result size: c = 10000, d = 10000, e = 10000 + +kelondroRowCollection test with size = 100000 +create c : 589 milliseconds, 169 entries/millisecond +copy c -> d: 141 milliseconds, 709 entries/millisecond +sort c (1) : 268 milliseconds, 373 entries/millisecond +sort d (2) : 187 milliseconds, 534 entries/millisecond +uniq c : 13 milliseconds, 7692 entries/millisecond +uniq d : 14 milliseconds, 7142 entries/millisecond +create e : 22068 milliseconds, 4 entries/millisecond +sort e (2) : 167 milliseconds, 598 entries/millisecond +uniq e : 14 milliseconds, 7142 entries/millisecond +c isSorted = true: 13 milliseconds +d isSorted = true: 14 milliseconds +e isSorted = true: 13 milliseconds +e allfound = true: 815 milliseconds +e noghosts = true: 787 milliseconds +Result size: c = 100000, d = 100000, e = 100000 + */ } \ No newline at end of file diff --git a/source/de/anomic/kelondro/kelondroRowSet.java b/source/de/anomic/kelondro/kelondroRowSet.java index 2841114b8..711998efb 100644 --- a/source/de/anomic/kelondro/kelondroRowSet.java +++ b/source/de/anomic/kelondro/kelondroRowSet.java @@ -141,29 +141,37 @@ public class kelondroRowSet extends kelondroRowCollection implements kelondroInd if (rowdef.objectOrder == null) return iterativeSearch(a, astart, alength, 0, this.chunkcount); - // check if a re-sorting make sense + // check if a re-sorting makes sense if ((this.chunkcount - this.sortBound) > collectionReSortLimit) { sort(); } + if ((this.rowdef.objectOrder != null) && (this.rowdef.objectOrder instanceof kelondroBase64Order) && (this.sortBound > 4000)) { + // first try to find in sorted area + final byte[] compiledPivot = compilePivot(a, astart, alength); + int p = binarySearchCompiledPivot(compiledPivot); + if (p >= 0) return p; + + // then find in unsorted area + return iterativeSearchCompiledPivot(compiledPivot, this.sortBound, this.chunkcount); + } else { + // first try to find in sorted area + int p = binarySearch(a, astart, alength); + if (p >= 0) return p; - // first try to find in sorted area - int p = binarySearch(a, astart, alength); - if (p >= 0) return p; - - // then find in unsorted area - return iterativeSearch(a, astart, alength, this.sortBound, this.chunkcount); - + // then find in unsorted area + return iterativeSearch(a, astart, alength, this.sortBound, this.chunkcount); + } } private int iterativeSearch(byte[] key, int astart, int alength, int leftBorder, int rightBound) { - // returns the chunknumber - + // returns the chunknumber if (rowdef.objectOrder == null) { for (int i = leftBorder; i < rightBound; i++) { if (match(key, astart, alength, i)) return i; } return -1; } else { + // we dont do a special handling of kelondroBase64Order here, because tests showed that this produces too much overhead for (int i = leftBorder; i < rightBound; i++) { if (compare(key, astart, alength, i) == 0) return i; } @@ -171,6 +179,16 @@ public class kelondroRowSet extends kelondroRowCollection implements kelondroInd } } + private int iterativeSearchCompiledPivot(byte[] compiledPivot, int leftBorder, int rightBound) { + // returns the chunknumber + assert (rowdef.objectOrder != null); + assert (rowdef.objectOrder instanceof kelondroBase64Order); + for (int i = leftBorder; i < rightBound; i++) { + if (comparePivot(compiledPivot, i) == 0) return i; + } + return -1; + } + private int binarySearch(byte[] key, int astart, int alength) { // returns the exact position of the key if the key exists, // or -1 if the key does not exist @@ -183,8 +201,25 @@ public class kelondroRowSet extends kelondroRowCollection implements kelondroInd p = l + ((rbound - l) >> 1); d = compare(key, astart, alength, p); if (d == 0) return p; - else if (d < 0) rbound = p; - else l = p + 1; + if (d < 0) rbound = p; else l = p + 1; + } + return -1; + } + + private int binarySearchCompiledPivot(byte[] compiledPivot) { + // returns the exact position of the key if the key exists, + // or -1 if the key does not exist + assert (rowdef.objectOrder != null); + assert (rowdef.objectOrder instanceof kelondroBase64Order); + int l = 0; + int rbound = this.sortBound; + int p = 0; + int d; + while (l < rbound) { + p = l + ((rbound - l) >> 1); + d = comparePivot(compiledPivot, p); + if (d == 0) return p; + if (d < 0) rbound = p; else l = p + 1; } return -1; } @@ -202,8 +237,7 @@ public class kelondroRowSet extends kelondroRowCollection implements kelondroInd p = l + ((rbound - l) >> 1); d = compare(key, astart, alength, p); if (d == 0) return p; - else if (d < 0) rbound = p; - else l = p + 1; + if (d < 0) rbound = p; else l = p + 1; } return l; } diff --git a/source/de/anomic/plasma/plasmaWordIndex.java b/source/de/anomic/plasma/plasmaWordIndex.java index dc8cc99dd..4d25bf138 100644 --- a/source/de/anomic/plasma/plasmaWordIndex.java +++ b/source/de/anomic/plasma/plasmaWordIndex.java @@ -155,14 +155,15 @@ public final class plasmaWordIndex implements indexRI { public void dhtFlushControl(indexRAMRI theCache) { // check for forced flush + int count = -1; synchronized (theCache) { if ((theCache.maxURLinCache() > wCacheMaxChunk ) || (theCache.size() > theCache.getMaxWordCount()) || (serverMemory.available() < collections.minMem())) { - int count = theCache.size() + flushsize - theCache.getMaxWordCount(); - flushCache(theCache, (count > 0) ? count : 1); + count = theCache.size() + flushsize - theCache.getMaxWordCount(); } } + if (count >= 0) flushCache(theCache, (count > 0) ? count : 1); } public long getUpdateTime(String wordHash) { @@ -216,11 +217,11 @@ public final class plasmaWordIndex implements indexRI { busyCacheFlush = true; String wordHash; ArrayList containerList = new ArrayList(); - synchronized (ram) { - count = Math.min(5000, Math.min(count, ram.size())); - boolean collectMax = true; - indexContainer c; - while (collectMax) { + count = Math.min(5000, Math.min(count, ram.size())); + boolean collectMax = true; + indexContainer c; + while (collectMax) { + synchronized (ram) { wordHash = ram.maxScoreWordHash(); c = ram.getContainer(wordHash, null); if ((c != null) && (c.size() > wCacheMaxChunk)) { @@ -230,17 +231,20 @@ public final class plasmaWordIndex implements indexRI { collectMax = false; } } - count = count - containerList.size(); - for (int i = 0; i < count; i++) { // possible position of outOfMemoryError ? + } + count = count - containerList.size(); + for (int i = 0; i < count; i++) { // possible position of outOfMemoryError ? + synchronized (ram) { if (ram.size() == 0) break; if (serverMemory.available() < collections.minMem()) break; // protect memory during flush + // select one word to flush wordHash = ram.bestFlushWordHash(); // move one container from ram to flush list c = ram.deleteContainer(wordHash); - if (c != null) containerList.add(c); } + if (c != null) containerList.add(c); } // flush the containers collections.addMultipleEntries(containerList); @@ -540,7 +544,7 @@ public final class plasmaWordIndex implements indexRI { public synchronized kelondroCloneableIterator wordContainers(String startWordHash, boolean ram) { kelondroOrder containerOrder = new indexContainerOrder((kelondroOrder) indexOrder.clone()); - containerOrder.rotate(startWordHash.getBytes()); + containerOrder.rotate(startWordHash.getBytes()); if (ram) { return dhtOutCache.wordContainers(startWordHash, false); } else {