- tried to remove deadlock

- enhanced searchtime in kelondroRowSets - enhanced uniq() - reverse enumeration causes less time in case of mass removal of doubles git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4207 6c8d7289-2bf4-0310-a012-ef5d649a1542
18 years ago · 0abf33ed03
parent a4010f7dc8
commit 0abf33ed03
5 changed files with 146 additions and 50 deletions
--- a/source/de/anomic/index/indexCollectionRI.java
+++ b/source/de/anomic/index/indexCollectionRI.java
@ -71,7 +71,7 @@ public class indexCollectionRI implements indexRI {
        return collectionIndex.size();
    }
    
-    public synchronized int indexSize(String wordHash) {
+    public int indexSize(String wordHash) {
        try {
            return collectionIndex.indexSize(wordHash.getBytes());
        } catch (IOException e) {
@ -122,7 +122,7 @@ public class indexCollectionRI implements indexRI {

    }

-    public synchronized boolean hasContainer(String wordHash) {
+    public boolean hasContainer(String wordHash) {
        try {
            return collectionIndex.has(wordHash.getBytes());
        } catch (IOException e) {
@ -130,7 +130,7 @@ public class indexCollectionRI implements indexRI {
        }
    }
    
-    public synchronized indexContainer getContainer(String wordHash, Set urlselection) {
+    public indexContainer getContainer(String wordHash, Set urlselection) {
        try {
            kelondroRowSet collection = collectionIndex.get(wordHash.getBytes());
            if (collection != null) collection.select(urlselection);
@ -141,7 +141,7 @@ public class indexCollectionRI implements indexRI {
        }
    }

-    public synchronized indexContainer deleteContainer(String wordHash) {
+    public indexContainer deleteContainer(String wordHash) {
        try {
            kelondroRowSet collection = collectionIndex.delete(wordHash.getBytes());
            if (collection == null) return null;
@ -151,13 +151,13 @@ public class indexCollectionRI implements indexRI {
        }
    }

-    public synchronized boolean removeEntry(String wordHash, String urlHash) {
+    public boolean removeEntry(String wordHash, String urlHash) {
        HashSet hs = new HashSet();
        hs.add(urlHash.getBytes());
        return removeEntries(wordHash, hs) == 1;
    }
    
-    public synchronized int removeEntries(String wordHash, Set urlHashes) {
+    public int removeEntries(String wordHash, Set urlHashes) {
        try {
            return collectionIndex.remove(wordHash.getBytes(), urlHashes);
        } catch (kelondroOutOfLimitsException e) {
@ -169,7 +169,7 @@ public class indexCollectionRI implements indexRI {
        }
    }

-    public synchronized void addEntries(indexContainer newEntries, long creationTime, boolean dhtCase) {
+    public void addEntries(indexContainer newEntries, long creationTime, boolean dhtCase) {
        try {
            collectionIndex.merge(newEntries);
        } catch (kelondroOutOfLimitsException e) {
@ -179,10 +179,10 @@ public class indexCollectionRI implements indexRI {
        }
    }

-    public synchronized void addMultipleEntries(List /*of indexContainer*/ containerList) {
+    public void addMultipleEntries(List /*of indexContainer*/ containerList) {
        try {
        	//for (int i = 0; i < containerList.size(); i++) collectionIndex.merge((indexContainer) containerList.get(i));
-            synchronized (containerList) {collectionIndex.mergeMultiple(containerList);}
+            collectionIndex.mergeMultiple(containerList);
        } catch (kelondroOutOfLimitsException e) {
            e.printStackTrace();
        } catch (IOException e) {
@ -190,7 +190,7 @@ public class indexCollectionRI implements indexRI {
        }
    }

-    public synchronized void close() {
+    public void close() {
        collectionIndex.close();
    }
    
--- a/source/de/anomic/index/indexRAMRI.java
+++ b/source/de/anomic/index/indexRAMRI.java
@ -288,7 +288,7 @@ public final class indexRAMRI implements indexRI {
    public class wordContainerIterator implements kelondroCloneableIterator {

        // this class exists, because the wCache cannot be iterated with rotation
-        // and because every indeContainer Object that is iterated must be returned as top-level-clone
+        // and because every indexContainer Object that is iterated must be returned as top-level-clone
        // so this class simulates wCache.tailMap(startWordHash).values().iterator()
        // plus the mentioned features
        
--- a/source/de/anomic/kelondro/kelondroRowCollection.java
+++ b/source/de/anomic/kelondro/kelondroRowCollection.java
@ -56,7 +56,7 @@ public class kelondroRowCollection {
    private static final int exp_order_bound = 5;
    private static final int exp_collection  = 6;
    
-    private static int processors = 1; //Runtime.getRuntime().availableProcessors();
+    private static int processors = Runtime.getRuntime().availableProcessors();
    
    public kelondroRowCollection(kelondroRowCollection rc) {
        this.rowdef = rc.rowdef;
@ -427,7 +427,7 @@ public class kelondroRowCollection {
        	qsort(p, this.chunkcount, 0, swapspace);
        }
        this.sortBound = this.chunkcount;
-        assert this.isSorted();
+        //assert this.isSorted();
    }

    private class qsortthread extends Thread {
@ -528,14 +528,13 @@ public class kelondroRowCollection {
        // then this method may run a long time with 100% CPU load which is caused
        // by the large number of memory movements. Therefore it is possible
        // to assign a runtime limitation
-        if (chunkcount <= 1) return;
-        int i = 0;
-        while (i < chunkcount - 1) {
+        if (chunkcount < 2) return;
+        int i = chunkcount - 2;
+        while (i >= 0) {
        	if (compare(i, i + 1) == 0) {
-                removeRow(i, true); // this decreases the chunkcount
-            } else {
-                i++;
+                removeRow(i, true);
            }
+            i--;
        }
    }
    
@ -580,8 +579,7 @@ public class kelondroRowCollection {
        return c;
    }
    
-    private final byte[] compilePivot(int i) {
-        assert (chunkcount * this.rowdef.objectsize <= chunkcache.length) : "chunkcount = " + chunkcount + ", objsize = " + this.rowdef.objectsize + ", chunkcache.length = " + chunkcache.length;
+    protected final byte[] compilePivot(int i) {
        assert (i >= 0) && (i < chunkcount) : "i = " + i + ", chunkcount = " + chunkcount;
        assert (this.rowdef.objectOrder != null);
        assert (this.rowdef.objectOrder instanceof kelondroBase64Order);
@ -591,7 +589,14 @@ public class kelondroRowCollection {
        return ((kelondroBase64Order) this.rowdef.objectOrder).compilePivot(chunkcache, i * this.rowdef.objectsize + colstart, this.rowdef.primaryKeyLength);
    }
    
-    private final int comparePivot(byte[] compiledPivot, int j) {
+    protected final byte[] compilePivot(byte[] a, int astart, int alength) {
+        assert (this.rowdef.objectOrder != null);
+        assert (this.rowdef.objectOrder instanceof kelondroBase64Order);
+        assert (this.rowdef.primaryKeyIndex == 0) : "this.sortColumn = " + this.rowdef.primaryKeyIndex;
+        return ((kelondroBase64Order) this.rowdef.objectOrder).compilePivot(a, astart, alength);
+    }
+    
+    protected final int comparePivot(byte[] compiledPivot, int j) {
        assert (chunkcount * this.rowdef.objectsize <= chunkcache.length) : "chunkcount = " + chunkcount + ", objsize = " + this.rowdef.objectsize + ", chunkcache.length = " + chunkcache.length;
        assert (j >= 0) && (j < chunkcount) : "j = " + j + ", chunkcount = " + chunkcount;
        assert (this.rowdef.objectOrder != null);
@ -693,7 +698,26 @@ public class kelondroRowCollection {
    	boolean eis = e.isSorted();
    	long t12 = System.currentTimeMillis();
    	System.out.println("e isSorted = " + ((eis) ? "true" : "false") + ": " + (t12 - t11) + " milliseconds");
-    	System.out.println("Result size: c = " + c.size() + ", d = " + d.size() + ", e = " + e.size());
+    	random = new Random(0);
+    	boolean allfound = true;
+        for (int i = 0; i < testsize; i++) {
+            if (e.get(randomHash().getBytes()) == null) {
+                allfound = false;
+                break;
+            }
+        }
+        long t13 = System.currentTimeMillis();
+        System.out.println("e allfound = " + ((allfound) ? "true" : "false") + ": " + (t13 - t12) + " milliseconds");
+        boolean noghosts = true;
+        for (int i = 0; i < testsize; i++) {
+            if (e.get(randomHash().getBytes()) != null) {
+                noghosts = false;
+                break;
+            }
+        }
+        long t14 = System.currentTimeMillis();
+        System.out.println("e noghosts = " + ((noghosts) ? "true" : "false") + ": " + (t14 - t13) + " milliseconds");
+        System.out.println("Result size: c = " + c.size() + ", d = " + d.size() + ", e = " + e.size());
    	System.out.println();
    }
    
@ -703,12 +727,46 @@ public class kelondroRowCollection {
    	test(100000);
    	//test(1000000);
    	
-        // 368, 12029
-    	
    	/*   	
        System.out.println(new java.util.Date(10957 * day));
        System.out.println(new java.util.Date(0));
        System.out.println(daysSince2000(System.currentTimeMillis()));
        */
    }
+    
+    /*
+kelondroRowCollection test with size = 10000
+create c   : 134 milliseconds, 74 entries/millisecond
+copy c -> d: 47 milliseconds, 212 entries/millisecond
+sort c (1) : 66 milliseconds, 151 entries/millisecond
+sort d (2) : 23 milliseconds, 434 entries/millisecond
+uniq c     : 3 milliseconds, 3333 entries/millisecond
+uniq d     : 2 milliseconds, 5000 entries/millisecond
+create e   : 528 milliseconds, 18 entries/millisecond
+sort e (2) : 13 milliseconds, 769 entries/millisecond
+uniq e     : 2 milliseconds, 5000 entries/millisecond
+c isSorted = true: 2 milliseconds
+d isSorted = true: 3 milliseconds
+e isSorted = true: 2 milliseconds
+e allfound = true: 85 milliseconds
+e noghosts = true: 75 milliseconds
+Result size: c = 10000, d = 10000, e = 10000
+
+kelondroRowCollection test with size = 100000
+create c   : 589 milliseconds, 169 entries/millisecond
+copy c -> d: 141 milliseconds, 709 entries/millisecond
+sort c (1) : 268 milliseconds, 373 entries/millisecond
+sort d (2) : 187 milliseconds, 534 entries/millisecond
+uniq c     : 13 milliseconds, 7692 entries/millisecond
+uniq d     : 14 milliseconds, 7142 entries/millisecond
+create e   : 22068 milliseconds, 4 entries/millisecond
+sort e (2) : 167 milliseconds, 598 entries/millisecond
+uniq e     : 14 milliseconds, 7142 entries/millisecond
+c isSorted = true: 13 milliseconds
+d isSorted = true: 14 milliseconds
+e isSorted = true: 13 milliseconds
+e allfound = true: 815 milliseconds
+e noghosts = true: 787 milliseconds
+Result size: c = 100000, d = 100000, e = 100000
+     */
 }
--- a/source/de/anomic/kelondro/kelondroRowSet.java
+++ b/source/de/anomic/kelondro/kelondroRowSet.java
@ -141,29 +141,37 @@ public class kelondroRowSet extends kelondroRowCollection implements kelondroInd
        
        if (rowdef.objectOrder == null) return iterativeSearch(a, astart, alength, 0, this.chunkcount);
        
-        // check if a re-sorting make sense
+        // check if a re-sorting makes sense
        if ((this.chunkcount - this.sortBound) > collectionReSortLimit) {
        	sort();
        }
+        if ((this.rowdef.objectOrder != null) && (this.rowdef.objectOrder instanceof kelondroBase64Order) && (this.sortBound > 4000)) {
+            // first try to find in sorted area
+            final byte[] compiledPivot = compilePivot(a, astart, alength);
+            int p = binarySearchCompiledPivot(compiledPivot);
+            if (p >= 0) return p;
+            
+            // then find in unsorted area
+            return iterativeSearchCompiledPivot(compiledPivot, this.sortBound, this.chunkcount);
+        } else {
+            // first try to find in sorted area
+            int p = binarySearch(a, astart, alength);
+            if (p >= 0) return p;
        
-        // first try to find in sorted area
-        int p = binarySearch(a, astart, alength);
-        if (p >= 0) return p;
-        
-        // then find in unsorted area
-        return iterativeSearch(a, astart, alength, this.sortBound, this.chunkcount);
-        
+            // then find in unsorted area
+            return iterativeSearch(a, astart, alength, this.sortBound, this.chunkcount);
+        }        
    }
    
    private int iterativeSearch(byte[] key, int astart, int alength, int leftBorder, int rightBound) {
        // returns the chunknumber        
-        
        if (rowdef.objectOrder == null) {
            for (int i = leftBorder; i < rightBound; i++) {
                if (match(key, astart, alength, i)) return i;
            }
            return -1;
        } else {
+            // we dont do a special handling of kelondroBase64Order here, because tests showed that this produces too much overhead
            for (int i = leftBorder; i < rightBound; i++) {
                if (compare(key, astart, alength, i) == 0) return i;
            }
@ -171,6 +179,16 @@ public class kelondroRowSet extends kelondroRowCollection implements kelondroInd
        }
    }
    
+    private int iterativeSearchCompiledPivot(byte[] compiledPivot, int leftBorder, int rightBound) {
+        // returns the chunknumber
+        assert (rowdef.objectOrder != null);
+        assert (rowdef.objectOrder instanceof kelondroBase64Order);
+        for (int i = leftBorder; i < rightBound; i++) {
+            if (comparePivot(compiledPivot, i) == 0) return i;
+        }
+        return -1;
+    }
+    
    private int binarySearch(byte[] key, int astart, int alength) {
        // returns the exact position of the key if the key exists,
        // or -1 if the key does not exist
@ -183,8 +201,25 @@ public class kelondroRowSet extends kelondroRowCollection implements kelondroInd
            p = l + ((rbound - l) >> 1);
            d = compare(key, astart, alength, p);
            if (d == 0) return p;
-            else if (d < 0) rbound = p;
-            else l = p + 1;
+            if (d < 0) rbound = p; else l = p + 1;
+        }
+        return -1;
+    }
+    
+    private int binarySearchCompiledPivot(byte[] compiledPivot) {
+        // returns the exact position of the key if the key exists,
+        // or -1 if the key does not exist
+        assert (rowdef.objectOrder != null);
+        assert (rowdef.objectOrder instanceof kelondroBase64Order);
+        int l = 0;
+        int rbound = this.sortBound;
+        int p = 0;
+        int d;
+        while (l < rbound) {
+            p = l + ((rbound - l) >> 1);
+            d = comparePivot(compiledPivot, p);
+            if (d == 0) return p;
+            if (d < 0) rbound = p; else l = p + 1;
        }
        return -1;
    }
@ -202,8 +237,7 @@ public class kelondroRowSet extends kelondroRowCollection implements kelondroInd
            p = l + ((rbound - l) >> 1);
            d = compare(key, astart, alength, p);
            if (d == 0) return p;
-            else if (d < 0) rbound = p;
-            else l = p + 1;
+            if (d < 0) rbound = p; else l = p + 1;
        }
        return l;
    }
--- a/source/de/anomic/plasma/plasmaWordIndex.java
+++ b/source/de/anomic/plasma/plasmaWordIndex.java
@ -155,14 +155,15 @@ public final class plasmaWordIndex implements indexRI {

    public void dhtFlushControl(indexRAMRI theCache) {
        // check for forced flush
+        int count = -1;
        synchronized (theCache) {
            if ((theCache.maxURLinCache() > wCacheMaxChunk ) ||
                (theCache.size() > theCache.getMaxWordCount()) ||
                (serverMemory.available() < collections.minMem())) {
-                int count = theCache.size() + flushsize - theCache.getMaxWordCount();
-                flushCache(theCache, (count > 0) ? count : 1);
+                count = theCache.size() + flushsize - theCache.getMaxWordCount();
            }
        }
+        if (count >= 0) flushCache(theCache, (count > 0) ? count : 1);
    }
    
    public long getUpdateTime(String wordHash) {
@ -216,11 +217,11 @@ public final class plasmaWordIndex implements indexRI {
        busyCacheFlush = true;
        String wordHash;
        ArrayList containerList = new ArrayList();
-        synchronized (ram) {
-            count = Math.min(5000, Math.min(count, ram.size()));
-            boolean collectMax = true;
-            indexContainer c;
-            while (collectMax) {
+        count = Math.min(5000, Math.min(count, ram.size()));
+        boolean collectMax = true;
+        indexContainer c;
+        while (collectMax) {
+            synchronized (ram) {
                wordHash = ram.maxScoreWordHash();
                c = ram.getContainer(wordHash, null);
                if ((c != null) && (c.size() > wCacheMaxChunk)) {
@ -230,17 +231,20 @@ public final class plasmaWordIndex implements indexRI {
                    collectMax = false;
                }
            }
-            count = count - containerList.size();
-            for (int i = 0; i < count; i++) { // possible position of outOfMemoryError ?
+        }
+        count = count - containerList.size();
+        for (int i = 0; i < count; i++) { // possible position of outOfMemoryError ?
+            synchronized (ram) {
                if (ram.size() == 0) break;
                if (serverMemory.available() < collections.minMem()) break; // protect memory during flush
+                
                // select one word to flush
                wordHash = ram.bestFlushWordHash();
                
                // move one container from ram to flush list
                c = ram.deleteContainer(wordHash);
-                if (c != null) containerList.add(c);
            }
+            if (c != null) containerList.add(c);
        }
        // flush the containers
        collections.addMultipleEntries(containerList);
@ -540,7 +544,7 @@ public final class plasmaWordIndex implements indexRI {

    public synchronized kelondroCloneableIterator wordContainers(String startWordHash, boolean ram) {
        kelondroOrder containerOrder = new indexContainerOrder((kelondroOrder) indexOrder.clone());
-       containerOrder.rotate(startWordHash.getBytes());
+        containerOrder.rotate(startWordHash.getBytes());
        if (ram) {
            return dhtOutCache.wordContainers(startWordHash, false);
        } else {