From 0abf33ed032c075392d6fd2579034e143b253975 Mon Sep 17 00:00:00 2001
From: orbiter <orbiter@6c8d7289-2bf4-0310-a012-ef5d649a1542>
Date: Mon, 12 Nov 2007 01:14:51 +0000
Subject: [PATCH] - tried to remove deadlock - enhanced searchtime in
 kelondroRowSets - enhanced uniq() - reverse enumeration causes less time in
 case of mass removal of doubles

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4207 6c8d7289-2bf4-0310-a012-ef5d649a1542
---
 source/de/anomic/index/indexCollectionRI.java | 20 ++---
 source/de/anomic/index/indexRAMRI.java        |  2 +-
 .../kelondro/kelondroRowCollection.java       | 86 ++++++++++++++++---
 source/de/anomic/kelondro/kelondroRowSet.java | 62 ++++++++++---
 source/de/anomic/plasma/plasmaWordIndex.java  | 26 +++---
 5 files changed, 146 insertions(+), 50 deletions(-)

diff --git a/source/de/anomic/index/indexCollectionRI.java b/source/de/anomic/index/indexCollectionRI.java
index 543bae7a3..120fb5b1b 100644
--- a/source/de/anomic/index/indexCollectionRI.java
+++ b/source/de/anomic/index/indexCollectionRI.java
@@ -71,7 +71,7 @@ public class indexCollectionRI implements indexRI {
         return collectionIndex.size();
     }
     
-    public synchronized int indexSize(String wordHash) {
+    public int indexSize(String wordHash) {
         try {
             return collectionIndex.indexSize(wordHash.getBytes());
         } catch (IOException e) {
@@ -122,7 +122,7 @@ public class indexCollectionRI implements indexRI {
 
     }
 
-    public synchronized boolean hasContainer(String wordHash) {
+    public boolean hasContainer(String wordHash) {
         try {
             return collectionIndex.has(wordHash.getBytes());
         } catch (IOException e) {
@@ -130,7 +130,7 @@ public class indexCollectionRI implements indexRI {
         }
     }
     
-    public synchronized indexContainer getContainer(String wordHash, Set urlselection) {
+    public indexContainer getContainer(String wordHash, Set urlselection) {
         try {
             kelondroRowSet collection = collectionIndex.get(wordHash.getBytes());
             if (collection != null) collection.select(urlselection);
@@ -141,7 +141,7 @@ public class indexCollectionRI implements indexRI {
         }
     }
 
-    public synchronized indexContainer deleteContainer(String wordHash) {
+    public indexContainer deleteContainer(String wordHash) {
         try {
             kelondroRowSet collection = collectionIndex.delete(wordHash.getBytes());
             if (collection == null) return null;
@@ -151,13 +151,13 @@ public class indexCollectionRI implements indexRI {
         }
     }
 
-    public synchronized boolean removeEntry(String wordHash, String urlHash) {
+    public boolean removeEntry(String wordHash, String urlHash) {
         HashSet hs = new HashSet();
         hs.add(urlHash.getBytes());
         return removeEntries(wordHash, hs) == 1;
     }
     
-    public synchronized int removeEntries(String wordHash, Set urlHashes) {
+    public int removeEntries(String wordHash, Set urlHashes) {
         try {
             return collectionIndex.remove(wordHash.getBytes(), urlHashes);
         } catch (kelondroOutOfLimitsException e) {
@@ -169,7 +169,7 @@ public class indexCollectionRI implements indexRI {
         }
     }
 
-    public synchronized void addEntries(indexContainer newEntries, long creationTime, boolean dhtCase) {
+    public void addEntries(indexContainer newEntries, long creationTime, boolean dhtCase) {
         try {
             collectionIndex.merge(newEntries);
         } catch (kelondroOutOfLimitsException e) {
@@ -179,10 +179,10 @@ public class indexCollectionRI implements indexRI {
         }
     }
 
-    public synchronized void addMultipleEntries(List /*of indexContainer*/ containerList) {
+    public void addMultipleEntries(List /*of indexContainer*/ containerList) {
         try {
         	//for (int i = 0; i < containerList.size(); i++) collectionIndex.merge((indexContainer) containerList.get(i));
-            synchronized (containerList) {collectionIndex.mergeMultiple(containerList);}
+            collectionIndex.mergeMultiple(containerList);
         } catch (kelondroOutOfLimitsException e) {
             e.printStackTrace();
         } catch (IOException e) {
@@ -190,7 +190,7 @@ public class indexCollectionRI implements indexRI {
         }
     }
 
-    public synchronized void close() {
+    public void close() {
         collectionIndex.close();
     }
     
diff --git a/source/de/anomic/index/indexRAMRI.java b/source/de/anomic/index/indexRAMRI.java
index c2706db5e..0d6bc4d7a 100644
--- a/source/de/anomic/index/indexRAMRI.java
+++ b/source/de/anomic/index/indexRAMRI.java
@@ -288,7 +288,7 @@ public final class indexRAMRI implements indexRI {
     public class wordContainerIterator implements kelondroCloneableIterator {
 
         // this class exists, because the wCache cannot be iterated with rotation
-        // and because every indeContainer Object that is iterated must be returned as top-level-clone
+        // and because every indexContainer Object that is iterated must be returned as top-level-clone
         // so this class simulates wCache.tailMap(startWordHash).values().iterator()
         // plus the mentioned features
         
diff --git a/source/de/anomic/kelondro/kelondroRowCollection.java b/source/de/anomic/kelondro/kelondroRowCollection.java
index 58f5dfe05..b468f9f5d 100644
--- a/source/de/anomic/kelondro/kelondroRowCollection.java
+++ b/source/de/anomic/kelondro/kelondroRowCollection.java
@@ -56,7 +56,7 @@ public class kelondroRowCollection {
     private static final int exp_order_bound = 5;
     private static final int exp_collection  = 6;
     
-    private static int processors = 1; //Runtime.getRuntime().availableProcessors();
+    private static int processors = Runtime.getRuntime().availableProcessors();
     
     public kelondroRowCollection(kelondroRowCollection rc) {
         this.rowdef = rc.rowdef;
@@ -427,7 +427,7 @@ public class kelondroRowCollection {
         	qsort(p, this.chunkcount, 0, swapspace);
         }
         this.sortBound = this.chunkcount;
-        assert this.isSorted();
+        //assert this.isSorted();
     }
 
     private class qsortthread extends Thread {
@@ -528,14 +528,13 @@ public class kelondroRowCollection {
         // then this method may run a long time with 100% CPU load which is caused
         // by the large number of memory movements. Therefore it is possible
         // to assign a runtime limitation
-        if (chunkcount <= 1) return;
-        int i = 0;
-        while (i < chunkcount - 1) {
+        if (chunkcount < 2) return;
+        int i = chunkcount - 2;
+        while (i >= 0) {
         	if (compare(i, i + 1) == 0) {
-                removeRow(i, true); // this decreases the chunkcount
-            } else {
-                i++;
+                removeRow(i, true);
             }
+            i--;
         }
     }
     
@@ -580,8 +579,7 @@ public class kelondroRowCollection {
         return c;
     }
     
-    private final byte[] compilePivot(int i) {
-        assert (chunkcount * this.rowdef.objectsize <= chunkcache.length) : "chunkcount = " + chunkcount + ", objsize = " + this.rowdef.objectsize + ", chunkcache.length = " + chunkcache.length;
+    protected final byte[] compilePivot(int i) {
         assert (i >= 0) && (i < chunkcount) : "i = " + i + ", chunkcount = " + chunkcount;
         assert (this.rowdef.objectOrder != null);
         assert (this.rowdef.objectOrder instanceof kelondroBase64Order);
@@ -591,7 +589,14 @@ public class kelondroRowCollection {
         return ((kelondroBase64Order) this.rowdef.objectOrder).compilePivot(chunkcache, i * this.rowdef.objectsize + colstart, this.rowdef.primaryKeyLength);
     }
     
-    private final int comparePivot(byte[] compiledPivot, int j) {
+    protected final byte[] compilePivot(byte[] a, int astart, int alength) {
+        assert (this.rowdef.objectOrder != null);
+        assert (this.rowdef.objectOrder instanceof kelondroBase64Order);
+        assert (this.rowdef.primaryKeyIndex == 0) : "this.sortColumn = " + this.rowdef.primaryKeyIndex;
+        return ((kelondroBase64Order) this.rowdef.objectOrder).compilePivot(a, astart, alength);
+    }
+    
+    protected final int comparePivot(byte[] compiledPivot, int j) {
         assert (chunkcount * this.rowdef.objectsize <= chunkcache.length) : "chunkcount = " + chunkcount + ", objsize = " + this.rowdef.objectsize + ", chunkcache.length = " + chunkcache.length;
         assert (j >= 0) && (j < chunkcount) : "j = " + j + ", chunkcount = " + chunkcount;
         assert (this.rowdef.objectOrder != null);
@@ -693,7 +698,26 @@ public class kelondroRowCollection {
     	boolean eis = e.isSorted();
     	long t12 = System.currentTimeMillis();
     	System.out.println("e isSorted = " + ((eis) ? "true" : "false") + ": " + (t12 - t11) + " milliseconds");
-    	System.out.println("Result size: c = " + c.size() + ", d = " + d.size() + ", e = " + e.size());
+    	random = new Random(0);
+    	boolean allfound = true;
+        for (int i = 0; i < testsize; i++) {
+            if (e.get(randomHash().getBytes()) == null) {
+                allfound = false;
+                break;
+            }
+        }
+        long t13 = System.currentTimeMillis();
+        System.out.println("e allfound = " + ((allfound) ? "true" : "false") + ": " + (t13 - t12) + " milliseconds");
+        boolean noghosts = true;
+        for (int i = 0; i < testsize; i++) {
+            if (e.get(randomHash().getBytes()) != null) {
+                noghosts = false;
+                break;
+            }
+        }
+        long t14 = System.currentTimeMillis();
+        System.out.println("e noghosts = " + ((noghosts) ? "true" : "false") + ": " + (t14 - t13) + " milliseconds");
+        System.out.println("Result size: c = " + c.size() + ", d = " + d.size() + ", e = " + e.size());
     	System.out.println();
     }
     
@@ -702,8 +726,6 @@ public class kelondroRowCollection {
     	test(10000);
     	test(100000);
     	//test(1000000);
-
-        // 368, 12029
     	
     	/*   	
         System.out.println(new java.util.Date(10957 * day));
@@ -711,4 +733,40 @@ public class kelondroRowCollection {
         System.out.println(daysSince2000(System.currentTimeMillis()));
         */
     }
+    
+    /*
+kelondroRowCollection test with size = 10000
+create c   : 134 milliseconds, 74 entries/millisecond
+copy c -> d: 47 milliseconds, 212 entries/millisecond
+sort c (1) : 66 milliseconds, 151 entries/millisecond
+sort d (2) : 23 milliseconds, 434 entries/millisecond
+uniq c     : 3 milliseconds, 3333 entries/millisecond
+uniq d     : 2 milliseconds, 5000 entries/millisecond
+create e   : 528 milliseconds, 18 entries/millisecond
+sort e (2) : 13 milliseconds, 769 entries/millisecond
+uniq e     : 2 milliseconds, 5000 entries/millisecond
+c isSorted = true: 2 milliseconds
+d isSorted = true: 3 milliseconds
+e isSorted = true: 2 milliseconds
+e allfound = true: 85 milliseconds
+e noghosts = true: 75 milliseconds
+Result size: c = 10000, d = 10000, e = 10000
+
+kelondroRowCollection test with size = 100000
+create c   : 589 milliseconds, 169 entries/millisecond
+copy c -> d: 141 milliseconds, 709 entries/millisecond
+sort c (1) : 268 milliseconds, 373 entries/millisecond
+sort d (2) : 187 milliseconds, 534 entries/millisecond
+uniq c     : 13 milliseconds, 7692 entries/millisecond
+uniq d     : 14 milliseconds, 7142 entries/millisecond
+create e   : 22068 milliseconds, 4 entries/millisecond
+sort e (2) : 167 milliseconds, 598 entries/millisecond
+uniq e     : 14 milliseconds, 7142 entries/millisecond
+c isSorted = true: 13 milliseconds
+d isSorted = true: 14 milliseconds
+e isSorted = true: 13 milliseconds
+e allfound = true: 815 milliseconds
+e noghosts = true: 787 milliseconds
+Result size: c = 100000, d = 100000, e = 100000
+     */
 }
\ No newline at end of file
diff --git a/source/de/anomic/kelondro/kelondroRowSet.java b/source/de/anomic/kelondro/kelondroRowSet.java
index 2841114b8..711998efb 100644
--- a/source/de/anomic/kelondro/kelondroRowSet.java
+++ b/source/de/anomic/kelondro/kelondroRowSet.java
@@ -141,29 +141,37 @@ public class kelondroRowSet extends kelondroRowCollection implements kelondroInd
         
         if (rowdef.objectOrder == null) return iterativeSearch(a, astart, alength, 0, this.chunkcount);
         
-        // check if a re-sorting make sense
+        // check if a re-sorting makes sense
         if ((this.chunkcount - this.sortBound) > collectionReSortLimit) {
         	sort();
         }
+        if ((this.rowdef.objectOrder != null) && (this.rowdef.objectOrder instanceof kelondroBase64Order) && (this.sortBound > 4000)) {
+            // first try to find in sorted area
+            final byte[] compiledPivot = compilePivot(a, astart, alength);
+            int p = binarySearchCompiledPivot(compiledPivot);
+            if (p >= 0) return p;
+            
+            // then find in unsorted area
+            return iterativeSearchCompiledPivot(compiledPivot, this.sortBound, this.chunkcount);
+        } else {
+            // first try to find in sorted area
+            int p = binarySearch(a, astart, alength);
+            if (p >= 0) return p;
         
-        // first try to find in sorted area
-        int p = binarySearch(a, astart, alength);
-        if (p >= 0) return p;
-        
-        // then find in unsorted area
-        return iterativeSearch(a, astart, alength, this.sortBound, this.chunkcount);
-        
+            // then find in unsorted area
+            return iterativeSearch(a, astart, alength, this.sortBound, this.chunkcount);
+        }        
     }
     
     private int iterativeSearch(byte[] key, int astart, int alength, int leftBorder, int rightBound) {
-        // returns the chunknumber
-        
+        // returns the chunknumber        
         if (rowdef.objectOrder == null) {
             for (int i = leftBorder; i < rightBound; i++) {
                 if (match(key, astart, alength, i)) return i;
             }
             return -1;
         } else {
+            // we dont do a special handling of kelondroBase64Order here, because tests showed that this produces too much overhead
             for (int i = leftBorder; i < rightBound; i++) {
                 if (compare(key, astart, alength, i) == 0) return i;
             }
@@ -171,6 +179,16 @@ public class kelondroRowSet extends kelondroRowCollection implements kelondroInd
         }
     }
     
+    private int iterativeSearchCompiledPivot(byte[] compiledPivot, int leftBorder, int rightBound) {
+        // returns the chunknumber
+        assert (rowdef.objectOrder != null);
+        assert (rowdef.objectOrder instanceof kelondroBase64Order);
+        for (int i = leftBorder; i < rightBound; i++) {
+            if (comparePivot(compiledPivot, i) == 0) return i;
+        }
+        return -1;
+    }
+    
     private int binarySearch(byte[] key, int astart, int alength) {
         // returns the exact position of the key if the key exists,
         // or -1 if the key does not exist
@@ -183,8 +201,25 @@ public class kelondroRowSet extends kelondroRowCollection implements kelondroInd
             p = l + ((rbound - l) >> 1);
             d = compare(key, astart, alength, p);
             if (d == 0) return p;
-            else if (d < 0) rbound = p;
-            else l = p + 1;
+            if (d < 0) rbound = p; else l = p + 1;
+        }
+        return -1;
+    }
+    
+    private int binarySearchCompiledPivot(byte[] compiledPivot) {
+        // returns the exact position of the key if the key exists,
+        // or -1 if the key does not exist
+        assert (rowdef.objectOrder != null);
+        assert (rowdef.objectOrder instanceof kelondroBase64Order);
+        int l = 0;
+        int rbound = this.sortBound;
+        int p = 0;
+        int d;
+        while (l < rbound) {
+            p = l + ((rbound - l) >> 1);
+            d = comparePivot(compiledPivot, p);
+            if (d == 0) return p;
+            if (d < 0) rbound = p; else l = p + 1;
         }
         return -1;
     }
@@ -202,8 +237,7 @@ public class kelondroRowSet extends kelondroRowCollection implements kelondroInd
             p = l + ((rbound - l) >> 1);
             d = compare(key, astart, alength, p);
             if (d == 0) return p;
-            else if (d < 0) rbound = p;
-            else l = p + 1;
+            if (d < 0) rbound = p; else l = p + 1;
         }
         return l;
     }
diff --git a/source/de/anomic/plasma/plasmaWordIndex.java b/source/de/anomic/plasma/plasmaWordIndex.java
index dc8cc99dd..4d25bf138 100644
--- a/source/de/anomic/plasma/plasmaWordIndex.java
+++ b/source/de/anomic/plasma/plasmaWordIndex.java
@@ -155,14 +155,15 @@ public final class plasmaWordIndex implements indexRI {
 
     public void dhtFlushControl(indexRAMRI theCache) {
         // check for forced flush
+        int count = -1;
         synchronized (theCache) {
             if ((theCache.maxURLinCache() > wCacheMaxChunk ) ||
                 (theCache.size() > theCache.getMaxWordCount()) ||
                 (serverMemory.available() < collections.minMem())) {
-                int count = theCache.size() + flushsize - theCache.getMaxWordCount();
-                flushCache(theCache, (count > 0) ? count : 1);
+                count = theCache.size() + flushsize - theCache.getMaxWordCount();
             }
         }
+        if (count >= 0) flushCache(theCache, (count > 0) ? count : 1);
     }
     
     public long getUpdateTime(String wordHash) {
@@ -216,11 +217,11 @@ public final class plasmaWordIndex implements indexRI {
         busyCacheFlush = true;
         String wordHash;
         ArrayList containerList = new ArrayList();
-        synchronized (ram) {
-            count = Math.min(5000, Math.min(count, ram.size()));
-            boolean collectMax = true;
-            indexContainer c;
-            while (collectMax) {
+        count = Math.min(5000, Math.min(count, ram.size()));
+        boolean collectMax = true;
+        indexContainer c;
+        while (collectMax) {
+            synchronized (ram) {
                 wordHash = ram.maxScoreWordHash();
                 c = ram.getContainer(wordHash, null);
                 if ((c != null) && (c.size() > wCacheMaxChunk)) {
@@ -230,17 +231,20 @@ public final class plasmaWordIndex implements indexRI {
                     collectMax = false;
                 }
             }
-            count = count - containerList.size();
-            for (int i = 0; i < count; i++) { // possible position of outOfMemoryError ?
+        }
+        count = count - containerList.size();
+        for (int i = 0; i < count; i++) { // possible position of outOfMemoryError ?
+            synchronized (ram) {
                 if (ram.size() == 0) break;
                 if (serverMemory.available() < collections.minMem()) break; // protect memory during flush
+                
                 // select one word to flush
                 wordHash = ram.bestFlushWordHash();
                 
                 // move one container from ram to flush list
                 c = ram.deleteContainer(wordHash);
-                if (c != null) containerList.add(c);
             }
+            if (c != null) containerList.add(c);
         }
         // flush the containers
         collections.addMultipleEntries(containerList);
@@ -540,7 +544,7 @@ public final class plasmaWordIndex implements indexRI {
 
     public synchronized kelondroCloneableIterator wordContainers(String startWordHash, boolean ram) {
         kelondroOrder containerOrder = new indexContainerOrder((kelondroOrder) indexOrder.clone());
-       containerOrder.rotate(startWordHash.getBytes());
+        containerOrder.rotate(startWordHash.getBytes());
         if (ram) {
             return dhtOutCache.wordContainers(startWordHash, false);
         } else {