From 0b4205eb5abaea88d8c53f41ccb71f6c1b92de98 Mon Sep 17 00:00:00 2001
From: orbiter <orbiter@6c8d7289-2bf4-0310-a012-ef5d649a1542>
Date: Wed, 23 Jan 2008 11:13:39 +0000
Subject: [PATCH] - fix double-deletion in eco tables - changed behaviour of
 sort moment (not during a get) - added some asserts in snippet cache for
 debugging

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4375 6c8d7289-2bf4-0310-a012-ef5d649a1542
---
 source/de/anomic/kelondro/kelondroEcoTable.java | 11 ++++++++++-
 source/de/anomic/kelondro/kelondroRowSet.java   |  8 ++++----
 source/de/anomic/plasma/plasmaSnippetCache.java |  4 ++++
 3 files changed, 18 insertions(+), 5 deletions(-)
diff --git a/source/de/anomic/kelondro/kelondroEcoTable.java b/source/de/anomic/kelondro/kelondroEcoTable.java
index 31072ff1e..2409cfc12 100644
--- a/source/de/anomic/kelondro/kelondroEcoTable.java
+++ b/source/de/anomic/kelondro/kelondroEcoTable.java
@@ -145,13 +145,22 @@ public class kelondroEcoTable implements kelondroIndex {
                     index.addi(key, ds[0].intValue());
                 }
                 // then remove the other doubles by removing them from the table, but do a re-indexing while doing that
+                // first aggregate all the delete positions because the elements from the top positions must be removed first
                 i = doubles.iterator();
+                TreeSet<Integer> delpos = new TreeSet<Integer>();
                 while (i.hasNext()) {
                     ds = i.next();
                     for (int j = 1; j < ds.length; j++) {
-                        removeInFile(ds[j].intValue());
+                        delpos.add(ds[j]);
                     }
                 }
+                // now remove the entries in a sorted way (top-down)
+                Integer top;
+                while (delpos.size() > 0) {
+                    top = delpos.last();
+                    delpos.remove(top);
+                    removeInFile(top.intValue());
+                }
             }
         } catch (FileNotFoundException e) {
             // should never happen
diff --git a/source/de/anomic/kelondro/kelondroRowSet.java b/source/de/anomic/kelondro/kelondroRowSet.java
index 31cdb700e..a13438b9b 100644
--- a/source/de/anomic/kelondro/kelondroRowSet.java
+++ b/source/de/anomic/kelondro/kelondroRowSet.java
@@ -110,6 +110,10 @@ public class kelondroRowSet extends kelondroRowCollection implements kelondroInd
         index = find(entry.bytes(), (rowdef.primaryKeyIndex < 0) ? 0 :super.rowdef.colstart[rowdef.primaryKeyIndex], super.rowdef.primaryKeyLength);
         if (index < 0) {
             super.addUnique(entry);
+            // when reaching a specific amount of un-sorted entries, re-sort all
+            if ((this.chunkcount - this.sortBound) > collectionReSortLimit) {
+                sort();
+            }
         } else {
             oldentry = get(index);
             set(index, entry);
@@ -140,10 +144,6 @@ public class kelondroRowSet extends kelondroRowCollection implements kelondroInd
         
         if (rowdef.objectOrder == null) return iterativeSearch(a, astart, alength, 0, this.chunkcount);
         
-        // check if a re-sorting makes sense
-        if ((this.chunkcount - this.sortBound) > collectionReSortLimit) {
-        	sort();
-        }
         if ((this.rowdef.objectOrder != null) && (this.rowdef.objectOrder instanceof kelondroBase64Order) && (this.sortBound > 4000)) {
             // first try to find in sorted area
             final byte[] compiledPivot = compilePivot(a, astart, alength);
diff --git a/source/de/anomic/plasma/plasmaSnippetCache.java b/source/de/anomic/plasma/plasmaSnippetCache.java
index 2582b0671..caf195ae4 100644
--- a/source/de/anomic/plasma/plasmaSnippetCache.java
+++ b/source/de/anomic/plasma/plasmaSnippetCache.java
@@ -867,6 +867,10 @@ public class plasmaSnippetCache {
             log.logInfo("error: '" + snippet.getError() + "', remove url = " + snippet.getUrl().toNormalform(false, true) + ", cause: " + snippet.getError());
             plasmaSwitchboard.getSwitchboard().wordIndex.loadedURL.remove(urlHash);
             plasmaSearchEvent event = plasmaSearchEvent.getEvent(eventID);
+            assert plasmaSwitchboard.getSwitchboard() != null;
+            assert plasmaSwitchboard.getSwitchboard().wordIndex != null;
+            assert event != null;
+            assert event.getQuery() != null;
             plasmaSwitchboard.getSwitchboard().wordIndex.removeEntryMultiple(event.getQuery().queryHashes, urlHash);
             event.remove(urlHash);
         }