From 0b4205eb5abaea88d8c53f41ccb71f6c1b92de98 Mon Sep 17 00:00:00 2001 From: orbiter Date: Wed, 23 Jan 2008 11:13:39 +0000 Subject: [PATCH] - fix double-deletion in eco tables - changed behaviour of sort moment (not during a get) - added some asserts in snippet cache for debugging git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4375 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- source/de/anomic/kelondro/kelondroEcoTable.java | 11 ++++++++++- source/de/anomic/kelondro/kelondroRowSet.java | 8 ++++---- source/de/anomic/plasma/plasmaSnippetCache.java | 4 ++++ 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/source/de/anomic/kelondro/kelondroEcoTable.java b/source/de/anomic/kelondro/kelondroEcoTable.java index 31072ff1e..2409cfc12 100644 --- a/source/de/anomic/kelondro/kelondroEcoTable.java +++ b/source/de/anomic/kelondro/kelondroEcoTable.java @@ -145,13 +145,22 @@ public class kelondroEcoTable implements kelondroIndex { index.addi(key, ds[0].intValue()); } // then remove the other doubles by removing them from the table, but do a re-indexing while doing that + // first aggregate all the delete positions because the elements from the top positions must be removed first i = doubles.iterator(); + TreeSet delpos = new TreeSet(); while (i.hasNext()) { ds = i.next(); for (int j = 1; j < ds.length; j++) { - removeInFile(ds[j].intValue()); + delpos.add(ds[j]); } } + // now remove the entries in a sorted way (top-down) + Integer top; + while (delpos.size() > 0) { + top = delpos.last(); + delpos.remove(top); + removeInFile(top.intValue()); + } } } catch (FileNotFoundException e) { // should never happen diff --git a/source/de/anomic/kelondro/kelondroRowSet.java b/source/de/anomic/kelondro/kelondroRowSet.java index 31cdb700e..a13438b9b 100644 --- a/source/de/anomic/kelondro/kelondroRowSet.java +++ b/source/de/anomic/kelondro/kelondroRowSet.java @@ -110,6 +110,10 @@ public class kelondroRowSet extends kelondroRowCollection implements kelondroInd index = find(entry.bytes(), (rowdef.primaryKeyIndex < 0) ? 0 :super.rowdef.colstart[rowdef.primaryKeyIndex], super.rowdef.primaryKeyLength); if (index < 0) { super.addUnique(entry); + // when reaching a specific amount of un-sorted entries, re-sort all + if ((this.chunkcount - this.sortBound) > collectionReSortLimit) { + sort(); + } } else { oldentry = get(index); set(index, entry); @@ -140,10 +144,6 @@ public class kelondroRowSet extends kelondroRowCollection implements kelondroInd if (rowdef.objectOrder == null) return iterativeSearch(a, astart, alength, 0, this.chunkcount); - // check if a re-sorting makes sense - if ((this.chunkcount - this.sortBound) > collectionReSortLimit) { - sort(); - } if ((this.rowdef.objectOrder != null) && (this.rowdef.objectOrder instanceof kelondroBase64Order) && (this.sortBound > 4000)) { // first try to find in sorted area final byte[] compiledPivot = compilePivot(a, astart, alength); diff --git a/source/de/anomic/plasma/plasmaSnippetCache.java b/source/de/anomic/plasma/plasmaSnippetCache.java index 2582b0671..caf195ae4 100644 --- a/source/de/anomic/plasma/plasmaSnippetCache.java +++ b/source/de/anomic/plasma/plasmaSnippetCache.java @@ -867,6 +867,10 @@ public class plasmaSnippetCache { log.logInfo("error: '" + snippet.getError() + "', remove url = " + snippet.getUrl().toNormalform(false, true) + ", cause: " + snippet.getError()); plasmaSwitchboard.getSwitchboard().wordIndex.loadedURL.remove(urlHash); plasmaSearchEvent event = plasmaSearchEvent.getEvent(eventID); + assert plasmaSwitchboard.getSwitchboard() != null; + assert plasmaSwitchboard.getSwitchboard().wordIndex != null; + assert event != null; + assert event.getQuery() != null; plasmaSwitchboard.getSwitchboard().wordIndex.removeEntryMultiple(event.getQuery().queryHashes, urlHash); event.remove(urlHash); }