diff --git a/source/de/anomic/kelondro/kelondroCollectionIndex.java b/source/de/anomic/kelondro/kelondroCollectionIndex.java index cc344cb34..2eaba5931 100644 --- a/source/de/anomic/kelondro/kelondroCollectionIndex.java +++ b/source/de/anomic/kelondro/kelondroCollectionIndex.java @@ -183,7 +183,7 @@ public class kelondroCollectionIndex { ientry.setCol(idx_col_indexpos, aentry.index()); ientry.setCol(idx_col_lastread, t); ientry.setCol(idx_col_lastwrote, t); - index.addUnique(ientry); + index.addUnique(ientry); // FIXME: this should avoid doubles count++; // write a log @@ -590,7 +590,7 @@ public class kelondroCollectionIndex { // join with new collection oldcollection.addAllUnique(collection); oldcollection.sort(); - oldcollection.uniq(); // FIXME: not clear if it would be better to insert the collection with put to avoid double-entries + oldcollection.uniq(-1); // FIXME: not clear if it would be better to insert the collection with put to avoid double-entries oldcollection.trim(false); // check for size of collection: @@ -704,7 +704,7 @@ public class kelondroCollectionIndex { // join with new collection oldcollection.addAllUnique(collection); oldcollection.sort(); - oldcollection.uniq(); // FIXME: not clear if it would be better to insert the collection with put to avoid double-entries + oldcollection.uniq(-1); // FIXME: not clear if it would be better to insert the collection with put to avoid double-entries oldcollection.trim(false); collection = oldcollection; diff --git a/source/de/anomic/kelondro/kelondroFlexTable.java b/source/de/anomic/kelondro/kelondroFlexTable.java index b872d9324..3f884d634 100644 --- a/source/de/anomic/kelondro/kelondroFlexTable.java +++ b/source/de/anomic/kelondro/kelondroFlexTable.java @@ -185,7 +185,7 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr System.out.flush(); ri.sort(); int sbu = ri.size(); - ri.uniq(); + ri.uniq(10000); if (ri.size() != sbu) serverLog.logSevere("kelondroFlexTable.initializeRamIndex: " + tablename, "; size before uniq = " + sbu + ", after uniq = " + ri.size()); return ri; } diff --git a/source/de/anomic/kelondro/kelondroRowCollection.java b/source/de/anomic/kelondro/kelondroRowCollection.java index 65677b4d8..fa29055b6 100644 --- a/source/de/anomic/kelondro/kelondroRowCollection.java +++ b/source/de/anomic/kelondro/kelondroRowCollection.java @@ -473,20 +473,26 @@ public class kelondroRowCollection { if (i == p) return j; else if (j == p) return i; else return p; } - public synchronized void uniq() { + public synchronized void uniq(long maxtime) { assert (this.rowdef.objectOrder != null); // removes double-occurrences of chunks // this works only if the collection was ordered with sort before + // if the collection is large and the number of deletions is also large, + // then this method may run a long time with 100% CPU load which is caused + // by the large number of memory movements. Therefore it is possible + // to assign a runtime limitation + long start = System.currentTimeMillis(); if (chunkcount <= 1) return; int i = 0; while (i < chunkcount - 1) { //System.out.println("ENTRY0: " + serverLog.arrayList(chunkcache, rowdef.objectsize*i, rowdef.objectsize)); //System.out.println("ENTRY1: " + serverLog.arrayList(chunkcache, rowdef.objectsize*(i+1), rowdef.objectsize)); if (compare(i, i + 1) == 0) { - removeRow(i); + removeRow(i); // this decreases the chunkcount } else { i++; } + if ((maxtime > 0) && (start + maxtime < System.currentTimeMillis())) break; } } diff --git a/source/de/anomic/kelondro/kelondroRowSet.java b/source/de/anomic/kelondro/kelondroRowSet.java index 74910be7d..8ef2e2992 100644 --- a/source/de/anomic/kelondro/kelondroRowSet.java +++ b/source/de/anomic/kelondro/kelondroRowSet.java @@ -259,6 +259,23 @@ public class kelondroRowSet extends kelondroRowCollection implements kelondroInd } public static void main(String[] args) { + // sort/uniq-test + kelondroRow rowdef = new kelondroRow("Cardinal key-4 {b256}, byte[] payload-1", kelondroNaturalOrder.naturalOrder, 0); + kelondroRowSet rs = new kelondroRowSet(rowdef, 0); + Random random = new Random(0); + kelondroRow.Entry entry; + for (int i = 0; i < 10000000; i++) { + entry = rowdef.newEntry(); + entry.setCol(0, Math.abs(random.nextLong() % 1000000)); + entry.setCol(1, "a".getBytes()); + rs.addUnique(entry); + } + System.out.println("before sort, size = " + rs.size()); + rs.sort(); + System.out.println("after sort, before uniq, size = " + rs.size()); + rs.uniq(10000); + System.out.println("after uniq, size = " + rs.size()); + /* String[] test = { "eins", "zwei", "drei", "vier", "fuenf", "sechs", "sieben", "acht", "neun", "zehn" }; kelondroRowSet c = new kelondroRowSet(new kelondroRow(new int[]{10, 3})); @@ -351,7 +368,7 @@ public class kelondroRowSet extends kelondroRowCollection implements kelondroInd byte[] key; int testsize = 5000; byte[][] delkeys = new byte[testsize / 5][]; - Random random = new Random(0); + random = new Random(0); for (int i = 0; i < testsize; i++) { key = randomHash(random); if (i % 5 != 0) continue; diff --git a/source/de/anomic/plasma/plasmaSearchEvent.java b/source/de/anomic/plasma/plasmaSearchEvent.java index a0540cc45..7e7ec3402 100644 --- a/source/de/anomic/plasma/plasmaSearchEvent.java +++ b/source/de/anomic/plasma/plasmaSearchEvent.java @@ -391,7 +391,7 @@ public final class plasmaSearchEvent extends Thread implements Runnable { searchResult.addAllUnique(rcLocal); searchResult.addAllUnique(rcContainers); searchResult.sort(); - searchResult.uniq(); + searchResult.uniq(1000); preorderTime = preorderTime - (System.currentTimeMillis() - pst); if (preorderTime < 0) preorderTime = 200; plasmaSearchPreOrder preorder = new plasmaSearchPreOrder(query, ranking, searchResult, preorderTime);