diff --git a/source/dbtest.java b/source/dbtest.java index 1d1a51d76..7ae0f0709 100644 --- a/source/dbtest.java +++ b/source/dbtest.java @@ -23,7 +23,6 @@ import de.anomic.kelondro.kelondroRow; import de.anomic.kelondro.kelondroRowSet; import de.anomic.kelondro.kelondroSQLTable; import de.anomic.kelondro.kelondroSplitTable; -import de.anomic.kelondro.kelondroSplittedTree; import de.anomic.kelondro.kelondroTree; import de.anomic.server.serverInstantThread; import de.anomic.server.serverMemory; @@ -200,12 +199,6 @@ public class dbtest { File tablefile = new File(tablename + ".kelondro.db"); table = new kelondroCache(new kelondroTree(tablefile, true, preload, testRow)); } - if (dbe.equals("kelondroSplittedTree")) { - File tablepath = new File(tablename).getParentFile(); - tablename = new File(tablename).getName(); - table = new kelondroSplittedTree(tablepath, tablename, kelondroBase64Order.enhancedCoder, - preload, 8, testRow, 1, 80); - } if (dbe.equals("kelondroFlexTable")) { File tablepath = new File(tablename).getParentFile(); table = new kelondroFlexTable(tablepath, new File(tablename).getName(), preload, testRow, 0, true); @@ -362,7 +355,6 @@ public class dbtest { if (command.equals("list")) { kelondroCloneableIterator i = null; - if (table instanceof kelondroSplittedTree) i = ((kelondroSplittedTree) table).rows(true, null); if (table instanceof kelondroTree) i = ((kelondroTree) table).rows(true, null); if (table instanceof kelondroSQLTable) i = ((kelondroSQLTable) table).rows(true, null); kelondroRow.Entry row; diff --git a/source/de/anomic/kelondro/kelondroBytesIntMap.java b/source/de/anomic/kelondro/kelondroBytesIntMap.java index 403a9a6c3..6b3845be1 100644 --- a/source/de/anomic/kelondro/kelondroBytesIntMap.java +++ b/source/de/anomic/kelondro/kelondroBytesIntMap.java @@ -25,6 +25,8 @@ package de.anomic.kelondro; import java.io.IOException; +import java.util.ArrayList; +import java.util.Iterator; public class kelondroBytesIntMap { @@ -74,6 +76,27 @@ public class kelondroBytesIntMap { index.addUnique(newentry); } + public synchronized ArrayList removeDoubles() throws IOException { + ArrayList indexreport = index.removeDoubles(); + ArrayList report = new ArrayList(); + Iterator i = indexreport.iterator(); + kelondroRowSet rowset; + Integer[] is; + Iterator ei; + int c; + while (i.hasNext()) { + rowset = i.next(); + is = new Integer[rowset.size()]; + ei = rowset.rows(); + c = 0; + while (ei.hasNext()) { + is[c++] = new Integer((int) ei.next().getColLong(1)); + } + report.add(is); + } + return report; + } + public synchronized int removei(byte[] key) throws IOException { assert (key != null); kelondroRow.Entry indexentry = index.remove(key, true); // keeping the order will prevent multiple re-sorts diff --git a/source/de/anomic/kelondro/kelondroCache.java b/source/de/anomic/kelondro/kelondroCache.java index da1186b97..540144d59 100644 --- a/source/de/anomic/kelondro/kelondroCache.java +++ b/source/de/anomic/kelondro/kelondroCache.java @@ -28,6 +28,7 @@ package de.anomic.kelondro; import java.io.IOException; +import java.util.ArrayList; import java.util.Date; import java.util.HashMap; import java.util.Iterator; @@ -353,6 +354,11 @@ public class kelondroCache implements kelondroIndex { while (i.hasNext()) addUnique((Entry) i.next()); } + public synchronized ArrayList removeDoubles() throws IOException { + return index.removeDoubles(); + // todo: remove reported entries from the cache!!! + } + public synchronized Entry remove(byte[] key, boolean keepOrder) throws IOException { checkMissSpace(); diff --git a/source/de/anomic/kelondro/kelondroCollectionIndex.java b/source/de/anomic/kelondro/kelondroCollectionIndex.java index 8cc40f4df..3b069b2b3 100644 --- a/source/de/anomic/kelondro/kelondroCollectionIndex.java +++ b/source/de/anomic/kelondro/kelondroCollectionIndex.java @@ -221,8 +221,7 @@ public class kelondroCollectionIndex { ientry.setCol(idx_col_indexpos, aentry.index()); ientry.setCol(idx_col_lastread, t); ientry.setCol(idx_col_lastwrote, t); - //index.addUnique(ientry); // FIXME: this should avoid doubles - index.put(ientry); + index.addUnique(ientry); // FIXME: this should avoid doubles count++; // write a log @@ -233,6 +232,35 @@ public class kelondroCollectionIndex { } } } + // care for double entries + ArrayList del = index.removeDoubles(); + Iterator j = del.iterator(); + kelondroRowSet rowset; + Iterator rowiter; + int partition, maxpartition; + kelondroRow.Entry entry, maxentry; + int doublecount = 0; + while (j.hasNext()) { + rowset = j.next(); + // for each entry in row set choose one which we want to keep + rowiter = rowset.rows(); + maxentry = null; + maxpartition = -1; + while (rowiter.hasNext()) { + entry = rowiter.next(); + partition = (int) entry.getColLong(idx_col_clusteridx); + if (partition > maxpartition) { + maxpartition = partition; + maxentry = entry; + } + } + if (maxentry != null) { + // put back a single entry to the index, which is then not double to any other entry + index.put(maxentry); + doublecount++; + } + } + if (doublecount > 0) serverLog.logWarning("STARTUP", "found " + doublecount + " RWI entries with references to several collections. All have been fixed (zombies still exists)."); } private kelondroIndex openIndexFile(File path, String filenameStub, kelondroByteOrder indexOrder, diff --git a/source/de/anomic/kelondro/kelondroEcoTable.java b/source/de/anomic/kelondro/kelondroEcoTable.java index 3fb3777a2..5d972c04f 100644 --- a/source/de/anomic/kelondro/kelondroEcoTable.java +++ b/source/de/anomic/kelondro/kelondroEcoTable.java @@ -28,12 +28,14 @@ import java.io.File; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; +import java.util.ArrayList; import java.util.Date; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.TreeMap; +import java.util.TreeSet; import de.anomic.kelondro.kelondroRow.Entry; import de.anomic.server.serverMemory; @@ -189,6 +191,34 @@ public class kelondroEcoTable implements kelondroIndex { assert file.size() == index.size() : "file.size() = " + file.size() + ", index.size() = " + index.size(); } + public synchronized ArrayList removeDoubles() throws IOException { + ArrayList indexreport = index.removeDoubles(); + ArrayList report = new ArrayList(); + Iterator i = indexreport.iterator(); + Integer[] is; + kelondroRowSet rows; + TreeSet d = new TreeSet(); + byte[] b = new byte[rowdef.objectsize]; + while (i.hasNext()) { + is = i.next(); + rows = new kelondroRowSet(this.rowdef, is.length); + for (int j = 0; j < is.length; j++) { + d.add(is[j]); + file.get(is[j].intValue(), b, 0); + rows.addUnique(rowdef.newEntry(b)); + } + report.add(rows); + } + // finally delete the affected rows, but start with largest id first, othervise we overwrite wrong entries + Integer s; + while (d.size() > 0) { + s = d.last(); + d.remove(s); + this.removeInFile(s.intValue()); + } + return report; + } + public void close() { file.close(); file = null; @@ -201,7 +231,7 @@ public class kelondroEcoTable implements kelondroIndex { public String filename() { return this.file.filename().toString(); } - + public synchronized Entry get(byte[] key) throws IOException { assert file.size() == index.size() : "file.size() = " + file.size() + ", index.size() = " + index.size(); assert ((table == null) || (table.size() == index.size())); @@ -281,6 +311,34 @@ public class kelondroEcoTable implements kelondroIndex { assert file.size() == index.size() : "file.size() = " + file.size() + ", index.size() = " + index.size(); } + private void removeInFile(int i) throws IOException { + assert i >= 0; + + byte[] p = new byte[rowdef.objectsize]; + if (table == null) { + file.cleanLast(p, 0); + file.put(i, p, 0); + byte[] k = new byte[rowdef.primaryKeyLength]; + System.arraycopy(p, 0, k, 0, rowdef.primaryKeyLength); + index.puti(k, i); + } else { + if (i == index.size() - 1) { + // special handling if the entry is the last entry in the file + table.removeRow(i, false); + file.clean(i); + } else { + // switch values + kelondroRow.Entry te = table.removeOne(); + table.set(i, te); + + file.cleanLast(p, 0); + file.put(i, p, 0); + kelondroRow.Entry lr = rowdef.newEntry(p); + index.puti(lr.getPrimaryKeyBytes(), i); + } + } + } + public synchronized Entry remove(byte[] key, boolean keepOrder) throws IOException { assert file.size() == index.size() : "file.size() = " + file.size() + ", index.size() = " + index.size(); assert ((table == null) || (table.size() == index.size())); diff --git a/source/de/anomic/kelondro/kelondroFlexTable.java b/source/de/anomic/kelondro/kelondroFlexTable.java index df01ca550..0cb226929 100644 --- a/source/de/anomic/kelondro/kelondroFlexTable.java +++ b/source/de/anomic/kelondro/kelondroFlexTable.java @@ -33,6 +33,7 @@ import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.TreeMap; +import java.util.TreeSet; import de.anomic.server.serverMemory; import de.anomic.server.logging.serverLog; @@ -172,9 +173,8 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr System.out.flush(); return ri; } - - private kelondroIndex initializeTreeIndex(File indexfile, long preloadTime, kelondroByteOrder objectOrder) throws IOException { + private kelondroIndex initializeTreeIndex(File indexfile, long preloadTime, kelondroByteOrder objectOrder) throws IOException { kelondroIndex treeindex = new kelondroCache(new kelondroTree(indexfile, true, preloadTime, treeIndexRow(rowdef.primaryKeyLength, objectOrder), 2, 80)); Iterator content = super.col[0].contentNodes(-1); kelondroNode node; @@ -315,6 +315,32 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr } + public synchronized ArrayList removeDoubles() throws IOException { + ArrayList indexreport = index.removeDoubles(); + ArrayList report = new ArrayList(); + Iterator i = indexreport.iterator(); + Integer[] is; + kelondroRowSet rows; + TreeSet d = new TreeSet(); + while (i.hasNext()) { + is = i.next(); + rows = new kelondroRowSet(this.rowdef, is.length); + for (int j = 0; j < is.length; j++) { + d.add(is[j]); + rows.addUnique(this.get(is[j].intValue())); + } + report.add(rows); + } + // finally delete the affected rows, but start with largest id first, othervise we overwrite wrong entries + Integer s; + while (d.size() > 0) { + s = d.last(); + d.remove(s); + this.remove(s.intValue()); + } + return report; + } + public synchronized kelondroRow.Entry remove(byte[] key, boolean keepOrder) throws IOException { assert keepOrder == false; // the underlying data structure is a file, where the order cannot be maintained. Gaps are filled with new values. int i = index.removei(key); diff --git a/source/de/anomic/kelondro/kelondroIndex.java b/source/de/anomic/kelondro/kelondroIndex.java index 0c786ed42..8e806dab7 100644 --- a/source/de/anomic/kelondro/kelondroIndex.java +++ b/source/de/anomic/kelondro/kelondroIndex.java @@ -51,6 +51,7 @@ package de.anomic.kelondro; import java.io.IOException; +import java.util.ArrayList; import java.util.Date; import java.util.List; @@ -67,6 +68,7 @@ public interface kelondroIndex { public void putMultiple(List rows) throws IOException; // for R/W head path optimization public void addUnique(kelondroRow.Entry row) throws IOException; // no double-check public void addUniqueMultiple(List rows) throws IOException; // no double-check + public ArrayList removeDoubles() throws IOException; // removes all elements that are double (to be used after all addUnique) public kelondroRow.Entry remove(byte[] key, boolean keepOrder) throws IOException; public kelondroRow.Entry removeOne() throws IOException; public kelondroCloneableIterator keys(boolean up, byte[] firstKey) throws IOException; // iterates only the key diff --git a/source/de/anomic/kelondro/kelondroRAMIndex.java b/source/de/anomic/kelondro/kelondroRAMIndex.java index e6a5f5b90..57061037c 100644 --- a/source/de/anomic/kelondro/kelondroRAMIndex.java +++ b/source/de/anomic/kelondro/kelondroRAMIndex.java @@ -24,7 +24,7 @@ package de.anomic.kelondro; -import java.io.IOException; +import java.util.ArrayList; import java.util.Date; import java.util.Iterator; import java.util.List; @@ -65,7 +65,7 @@ public class kelondroRAMIndex implements kelondroIndex { } } - public synchronized kelondroRow.Entry get(byte[] key) throws IOException { + public synchronized kelondroRow.Entry get(byte[] key) { assert (key != null); finishInitialization(); kelondroRow.Entry indexentry = index0.get(key); @@ -73,14 +73,14 @@ public class kelondroRAMIndex implements kelondroIndex { return index1.get(key); } - public boolean has(byte[] key) throws IOException { + public boolean has(byte[] key) { assert (key != null); finishInitialization(); if (index0.has(key)) return true; return index1.has(key); } - public synchronized kelondroRow.Entry put(kelondroRow.Entry entry) throws IOException { + public synchronized kelondroRow.Entry put(kelondroRow.Entry entry) { assert (entry != null); finishInitialization(); // if the new entry is within the initialization part, just overwrite it @@ -93,18 +93,18 @@ public class kelondroRAMIndex implements kelondroIndex { return index1.put(entry); } - public Entry put(Entry row, Date entryDate) throws IOException { + public Entry put(Entry row, Date entryDate) { return put(row); } - public void putMultiple(List rows) throws IOException { + public void putMultiple(List rows) { Iterator i = rows.iterator(); while (i.hasNext()) { put(i.next()); } } - public synchronized void addUnique(kelondroRow.Entry entry) throws IOException { + public synchronized void addUnique(kelondroRow.Entry entry) { assert (entry != null); if (index1 == null) { // we are in the initialization phase @@ -115,14 +115,20 @@ public class kelondroRAMIndex implements kelondroIndex { } } - public void addUniqueMultiple(List rows) throws IOException { + public void addUniqueMultiple(List rows) { Iterator i = rows.iterator(); while (i.hasNext()) { addUnique(i.next()); } } - public synchronized kelondroRow.Entry remove(byte[] key, boolean keepOrder) throws IOException { + public synchronized ArrayList removeDoubles() { + // finish initialization phase explicitely + if (index1 == null) index1 = new kelondroRowSet(rowdef, 0); + return index0.removeDoubles(); + } + + public synchronized kelondroRow.Entry remove(byte[] key, boolean keepOrder) { assert keepOrder == true; // if this is false, the index must be re-ordered so many times which will cause a major CPU usage finishInitialization(); // if the new entry is within the initialization part, just delete it @@ -135,7 +141,7 @@ public class kelondroRAMIndex implements kelondroIndex { return index1.remove(key, keepOrder); } - public synchronized kelondroRow.Entry removeOne() throws IOException { + public synchronized kelondroRow.Entry removeOne() { if ((index1 != null) && (index1.size() != 0)) { return index1.removeOne(); } @@ -156,7 +162,7 @@ public class kelondroRAMIndex implements kelondroIndex { return index0.size() + index1.size(); } - public synchronized kelondroCloneableIterator keys(boolean up, byte[] firstKey) throws IOException { + public synchronized kelondroCloneableIterator keys(boolean up, byte[] firstKey) { // returns the key-iterator of the underlying kelondroIndex if (index1 == null) { // finish initialization phase @@ -182,7 +188,7 @@ public class kelondroRAMIndex implements kelondroIndex { true); } - public synchronized kelondroCloneableIterator rows(boolean up, byte[] firstKey) throws IOException { + public synchronized kelondroCloneableIterator rows(boolean up, byte[] firstKey) { // returns the row-iterator of the underlying kelondroIndex if (index1 == null) { // finish initialization phase diff --git a/source/de/anomic/kelondro/kelondroRowCollection.java b/source/de/anomic/kelondro/kelondroRowCollection.java index 975763404..0f450ebfe 100644 --- a/source/de/anomic/kelondro/kelondroRowCollection.java +++ b/source/de/anomic/kelondro/kelondroRowCollection.java @@ -26,6 +26,7 @@ package de.anomic.kelondro; import java.io.File; import java.io.IOException; +import java.util.ArrayList; import java.util.Iterator; import java.util.List; import java.util.Random; @@ -583,9 +584,9 @@ public class kelondroRowCollection { try { while (i >= 0) { if (compare(i, i + 1) == 0) { - removeRow(i, false); + removeRow(i + 1, false); d++; - if (i < chunkcount - 2) u = false; + if (i + 1 < chunkcount - 1) u = false; } i--; if (System.currentTimeMillis() - t > 10000) { @@ -599,6 +600,45 @@ public class kelondroRowCollection { } } + public synchronized ArrayList removeDoubles() { + assert (this.rowdef.objectOrder != null); + // removes double-occurrences of chunks + // in contrast to uniq() this removes also the remaining, non-double entry that had a double-occurrance to the others + // all removed chunks are returned in an array + this.sort(); + ArrayList report = new ArrayList(); + if (chunkcount < 2) return report; + int i = chunkcount - 2; + int d = 0; + boolean u = true; + kelondroRowSet collection = new kelondroRowSet(this.rowdef, 2); + try { + while (i >= 0) { + if (compare(i, i + 1) == 0) { + collection.addUnique(get(i + 1)); + removeRow(i + 1, false); + d++; + if (i + 1 < chunkcount - 1) u = false; + } else if (collection.size() > 0) { + // finish collection of double occurrences + collection.addUnique(get(i + 1)); + removeRow(i + 1, false); + d++; + if (i + 1 < chunkcount - 1) u = false; + collection.trim(false); + report.add(collection); + collection = new kelondroRowSet(this.rowdef, 2); + } + i--; + } + } catch (RuntimeException e) { + serverLog.logWarning("kelondroRowCollection", e.getMessage(), e); + } finally { + if (!u) this.sort(); + } + return report; + } + public synchronized boolean isSorted() { assert (this.rowdef.objectOrder != null); if (chunkcount <= 1) return true; @@ -711,7 +751,18 @@ public class kelondroRowCollection { kelondroBase64Order.enhancedCoder, 0); kelondroRowCollection a = new kelondroRowCollection(r, testsize); + a.add("AAAAAAAAAAAA".getBytes()); + a.add("BBBBBBBBBBBB".getBytes()); + a.add("BBBBBBBBBBBB".getBytes()); + a.add("BBBBBBBBBBBB".getBytes()); + a.add("CCCCCCCCCCCC".getBytes()); + ArrayList del = a.removeDoubles(); + System.out.println(del + "rows double"); + Iterator j = a.rows(); + while (j.hasNext()) System.out.println(new String(j.next().bytes())); + System.out.println("kelondroRowCollection test with size = " + testsize); + a = new kelondroRowCollection(r, testsize); long t0 = System.currentTimeMillis(); random = new Random(0); for (int i = 0; i < testsize; i++) a.add(randomHash().getBytes()); @@ -807,40 +858,4 @@ public class kelondroRowCollection { System.out.println(daysSince2000(System.currentTimeMillis())); */ } - - /* -kelondroRowCollection test with size = 10000 -create c : 134 milliseconds, 74 entries/millisecond -copy c -> d: 47 milliseconds, 212 entries/millisecond -sort c (1) : 66 milliseconds, 151 entries/millisecond -sort d (2) : 23 milliseconds, 434 entries/millisecond -uniq c : 3 milliseconds, 3333 entries/millisecond -uniq d : 2 milliseconds, 5000 entries/millisecond -create e : 528 milliseconds, 18 entries/millisecond -sort e (2) : 13 milliseconds, 769 entries/millisecond -uniq e : 2 milliseconds, 5000 entries/millisecond -c isSorted = true: 2 milliseconds -d isSorted = true: 3 milliseconds -e isSorted = true: 2 milliseconds -e allfound = true: 85 milliseconds -e noghosts = true: 75 milliseconds -Result size: c = 10000, d = 10000, e = 10000 - -kelondroRowCollection test with size = 100000 -create c : 589 milliseconds, 169 entries/millisecond -copy c -> d: 141 milliseconds, 709 entries/millisecond -sort c (1) : 268 milliseconds, 373 entries/millisecond -sort d (2) : 187 milliseconds, 534 entries/millisecond -uniq c : 13 milliseconds, 7692 entries/millisecond -uniq d : 14 milliseconds, 7142 entries/millisecond -create e : 22068 milliseconds, 4 entries/millisecond -sort e (2) : 167 milliseconds, 598 entries/millisecond -uniq e : 14 milliseconds, 7142 entries/millisecond -c isSorted = true: 13 milliseconds -d isSorted = true: 14 milliseconds -e isSorted = true: 13 milliseconds -e allfound = true: 815 milliseconds -e noghosts = true: 787 milliseconds -Result size: c = 100000, d = 100000, e = 100000 - */ } \ No newline at end of file diff --git a/source/de/anomic/kelondro/kelondroRowSet.java b/source/de/anomic/kelondro/kelondroRowSet.java index 81e481270..31cdb700e 100644 --- a/source/de/anomic/kelondro/kelondroRowSet.java +++ b/source/de/anomic/kelondro/kelondroRowSet.java @@ -24,7 +24,6 @@ package de.anomic.kelondro; -import java.io.IOException; import java.util.Date; import java.util.Iterator; import java.util.List; @@ -76,7 +75,7 @@ public class kelondroRowSet extends kelondroRowCollection implements kelondroInd this.profile = new kelondroProfile(); } - public synchronized boolean has(byte[] key) throws IOException { + public synchronized boolean has(byte[] key) { return (get(key) != null); } @@ -92,7 +91,7 @@ public class kelondroRowSet extends kelondroRowCollection implements kelondroInd return entry; } - public synchronized void putMultiple(List rows) throws IOException { + public synchronized void putMultiple(List rows) { Iterator i = rows.iterator(); while (i.hasNext()) put(i.next()); } diff --git a/source/de/anomic/kelondro/kelondroSQLTable.java b/source/de/anomic/kelondro/kelondroSQLTable.java index e3f5c73db..0ddde077c 100644 --- a/source/de/anomic/kelondro/kelondroSQLTable.java +++ b/source/de/anomic/kelondro/kelondroSQLTable.java @@ -32,6 +32,7 @@ import java.sql.DriverManager; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; +import java.util.ArrayList; import java.util.Date; import java.util.Iterator; import java.util.List; @@ -140,6 +141,10 @@ public class kelondroSQLTable implements kelondroIndex { return (get(key) != null); } + public ArrayList removeDoubles() { + return new ArrayList(); + } + public kelondroRow.Entry get(byte[] key) throws IOException { try { String sqlQuery = new String diff --git a/source/de/anomic/kelondro/kelondroSplitTable.java b/source/de/anomic/kelondro/kelondroSplitTable.java index a96690a96..9e81fa8cc 100644 --- a/source/de/anomic/kelondro/kelondroSplitTable.java +++ b/source/de/anomic/kelondro/kelondroSplitTable.java @@ -28,6 +28,7 @@ package de.anomic.kelondro; import java.io.File; import java.io.IOException; +import java.util.ArrayList; import java.util.Calendar; import java.util.Date; import java.util.HashMap; @@ -292,6 +293,15 @@ public class kelondroSplitTable implements kelondroIndex { while (i.hasNext()) addUnique(i.next(), entryDate); } + public ArrayList removeDoubles() throws IOException { + Iterator i = tables.values().iterator(); + ArrayList report = new ArrayList(); + while (i.hasNext()) { + report.addAll(i.next().removeDoubles()); + } + return report; + } + public synchronized kelondroRow.Entry remove(byte[] key, boolean keepOrder) throws IOException { Iterator i = tables.values().iterator(); kelondroIndex table; diff --git a/source/de/anomic/kelondro/kelondroSplittedTree.java b/source/de/anomic/kelondro/kelondroSplittedTree.java deleted file mode 100644 index dbd57029e..000000000 --- a/source/de/anomic/kelondro/kelondroSplittedTree.java +++ /dev/null @@ -1,344 +0,0 @@ -// kelondroSplittedTree.java -// ------------------------- -// part of The Kelondro Database -// (C) by Michael Peter Christen; mc@anomic.de -// first published on http://www.anomic.de -// Frankfurt, Germany, 2006 -// created 07.01.2006 -// -// $LastChangedDate: 2005-09-22 22:01:26 +0200 (Thu, 22 Sep 2005) $ -// $LastChangedRevision: 774 $ -// $LastChangedBy: orbiter $ -// -// This program is free software; you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation; either version 2 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -// -// Using this software in any meaning (reading, learning, copying, compiling, -// running) means that you agree that the Author(s) is (are) not responsible -// for cost, loss of data or any harm that may be caused directly or indirectly -// by usage of this softare or this documentation. The usage of this software -// is on your own risk. The installation and usage (starting/running) of this -// software may allow other people or application to access your computer and -// any attached devices and is highly dependent on the configuration of the -// software which must be done by the user of the software; the author(s) is -// (are) also not responsible for proper configuration and usage of the -// software, even if provoked by documentation provided together with -// the software. -// -// Any changes to this file according to the GPL as documented in the file -// gpl.txt aside this file in the shipment you received can be done to the -// lines that follows this copyright notice here, but changes must not be -// done inside the copyright notive above. A re-distribution must contain -// the intact and unchanged copyright notice. -// Contributions and changes to the program code must be marked as such. - -package de.anomic.kelondro; - -import java.io.File; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Date; -import java.util.Iterator; -import java.util.List; - -public class kelondroSplittedTree implements kelondroIndex { - - private kelondroTree[] ktfs; - private kelondroByteOrder order; - private int ff; - private String filename; - - private static File dbFile(File path, String filenameStub, int forkfactor, int columns, int number) { - String ns = Integer.toHexString(number).toUpperCase(); - while (ns.length() < 2) ns = "0" + ns; - String ff = Integer.toHexString(forkfactor).toUpperCase(); - while (ff.length() < 2) ff = "0" + ff; - String co = Integer.toHexString(columns).toUpperCase(); - while (co.length() < 2) co = "0" + co; - return new File(path, filenameStub + "." + ff + "." + co + "." + ns + ".ktc"); - } - - public kelondroSplittedTree(File pathToFiles, String filenameStub, kelondroByteOrder objectOrder, - long preloadTime, - int forkfactor, kelondroRow rowdef, int txtProps, int txtPropsWidth) { - try { - this.filename = new File(pathToFiles, filenameStub).getCanonicalPath(); - } catch (IOException e) { - this.filename = null; - } - ktfs = new kelondroTree[forkfactor]; - File f; - for (int i = 0; i < forkfactor; i++) { - f = dbFile(pathToFiles, filenameStub, forkfactor, rowdef.columns(), i); - ktfs[i] = kelondroTree.open(f, true, preloadTime / forkfactor, rowdef, txtProps, txtPropsWidth); - } - this.order = objectOrder; - ff = forkfactor; - } - - public void reset() throws IOException { - for (int i = 0; i < ktfs.length; i++) { - ktfs[i].reset(); - } - } - - public void close() { - for (int i = 0; i < ktfs.length; i++) ktfs[i].close(); - } - - public int size() { - return ktfs[0].size(); - } - - public kelondroRow row() { - return ktfs[0].row(); - } - - private int partition(byte[] key) { - // return number of db file where this key should be managed - return (int) order.partition(key, ff); - } - - public boolean has(byte[] key) throws IOException { - throw new UnsupportedOperationException("has should not be used with kelondroSplittedTree."); - } - - public kelondroRow.Entry get(byte[] key) throws IOException { - return ktfs[partition(key)].get(key); - } - - @SuppressWarnings("unchecked") - public synchronized void putMultiple(List rows) throws IOException { - Iterator i = rows.iterator(); - kelondroRow.Entry row; - ArrayList[] parts = new ArrayList[ktfs.length]; - for (int j = 0; j < ktfs.length; j++) parts[j] = new ArrayList(); - while (i.hasNext()) { - row = i.next(); - parts[partition(row.getColBytes(0))].add(row); - } - for (int j = 0; j < ktfs.length; j++) ktfs[j].putMultiple(parts[j]); - } - - public kelondroRow.Entry put(kelondroRow.Entry row, Date entryDate) throws IOException { - return put(row); - } - - public synchronized void addUnique(kelondroRow.Entry row) throws IOException { - throw new UnsupportedOperationException(); - } - - public synchronized void addUnique(kelondroRow.Entry row, Date entryDate) { - throw new UnsupportedOperationException(); - } - - public synchronized void addUniqueMultiple(List rows) throws IOException { - throw new UnsupportedOperationException(); - } - - public kelondroRow.Entry put(kelondroRow.Entry row) throws IOException { - return ktfs[partition(row.getColBytes(0))].put(row); - } - - public kelondroRow.Entry remove(byte[] key, boolean keepOrder) throws IOException { - return ktfs[partition(key)].remove(key, keepOrder); - } - - public kelondroRow.Entry removeOne() throws IOException { - // removes one entry from the partition with the most entries - int maxc = -1, maxi = 0; - for (int i = 0; i < ktfs.length; i++) { - if (ktfs[i].size() > maxc) { - maxc = ktfs[i].size(); - maxi = i; - } - } - if (maxc > 0) { - return ktfs[maxi].removeOne(); - } else { - return null; - } - } - - public kelondroCloneableIterator rows(boolean up, byte[] firstKey) throws IOException { - return new rowIterator(up, firstKey); - } - - public class rowIterator implements kelondroCloneableIterator { - - int c = 0; - Iterator ktfsI; - boolean up; - - public rowIterator(boolean up, byte[] firstKey) throws IOException { - this.up = up; - c = (up) ? 0 : (ff - 1); - if (firstKey != null) throw new UnsupportedOperationException("ktfsIterator does not work with a start key"); - ktfsI = ktfs[c].rows(up, firstKey); // FIXME: this works only correct with firstKey == null - } - - public rowIterator clone(Object secondKey) { - try { - return new rowIterator(up, (byte[]) secondKey); - } catch (IOException e) { - return null; - } - } - - public boolean hasNext() { - return ((ktfsI.hasNext()) || - ((up) && (c < ff)) || - ((!(up)) && (c > 0))); - } - - public kelondroRow.Entry next() { - if (ktfsI.hasNext()) return ktfsI.next(); - if (up) { - if (c < (ff - 1)) { - c++; - try { - ktfsI = ktfs[c].rows(true, null); - } catch (IOException e) { - return null; - } - return ktfsI.next(); - } else { - return null; - } - } else { - if (c > 0) { - c--; - try { - ktfsI = ktfs[c].rows(false, null); - } catch (IOException e) { - return null; - } - return ktfsI.next(); - } else { - return null; - } - } - } - - public void remove() { - ktfsI.remove(); - } - - } - - public kelondroCloneableIterator keys(boolean up, byte[] firstKey) throws IOException { - return new keyIterator(up, firstKey); - } - - public class keyIterator implements kelondroCloneableIterator { - - int c = 0; - Iterator ktfsI; - boolean up; - - public keyIterator(boolean up, byte[] firstKey) throws IOException { - this.up = up; - c = (up) ? 0 : (ff - 1); - if (firstKey != null) throw new UnsupportedOperationException("ktfsIterator does not work with a start key"); - ktfsI = ktfs[c].keys(up, firstKey); // FIXME: this works only correct with firstKey == null - } - - public keyIterator clone(Object secondKey) { - try { - return new keyIterator(up, (byte[]) secondKey); - } catch (IOException e) { - return null; - } - } - - public boolean hasNext() { - return ((ktfsI.hasNext()) || - ((up) && (c < ff)) || - ((!(up)) && (c > 0))); - } - - public byte[] next() { - if (ktfsI.hasNext()) return ktfsI.next(); - if (up) { - if (c < (ff - 1)) { - c++; - try { - ktfsI = ktfs[c].keys(true, null); - } catch (IOException e) { - return null; - } - return ktfsI.next(); - } else { - return null; - } - } else { - if (c > 0) { - c--; - try { - ktfsI = ktfs[c].keys(false, null); - } catch (IOException e) { - return null; - } - return ktfsI.next(); - } else { - return null; - } - } - } - - public void remove() { - ktfsI.remove(); - } - - } - - public kelondroByteOrder order() { - return this.order; - } - - public int primarykey() { - return 0; - } - - public kelondroProfile profile() { - kelondroProfile[] profiles = new kelondroProfile[ktfs.length]; - for (int i = 0; i < ktfs.length; i++) profiles[i] = ktfs[i].profile(); - return kelondroProfile.consolidate(profiles); - } - - public final int cacheObjectChunkSize() { - // dummy method - return -1; - } - - public long[] cacheObjectStatus() { - // dummy method - return null; - } - - public final int cacheNodeChunkSize() { - // returns the size that the node cache uses for a single entry - return -1; - } - - public final int[] cacheNodeStatus() { - // a collection of different node cache status values - return new int[]{0,0,0,0,0,0,0,0,0,0}; - } - - public String filename() { - return this.filename; - } - -} diff --git a/source/de/anomic/kelondro/kelondroTree.java b/source/de/anomic/kelondro/kelondroTree.java index 781067ed4..d8b559897 100644 --- a/source/de/anomic/kelondro/kelondroTree.java +++ b/source/de/anomic/kelondro/kelondroTree.java @@ -50,6 +50,7 @@ import java.io.File; import java.io.FileReader; import java.io.IOException; import java.io.RandomAccessFile; +import java.util.ArrayList; import java.util.Comparator; import java.util.Date; import java.util.HashSet; @@ -186,7 +187,12 @@ public class kelondroTree extends kelondroCachedRecords implements kelondroIndex } return result; } - + + public ArrayList removeDoubles() { + // this data structure cannot have doubles; return empty array + return new ArrayList(); + } + public class Search { // a search object combines the results of a search in the tree, which are