From dc26d6262b9ed6f319c6d870e0ae23c030e4a1b6 Mon Sep 17 00:00:00 2001 From: orbiter Date: Thu, 17 Jan 2008 12:12:52 +0000 Subject: [PATCH] - removed write buffer from kelondroCache (was never used because buggy; will now be replaced by new EcoBuffer) - added new data structure 'eco' for an index file that should use only 50% of write-IO compared to kelondroFlex The new eco index is not used yet, but already successfully tested with the collectionIndex The main purpose is to replace the kelondroFlex at every point when enough RAM is available. Othervise, the kelondroFlex stays as option in case of low memory (which then can even use a file-index) git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4337 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- source/dbtest.java | 54 +- .../anomic/kelondro/kelondroBase64Order.java | 4 +- .../kelondro/kelondroBufferedEcoFS.java | 149 ++++++ .../anomic/kelondro/kelondroBytesIntMap.java | 8 +- source/de/anomic/kelondro/kelondroCache.java | 230 +------- .../kelondro/kelondroCollectionIndex.java | 51 +- source/de/anomic/kelondro/kelondroDyn.java | 2 +- source/de/anomic/kelondro/kelondroEcoFS.java | 495 ++++++++++++++++++ .../de/anomic/kelondro/kelondroEcoTable.java | 463 ++++++++++++++++ .../kelondro/kelondroFlexSplitTable.java | 12 +- .../de/anomic/kelondro/kelondroFlexTable.java | 4 +- .../de/anomic/kelondro/kelondroMapTable.java | 2 +- .../de/anomic/kelondro/kelondroRAMIndex.java | 6 + source/de/anomic/kelondro/kelondroRow.java | 104 ++-- .../kelondro/kelondroRowCollection.java | 1 + source/de/anomic/kelondro/kelondroTree.java | 2 +- .../de/anomic/plasma/plasmaCrawlBalancer.java | 2 +- source/de/anomic/plasma/plasmaCrawlLURL.java | 7 - .../de/anomic/plasma/plasmaCrawlStacker.java | 14 +- .../de/anomic/plasma/plasmaSwitchboard.java | 1 - source/de/anomic/yacy/yacyNewsDB.java | 12 +- 21 files changed, 1259 insertions(+), 364 deletions(-) create mode 100644 source/de/anomic/kelondro/kelondroBufferedEcoFS.java create mode 100644 source/de/anomic/kelondro/kelondroEcoFS.java create mode 100644 source/de/anomic/kelondro/kelondroEcoTable.java diff --git a/source/dbtest.java b/source/dbtest.java index b59557e48..910c80608 100644 --- a/source/dbtest.java +++ b/source/dbtest.java @@ -14,6 +14,7 @@ import javax.imageio.ImageIO; import de.anomic.kelondro.kelondroBase64Order; import de.anomic.kelondro.kelondroCache; import de.anomic.kelondro.kelondroCloneableIterator; +import de.anomic.kelondro.kelondroEcoTable; import de.anomic.kelondro.kelondroFlexSplitTable; import de.anomic.kelondro.kelondroFlexTable; import de.anomic.kelondro.kelondroIndex; @@ -163,10 +164,10 @@ public class dbtest { final STEntry dbEntry = new STEntry(entryBytes.getColBytes(0), entryBytes.getColBytes(1)); if (!dbEntry.isValid()) { System.out.println("INVALID: " + dbEntry); - }/* else { + } /* else { System.out.println("_VALID_: " + dbEntry); - getTable().remove(entry.getKey()); - }*/ + getTable().remove(entry.getKey(), true); + } */ } } catch (IOException e) { System.err.println(e); @@ -197,7 +198,7 @@ public class dbtest { } if (dbe.equals("kelondroTree")) { File tablefile = new File(tablename + ".kelondro.db"); - table = new kelondroCache(new kelondroTree(tablefile, true, preload, testRow), true, false); + table = new kelondroCache(new kelondroTree(tablefile, true, preload, testRow)); } if (dbe.equals("kelondroSplittedTree")) { File tablepath = new File(tablename).getParentFile(); @@ -213,6 +214,9 @@ public class dbtest { File tablepath = new File(tablename).getParentFile(); table = new kelondroFlexSplitTable(tablepath, new File(tablename).getName(), preload, testRow, true); } + if (dbe.equals("kelondroEcoTable")) { + table = new kelondroEcoTable(new File(tablename), testRow, 100); + } if (dbe.equals("mysql")) { table = new kelondroSQLTable("mysql", testRow); } @@ -372,7 +376,7 @@ public class dbtest { if (command.equals("stressThreaded")) { // // args: - // example: kelondroFlexTable stressThreaded /Users/admin/dbtest 500 50 0 + // example: kelondroFlexTable stressThreaded /Users/admin/dbtest 500 50 0 long writeCount = Long.parseLong(args[3]); long readCount = Long.parseLong(args[4]); long randomstart = Long.parseLong(args[5]); @@ -384,22 +388,22 @@ public class dbtest { HashSet jcontrol = new HashSet(); kelondroIntBytesMap kcontrol = new kelondroIntBytesMap(1, 0); for (int i = 0; i < writeCount; i++) { - r = Math.abs(random.nextLong() % 1000); - jcontrol.add(new Long(r)); - kcontrol.putb((int) r, "x".getBytes()); + r = Math.abs(random.nextLong() % 1000); + jcontrol.add(new Long(r)); + kcontrol.putb((int) r, "x".getBytes()); serverInstantThread.oneTimeJob(new WriteJob(table, r), 0, 50); if (random.nextLong() % 5 == 0) ra.add(new Long(r)); for (int j = 0; j < readCount; j++) { serverInstantThread.oneTimeJob(new ReadJob(table, random.nextLong() % writeCount), random.nextLong() % 1000, 20); } if ((ra.size() > 0) && (random.nextLong() % 7 == 0)) { - rc++; - p = Math.abs(random.nextInt()) % ra.size(); - R = (Long) ra.get(p); - jcontrol.remove(R); - kcontrol.removeb((int) R.longValue()); - System.out.println("remove: " + R.longValue()); - serverInstantThread.oneTimeJob(new RemoveJob(table, ((Long) ra.remove(p)).longValue()), 0, 50); + rc++; + p = Math.abs(random.nextInt()) % ra.size(); + R = (Long) ra.get(p); + jcontrol.remove(R); + kcontrol.removeb((int) R.longValue()); + System.out.println("remove: " + R.longValue()); + serverInstantThread.oneTimeJob(new RemoveJob(table, ((Long) ra.remove(p)).longValue()), 0, 50); } } System.out.println("removed: " + rc + ", size of jcontrol set: " + jcontrol.size() + ", size of kcontrol set: " + kcontrol.size()); @@ -424,22 +428,22 @@ public class dbtest { HashSet jcontrol = new HashSet(); kelondroIntBytesMap kcontrol = new kelondroIntBytesMap(1, 0); for (int i = 0; i < writeCount; i++) { - //if (i == 30) random = new Random(randomstart); - r = Math.abs(random.nextLong() % 1000); - jcontrol.add(new Long(r)); - kcontrol.putb((int) r, "x".getBytes()); + //if (i == 30) random = new Random(randomstart); + r = Math.abs(random.nextLong() % 1000); + jcontrol.add(new Long(r)); + kcontrol.putb((int) r, "x".getBytes()); new WriteJob(table, r).run(); if (random.nextLong() % 5 == 0) ra.add(new Long(r)); for (int j = 0; j < readCount; j++) { new ReadJob(table, random.nextLong() % writeCount).run(); } if ((ra.size() > 0) && (random.nextLong() % 7 == 0)) { - rc++; - p = Math.abs(random.nextInt()) % ra.size(); - R = (Long) ra.get(p); - jcontrol.remove(R); - kcontrol.removeb((int) R.longValue()); - new RemoveJob(table, ((Long) ra.remove(p)).longValue()).run(); + rc++; + p = Math.abs(random.nextInt()) % ra.size(); + R = (Long) ra.get(p); + jcontrol.remove(R); + kcontrol.removeb((int) R.longValue()); + new RemoveJob(table, ((Long) ra.remove(p)).longValue()).run(); } } try {Thread.sleep(1000);} catch (InterruptedException e) {} diff --git a/source/de/anomic/kelondro/kelondroBase64Order.java b/source/de/anomic/kelondro/kelondroBase64Order.java index 97c76777b..c3fd0bb83 100644 --- a/source/de/anomic/kelondro/kelondroBase64Order.java +++ b/source/de/anomic/kelondro/kelondroBase64Order.java @@ -345,8 +345,8 @@ public class kelondroBase64Order extends kelondroAbstractOrder implement } public final int comparePivot(byte[] compiledPivot, byte[] b, int boffset, int blength) { - assert zero == null; - assert asc; + assert zero == null; + assert asc; assert (boffset + blength <= b.length) : "b.length = " + b.length + ", boffset = " + boffset + ", blength = " + blength; int i = 0; final int bl = Math.min(blength, b.length - boffset); diff --git a/source/de/anomic/kelondro/kelondroBufferedEcoFS.java b/source/de/anomic/kelondro/kelondroBufferedEcoFS.java new file mode 100644 index 000000000..d5b98e482 --- /dev/null +++ b/source/de/anomic/kelondro/kelondroBufferedEcoFS.java @@ -0,0 +1,149 @@ +// kelondroBufferedEcoFS.java +// (C) 2008 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany +// first published 14.01.2008 on http://yacy.net +// +// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $ +// $LastChangedRevision: 1986 $ +// $LastChangedBy: orbiter $ +// +// LICENSE +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +package de.anomic.kelondro; + +import java.io.File; +import java.io.IOException; +import java.util.Iterator; +import java.util.Map; +import java.util.TreeMap; + +public class kelondroBufferedEcoFS { + + private kelondroEcoFS efs; + private int maxEntries; + private TreeMap buffer; + + /* + * The kelondroBufferedEcoFS extends the IO reduction to EcoFS by providing a + * write buffer to elements that are inside the filed entries of the file + * That means, each time, an entry is written to the end of the file, it is not buffered + */ + + public kelondroBufferedEcoFS(kelondroEcoFS efs, int maxEntries) throws IOException { + this.efs = efs; + this.maxEntries = maxEntries; + this.buffer = new TreeMap(); + } + + private void flushBuffer() throws IOException { + Iterator> i = buffer.entrySet().iterator(); + Map.Entry entry; + while (i.hasNext()) { + entry = i.next(); + efs.put(entry.getKey().intValue(), entry.getValue(), 0); + } + buffer.clear(); + } + + public synchronized int size() throws IOException { + return efs.size(); + } + + public File filename() { + return efs.filename(); + } + + public synchronized void close() { + try { + flushBuffer(); + } catch (IOException e) { + e.printStackTrace(); + } + efs.close(); + efs = null; + } + + public synchronized void finalize() { + if (this.efs != null) this.close(); + } + + public synchronized void get(int index, byte[] b, int start) throws IOException { + assert b.length - start >= efs.recordsize; + if (index >= size()) throw new IndexOutOfBoundsException("kelondroBufferedEcoFS.get(" + index + ") outside bounds (" + this.size() + ")"); + byte[] bb = buffer.get(new Integer(index)); + if (bb == null) { + efs.get(index, b, start); + } else { + System.arraycopy(bb, 0, b, start, efs.recordsize); + } + } + + public synchronized void put(int index, byte[] b, int start) throws IOException { + assert b.length - start >= efs.recordsize; + if (index > size()) throw new IndexOutOfBoundsException("kelondroEcoFS.put(" + index + ") outside bounds (" + this.size() + ")"); + if (index == efs.size()) { + efs.put(index, b, start); + } else { + byte[] bb = new byte[efs.recordsize]; + System.arraycopy(b, start, bb, 0, efs.recordsize); + buffer.put(new Integer(index), bb); + if (buffer.size() > this.maxEntries) flushBuffer(); + } + } + + public synchronized void add(byte[] b, int start) throws IOException { + put(size(), b, start); + } + + public synchronized void clean(int index, byte[] b, int start) throws IOException { + assert b.length - start >= efs.recordsize; + if (index >= size()) throw new IndexOutOfBoundsException("kelondroBufferedEcoFS.clean(" + index + ") outside bounds (" + this.size() + ")"); + byte[] bb = buffer.get(new Integer(index)); + if (bb == null) { + efs.clean(index, b, start); + } else { + System.arraycopy(bb, 0, b, start, efs.recordsize); + buffer.remove(new Integer(index)); + efs.clean(index); + } + } + + public synchronized void clean(int index) throws IOException { + if (index >= size()) throw new IndexOutOfBoundsException("kelondroBufferedEcoFS.clean(" + index + ") outside bounds (" + this.size() + ")"); + buffer.remove(new Integer(index)); + efs.clean(index); + } + + public synchronized void cleanLast(byte[] b, int start) throws IOException { + assert b.length - start >= efs.recordsize; + Integer i = new Integer(size() - 1); + byte[] bb = buffer.get(i); + if (bb == null) { + efs.clean(i.intValue(), b, start); + } else { + System.arraycopy(bb, 0, b, start, efs.recordsize); + buffer.remove(i); + efs.clean(i.intValue()); + } + } + + public synchronized void cleanLast() throws IOException { + Integer i = new Integer(size() - 1); + buffer.remove(i); + efs.clean(i.intValue()); + } + +} diff --git a/source/de/anomic/kelondro/kelondroBytesIntMap.java b/source/de/anomic/kelondro/kelondroBytesIntMap.java index ab04db770..133ec9ebf 100644 --- a/source/de/anomic/kelondro/kelondroBytesIntMap.java +++ b/source/de/anomic/kelondro/kelondroBytesIntMap.java @@ -55,7 +55,7 @@ public class kelondroBytesIntMap { } public synchronized int puti(byte[] key, int i) throws IOException { - assert i >= 0 : "i = " + i; + assert i >= 0 : "i = " + i; assert (key != null); kelondroRow.Entry newentry = index.row().newEntry(); newentry.setCol(0, key); @@ -66,7 +66,7 @@ public class kelondroBytesIntMap { } public synchronized void addi(byte[] key, int i) throws IOException { - assert i >= 0 : "i = " + i; + assert i >= 0 : "i = " + i; assert (key != null); kelondroRow.Entry newentry = this.rowdef.newEntry(); newentry.setCol(0, key); @@ -82,8 +82,8 @@ public class kelondroBytesIntMap { } public synchronized int removeonei() throws IOException { - kelondroRow.Entry indexentry = index.removeOne(); - if (indexentry == null) return -1; + kelondroRow.Entry indexentry = index.removeOne(); + if (indexentry == null) return -1; return (int) indexentry.getColLong(1); } diff --git a/source/de/anomic/kelondro/kelondroCache.java b/source/de/anomic/kelondro/kelondroCache.java index 2c1dd2b2d..da1186b97 100644 --- a/source/de/anomic/kelondro/kelondroCache.java +++ b/source/de/anomic/kelondro/kelondroCache.java @@ -56,29 +56,21 @@ public class kelondroCache implements kelondroIndex { // class objects private kelondroRowSet readHitCache; private kelondroRowSet readMissCache; - private kelondroRowSet writeBufferUnique; // entries of that buffer are not contained in index - private kelondroRowSet writeBufferDoubles; // entries of that buffer shall overwrite entries in index private kelondroIndex index; private kelondroRow keyrow; private int readHit, readMiss, writeUnique, writeDouble, cacheDelete, cacheFlush; private int hasnotHit, hasnotMiss, hasnotUnique, hasnotDouble, hasnotDelete; - private boolean read, write; - public kelondroCache(kelondroIndex backupIndex, boolean read, boolean write) { - assert write == false; + public kelondroCache(kelondroIndex backupIndex) { this.index = backupIndex; - this.read = read; - this.write = write; init(); objectTracker.put(backupIndex.filename(), this); } private void init() { this.keyrow = new kelondroRow(new kelondroColumn[]{index.row().column(index.row().primaryKeyIndex)}, index.row().objectOrder, 0); - this.readHitCache = (read) ? new kelondroRowSet(index.row(), 0) : null; - this.readMissCache = (read) ? new kelondroRowSet(this.keyrow, 0) : null; - this.writeBufferUnique = (write) ? new kelondroRowSet(index.row(), 0) : null; - this.writeBufferDoubles = (write) ? new kelondroRowSet(index.row(), 0) : null; + this.readHitCache = new kelondroRowSet(index.row(), 0); + this.readMissCache = new kelondroRowSet(this.keyrow, 0); this.readHit = 0; this.readMiss = 0; this.writeUnique = 0; @@ -97,9 +89,7 @@ public class kelondroCache implements kelondroIndex { } public int writeBufferSize() { - return - ((writeBufferUnique == null) ? 0 : writeBufferUnique.size()) + - ((writeBufferDoubles == null) ? 0 : writeBufferDoubles.size()); + return 0; } public kelondroProfile profile() { @@ -163,84 +153,6 @@ public class kelondroCache implements kelondroIndex { return kelondroCachedRecords.cacheGrowStatus(serverMemory.available(), memStopGrow, memStartShrink); } - private void flushUnique() throws IOException { - if (writeBufferUnique == null) return; - synchronized (writeBufferUnique) { - Iterator i = writeBufferUnique.rows(); - while (i.hasNext()) { - this.index.addUnique(i.next()); - this.cacheFlush++; - } - writeBufferUnique.clear(); - } - } - - private void flushUnique(int maxcount) throws IOException { - if (writeBufferUnique == null) return; - if (maxcount == 0) return; - synchronized (writeBufferUnique) { - kelondroRowCollection delete = new kelondroRowCollection(this.keyrow, maxcount); - Iterator i = writeBufferUnique.rows(); - kelondroRow.Entry row; - while ((i.hasNext()) && (maxcount-- > 0)) { - row = i.next(); - delete.add(row.getPrimaryKeyBytes()); - this.index.addUnique(row); - this.cacheFlush++; - } - i = delete.rows(); - while (i.hasNext()) { - writeBufferUnique.remove(((kelondroRow.Entry) i.next()).getColBytes(0), true); - } - delete = null; - writeBufferUnique.trim(true); - } - } - - private void flushDoubles() throws IOException { - if (writeBufferDoubles == null) return; - synchronized (writeBufferDoubles) { - Iterator i = writeBufferDoubles.rows(); - while (i.hasNext()) { - this.index.put(i.next()); - this.cacheFlush++; - } - writeBufferDoubles.clear(); - } - } - - private void flushDoubles(int maxcount) throws IOException { - if (writeBufferDoubles == null) return; - if (maxcount == 0) return; - synchronized (writeBufferDoubles) { - kelondroRowCollection delete = new kelondroRowCollection(this.keyrow, maxcount); - Iterator i = writeBufferDoubles.rows(); - kelondroRow.Entry row; - while ((i.hasNext()) && (maxcount-- > 0)) { - row = i.next(); - delete.add(row.getPrimaryKeyBytes()); - this.index.addUnique(row); - this.cacheFlush++; - } - i = delete.rows(); - while (i.hasNext()) writeBufferDoubles.remove(((kelondroRow.Entry) i.next()).getColBytes(0), true); - delete = null; - writeBufferDoubles.trim(true); - } - } - - public void flushSome() throws IOException { - if (writeBufferUnique != null) flushUnique(writeBufferUnique.size() / 10); - if (writeBufferDoubles != null) flushDoubles(writeBufferDoubles.size() / 10); - } - - private int sumRecords() { - return - ((readHitCache == null) ? 0 : readHitCache.size()) + - ((writeBufferUnique == null) ? 0 : writeBufferUnique.size()) + - ((writeBufferDoubles == null) ? 0 : writeBufferDoubles.size()); - } - private boolean checkMissSpace() { // returns true if it is allowed to write into this cache if (cacheGrowStatus() < 1) { @@ -256,38 +168,21 @@ public class kelondroCache implements kelondroIndex { // returns true if it is allowed to write into this cache int status = cacheGrowStatus(); if (status < 1) { - flushUnique(); - flushDoubles(); if (readHitCache != null) { readHitCache.clear(); } return false; } if (status < 2) { - int s = sumRecords(); - flushDoubles(s / 4); - flushUnique(s / 4); if (readHitCache != null) readHitCache.clear(); } return true; } public synchronized void close() { - try { - flushUnique(); - } catch (IOException e) { - e.printStackTrace(); - } - try { - flushDoubles(); - } catch (IOException e) { - e.printStackTrace(); - } index.close(); readHitCache = null; readMissCache = null; - writeBufferUnique = null; - writeBufferDoubles = null; } public boolean has(byte[] key) throws IOException { @@ -315,20 +210,6 @@ public class kelondroCache implements kelondroIndex { return entry; } } - if (writeBufferUnique != null) { - entry = writeBufferUnique.get(key); - if (entry != null) { - this.readHit++; - return entry; - } - } - if (writeBufferDoubles != null) { - entry = writeBufferDoubles.get(key); - if (entry != null) { - this.readHit++; - return entry; - } - } // finally ask the backend index this.readMiss++; @@ -371,14 +252,7 @@ public class kelondroCache implements kelondroIndex { if (readMissCache != null) { if (readMissCache.remove(key, true) != null) { this.hasnotHit++; - // the entry does not exist before - if (writeBufferUnique != null) { - // since we know that the entry does not exist, we know that new - // entry belongs to the unique buffer - writeBufferUnique.put(row); - return null; - } - assert (writeBufferDoubles == null); + // the entry does not exist before index.put(row); // write to backend if (readHitCache != null) { kelondroRow.Entry dummy = readHitCache.put(row); // learn that entry @@ -394,53 +268,15 @@ public class kelondroCache implements kelondroIndex { entry = readHitCache.get(key); if (entry != null) { // since we know that the entry was in the read cache, it cannot be in any write cache - if (writeBufferDoubles != null) { - // because the entry exists, it must be written in the doubles buffer - readHitCache.remove(key, true); - this.cacheDelete++; - writeBufferDoubles.put(row); - return entry; - } else { // write directly to backend index index.put(row); // learn from situation kelondroRow.Entry dummy = readHitCache.put(row); // overwrite old entry if (dummy == null) this.writeUnique++; else this.writeDouble++; return entry; - } } } - // we still don't know if the key exists. Look into the buffers - if (writeBufferUnique != null) { - entry = writeBufferUnique.get(key); - if (entry != null) { - writeBufferUnique.put(row); - return entry; - } - } - if (writeBufferDoubles != null) { - entry = writeBufferDoubles.get(key); - if (entry != null) { - writeBufferDoubles.put(row); - return entry; - } - } - - // finally, we still don't know if this is a double-entry or unique-entry - // there is a chance to get that information 'cheap': - // look into the node ram cache of the back-end index. - // that does only work, if the node cache is complete - // that is the case for kelondroFlexTables with ram index - if ((writeBufferUnique != null) && - (index instanceof kelondroFlexTable) && - (((kelondroFlexTable) index).hasRAMIndex()) && - (!(((kelondroFlexTable) index).has(key)))) { - // this an unique entry - writeBufferUnique.put(row); - return null; // since that was unique, there was no entry before - } - // the worst case: we must write to the back-end directly entry = index.put(row); if (readHitCache != null) { @@ -470,13 +306,6 @@ public class kelondroCache implements kelondroIndex { this.readMissCache.remove(key, true); this.hasnotDelete++; // the entry does not exist before - if (writeBufferUnique != null) { - // since we know that the entry does not exist, we know that new - // entry belongs to the unique buffer - writeBufferUnique.put(row); - return; - } - assert (writeBufferDoubles == null); index.addUnique(row); // write to backend if (readHitCache != null) { kelondroRow.Entry dummy = readHitCache.put(row); // learn that entry @@ -485,15 +314,6 @@ public class kelondroCache implements kelondroIndex { return; } - if ((writeBufferUnique != null) && - (index instanceof kelondroFlexTable) && - (((kelondroFlexTable) index).hasRAMIndex()) && - (!(((kelondroFlexTable) index).has(key)))) { - // this an unique entry - writeBufferUnique.addUnique(row); - return; - } - // the worst case: we must write to the back-end directly index.addUnique(row); if (readHitCache != null) { @@ -510,9 +330,6 @@ public class kelondroCache implements kelondroIndex { assert (row != null); assert (row.columns() == row().columns()); - //assert (!(serverLog.allZero(row.getColBytes(index.primarykey())))); - assert (writeBufferUnique == null); - assert (writeBufferDoubles == null); byte[] key = row.getPrimaryKeyBytes(); checkHitSpace(); @@ -562,19 +379,6 @@ public class kelondroCache implements kelondroIndex { } } - // if the key already exists in one buffer, remove that buffer - if (writeBufferUnique != null) { - Entry entry = writeBufferUnique.remove(key, true); - if (entry != null) return entry; - } - if (writeBufferDoubles != null) { - Entry entry = writeBufferDoubles.remove(key, true); - if (entry != null) { - index.remove(key, false); - return entry; - } - } - return index.remove(key, false); } @@ -582,26 +386,6 @@ public class kelondroCache implements kelondroIndex { checkMissSpace(); - if ((writeBufferUnique != null) && (writeBufferUnique.size() > 0)) { - Entry entry = writeBufferUnique.removeOne(); - if (readMissCache != null) { - kelondroRow.Entry dummy = readMissCache.put(readMissCache.row().newEntry(entry.getPrimaryKeyBytes())); - if (dummy == null) this.hasnotUnique++; else this.hasnotDouble++; - } - return entry; - } - - if ((writeBufferDoubles != null) && (writeBufferDoubles.size() > 0)) { - Entry entry = writeBufferDoubles.removeOne(); - byte[] key = entry.getPrimaryKeyBytes(); - if (readMissCache != null) { - kelondroRow.Entry dummy = readMissCache.put(readMissCache.row().newEntry(key)); - if (dummy == null) this.hasnotUnique++; else this.hasnotDouble++; - } - index.remove(key, false); - return entry; - } - Entry entry = index.removeOne(); if (entry == null) return null; byte[] key = entry.getPrimaryKeyBytes(); @@ -621,17 +405,15 @@ public class kelondroCache implements kelondroIndex { } public synchronized kelondroCloneableIterator keys(boolean up, byte[] firstKey) throws IOException { - flushUnique(); return index.keys(up, firstKey); } public synchronized kelondroCloneableIterator rows(boolean up, byte[] firstKey) throws IOException { - flushUnique(); return index.rows(up, firstKey); } public int size() { - return index.size() + ((writeBufferUnique == null) ? 0 : writeBufferUnique.size()); + return index.size(); } public String filename() { diff --git a/source/de/anomic/kelondro/kelondroCollectionIndex.java b/source/de/anomic/kelondro/kelondroCollectionIndex.java index ab6aa6911..491a6c763 100644 --- a/source/de/anomic/kelondro/kelondroCollectionIndex.java +++ b/source/de/anomic/kelondro/kelondroCollectionIndex.java @@ -74,6 +74,8 @@ public class kelondroCollectionIndex { private static final int idx_col_lastread = 6; // a time stamp, update time in days since 1.1.2000 private static final int idx_col_lastwrote = 7; // a time stamp, update time in days since 1.1.2000 + private static final boolean useEcoTable = false; + private static kelondroRow indexRow(int keylength, kelondroByteOrder payloadOrder) { return new kelondroRow( "byte[] key-" + keylength + "," + @@ -122,8 +124,9 @@ public class kelondroCollectionIndex { this.maxPartitions = maxpartitions; this.commonsPath = new File(path, filenameStub + "." + fillZ(Integer.toHexString(rowdef.objectsize).toUpperCase(), 4) + ".commons"); this.commonsPath.mkdirs(); + File f = new File(path, filenameStub + ".index"); - if (new File(path, filenameStub + ".index").exists()) { + if (f.exists()) { serverLog.logFine("STARTUP", "OPENING COLLECTION INDEX"); // open index and array files @@ -153,7 +156,11 @@ public class kelondroCollectionIndex { serverLog.logFine("STARTUP", "STARTED INITIALIZATION OF NEW COLLECTION INDEX WITH " + initialSpace + " ENTRIES. THIS WILL TAKE SOME TIME"); // initialize (new generation) index table from file - index = new kelondroFlexTable(path, filenameStub + ".index", preloadTime, indexRow(keyLength, indexOrder), initialSpace, true); + if (useEcoTable) { + index = new kelondroEcoTable(f, indexRow(keyLength, indexOrder), 100); + } else { + index = new kelondroFlexTable(path, filenameStub + ".index", preloadTime, indexRow(keyLength, indexOrder), initialSpace, true); + } // open array files this.arrays = new HashMap(); // all entries will be dynamically created with getArray() @@ -225,25 +232,31 @@ public class kelondroCollectionIndex { private kelondroIndex openIndexFile(File path, String filenameStub, kelondroByteOrder indexOrder, long preloadTime, int loadfactor, kelondroRow rowdef, int initialSpace) throws IOException { // open/create index table - kelondroIndex theindex = new kelondroCache(new kelondroFlexTable(path, filenameStub + ".index", preloadTime, indexRow(keylength, indexOrder), initialSpace, true), true, false); - //kelondroIndex theindex = new kelondroFlexTable(path, filenameStub + ".index", preloadTime, indexRow(keylength, indexOrder), true); - - // save/check property file for this array - File propfile = propertyFile(path, filenameStub, loadfactor, rowdef.objectsize); - Map props = new HashMap(); - if (propfile.exists()) { - props = serverFileUtils.loadHashMap(propfile); - String stored_rowdef = (String) props.get("rowdef"); - if ((stored_rowdef == null) || (!(rowdef.subsumes(new kelondroRow(stored_rowdef, rowdef.objectOrder, 0))))) { - System.out.println("FATAL ERROR: stored rowdef '" + stored_rowdef + "' does not match with new rowdef '" + - rowdef + "' for array cluster '" + path + "/" + filenameStub + "'"); - System.exit(-1); + File f = new File(path, filenameStub + ".index"); + if (f.isDirectory()) { + // use a flextable + kelondroIndex theindex = new kelondroCache(new kelondroFlexTable(path, filenameStub + ".index", preloadTime, indexRow(keylength, indexOrder), initialSpace, true)); + + // save/check property file for this array + File propfile = propertyFile(path, filenameStub, loadfactor, rowdef.objectsize); + Map props = new HashMap(); + if (propfile.exists()) { + props = serverFileUtils.loadHashMap(propfile); + String stored_rowdef = (String) props.get("rowdef"); + if ((stored_rowdef == null) || (!(rowdef.subsumes(new kelondroRow(stored_rowdef, rowdef.objectOrder, 0))))) { + System.out.println("FATAL ERROR: stored rowdef '" + stored_rowdef + "' does not match with new rowdef '" + + rowdef + "' for array cluster '" + path + "/" + filenameStub + "'"); + System.exit(-1); + } } - } - props.put("rowdef", rowdef.toString()); - serverFileUtils.saveMap(propfile, props, "CollectionIndex properties"); + props.put("rowdef", rowdef.toString()); + serverFileUtils.saveMap(propfile, props, "CollectionIndex properties"); - return theindex; + return theindex; + } else { + // open a ecotable + return new kelondroEcoTable(f, indexRow(keylength, indexOrder), 100); + } } private kelondroFixedWidthArray openArrayFile(int partitionNumber, int serialNumber, kelondroByteOrder indexOrder, boolean create) throws IOException { diff --git a/source/de/anomic/kelondro/kelondroDyn.java b/source/de/anomic/kelondro/kelondroDyn.java index 7c9306242..6aad7e87a 100644 --- a/source/de/anomic/kelondro/kelondroDyn.java +++ b/source/de/anomic/kelondro/kelondroDyn.java @@ -96,7 +96,7 @@ public class kelondroDyn { } else { fbi = new kelondroFlexTable(file.getParentFile(), file.getName(), 10000, rowdef, 0, resetOnFail); } - this.index = (useObjectCache) ? (kelondroIndex) new kelondroCache(fbi, true, writebuffer) : fbi; + this.index = (useObjectCache) ? (kelondroIndex) new kelondroCache(fbi) : fbi; this.keylen = key; this.reclen = nodesize; this.fillChar = fillChar; diff --git a/source/de/anomic/kelondro/kelondroEcoFS.java b/source/de/anomic/kelondro/kelondroEcoFS.java new file mode 100644 index 000000000..19979e7dd --- /dev/null +++ b/source/de/anomic/kelondro/kelondroEcoFS.java @@ -0,0 +1,495 @@ +// kelondroEcoFS.java +// (C) 2008 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany +// first published 14.01.2008 on http://yacy.net +// +// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $ +// $LastChangedRevision: 1986 $ +// $LastChangedBy: orbiter $ +// +// LICENSE +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +package de.anomic.kelondro; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.RandomAccessFile; + +public class kelondroEcoFS { + + + /* + * The EcoFS is a flat file with records of fixed length. The file does not contain + * any meta information and the first record starts right at file position 0 + * The access rules are in such a way that a minimum of IO operations are necessary + * Two caches provide a mirror to content in the file: a read cache and a write buffer + * The read cache contains a number of entries from the file; a mirror that moves + * whenever information outsite the mirror is requested. + * The write buffer always exists only at the end of the file. It contains only records + * that have never been written to the file before. When the write buffer is flushed, + * the file grows + * The record file may also shrink when the last entry of the file is removed. + * Removal of Entries inside the file is not possible, but such entries can be erased + * by overwriting the data with zero bytes + * All access to the file is made with byte[] that are generated outsite of this class + * This class only references byte[] that are handed over to methods of this class. + */ + + private RandomAccessFile raf; + private File tablefile; + protected int recordsize; // number of bytes in one record + private int cacheindex, cachecount, buffercount; // number of entries in buffer + private byte[] cache, buffer, zero; + + private static final int maxBuffer = 512; + + + public kelondroEcoFS(File tablefile, int recordsize) throws IOException { + this.tablefile = tablefile; + this.recordsize = recordsize; + + // initialize zero buffer + this.zero = new byte[recordsize]; + for (int i = 0; i < recordsize; i++) this.zero[i] = 0; + + // initialize table file + if (!tablefile.exists()) { + // make new file + FileOutputStream fos = null; + try { + fos = new FileOutputStream(tablefile); + } catch (FileNotFoundException e) { + // should not happen + e.printStackTrace(); + } + try { fos.close(); } catch (IOException e) {} + } + + // open an existing table file + try { + raf = new RandomAccessFile(tablefile, "rw"); + } catch (FileNotFoundException e) { + // should never happen + e.printStackTrace(); + } + + // initialize cache and buffer + int maxrecords = Math.max(1, maxBuffer / recordsize); + cache = new byte[maxrecords * recordsize]; + buffer = new byte[maxrecords * recordsize]; + this.buffercount = 0; + + // first-time read of cache + fillCache(0); + } + + public static long tableSize(File tablefile, int recordsize) { + // returns number of records in table + if (!tablefile.exists()) return 0; + long size = tablefile.length(); + assert size % recordsize == 0; + return size / recordsize; + } + + public synchronized int size() throws IOException { + // return the number of records in file plus number of records in buffer + return filesize() + this.buffercount; + } + + public File filename() { + return this.tablefile; + } + + private int filesize() throws IOException { + return (int) (raf.length() / recordsize); + } + + private int inCache(int index) { + // checks if the index is inside the cache and returns the index offset inside + // the cache if the index is inside the cache + // returns -1 if the index is not in the cache + if ((index >= this.cacheindex) && (index < this.cacheindex + this.cachecount)) { + return index - this.cacheindex; + } + return -1; + } + + private int inBuffer(int index) throws IOException { + // checks if the index is inside the buffer and returns the index offset inside + // the buffer if the index is inside the buffer + // returns -1 if the index is not in the buffer + int fs = filesize(); + if ((index >= fs) && (index < fs + this.buffercount)) { + return index - fs; + } + return -1; + } + + private void fillCache(int index) throws IOException { + // load cache with copy of disc content; start with record at index + // if the record would overlap with the write buffer, + // its start is shifted forward until it fits + + // first check if the index is inside the current cache + assert inCache(index) < 0; + if (inCache(index) >= 0) return; + + // calculate new start position + int fs = this.filesize(); + if (index + this.cache.length / this.recordsize > fs) { + index = fs - this.cache.length / this.recordsize; + } + if (index < 0) index = 0; + + // calculate number of records that shall be stored in the cache + this.cachecount = Math.min(this.cache.length / this.recordsize, this.filesize() - index); + assert this.cachecount >= 0; + + // check if we need to read 0 bytes from the file + this.cacheindex = index; + if (this.cachecount == 0) return; + + // copy records from file to cache + raf.seek((long) this.recordsize * (long) index); + raf.read(this.cache, 0, this.recordsize * this.cachecount); + } + + private void flushBuffer() { + // write buffer to end of file + try { + raf.seek(raf.length()); + raf.write(this.buffer, 0, this.recordsize * this.buffercount); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + this.buffercount = 0; + } + + public synchronized void close() { + flushBuffer(); + + // then close the file + try { + raf.close(); + } catch (IOException e) { + e.printStackTrace(); + } + raf = null; + buffer = null; + cache = null; + } + + public synchronized void get(int index, byte[] b, int start) throws IOException { + assert b.length - start >= this.recordsize; + if (index >= size()) throw new IndexOutOfBoundsException("kelondroEcoFS.get(" + index + ") outside bounds (" + this.size() + ")"); + // check if index is inside of cache + int p = inCache(index); + int q = (p >= 0) ? -1 : inBuffer(index); + if ((p < 0) && (q < 0)) { + // the index is outside of cache and buffer index. shift cache window + fillCache(index); + p = inCache(index); + assert p >= 0; + } + if (p >= 0) { + // read entry from the cache + System.arraycopy(this.cache, p * this.recordsize, b, start, this.recordsize); + return; + } + if (q >= 0) { + // read entry from the buffer + System.arraycopy(this.buffer, q * this.recordsize, b, start, this.recordsize); + return; + } + assert false; + } + + public synchronized void put(int index, byte[] b, int start) throws IOException { + assert b.length - start >= this.recordsize; + if (index > size()) throw new IndexOutOfBoundsException("kelondroEcoFS.put(" + index + ") outside bounds (" + this.size() + ")"); + // check if this is an empty entry + if (isClean(b , start, this.recordsize)) { + clean(index); + return; + } + // check if index is inside of cache + int p = inCache(index); + int q = (p >= 0) ? -1 : inBuffer(index); + if (p >= 0) { + // write entry to the cache and to the file + System.arraycopy(b, start, this.cache, p * this.recordsize, this.recordsize); + raf.seek((long) index * (long) this.recordsize); + raf.write(b, start, this.recordsize); + return; + } + if (q >= 0) { + // write entry to the buffer + System.arraycopy(b, start, this.buffer, q * this.recordsize, this.recordsize); + return; + } + if (index == size()) { + // append the record to the end of the file; + + // look if there is space in the buffer + int bufferpos = index - filesize(); + if (bufferpos >= this.buffer.length / this.recordsize) { + assert this.buffercount == this.buffer.length / this.recordsize; + // the record does not fit in current buffer + // write buffer + flushBuffer(); + // write new entry to buffer + System.arraycopy(b, start, this.buffer, 0, this.recordsize); + this.buffercount = 1; + } else { + System.arraycopy(b, start, this.buffer, bufferpos * this.recordsize, this.recordsize); + this.buffercount++; + } + assert this.buffercount <= this.buffer.length / this.recordsize; + } else { + // write the record directly to the file, + // do not care about the cache; this case was checked before + raf.seek((long) index * (long) this.recordsize); + raf.write(b, start, this.recordsize); + } + } + + + public synchronized void add(byte[] b, int start) throws IOException { + put(size(), b, start); + } + + private boolean isClean(byte[] b, int offset, int length) { + for (int i = 0; i < length; i++) { + if (b[i + offset] != 0) return false; + } + return true; + } + + private boolean isClean(int index) throws IOException { + assert index < size(); + // check if index is inside of cache + int p = inCache(index); + int q = (p >= 0) ? -1 : inBuffer(index); + if ((p < 0) && (q < 0)) { + // the index is outside of cache and buffer index. shift cache window + fillCache(index); + p = inCache(index); + assert p >= 0; + } + if (p >= 0) { + // check entry from the cache + return isClean(this.cache, p * this.recordsize, this.recordsize); + } + if (q >= 0) { + // check entry from the buffer + return isClean(this.buffer, q * this.recordsize, this.recordsize); + } + assert false; + return false; + } + + public synchronized void clean(int index, byte[] b, int start) throws IOException { + // removes an entry by cleaning (writing zero bytes to the file) + // the entry that had been at the specific place before is copied to the given array b + // if the last entry in the file was cleaned, the file shrinks by the given record + // this is like + // get(index, b, start); + // put(index, zero, 0); + // plus an additional check if the file should shrink + + assert b.length - start >= this.recordsize; + if (index >= size()) throw new IndexOutOfBoundsException("kelondroEcoFS.clean(" + index + ") outside bounds (" + this.size() + ")"); + if (index == size() - 1) { + cleanLast(b, start); + return; + } + + // check if index is inside of cache + int p = inCache(index); + int q = (p >= 0) ? -1 : inBuffer(index); + if ((p < 0) && (q < 0)) { + // the index is outside of cache and buffer index. shift cache window + fillCache(index); + p = inCache(index); + assert p >= 0; + } + if (p >= 0) { + // read entry from the cache + System.arraycopy(this.cache, p * this.recordsize, b, start, this.recordsize); + + // write zero bytes to the cache and to the file + System.arraycopy(zero, 0, this.cache, p * this.recordsize, this.recordsize); + this.raf.seek((long) index * (long) this.recordsize); + this.raf.write(zero, 0, this.recordsize); + return; + } + if (q >= 0) { + // read entry from the buffer + System.arraycopy(this.buffer, q * this.recordsize, b, start, this.recordsize); + // write zero to the buffer + System.arraycopy(zero, 0, this.buffer, q * this.recordsize, this.recordsize); + return; + } + assert false; + } + + public synchronized void clean(int index) throws IOException { + if (index >= size()) throw new IndexOutOfBoundsException("kelondroEcoFS.clean(" + index + ") outside bounds (" + this.size() + ")"); + if (index == size() - 1) { + cleanLast(); + return; + } + + // check if index is inside of cache + int p = inCache(index); + int q = (p >= 0) ? -1 : inBuffer(index); + if (p >= 0) { + // write zero bytes to the cache and to the file + System.arraycopy(zero, 0, this.cache, p * this.recordsize, this.recordsize); + raf.seek((long) index * (long) this.recordsize); + raf.write(zero, 0, this.recordsize); + return; + } + if (q >= 0) { + // write zero to the buffer + System.arraycopy(zero, 0, this.buffer, q * this.recordsize, this.recordsize); + return; + } + + raf.seek((long) index * (long) this.recordsize); + raf.write(zero, 0, this.recordsize); + } + + public synchronized void cleanLast(byte[] b, int start) throws IOException { + cleanLast0(b, start); + int i; + while (((i = size()) > 0) && (isClean(i - 1))) { + //System.out.println("Extra clean/1: before size = " + size()); + cleanLast0(); + //System.out.println(" after size = " + size()); + } + } + + private synchronized void cleanLast0(byte[] b, int start) throws IOException { + // this is like + // clean(this.size() - 1, b, start); + + assert b.length - start >= this.recordsize; + // check if index is inside of cache + int p = inCache(this.size() - 1); + int q = (p >= 0) ? -1 : inBuffer(this.size() - 1); + if ((p < 0) && (q < 0)) { + // the index is outside of cache and buffer index. shift cache window + fillCache(this.size() - 1); + p = inCache(this.size() - 1); + assert p >= 0; + } + if (p >= 0) { + // read entry from the cache + System.arraycopy(this.cache, p * this.recordsize, b, start, this.recordsize); + // shrink cache and file + assert this.buffercount == 0; + this.raf.setLength((long) (this.size() - 1) * (long) this.recordsize); + this.cachecount--; + return; + } + if (q >= 0) { + // read entry from the buffer + System.arraycopy(this.buffer, q * this.recordsize, b, start, this.recordsize); + // shrink buffer + assert this.buffercount > 0; + this.buffercount--; + return; + } + assert false; + } + + public synchronized void cleanLast() throws IOException { + cleanLast0(); + int i; + while (((i = size()) > 0) && (isClean(i - 1))) { + //System.out.println("Extra clean/0: before size = " + size()); + cleanLast0(); + //System.out.println(" after size = " + size()); + } + } + + private synchronized void cleanLast0() throws IOException { + + // check if index is inside of cache + int p = inCache(this.size() - 1); + int q = (p >= 0) ? -1 : inBuffer(this.size() - 1); + if (p >= 0) { + // shrink cache and file + assert this.buffercount == 0; + this.raf.setLength((long) (this.size() - 1) * (long) this.recordsize); + this.cachecount--; + return; + } + if (q >= 0) { + // shrink buffer + assert this.buffercount > 0; + this.buffercount--; + return; + } + // check if file should shrink + assert this.buffercount == 0; + this.raf.setLength((long) (this.size() - 1) * (long) this.recordsize); + } + + public static void main(String[] args) { + // open a file, add one entry and exit + File f = new File(args[0]); + if (f.exists()) f.delete(); + try { + kelondroEcoFS t = new kelondroEcoFS(f, 8); + byte[] b = new byte[8]; + t.add("01234567".getBytes(), 0); + t.add("ABCDEFGH".getBytes(), 0); + t.add("abcdefgh".getBytes(), 0); + t.add("--------".getBytes(), 0); + t.add("********".getBytes(), 0); + for (int i = 0; i < 1000; i++) t.add("++++++++".getBytes(), 0); + t.add("=======0".getBytes(), 0); + t.add("=======1".getBytes(), 0); + t.add("=======2".getBytes(), 0); + t.cleanLast(b, 0); + System.out.println(new String(b)); + t.clean(2, b, 0); + System.out.println(new String(b)); + t.get(1, b, 0); + System.out.println(new String(b)); + t.put(1, "AbCdEfGh".getBytes(), 0); + t.get(1, b, 0); + System.out.println(new String(b)); + t.get(3, b, 0); + System.out.println(new String(b)); + t.get(4, b, 0); + System.out.println(new String(b)); + System.out.println("size = " + t.size()); + t.clean(t.size() - 2); + t.cleanLast(); + System.out.println("size = " + t.size()); + t.close(); + } catch (IOException e) { + e.printStackTrace(); + } + } + +} diff --git a/source/de/anomic/kelondro/kelondroEcoTable.java b/source/de/anomic/kelondro/kelondroEcoTable.java new file mode 100644 index 000000000..c2df2c403 --- /dev/null +++ b/source/de/anomic/kelondro/kelondroEcoTable.java @@ -0,0 +1,463 @@ +// kelondroEcoIndex.java +// (C) 2008 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany +// first published 14.01.2008 on http://yacy.net +// +// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $ +// $LastChangedRevision: 1986 $ +// $LastChangedBy: orbiter $ +// +// LICENSE +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +package de.anomic.kelondro; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.util.Date; +import java.util.Iterator; +import java.util.List; +import java.util.TreeMap; + +import de.anomic.kelondro.kelondroRow.Entry; +import de.anomic.server.serverMemory; + +/* + * The EcoIndex builts upon the EcoFS and tries to reduce the number of IO requests that the + * EcoFS must do to a minimum. In best cases, no IO has to be done for read operations (complete database shadow in RAM) + * and a rare number of write IO operations must be done for a large number of table-writings (using the write buffer of EcoFS) + * To make the EcoIndex scalable in question of available RAM, there are two elements that must be scalable: + * - the access index can be either completely in RAM (kelondroRAMIndex) or it is file-based (kelondroTree) + * - the content cache can be either a complete RAM-based shadow of the File, or empty. + * The content cache can also be deleted during run-time, if the available RAM gets too low. + * + */ + +public class kelondroEcoTable implements kelondroIndex { + + // static tracker objects + private static TreeMap tableTracker = new TreeMap(); + + private kelondroRowSet table; + private kelondroBytesIntMap index; + private kelondroBufferedEcoFS file; + private kelondroRow rowdef, taildef; + private int buffersize; + + public kelondroEcoTable(File tablefile, kelondroRow rowdef, int buffersize) throws IOException { + this.rowdef = rowdef; + this.buffersize = buffersize; + assert rowdef.primaryKeyIndex == 0; + // define the taildef, a row like the rowdef but without the first column + kelondroColumn[] cols = new kelondroColumn[rowdef.columns() - 1]; + for (int i = 0; i < cols.length; i++) { + cols[i] = rowdef.column(i + 1); + } + this.taildef = new kelondroRow(cols, kelondroNaturalOrder.naturalOrder, rowdef.primaryKeyIndex); + + // initialize table file + if (!tablefile.exists()) { + // make new file + FileOutputStream fos = null; + try { + fos = new FileOutputStream(tablefile); + } catch (FileNotFoundException e) { + // should not happen + e.printStackTrace(); + } + try { fos.close(); } catch (IOException e) {} + } + + // open an existing table file + try { + this.file = new kelondroBufferedEcoFS(new kelondroEcoFS(tablefile, rowdef.objectsize), this.buffersize); + } catch (FileNotFoundException e) { + // should never happen + e.printStackTrace(); + } + + // initialize index and copy table + int records = file.size(); + long neededRAM4table = records * taildef.objectsize * 3 / 2; + table = (serverMemory.request(neededRAM4table, true)) ? new kelondroRowSet(taildef, records + 1) : null; + index = new kelondroBytesIntMap(rowdef.primaryKeyLength, rowdef.objectOrder, records + 1); + + // read all elements from the file into the copy table + byte[] record = new byte[rowdef.objectsize]; + byte[] key = new byte[rowdef.primaryKeyLength]; + for (int i = 0; i < records; i++) { + // read entry + file.get(i, record, 0); + + // write the key into the index table + System.arraycopy(record, 0, key, 0, rowdef.primaryKeyLength); + index.addi(key, i); + + // write the tail into the table + if (table != null) table.addUnique(taildef.newEntry(record, rowdef.primaryKeyLength, true)); + } + + // track this table + tableTracker.put(tablefile.toString(), this); + } + + public static long tableSize(File tablefile, int recordsize) { + // returns number of records in table + return kelondroEcoFS.tableSize(tablefile, recordsize); + } + + public synchronized void addUnique(Entry row) throws IOException { + assert (file.size() == index.size()); + assert ((table == null) || (table.size() == index.size())); + int i = file.size(); + index.addi(row.getPrimaryKeyBytes(), i); + if (table != null) { + assert table.size() == i; + table.addUnique(taildef.newEntry(row.bytes(), rowdef.primaryKeyLength, true)); + } + file.put(i, row.bytes(), 0); + } + + public synchronized void addUniqueMultiple(List rows) throws IOException { + Iterator i = rows.iterator(); + while (i.hasNext()) { + addUnique(i.next()); + } + } + + public void close() { + file.close(); + file = null; + } + + public void finalize() { + if (this.file != null) this.close(); + } + + public String filename() { + return this.file.filename().toString(); + } + + public synchronized Entry get(byte[] key) throws IOException { + assert (file.size() == index.size()); + assert ((table == null) || (table.size() == index.size())); + int i = index.geti(key); + if (i == -1) return null; + byte[] b = new byte[rowdef.objectsize]; + if (table == null) { + // read row from the file + file.get(i, b, 0); + } else { + // construct the row using the copy in RAM + kelondroRow.Entry v = table.get(i); + assert v != null; + assert key.length == rowdef.primaryKeyLength; + System.arraycopy(key, 0, b, 0, key.length); + System.arraycopy(v.bytes(), 0, b, rowdef.primaryKeyLength, rowdef.objectsize - rowdef.primaryKeyLength); + } + assert (file.size() == index.size()); + assert ((table == null) || (table.size() == index.size())); + return rowdef.newEntry(b); + } + + public synchronized boolean has(byte[] key) throws IOException { + assert (file.size() == index.size()); + assert ((table == null) || (table.size() == index.size())); + return index.geti(key) >= 0; + } + + public synchronized kelondroCloneableIterator keys(boolean up, byte[] firstKey) throws IOException { + return index.keys(up, firstKey); + } + + public kelondroProfile profile() { + return null; + } + + public synchronized Entry put(Entry row) throws IOException { + assert (file.size() == index.size()); + assert ((table == null) || (table.size() == index.size())); + int i = index.geti(row.getPrimaryKeyBytes()); + if (i == -1) { + addUnique(row); + return null; + } + + byte[] b = new byte[rowdef.objectsize]; + if (table == null) { + // read old value + file.get(i, b, 0); + // write new value + file.put(i, row.bytes(), 0); + } else { + // read old value + kelondroRow.Entry v = table.get(i); + System.arraycopy(row.getPrimaryKeyBytes(), 0, b, 0, rowdef.primaryKeyLength); + System.arraycopy(v.bytes(), 0, b, rowdef.primaryKeyLength, rowdef.objectsize - rowdef.primaryKeyLength); + // write new value + table.set(i, taildef.newEntry(row.bytes(), rowdef.primaryKeyLength, true)); + file.put(i, row.bytes(), 0); + } + assert (file.size() == index.size()); + assert ((table == null) || (table.size() == index.size())); + // return old value + return rowdef.newEntry(b); + } + + public synchronized Entry put(Entry row, Date entryDate) throws IOException { + return put(row); + } + + public synchronized void putMultiple(List rows) throws IOException { + Iterator i = rows.iterator(); + while (i.hasNext()) { + put(i.next()); + } + } + + public synchronized Entry remove(byte[] key, boolean keepOrder) throws IOException { + assert (file.size() == index.size()); + assert ((table == null) || (table.size() == index.size())); + assert keepOrder == false; // this class cannot keep the order during a remove + int i = index.geti(key); + if (i == -1) return null; // nothing to do + + // prepare result + byte[] b = new byte[rowdef.objectsize]; + byte[] p = new byte[rowdef.objectsize]; + if (table == null) { + index.removei(key); + file.get(i, b, 0); + file.cleanLast(p, 0); + file.put(i, p, 0); + byte[] k = new byte[rowdef.primaryKeyLength]; + System.arraycopy(p, 0, k, 0, rowdef.primaryKeyLength); + index.puti(k, i); + assert (file.size() == index.size()); + assert ((table == null) || (table.size() == index.size())); + } else { + kelondroRow.Entry v = table.get(i); + assert key.length == rowdef.primaryKeyLength; + System.arraycopy(key, 0, b, 0, key.length); + System.arraycopy(v.bytes(), 0, b, rowdef.primaryKeyLength, taildef.objectsize); + if (i == index.size() - 1) { + // special handling if the entry is the last entry in the file + index.removei(key); + table.removeRow(i, false); + file.clean(i); + assert (file.size() == index.size()); + assert ((table == null) || (table.size() == index.size())); + } else { + // switch values + kelondroRow.Entry te = table.removeOne(); + table.set(i, te); + + file.cleanLast(p, 0); + file.put(i, p, 0); + kelondroRow.Entry lr = rowdef.newEntry(p); + + index.removei(key); + index.puti(lr.getPrimaryKeyBytes(), i); + assert (file.size() == index.size()); + assert ((table == null) || (table.size() == index.size())) : "table.size() = " + table.size() + ", index.size() = " + index.size(); + } + } + assert (file.size() == index.size()); + assert ((table == null) || (table.size() == index.size())); + return rowdef.newEntry(b); + } + + public synchronized Entry removeOne() throws IOException { + assert (file.size() == index.size()); + assert ((table == null) || (table.size() == index.size())); + byte[] le = new byte[rowdef.objectsize]; + file.cleanLast(le, 0); + kelondroRow.Entry lr = rowdef.newEntry(le); + int i = index.removei(lr.getPrimaryKeyBytes()); + assert i >= 0; + table.removeRow(i, false); + return lr; + } + + public void reset() throws IOException { + File f = file.filename(); + file.close(); + f.delete(); + + // make new file + FileOutputStream fos = null; + try { + fos = new FileOutputStream(f); + } catch (FileNotFoundException e) { + // should not happen + e.printStackTrace(); + } + try { fos.close(); } catch (IOException e) {} + + + // open an existing table file + try { + this.file = new kelondroBufferedEcoFS(new kelondroEcoFS(f, rowdef.objectsize), this.buffersize); + } catch (FileNotFoundException e) { + // should never happen + e.printStackTrace(); + } + + // initialize index and copy table + table = new kelondroRowSet(taildef, 1); + index = new kelondroBytesIntMap(rowdef.primaryKeyLength, rowdef.objectOrder, 1); + } + + public kelondroRow row() { + return this.rowdef; + } + + public synchronized int size() { + return index.size(); + } + + + public synchronized kelondroCloneableIterator rows(boolean up, byte[] firstKey) throws IOException { + return new rowIterator(up, firstKey); + } + + public class rowIterator implements kelondroCloneableIterator { + Iterator i; + boolean up; + byte[] fk; + int c; + + public rowIterator(boolean up, byte[] firstKey) throws IOException { + this.up = up; + this.fk = firstKey; + this.i = index.keys(up, firstKey); + this.c = -1; + } + + public kelondroCloneableIterator clone(Object modifier) { + try { + return new rowIterator(up, fk); + } catch (IOException e) { + e.printStackTrace(); + return null; + } + } + + public boolean hasNext() { + return i.hasNext(); + } + + public Entry next() { + byte[] k = i.next(); + try { + this.c = index.geti(k); + } catch (IOException e) { + e.printStackTrace(); + return null; + } + byte[] b = new byte[rowdef.objectsize]; + if (table == null) { + // read from file + try { + file.get(this.c, b, 0); + } catch (IOException e) { + e.printStackTrace(); + return null; + } + } else { + // compose from table and key + kelondroRow.Entry v = table.get(this.c); + System.arraycopy(k, 0, b, 0, rowdef.primaryKeyLength); + System.arraycopy(v.bytes(), 0, b, rowdef.primaryKeyLength, taildef.objectsize); + } + return rowdef.newEntry(b); + } + + public void remove() { + throw new UnsupportedOperationException("no remove in EcoTable"); + } + + } + + public static kelondroIndex testTable(File f, String testentities) throws IOException { + if (f.exists()) f.delete(); + kelondroRow rowdef = new kelondroRow("byte[] a-4, byte[] b-4", kelondroNaturalOrder.naturalOrder, 0); + kelondroIndex tt = new kelondroEcoTable(f, rowdef, 100); + byte[] b; + kelondroRow.Entry row = rowdef.newEntry(); + for (int i = 0; i < testentities.length(); i++) { + b = kelondroTree.testWord(testentities.charAt(i)); + row.setCol(0, b); + row.setCol(1, b); + tt.put(row); + } + return tt; + } + + public static void bigtest(int elements, File testFile) { + System.out.println("starting big test with " + elements + " elements:"); + long start = System.currentTimeMillis(); + String[] s = kelondroTree.permutations(elements); + kelondroIndex tt; + try { + for (int i = 0; i < s.length; i++) { + System.out.println("*** probing tree " + i + " for permutation " + s[i]); + // generate tree and delete elements + tt = testTable(testFile, s[i]); + if (kelondroTree.countElements(tt) != tt.size()) { + System.out.println("wrong size for " + s[i]); + } + tt.close(); + for (int j = 0; j < s.length; j++) { + tt = testTable(testFile, s[i]); + // delete by permutation j + for (int elt = 0; elt < s[j].length(); elt++) { + tt.remove(kelondroTree.testWord(s[j].charAt(elt)), false); + if (kelondroTree.countElements(tt) != tt.size()) { + System.out.println("ERROR! wrong size for probe tree " + s[i] + "; probe delete " + s[j] + "; position " + elt); + } + } + tt.close(); + } + } + System.out.println("FINISHED test after " + ((System.currentTimeMillis() - start) / 1000) + " seconds."); + } catch (Exception e) { + e.printStackTrace(); + System.out.println("TERMINATED"); + } + } + + public static void main(String[] args) { + // open a file, add one entry and exit + File f = new File(args[0]); + bigtest(5, f); + /* + kelondroRow row = new kelondroRow("byte[] key-4, byte[] x-5", kelondroNaturalOrder.naturalOrder, 0); + try { + kelondroEcoTable t = new kelondroEcoTable(f, row); + kelondroRow.Entry entry = row.newEntry(); + entry.setCol(0, "abcd".getBytes()); + entry.setCol(1, "dummy".getBytes()); + t.put(entry); + t.close(); + } catch (IOException e) { + e.printStackTrace(); + } + */ + } + +} diff --git a/source/de/anomic/kelondro/kelondroFlexSplitTable.java b/source/de/anomic/kelondro/kelondroFlexSplitTable.java index 403d49665..c959b164e 100644 --- a/source/de/anomic/kelondro/kelondroFlexSplitTable.java +++ b/source/de/anomic/kelondro/kelondroFlexSplitTable.java @@ -101,7 +101,7 @@ public class kelondroFlexSplitTable implements kelondroIndex { // open next biggest table t.remove(maxf); date = maxf.substring(tablename.length() + 1); - table = new kelondroCache(new kelondroFlexTable(path, maxf, preloadTime, rowdef, 0, resetOnFail), true, false); + table = new kelondroCache(new kelondroFlexTable(path, maxf, preloadTime, rowdef, 0, resetOnFail)); tables.put(date, table); } } @@ -164,16 +164,6 @@ public class kelondroFlexSplitTable implements kelondroIndex { return s; } - public void flushSome() { - Iterator i = tables.values().iterator(); - kelondroIndex ki; - while (i.hasNext()) { - ki = ((kelondroIndex) i.next()); - if (ki instanceof kelondroCache) - try {((kelondroCache) ki).flushSome();} catch (IOException e) {} - } - } - public kelondroRow row() { return this.rowdef; } diff --git a/source/de/anomic/kelondro/kelondroFlexTable.java b/source/de/anomic/kelondro/kelondroFlexTable.java index 6bb478a8a..df01ca550 100644 --- a/source/de/anomic/kelondro/kelondroFlexTable.java +++ b/source/de/anomic/kelondro/kelondroFlexTable.java @@ -93,7 +93,7 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr if (indexfile.exists()) { // use existing index file System.out.println("*** Using File index " + indexfile); - ki = new kelondroCache(kelondroTree.open(indexfile, true, preloadTime, treeIndexRow(rowdef.width(0), rowdef.objectOrder), 2, 80), true, false); + ki = new kelondroCache(kelondroTree.open(indexfile, true, preloadTime, treeIndexRow(rowdef.width(0), rowdef.objectOrder), 2, 80)); RAMIndex = false; } else { // generate new index file @@ -175,7 +175,7 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr private kelondroIndex initializeTreeIndex(File indexfile, long preloadTime, kelondroByteOrder objectOrder) throws IOException { - kelondroIndex treeindex = new kelondroCache(new kelondroTree(indexfile, true, preloadTime, treeIndexRow(rowdef.primaryKeyLength, objectOrder), 2, 80), true, false); + kelondroIndex treeindex = new kelondroCache(new kelondroTree(indexfile, true, preloadTime, treeIndexRow(rowdef.primaryKeyLength, objectOrder), 2, 80)); Iterator content = super.col[0].contentNodes(-1); kelondroNode node; kelondroRow.Entry indexentry; diff --git a/source/de/anomic/kelondro/kelondroMapTable.java b/source/de/anomic/kelondro/kelondroMapTable.java index 96bb88487..9dec9d428 100644 --- a/source/de/anomic/kelondro/kelondroMapTable.java +++ b/source/de/anomic/kelondro/kelondroMapTable.java @@ -93,7 +93,7 @@ public class kelondroMapTable { if (mTables.containsKey(tablename)) throw new RuntimeException("kelondroTables.declareTree: table '" + tablename + "' declared already in other context."); if (tTables.containsKey(tablename)) throw new RuntimeException("kelondroTables.declareTree: table '" + tablename + "' declared twice."); File tablefile = new File(tablesPath, "table." + tablename + ".tdb"); - kelondroIndex Tree = new kelondroCache(kelondroTree.open(tablefile, true, preloadTime, rowdef), true, false); + kelondroIndex Tree = new kelondroCache(kelondroTree.open(tablefile, true, preloadTime, rowdef)); tTables.put(tablename, Tree); } diff --git a/source/de/anomic/kelondro/kelondroRAMIndex.java b/source/de/anomic/kelondro/kelondroRAMIndex.java index 6bc9cd571..d380ee31e 100644 --- a/source/de/anomic/kelondro/kelondroRAMIndex.java +++ b/source/de/anomic/kelondro/kelondroRAMIndex.java @@ -169,6 +169,9 @@ public class kelondroRAMIndex implements kelondroIndex { //assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis(); return index1.keys(up, firstKey); } + // index0 should be sorted + // sort index1 to enable working of the merge iterator + index1.sort(); //assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis(); return new kelondroMergeIterator( index0.keys(up, firstKey), @@ -192,6 +195,9 @@ public class kelondroRAMIndex implements kelondroIndex { //assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis(); return index1.rows(up, firstKey); } + // index0 should be sorted + // sort index1 to enable working of the merge iterator + index1.sort(); //assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis(); return new kelondroMergeIterator( index0.rows(up, firstKey), diff --git a/source/de/anomic/kelondro/kelondroRow.java b/source/de/anomic/kelondro/kelondroRow.java index 016977485..e0e6da969 100644 --- a/source/de/anomic/kelondro/kelondroRow.java +++ b/source/de/anomic/kelondro/kelondroRow.java @@ -62,8 +62,8 @@ public final class kelondroRow { } public kelondroRow(String structure, kelondroByteOrder objectOrder, int primaryKey) { - assert (objectOrder != null); - this.objectOrder = objectOrder; + assert (objectOrder != null); + this.objectOrder = objectOrder; // define row with row syntax // example: //# Structure=,'=',,,,,,,,,, @@ -95,8 +95,8 @@ public final class kelondroRow { } public final void setOrdering(kelondroByteOrder objectOrder, int primaryKey) { - assert (objectOrder != null); - this.objectOrder = objectOrder; + assert (objectOrder != null); + this.objectOrder = objectOrder; this.primaryKeyIndex = primaryKey; this.primaryKeyLength = (primaryKey < 0) ? this.objectsize : row[primaryKeyIndex].cellwidth; } @@ -165,7 +165,7 @@ public final class kelondroRow { public final Entry newEntry(byte[] rowinstance, int start, boolean clone) { if (rowinstance == null) return null; //assert (rowinstance[0] != 0); - assert (this.objectOrder.wellformed(rowinstance, start, row[0].cellwidth)); + assert (this.objectOrder.wellformed(rowinstance, start, row[0].cellwidth)) : "rowinstance = " + new String(rowinstance); // this method offers the option to clone the content // this is necessary if it is known that the underlying byte array may change and therefore // the reference to the byte array does not contain the original content @@ -185,38 +185,38 @@ public final class kelondroRow { } public final EntryIndex newEntryIndex(byte[] rowinstance, int index) { - if (rowinstance == null) return null; - assert (rowinstance[0] != 0); - assert (this.objectOrder.wellformed(rowinstance, 0, row[0].cellwidth)); + if (rowinstance == null) return null; + assert (rowinstance[0] != 0); + assert (this.objectOrder.wellformed(rowinstance, 0, row[0].cellwidth)); return new EntryIndex(rowinstance, index); } public static class EntryComparator extends kelondroAbstractOrder implements kelondroOrder, Comparator { - kelondroByteOrder base; - public EntryComparator(kelondroByteOrder baseOrder) { - this.base = baseOrder; - } - - public int compare(Entry a, Entry b) { - return a.compareTo(b); - } + kelondroByteOrder base; + public EntryComparator(kelondroByteOrder baseOrder) { + this.base = baseOrder; + } + + public int compare(Entry a, Entry b) { + return a.compareTo(b); + } - public kelondroOrder clone() { - return new EntryComparator(base); - } + public kelondroOrder clone() { + return new EntryComparator(base); + } - public long cardinal(Entry key) { - return base.cardinal(key.getPrimaryKeyBytes()); - } + public long cardinal(Entry key) { + return base.cardinal(key.getPrimaryKeyBytes()); + } - public String signature() { - return base.signature(); - } + public String signature() { + return base.signature(); + } - public boolean wellformed(Entry a) { - return base.wellformed(a.getPrimaryKeyBytes()); - } + public boolean wellformed(Entry a) { + return base.wellformed(a.getPrimaryKeyBytes()); + } } @@ -240,14 +240,14 @@ public final class kelondroRow { } public Entry(byte[] newrow, int start, boolean forceclone) { - if ((!forceclone) && (newrow.length - start >= objectsize)) { - this.rowinstance = newrow; - this.offset = start; - } else { - this.rowinstance = new byte[objectsize]; - System.arraycopy(newrow, start, this.rowinstance, 0, objectsize); - this.offset = 0; - } + if ((!forceclone) && (newrow.length - start >= objectsize)) { + this.rowinstance = newrow; + this.offset = start; + } else { + this.rowinstance = new byte[objectsize]; + System.arraycopy(newrow, start, this.rowinstance, 0, objectsize); + this.offset = 0; + } //for (int i = ll; i < objectsize; i++) this.rowinstance[i] = 0; } @@ -291,7 +291,7 @@ public final class kelondroRow { try { setCol(nick, Long.parseLong(elts[i].substring(p + 1).trim())); } catch (NumberFormatException e) { - serverLog.logSevere("kelondroRow", "NumberFormatException for celltype_cardinal; row = " + i + ", celltype = " + row[i].celltype + ", encoder = " + row[i].encoder + ", value = '" + elts[i].substring(p + 1).trim() + "'"); + serverLog.logSevere("kelondroRow", "NumberFormatException for celltype_cardinal; row = " + i + ", celltype = " + row[i].celltype + ", encoder = " + row[i].encoder + ", value = '" + elts[i].substring(p + 1).trim() + "'"); setCol(nick, 0); } } else if ((decimalCardinal) && (row[i].celltype == kelondroColumn.celltype_binary)) { @@ -299,7 +299,7 @@ public final class kelondroRow { try { setCol(nick, new byte[]{(byte) Integer.parseInt(elts[i].substring(p + 1).trim())}); } catch (NumberFormatException e) { - serverLog.logSevere("kelondroRow", "NumberFormatException for celltype_binary; row = " + i + ", celltype = " + row[i].celltype + ", encoder = " + row[i].encoder + ", value = '" + elts[i].substring(p + 1).trim() + "'"); + serverLog.logSevere("kelondroRow", "NumberFormatException for celltype_binary; row = " + i + ", celltype = " + row[i].celltype + ", encoder = " + row[i].encoder + ", value = '" + elts[i].substring(p + 1).trim() + "'"); setCol(nick, new byte[]{0}); } } else if ((decimalCardinal) && (row[i].celltype == kelondroColumn.celltype_bitfield)) { @@ -313,11 +313,11 @@ public final class kelondroRow { } protected final int colstart(int column) { - return colstart[column]; + return colstart[column]; } protected final int cellwidth(int column) { - return row[column].cellwidth; + return row[column].cellwidth; } public final int compareTo(Entry o) { @@ -326,18 +326,18 @@ public final class kelondroRow { } public final byte[] bytes() { - if ((offset == 0) && (rowinstance.length == objectsize)) { - return rowinstance; - } else { - byte[] tmp = new byte[objectsize]; - System.arraycopy(rowinstance, offset, tmp, 0, objectsize); - return tmp; - } + if ((offset == 0) && (rowinstance.length == objectsize)) { + return rowinstance; + } else { + byte[] tmp = new byte[objectsize]; + System.arraycopy(rowinstance, offset, tmp, 0, objectsize); + return tmp; + } } public final void writeToArray(byte[] target, int targetOffset) { - // this method shall replace the byte()s where possible, bacause it may reduce the number of new byte[] allocations - assert (targetOffset + objectsize <= target.length) : "targetOffset = " + targetOffset + ", target.length = " + target.length + ", objectsize = " + objectsize; + // this method shall replace the byte()s where possible, bacause it may reduce the number of new byte[] allocations + assert (targetOffset + objectsize <= target.length) : "targetOffset = " + targetOffset + ", target.length = " + target.length + ", objectsize = " + objectsize; System.arraycopy(rowinstance, offset, target, targetOffset, objectsize); } @@ -498,7 +498,7 @@ public final class kelondroRow { } protected final long getColLong(int encoder, int clstrt, int length) { - switch (encoder) { + switch (encoder) { case kelondroColumn.encoder_none: throw new kelondroException("ROW", "getColLong has celltype none, no encoder given"); case kelondroColumn.encoder_b64e: @@ -553,8 +553,8 @@ public final class kelondroRow { } public final void writeToArray(int column, byte[] target, int targetOffset) { - // this method shall replace the getColBytes where possible, bacause it may reduce the number of new byte[] allocations - assert (targetOffset + row[column].cellwidth <= target.length) : "targetOffset = " + targetOffset + ", target.length = " + target.length + ", row[column].cellwidth() = " + row[column].cellwidth; + // this method shall replace the getColBytes where possible, bacause it may reduce the number of new byte[] allocations + assert (targetOffset + row[column].cellwidth <= target.length) : "targetOffset = " + targetOffset + ", target.length = " + target.length + ", row[column].cellwidth() = " + row[column].cellwidth; System.arraycopy(rowinstance, offset + colstart[column], target, targetOffset, row[column].cellwidth); } diff --git a/source/de/anomic/kelondro/kelondroRowCollection.java b/source/de/anomic/kelondro/kelondroRowCollection.java index 93b06a691..975763404 100644 --- a/source/de/anomic/kelondro/kelondroRowCollection.java +++ b/source/de/anomic/kelondro/kelondroRowCollection.java @@ -355,6 +355,7 @@ public class kelondroRowCollection { } public synchronized kelondroRow.Entry removeOne() { + // removes the last entry from the collection if (chunkcount == 0) return null; kelondroRow.Entry r = get(chunkcount - 1); if (chunkcount == sortBound) sortBound--; diff --git a/source/de/anomic/kelondro/kelondroTree.java b/source/de/anomic/kelondro/kelondroTree.java index 5c80291a8..781067ed4 100644 --- a/source/de/anomic/kelondro/kelondroTree.java +++ b/source/de/anomic/kelondro/kelondroTree.java @@ -1637,7 +1637,7 @@ public class kelondroTree extends kelondroCachedRecords implements kelondroIndex } } - public static int countElements(kelondroTree t) { + public static int countElements(kelondroIndex t) { int count = 0; try { Iterator iter = t.rows(true, null); diff --git a/source/de/anomic/plasma/plasmaCrawlBalancer.java b/source/de/anomic/plasma/plasmaCrawlBalancer.java index 8111acac3..a8fae30e1 100644 --- a/source/de/anomic/plasma/plasmaCrawlBalancer.java +++ b/source/de/anomic/plasma/plasmaCrawlBalancer.java @@ -140,7 +140,7 @@ public class plasmaCrawlBalancer { private void openFileIndex() { cacheStacksPath.mkdirs(); - urlFileIndex = new kelondroCache(new kelondroFlexTable(cacheStacksPath, stackname + indexSuffix, -1, plasmaCrawlEntry.rowdef, 0, true), true, false); + urlFileIndex = new kelondroCache(new kelondroFlexTable(cacheStacksPath, stackname + indexSuffix, -1, plasmaCrawlEntry.rowdef, 0, true)); } private void resetFileIndex() { diff --git a/source/de/anomic/plasma/plasmaCrawlLURL.java b/source/de/anomic/plasma/plasmaCrawlLURL.java index 70ae23db6..99410faad 100644 --- a/source/de/anomic/plasma/plasmaCrawlLURL.java +++ b/source/de/anomic/plasma/plasmaCrawlLURL.java @@ -146,13 +146,6 @@ public final class plasmaCrawlLURL { gcrawlResultStack.add(urlHash + initiatorHash + executorHash); } - public synchronized void flushCacheSome() { - try { - if (urlIndexFile instanceof kelondroFlexSplitTable) ((kelondroFlexSplitTable) urlIndexFile).flushSome(); - if (urlIndexFile instanceof kelondroCache) ((kelondroCache) urlIndexFile).flushSome(); - } catch (IOException e) {} - } - public synchronized int writeCacheSize() { if (urlIndexFile instanceof kelondroFlexSplitTable) return ((kelondroFlexSplitTable) urlIndexFile).writeBufferSize(); if (urlIndexFile instanceof kelondroCache) return ((kelondroCache) urlIndexFile).writeBufferSize(); diff --git a/source/de/anomic/plasma/plasmaCrawlStacker.java b/source/de/anomic/plasma/plasmaCrawlStacker.java index d062d3688..ef3b4df41 100644 --- a/source/de/anomic/plasma/plasmaCrawlStacker.java +++ b/source/de/anomic/plasma/plasmaCrawlStacker.java @@ -80,7 +80,7 @@ public final class plasmaCrawlStacker extends Thread { final serverLog log = new serverLog("STACKCRAWL"); private plasmaSwitchboard sb; - private final LinkedList urlEntryHashCache; + private final LinkedList urlEntryHashCache; private kelondroIndex urlEntryCache; private File cacheStacksPath; private long preloadTime; @@ -91,7 +91,7 @@ public final class plasmaCrawlStacker extends Thread { // objects for the prefetch task - private ArrayList dnsfetchHosts = new ArrayList(); + private ArrayList dnsfetchHosts = new ArrayList(); public plasmaCrawlStacker(plasmaSwitchboard sb, File dbPath, long preloadTime, int dbtype, boolean prequeue) { this.sb = sb; @@ -101,7 +101,7 @@ public final class plasmaCrawlStacker extends Thread { this.alternateCount = 0; // init the message list - this.urlEntryHashCache = new LinkedList(); + this.urlEntryHashCache = new LinkedList(); // create a stack for newly entered entries this.cacheStacksPath = dbPath; @@ -111,7 +111,7 @@ public final class plasmaCrawlStacker extends Thread { openDB(); try { // loop through the list and fill the messageList with url hashs - Iterator rows = this.urlEntryCache.rows(true, null); + Iterator rows = this.urlEntryCache.rows(true, null); kelondroRow.Entry entry; while (rows.hasNext()) { entry = (kelondroRow.Entry) rows.next(); @@ -299,13 +299,13 @@ public final class plasmaCrawlStacker extends Thread { String newCacheName = "urlNoticeStacker8.db"; cacheStacksPath.mkdirs(); try { - this.urlEntryCache = new kelondroCache(new kelondroFlexTable(cacheStacksPath, newCacheName, preloadTime, plasmaCrawlEntry.rowdef, 0, true), true, false); + this.urlEntryCache = new kelondroCache(new kelondroFlexTable(cacheStacksPath, newCacheName, preloadTime, plasmaCrawlEntry.rowdef, 0, true)); } catch (Exception e) { e.printStackTrace(); // kill DB and try again kelondroFlexTable.delete(cacheStacksPath, newCacheName); try { - this.urlEntryCache = new kelondroCache(new kelondroFlexTable(cacheStacksPath, newCacheName, preloadTime, plasmaCrawlEntry.rowdef, 0, true), true, false); + this.urlEntryCache = new kelondroCache(new kelondroFlexTable(cacheStacksPath, newCacheName, preloadTime, plasmaCrawlEntry.rowdef, 0, true)); } catch (Exception ee) { ee.printStackTrace(); System.exit(-1); @@ -315,7 +315,7 @@ public final class plasmaCrawlStacker extends Thread { if (this.dbtype == QUEUE_DB_TYPE_TREE) { File cacheFile = new File(cacheStacksPath, "urlNoticeStacker8.db"); cacheFile.getParentFile().mkdirs(); - this.urlEntryCache = new kelondroCache(kelondroTree.open(cacheFile, true, preloadTime, plasmaCrawlEntry.rowdef), true, true); + this.urlEntryCache = new kelondroCache(kelondroTree.open(cacheFile, true, preloadTime, plasmaCrawlEntry.rowdef)); } } diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index c463f8dd5..b3d7d50cc 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -1802,7 +1802,6 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser // flush some entries from the RAM cache if (sbQueue.size() == 0) wordIndex.flushCacheSome(); // permanent flushing only if we are not busy - wordIndex.loadedURL.flushCacheSome(); boolean doneSomething = false; diff --git a/source/de/anomic/yacy/yacyNewsDB.java b/source/de/anomic/yacy/yacyNewsDB.java index acfd43951..2fe9babb4 100644 --- a/source/de/anomic/yacy/yacyNewsDB.java +++ b/source/de/anomic/yacy/yacyNewsDB.java @@ -67,13 +67,13 @@ public class yacyNewsDB { public yacyNewsDB(File path, long preloadTime) { this.path = path; this.preloadTime = preloadTime; - this.news = new kelondroCache(kelondroTree.open(path, true, preloadTime, yacyNewsRecord.rowdef), true, false); + this.news = new kelondroCache(kelondroTree.open(path, true, preloadTime, yacyNewsRecord.rowdef)); } private void resetDB() { try {close();} catch (Exception e) {} if (path.exists()) path.delete(); - this.news = new kelondroCache(kelondroTree.open(path, true, preloadTime, yacyNewsRecord.rowdef), true, false); + this.news = new kelondroCache(kelondroTree.open(path, true, preloadTime, yacyNewsRecord.rowdef)); } public void close() { @@ -102,14 +102,14 @@ public class yacyNewsDB { } } - public synchronized Iterator news() throws IOException { + public synchronized Iterator news() throws IOException { // the iteration iterates yacyNewsRecord - type objects return new recordIterator(); } - public class recordIterator implements Iterator { + public class recordIterator implements Iterator { - Iterator rowIterator; + Iterator rowIterator; public recordIterator() throws IOException { rowIterator = news.rows(true, null); @@ -119,7 +119,7 @@ public class yacyNewsDB { return rowIterator.hasNext(); } - public Object next() { + public yacyNewsRecord next() { return b2r((kelondroRow.Entry) rowIterator.next()); }