From 147d88cf23577ff6568210bce53b6cc5bc6b6193 Mon Sep 17 00:00:00 2001 From: orbiter Date: Thu, 26 Oct 2006 13:50:50 +0000 Subject: [PATCH] re-design of database caching this should reduce IO a lot, because write caches are now actived for all databases - added new caching class that combines a read- and write-cache. - removed old read and write cache classes - removed superfluous RAM index (can be replaced by kelonodroRowSet) - addoped all current classes that used the old caching methods - more asserts, more bugfixes git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2865 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- build.properties | 2 +- htroot/PerformanceMemory_p.java | 3 +- source/dbtest.java | 4 +- .../kelondro/kelondroBufferedIndex.java | 240 -------------- .../anomic/kelondro/kelondroBytesIntMap.java | 10 +- .../anomic/kelondro/kelondroCachedIndex.java | 151 --------- .../kelondro/kelondroCollectionIndex.java | 11 +- source/de/anomic/kelondro/kelondroDyn.java | 2 +- .../kelondro/kelondroFlexSplitTable.java | 49 ++- .../de/anomic/kelondro/kelondroFlexTable.java | 26 +- .../anomic/kelondro/kelondroIntBytesMap.java | 11 +- .../de/anomic/kelondro/kelondroMapTable.java | 2 +- .../anomic/kelondro/kelondroObjectBuffer.java | 3 + .../anomic/kelondro/kelondroObjectCache.java | 307 ------------------ .../de/anomic/kelondro/kelondroRAMIndex.java | 126 ------- .../kelondro/kelondroRowCollection.java | 26 +- source/de/anomic/kelondro/kelondroRowSet.java | 49 +-- source/de/anomic/kelondro/kelondroTree.java | 4 +- source/de/anomic/plasma/plasmaCrawlLURL.java | 14 +- source/de/anomic/plasma/plasmaCrawlNURL.java | 11 +- .../de/anomic/plasma/plasmaCrawlStacker.java | 28 +- .../plasma/plasmaWordIndexAssortment.java | 8 +- .../plasmaWordIndexAssortmentCluster.java | 4 +- source/de/anomic/yacy/yacyNewsDB.java | 6 +- source/de/anomic/yacy/yacySeedDB.java | 4 +- 25 files changed, 160 insertions(+), 941 deletions(-) delete mode 100644 source/de/anomic/kelondro/kelondroBufferedIndex.java delete mode 100644 source/de/anomic/kelondro/kelondroCachedIndex.java delete mode 100644 source/de/anomic/kelondro/kelondroObjectCache.java delete mode 100644 source/de/anomic/kelondro/kelondroRAMIndex.java diff --git a/build.properties b/build.properties index 6f71d5cbb..866551e7c 100644 --- a/build.properties +++ b/build.properties @@ -3,7 +3,7 @@ javacSource=1.4 javacTarget=1.4 # Release Configuration -releaseVersion=0.483 +releaseVersion=0.484 releaseFile=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz #releaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz releaseDir=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr} diff --git a/htroot/PerformanceMemory_p.java b/htroot/PerformanceMemory_p.java index f5983e3ff..f5bddacc3 100644 --- a/htroot/PerformanceMemory_p.java +++ b/htroot/PerformanceMemory_p.java @@ -49,7 +49,6 @@ import java.util.Map; import de.anomic.http.httpHeader; import de.anomic.http.httpc; -import de.anomic.kelondro.kelondroCachedIndex; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverFileUtils; import de.anomic.server.serverMemory; @@ -338,7 +337,7 @@ public class PerformanceMemory_p { private static void putprop(serverObjects prop, serverSwitch env, String wdb, String db, String set) { if ((slt == null) || (ost == null)) return; - usd = chk * slt[1] + obj * ost[2] /*hit*/ + kelondroCachedIndex.cacheObjectMissSize * ost[3] /*miss*/; + usd = chk * slt[1] + obj * ost[2] /*hit*/ + 12 * ost[3] /*miss*/; bst = (((((long) chk) * ((long) req)) >> 10) + 1) << 10; if (set.equals("setBest")) env.setConfig("ramCache" + db, bst); prop.put(wdb + ((wdb.length() > 0) ? ("_") : ("")) + "nodsz" + db, chk); diff --git a/source/dbtest.java b/source/dbtest.java index 24f41ab00..bebb46fde 100644 --- a/source/dbtest.java +++ b/source/dbtest.java @@ -13,7 +13,7 @@ import java.util.Iterator; import java.util.Random; import de.anomic.kelondro.kelondroBase64Order; -import de.anomic.kelondro.kelondroCachedIndex; +import de.anomic.kelondro.kelondroCache; import de.anomic.kelondro.kelondroFlexSplitTable; import de.anomic.kelondro.kelondroFlexTable; import de.anomic.kelondro.kelondroIndex; @@ -175,7 +175,7 @@ public class dbtest { kelondroRow testRow = new kelondroRow("byte[] key-" + keylength + ", byte[] dummy-" + keylength + ", value-" + valuelength); if (dbe.equals("kelondroTree")) { File tablefile = new File(tablename + ".kelondro.db"); - table = new kelondroCachedIndex(new kelondroTree(tablefile, buffer / 2, preload, testRow), buffer / 2); + table = new kelondroCache(new kelondroTree(tablefile, buffer / 2, preload, testRow), buffer / 2, true, true); } if (dbe.equals("kelondroSplittedTree")) { File tablepath = new File(tablename).getParentFile(); diff --git a/source/de/anomic/kelondro/kelondroBufferedIndex.java b/source/de/anomic/kelondro/kelondroBufferedIndex.java deleted file mode 100644 index 1f9838344..000000000 --- a/source/de/anomic/kelondro/kelondroBufferedIndex.java +++ /dev/null @@ -1,240 +0,0 @@ -// kelondroBufferedIndex.java -// (C) 2006 by Michael Peter Christen; mc@anomic.de, Frankfurt a. M., Germany -// first published 16.10.2006 on http://www.anomic.de -// -// This is a part of YaCy, a peer-to-peer based web search engine -// -// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $ -// $LastChangedRevision: 1986 $ -// $LastChangedBy: orbiter $ -// -// LICENSE -// -// This program is free software; you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation; either version 2 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -package de.anomic.kelondro; - -import java.io.IOException; -import java.util.Date; -import java.util.Iterator; -import java.util.Map; -import java.util.TreeMap; - -import de.anomic.server.serverMemory; -import de.anomic.server.logging.serverLog; - -public class kelondroBufferedIndex implements kelondroIndex { - - // this implements a write buffer on index objects - - private static final long memBlockLimit = 2000000; // do not fill cache further if the amount of available memory is less that this - private static final int bufferFlushLimit = 10000; - private static final int bufferFlushMinimum = 1000; - private TreeMap buffer; - private kelondroIndex index; - - public kelondroBufferedIndex(kelondroIndex theIndex) { - index = theIndex; - buffer = (theIndex.order() == null) ? new TreeMap() : new TreeMap(theIndex.order()); - } - - public synchronized void flush() throws IOException { - if ((buffer == null) || (buffer.size() == 0)) return; - Iterator i = buffer.entrySet().iterator(); - Map.Entry entry; - while (i.hasNext()) { - entry = (Map.Entry) i.next(); - index.put((kelondroRow.Entry) entry.getValue()); - } - buffer.clear(); - } - - public synchronized void flushOnce() throws IOException { - if (buffer.size() == 0) return; - Iterator i = buffer.entrySet().iterator(); - Map.Entry entry; - if (i.hasNext()) { - entry = (Map.Entry) i.next(); - System.out.println("*** DEBUG: flushed " + ((kelondroRow.Entry) entry.getValue()).getColString(0, null)); - index.put((kelondroRow.Entry) entry.getValue()); - } - } - - public void flushSome() throws IOException { - if (buffer.size() == 0) return; - int flush = Math.max(1, buffer.size() / 10); - while (flush-- > 0) flushOnce(); - } - - public synchronized int size() throws IOException { - return buffer.size() + index.size(); - } - - public int writeBufferSize() { - return buffer.size(); - } - - public synchronized String toString() { - try {flush();} catch (IOException e) {} - return index.toString(); - } - - public synchronized kelondroRow.Entry get(byte[] key) throws IOException { - long handle = (index instanceof kelondroTree) ? index.profile().startRead() : -1; - kelondroRow.Entry entry = null; - entry = (kelondroRow.Entry) buffer.get(key); - if (entry == null) entry = index.get(key); - if (handle >= 0) index.profile().stopRead(handle); - return entry; - } - - public synchronized kelondroRow.Entry put(kelondroRow.Entry row) throws IOException { - return put(row, null); - } - - public synchronized kelondroRow.Entry put(kelondroRow.Entry row, Date entryDate) throws IOException { - assert (row != null); - assert (row.getColBytes(index.primarykey()) != null); - assert (!(serverLog.allZero(row.getColBytes(index.primarykey())))); - long handle = (index instanceof kelondroTree) ? index.profile().startWrite() : -1; - byte[] key = row.getColBytes(index.primarykey()); - kelondroRow.Entry oldentry = null; - oldentry = (kelondroRow.Entry) buffer.get(key); - if (oldentry == null) { - // try the collection - oldentry = index.get(key); - if (oldentry == null) { - // this was not anywhere - if (entryDate == null) { - buffer.put(key, row); - if (((buffer.size() > bufferFlushMinimum) && (serverMemory.available() > memBlockLimit)) - || (buffer.size() > bufferFlushLimit)) - flush(); - } else { - index.put(row, entryDate); - } - } else { - // replace old entry - if (entryDate == null) { - index.put(row); - } else { - index.put(row, entryDate); - } - } - } else { - // the entry is already in buffer - // simply replace old entry - if (entryDate == null) { - buffer.put(key, row); - } else { - buffer.remove(key); - index.put(row, entryDate); - } - } - if (handle >= 0) index.profile().stopWrite(handle); - return oldentry; - } - - public synchronized void addUnique(kelondroRow.Entry row) throws IOException { - assert (index instanceof kelondroRowSet); - ((kelondroRowSet) index).addUnique(row); - } - - public synchronized void addUnique(kelondroRow.Entry row, Date entryDate) throws IOException { - addUnique(row); - } - - public synchronized kelondroRow.Entry remove(byte[] key) throws IOException { - long handle = (index instanceof kelondroTree) ? index.profile().startDelete() : -1; - kelondroRow.Entry oldentry = null; - oldentry = (kelondroRow.Entry) buffer.remove(key); - if (oldentry == null) { - // try the collection - return index.remove(key); - } - if (handle >= 0) index.profile().stopDelete(handle); - return oldentry; - } - - public synchronized kelondroRow.Entry removeOne() throws IOException { - long handle = (index instanceof kelondroTree) ? index.profile().startDelete() : -1; - if (buffer.size() > 0) { - byte[] key = (byte[]) buffer.keySet().iterator().next(); - kelondroRow.Entry entry = (kelondroRow.Entry) buffer.remove(key); - if (handle >= 0) index.profile().stopDelete(handle); - return entry; - } else { - kelondroRow.Entry entry = index.removeOne(); - if (handle >= 0) index.profile().stopDelete(handle); - return entry; - } - } - - public kelondroProfile profile() { - return index.profile(); - } - - public synchronized void close() throws IOException { - flush(); - buffer = null; - index.close(); - } - - public kelondroOrder order() { - return index.order(); - } - - public int primarykey() { - return index.primarykey(); - } - - public kelondroRow row() throws IOException { - return index.row(); - } - - public synchronized Iterator rows() throws IOException { - return rows(true, false, null); - } - - public synchronized Iterator rows(boolean up, boolean rotating, byte[] firstKey) throws IOException { - flush(); - return index.rows(up, rotating, firstKey); - } - - public static kelondroBufferedIndex getRAMIndex(kelondroRow rowdef, int initSize) { - return new kelondroBufferedIndex(new kelondroRowSet(rowdef, kelondroNaturalOrder.naturalOrder, 0, initSize)); - } - - public final int cacheObjectChunkSize() { - // dummy method - return -1; - } - - public long[] cacheObjectStatus() { - // dummy method - return null; - } - - public final int cacheNodeChunkSize() { - // returns the size that the node cache uses for a single entry - return index.cacheNodeChunkSize(); - } - - public final int[] cacheNodeStatus() { - // a collection of different node cache status values - return index.cacheNodeStatus(); - } - -} diff --git a/source/de/anomic/kelondro/kelondroBytesIntMap.java b/source/de/anomic/kelondro/kelondroBytesIntMap.java index f0ada0105..bcfbc17be 100644 --- a/source/de/anomic/kelondro/kelondroBytesIntMap.java +++ b/source/de/anomic/kelondro/kelondroBytesIntMap.java @@ -27,8 +27,6 @@ package de.anomic.kelondro; import java.io.IOException; import java.util.Iterator; -import de.anomic.server.logging.serverLog; - public class kelondroBytesIntMap { private kelondroIndex ki; @@ -41,7 +39,7 @@ public class kelondroBytesIntMap { public synchronized int geti(byte[] key) throws IOException { assert (key != null); - assert (!(serverLog.allZero(key))); + //assert (!(serverLog.allZero(key))); kelondroRow.Entry indexentry = ki.get(key); if (indexentry == null) return -1; return (int) indexentry.getColLong(1); @@ -49,7 +47,7 @@ public class kelondroBytesIntMap { public synchronized int puti(byte[] key, int i) throws IOException { assert (key != null); - assert (!(serverLog.allZero(key))); + //assert (!(serverLog.allZero(key))); kelondroRow.Entry newentry = ki.row().newEntry(); newentry.setCol(0, key); newentry.setCol(1, i); @@ -60,7 +58,7 @@ public class kelondroBytesIntMap { public synchronized void addi(byte[] key, int i) throws IOException { assert (key != null); - assert (!(serverLog.allZero(key))); + //assert (!(serverLog.allZero(key))); kelondroRow.Entry newentry = ki.row().newEntry(); newentry.setCol(0, key); newentry.setCol(1, i); @@ -69,7 +67,7 @@ public class kelondroBytesIntMap { public synchronized int removei(byte[] key) throws IOException { assert (key != null); - assert (!(serverLog.allZero(key))); + //assert (!(serverLog.allZero(key))); // returns the integer index of the key, if the key can be found and was removed // and -1 if the key was not found. if (ki.size() == 0) return -1; diff --git a/source/de/anomic/kelondro/kelondroCachedIndex.java b/source/de/anomic/kelondro/kelondroCachedIndex.java deleted file mode 100644 index 08722cdcc..000000000 --- a/source/de/anomic/kelondro/kelondroCachedIndex.java +++ /dev/null @@ -1,151 +0,0 @@ -// kelondroCachedIndex -// (C) 2006 by Michael Peter Christen; mc@anomic.de, Frankfurt a. M., Germany -// first published 23.10.2006 on http://www.anomic.de -// -// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $ -// $LastChangedRevision: 1986 $ -// $LastChangedBy: orbiter $ -// -// LICENSE -// -// This program is free software; you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation; either version 2 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -package de.anomic.kelondro; - -import java.io.IOException; -import java.util.Date; -import java.util.Iterator; - -import de.anomic.kelondro.kelondroRow.Entry; -import de.anomic.server.logging.serverLog; - -public class kelondroCachedIndex implements kelondroIndex { - - public final static int cacheObjectMissSize = 120; - public final static int defaultObjectCachePercent = 10; - - private kelondroObjectCache objectCache; - private kelondroIndex theIndex; - - public kelondroCachedIndex(kelondroIndex superIndex, long objectbuffersize) throws IOException { - this.theIndex = superIndex; - long objecthitcachesize = objectbuffersize * 4 / 5 / cacheObjectChunkSize(); - long objectmisscachesize = objectbuffersize / 5 / cacheObjectMissSize; - this.objectCache = new kelondroObjectCache("generic", (int) objecthitcachesize, (int) objectmisscachesize, objecthitcachesize * 3000 , 4*1024*1024); - } - - public final int cacheObjectChunkSize() { - try { - return this.theIndex.row().objectsize() + /* overhead */ 16 * this.theIndex.row().columns(); - } catch (IOException e) { - return 0; - } - } - - public long[] cacheObjectStatus() { - if (this.objectCache == null) return null; - return this.objectCache.status(); - } - - public final int cacheNodeChunkSize() { - // returns the size that the node cache uses for a single entry - return theIndex.cacheNodeChunkSize(); - } - - public final int[] cacheNodeStatus() { - // a collection of different node cache status values - return theIndex.cacheNodeStatus(); - } - - public void addUnique(Entry row) throws IOException { - // the use case for add implies that usually the objects are not needed in the cache - // therefore omit an object cache write here - this.theIndex.addUnique(row); - } - - public void addUnique(Entry row, Date entryDate) throws IOException { - this.theIndex.addUnique(row, entryDate); - } - - public void close() throws IOException { - this.objectCache = null; - this.theIndex.close(); - - } - - public Entry get(byte[] key) throws IOException { - // get result from cache - kelondroRow.Entry result = (objectCache == null) ? null : (kelondroRow.Entry) objectCache.get(key); - if (result != null) return result; - // check if we have an entry in the miss cache - if ((objectCache != null) && (objectCache.has(key) == -1)) return null; - // finally: get it from the index - result = this.theIndex.get(key); - if (result == null) objectCache.hasnot(key); else objectCache.put(key, result); - return result; - } - - public kelondroOrder order() { - return this.theIndex.order(); - } - - public int primarykey() { - return this.theIndex.primarykey(); - } - - public kelondroProfile profile() { - return this.theIndex.profile(); - } - - public Entry put(Entry row) throws IOException { - assert (row != null); - assert (row.columns() == row().columns()); - assert (!(serverLog.allZero(row.getColBytes(theIndex.primarykey())))); - objectCache.put(row.getColBytes(theIndex.primarykey()), row); - return this.theIndex.put(row); - } - - public Entry put(Entry row, Date entryDate) throws IOException { - assert (row.columns() == row().columns()); - objectCache.put(row.getColBytes(theIndex.primarykey()), row); - return this.theIndex.put(row, entryDate); - } - - public Entry remove(byte[] key) throws IOException { - if (objectCache.has(key) == -1) return null; - objectCache.remove(key); - return this.theIndex.remove(key); - } - - public Entry removeOne() throws IOException { - Entry entry = this.theIndex.removeOne(); - if (entry == null) return null; - this.objectCache.remove(entry.getColBytes(this.theIndex.primarykey())); - return entry; - } - - public kelondroRow row() throws IOException { - return this.theIndex.row(); - } - - public Iterator rows(boolean up, boolean rotating, byte[] firstKey) throws IOException { - return this.theIndex.rows(up, rotating, firstKey); - } - - public int size() throws IOException { - return this.theIndex.size(); - } - -} diff --git a/source/de/anomic/kelondro/kelondroCollectionIndex.java b/source/de/anomic/kelondro/kelondroCollectionIndex.java index b46ff7155..e867eec8e 100644 --- a/source/de/anomic/kelondro/kelondroCollectionIndex.java +++ b/source/de/anomic/kelondro/kelondroCollectionIndex.java @@ -99,8 +99,7 @@ public class kelondroCollectionIndex { boolean ramIndexGeneration = false; boolean fileIndexGeneration = !(new File(path, filenameStub + ".index").exists()); - //if (ramIndexGeneration) index = new kelondroRAMIndex(indexOrder, indexRow()); - if (ramIndexGeneration) index = new kelondroBufferedIndex(new kelondroRowSet(indexRow(), indexOrder, 0, 0)); + if (ramIndexGeneration) index = new kelondroRowSet(indexRow(), indexOrder, 0, 0); if (fileIndexGeneration) index = new kelondroFlexTable(path, filenameStub + ".index", buffersize, preloadTime, indexRow(), indexOrder); // open array files @@ -158,10 +157,7 @@ public class kelondroCollectionIndex { ientry.setCol(idx_col_indexpos, j); ientry.setCol(idx_col_lastread, t); ientry.setCol(idx_col_lastwrote, t); - if (index instanceof kelondroBufferedIndex) - ((kelondroBufferedIndex) index).addUnique(ientry); - else - index.put(ientry); + index.addUnique(ientry); // write a log if (System.currentTimeMillis() - lastlog > 30000) { @@ -177,7 +173,7 @@ public class kelondroCollectionIndex { long buffersize, long preloadTime, int loadfactor, kelondroRow rowdef) throws IOException { // open/create index table - kelondroIndex theindex = new kelondroCachedIndex(new kelondroFlexTable(path, filenameStub + ".index", buffersize / 2, preloadTime, indexRow(), indexOrder), buffersize / 2); + kelondroIndex theindex = new kelondroCache(new kelondroFlexTable(path, filenameStub + ".index", buffersize / 2, preloadTime, indexRow(), indexOrder), buffersize / 2, true, true); // save/check property file for this array File propfile = propertyFile(path, filenameStub, loadfactor, rowdef.objectsize()); @@ -295,6 +291,7 @@ public class kelondroCollectionIndex { // join with new collection oldcollection.addAll(collection); oldcollection.shape(); + oldcollection.trim(); collection = oldcollection; } diff --git a/source/de/anomic/kelondro/kelondroDyn.java b/source/de/anomic/kelondro/kelondroDyn.java index 10ee19471..233712549 100644 --- a/source/de/anomic/kelondro/kelondroDyn.java +++ b/source/de/anomic/kelondro/kelondroDyn.java @@ -81,7 +81,7 @@ public class kelondroDyn { // creates or opens a dynamic tree rowdef = new kelondroRow("byte[] key-" + (key + counterlen) + ", byte[] node-" + nodesize); kelondroTree tree = new kelondroTree(file, buffersize / 2, preloadTime, rowdef, objectOrder, 1, 8); - this.index = new kelondroCachedIndex(tree, buffersize / 2); + this.index = new kelondroCache(tree, buffersize / 2, true, true); this.keylen = index.row().width(0) - counterlen; this.reclen = index.row().width(1); this.fillChar = fillChar; diff --git a/source/de/anomic/kelondro/kelondroFlexSplitTable.java b/source/de/anomic/kelondro/kelondroFlexSplitTable.java index d88d59341..ec9ff21b5 100644 --- a/source/de/anomic/kelondro/kelondroFlexSplitTable.java +++ b/source/de/anomic/kelondro/kelondroFlexSplitTable.java @@ -68,7 +68,7 @@ public class kelondroFlexSplitTable implements kelondroIndex { (dir[i].length() == tablename.length() + 7)) { // open table date = dir[i].substring(tablename.length() + 1); - this.tables.put(date, new kelondroFlexTable(path, dir[i], buffersize / count, preloadTime, rowdef, objectOrder)); + this.tables.put(date, new kelondroCache(new kelondroFlexTable(path, dir[i], buffersize / count / 2, preloadTime, rowdef, objectOrder), buffersize / count / 2, true, true)); } } } @@ -102,7 +102,7 @@ public class kelondroFlexSplitTable implements kelondroIndex { Iterator i = tables.values().iterator(); int s = 0; while (i.hasNext()) { - s += ((kelondroFlexTable) i.next()).size(); + s += ((kelondroIndex) i.next()).size(); } return s; } @@ -111,10 +111,31 @@ public class kelondroFlexSplitTable implements kelondroIndex { kelondroProfile[] profiles = new kelondroProfile[tables.size()]; Iterator i = tables.values().iterator(); int c = 0; - while (i.hasNext()) profiles[c++] = ((kelondroFlexTable) i.next()).profile(); + while (i.hasNext()) profiles[c++] = ((kelondroIndex) i.next()).profile(); return kelondroProfile.consolidate(profiles); } + public int writeBufferSize() { + Iterator i = tables.values().iterator(); + int s = 0; + kelondroIndex ki; + while (i.hasNext()) { + ki = ((kelondroIndex) i.next()); + if (ki instanceof kelondroCache) s += ((kelondroCache) ki).writeBufferSize(); + } + return s; + } + + public void flushSome() { + Iterator i = tables.values().iterator(); + kelondroIndex ki; + while (i.hasNext()) { + ki = ((kelondroIndex) i.next()); + if (ki instanceof kelondroCache) + try {((kelondroCache) ki).flushSome();} catch (IOException e) {} + } + } + public kelondroRow row() throws IOException { return this.rowdef; } @@ -131,10 +152,10 @@ public class kelondroFlexSplitTable implements kelondroIndex { public synchronized kelondroRow.Entry put(kelondroRow.Entry row, Date entryDate) throws IOException { Object[] keeper = keeperOf(row.getColBytes(0)); - if (keeper != null) return ((kelondroFlexTable) keeper[0]).put(row, entryDate); + if (keeper != null) return ((kelondroIndex) keeper[0]).put(row); String suffix = dateSuffix(entryDate); if (suffix == null) return null; - kelondroFlexTable table = (kelondroFlexTable) tables.get(suffix); + kelondroIndex table = (kelondroIndex) tables.get(suffix); if (table == null) { // make new table table = new kelondroFlexTable(path, tablename + "." + suffix, buffersize / (tables.size() + 1), -1, rowdef, objectOrder); @@ -146,10 +167,10 @@ public class kelondroFlexSplitTable implements kelondroIndex { public synchronized Object[] keeperOf(byte[] key) throws IOException { Iterator i = tables.values().iterator(); - kelondroFlexTable table; + kelondroIndex table; kelondroRow.Entry entry; while (i.hasNext()) { - table = (kelondroFlexTable) i.next(); + table = (kelondroIndex) i.next(); entry = table.get(key); if (entry != null) return new Object[]{table, entry}; } @@ -163,7 +184,7 @@ public class kelondroFlexSplitTable implements kelondroIndex { public synchronized void addUnique(kelondroRow.Entry row, Date entryDate) throws IOException { String suffix = dateSuffix(entryDate); if (suffix == null) return; - kelondroFlexTable table = (kelondroFlexTable) tables.get(suffix); + kelondroIndex table = (kelondroIndex) tables.get(suffix); if (table == null) { // make new table table = new kelondroFlexTable(path, tablename + "." + suffix, buffersize / (tables.size() + 1), -1, rowdef, objectOrder); @@ -174,10 +195,10 @@ public class kelondroFlexSplitTable implements kelondroIndex { public synchronized kelondroRow.Entry remove(byte[] key) throws IOException { Iterator i = tables.values().iterator(); - kelondroFlexTable table; + kelondroIndex table; kelondroRow.Entry entry; while (i.hasNext()) { - table = (kelondroFlexTable) i.next(); + table = (kelondroIndex) i.next(); entry = table.remove(key); if (entry != null) return entry; } @@ -186,10 +207,10 @@ public class kelondroFlexSplitTable implements kelondroIndex { public synchronized kelondroRow.Entry removeOne() throws IOException { Iterator i = tables.values().iterator(); - kelondroFlexTable table, maxtable = null; + kelondroIndex table, maxtable = null; int maxcount = -1; while (i.hasNext()) { - table = (kelondroFlexTable) i.next(); + table = (kelondroIndex) i.next(); if (table.size() > maxcount) { maxtable = table; maxcount = table.size(); @@ -223,7 +244,7 @@ public class kelondroFlexSplitTable implements kelondroIndex { if (t.hasNext()) { if ((tt == null) || (!(tt.hasNext()))) { try { - tt = ((kelondroFlexTable) t.next()).rows(true, false, null); + tt = ((kelondroIndex) t.next()).rows(true, false, null); } catch (IOException e) { return null; } @@ -265,7 +286,7 @@ public class kelondroFlexSplitTable implements kelondroIndex { public synchronized void close() throws IOException { Iterator i = tables.values().iterator(); - while (i.hasNext()) ((kelondroFlexTable) i.next()).close(); + while (i.hasNext()) ((kelondroIndex) i.next()).close(); tables = null; } diff --git a/source/de/anomic/kelondro/kelondroFlexTable.java b/source/de/anomic/kelondro/kelondroFlexTable.java index 3f2b4403a..b6a9f87e2 100644 --- a/source/de/anomic/kelondro/kelondroFlexTable.java +++ b/source/de/anomic/kelondro/kelondroFlexTable.java @@ -35,7 +35,8 @@ import de.anomic.server.logging.serverLog; public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondroIndex { protected kelondroBytesIntMap index; - + private boolean RAMIndex; + public kelondroFlexTable(File path, String tablename, long buffersize, long preloadTime, kelondroRow rowdef, kelondroOrder objectOrder) throws IOException { super(path, tablename, rowdef); File newpath = new File(path, tablename); @@ -55,7 +56,8 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr if (indexfile.exists()) { // use existing index file System.out.println("*** Using File index " + indexfile); - ki = new kelondroCachedIndex(kelondroTree.open(indexfile, buffersize / 2, preloadTime, treeIndexRow(rowdef.width(0)), objectOrder, 2, 80), buffersize / 2); + ki = new kelondroCache(kelondroTree.open(indexfile, buffersize / 2, preloadTime, treeIndexRow(rowdef.width(0)), objectOrder, 2, 80), buffersize / 2, true, true); + RAMIndex = false; } else if ((preloadTime >= 0) && (stt > preloadTime)) { // generate new index file System.out.print("*** Generating File index for " + size() + " entries from " + indexfile); @@ -65,6 +67,7 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr System.out.println(ki.size() + " entries indexed from " + super.col[0].size() + " keys."); + RAMIndex = false; } else { // fill the index System.out.print("*** Loading RAM index for " + size() + " entries from "+ newpath); @@ -74,6 +77,7 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr System.out.println(ki.size() + " index entries initialized and sorted from " + super.col[0].size() + " keys."); + RAMIndex = true; } // assign index to wrapper index = new kelondroBytesIntMap(ki); @@ -81,8 +85,19 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr super.col[0].setDescription(description.getBytes()); } + public boolean hasRAMIndex() { + return RAMIndex; + } + + public boolean has(byte[] key) throws IOException { + // it is not recommended to implement or use a has predicate unless + // it can be ensured that it causes no IO + assert (RAMIndex == true); + return index.geti(key) >= 0; + } + private kelondroIndex initializeRamIndex(kelondroOrder objectOrder) throws IOException { - kelondroBufferedIndex ri = new kelondroBufferedIndex(new kelondroRowSet(new kelondroRow(new kelondroColumn[]{super.row().column(0), new kelondroColumn("int c-4 {b256}")}), objectOrder, 0, 0)); + kelondroRowSet ri = new kelondroRowSet(new kelondroRow(new kelondroColumn[]{super.row().column(0), new kelondroColumn("int c-4 {b256}")}), objectOrder, 0, 0); //kelondroRowSet ri = new kelondroRowSet(new kelondroRow(new kelondroColumn[]{super.row().column(0), new kelondroColumn("int c-4 {b256}")}), 0); //ri.setOrdering(objectOrder, 0); Iterator content = super.col[0].contentNodes(-1); @@ -106,12 +121,13 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr } System.out.print(" -ordering- "); System.out.flush(); - ri.flush(); + ri.shape(); + ri.trim(); return ri; } private kelondroIndex initializeTreeIndex(File indexfile, long buffersize, long preloadTime, kelondroOrder objectOrder) throws IOException { - kelondroIndex treeindex = new kelondroCachedIndex(new kelondroTree(indexfile, buffersize / 2, preloadTime, treeIndexRow(rowdef.width(0)), objectOrder, 2, 80), buffersize / 2); + kelondroIndex treeindex = new kelondroCache(new kelondroTree(indexfile, buffersize / 2, preloadTime, treeIndexRow(rowdef.width(0)), objectOrder, 2, 80), buffersize / 2, true, true); Iterator content = super.col[0].contentNodes(-1); kelondroRecords.Node node; kelondroRow.Entry indexentry; diff --git a/source/de/anomic/kelondro/kelondroIntBytesMap.java b/source/de/anomic/kelondro/kelondroIntBytesMap.java index 39f5ad330..f68dc601f 100644 --- a/source/de/anomic/kelondro/kelondroIntBytesMap.java +++ b/source/de/anomic/kelondro/kelondroIntBytesMap.java @@ -31,10 +31,10 @@ import java.util.Iterator; public class kelondroIntBytesMap { - private kelondroBufferedIndex index; + private kelondroIndex index; public kelondroIntBytesMap(int payloadSize, int initSize) { - index = kelondroBufferedIndex.getRAMIndex(new kelondroRow("Cardinal key-4 {b256}, byte[] payload-" + payloadSize), initSize); + index = kelondroRowSet.getRAMIndex(new kelondroRow("Cardinal key-4 {b256}, byte[] payload-" + payloadSize), initSize); } public int size() { @@ -101,14 +101,17 @@ public class kelondroIntBytesMap { public Iterator rows() { try { - return index.rows(); + return index.rows(true, false, null); } catch (IOException e) { return null; } } public void flush() { - try {index.flush();} catch (IOException e) {} + if (index instanceof kelondroRowSet) { + ((kelondroRowSet) index).shape(); + ((kelondroRowSet) index).trim(); + } } public kelondroProfile profile() { diff --git a/source/de/anomic/kelondro/kelondroMapTable.java b/source/de/anomic/kelondro/kelondroMapTable.java index a735d5ee5..65742462c 100644 --- a/source/de/anomic/kelondro/kelondroMapTable.java +++ b/source/de/anomic/kelondro/kelondroMapTable.java @@ -91,7 +91,7 @@ public class kelondroMapTable { if (mTables.containsKey(tablename)) throw new RuntimeException("kelondroTables.declareTree: table '" + tablename + "' declared already in other context."); if (tTables.containsKey(tablename)) throw new RuntimeException("kelondroTables.declareTree: table '" + tablename + "' declared twice."); File tablefile = new File(tablesPath, "table." + tablename + ".tdb"); - kelondroIndex Tree = new kelondroCachedIndex(kelondroTree.open(tablefile, buffersize / 2, preloadTime, rowdef), buffersize / 2); + kelondroIndex Tree = new kelondroCache(kelondroTree.open(tablefile, buffersize / 2, preloadTime, rowdef), buffersize / 2, true, true); tTables.put(tablename, Tree); } diff --git a/source/de/anomic/kelondro/kelondroObjectBuffer.java b/source/de/anomic/kelondro/kelondroObjectBuffer.java index 23d33bef7..e4abecd90 100644 --- a/source/de/anomic/kelondro/kelondroObjectBuffer.java +++ b/source/de/anomic/kelondro/kelondroObjectBuffer.java @@ -58,6 +58,9 @@ package de.anomic.kelondro; public class kelondroObjectBuffer { + // this is a buffer for a single (only one) key/value object + // without an index-backend + private int readHit, readMiss, writeUnique, writeDouble; private String name; private byte[] key; diff --git a/source/de/anomic/kelondro/kelondroObjectCache.java b/source/de/anomic/kelondro/kelondroObjectCache.java deleted file mode 100644 index 69946ba46..000000000 --- a/source/de/anomic/kelondro/kelondroObjectCache.java +++ /dev/null @@ -1,307 +0,0 @@ -// kelondroObjectCache.java -// (C) 2006 by Michael Peter Christen; mc@anomic.de, Frankfurt a. M., Germany -// first published 2006 on http://www.anomic.de -// -// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $ -// $LastChangedRevision: 1986 $ -// $LastChangedBy: orbiter $ -// -// LICENSE -// -// This program is free software; you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation; either version 2 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -package de.anomic.kelondro; - -import java.util.TreeMap; - -import de.anomic.server.serverMemory; - -public class kelondroObjectCache { - - private final TreeMap cache; - private final kelondroMScoreCluster ages, hasnot; - private long startTime; - private int maxHitSize, maxMissSize; - private long maxAge; - private long minMem; - private int readHit, readMiss, writeUnique, writeDouble, cacheDelete, cacheFlush; - private int hasnotHit, hasnotMiss, hasnotUnique, hasnotDouble, hasnotDelete, hasnotFlush; - private String name; - - public kelondroObjectCache(String name, int maxHitSize, int maxMissSize, long maxAge, long minMem) { - this.name = name; - this.cache = new TreeMap(); - this.ages = new kelondroMScoreCluster(); - this.hasnot = new kelondroMScoreCluster(); - this.startTime = System.currentTimeMillis(); - this.maxHitSize = Math.max(maxHitSize, 1); - this.maxMissSize = Math.max(maxMissSize, 1); - this.maxAge = Math.max(maxAge, 10000); - this.minMem = Math.max(minMem, 1024 * 1024); - this.readHit = 0; - this.readMiss = 0; - this.writeUnique = 0; - this.writeDouble = 0; - this.cacheDelete = 0; - this.cacheFlush = 0; - this.hasnotHit = 0; - this.hasnotMiss = 0; - this.hasnotUnique = 0; - this.hasnotDouble = 0; - this.hasnotDelete = 0; - this.hasnotFlush = 0; - } - - public String getName() { - return name; - } - - public void setMaxAge(long maxAge) { - this.maxAge = maxAge; - } - - public void setMaxHitSize(int maxSize) { - this.maxHitSize = maxSize; - } - - public void setMaxMissSize(int maxSize) { - this.maxMissSize = maxSize; - } - - public int maxHitSize() { - return this.maxHitSize; - } - - public int maxMissSize() { - return this.maxMissSize; - } - - public void setMinMem(int minMem) { - this.minMem = minMem; - } - - public long minAge() { - if (ages.size() == 0) return 0; - return System.currentTimeMillis() - longEmit(ages.getMaxScore()); - } - - public long maxAge() { - if (ages.size() == 0) return 0; - return System.currentTimeMillis() - longEmit(ages.getMinScore()); - } - - public int hitsize() { - return cache.size(); - } - - public int misssize() { - return hasnot.size(); - } - - public long[] status() { - return new long[]{ - (long) maxHitSize(), - (long) maxMissSize(), - (long) hitsize(), - (long) misssize(), - this.maxAge, - minAge(), - maxAge(), - (long) readHit, - (long) readMiss, - (long) writeUnique, - (long) writeDouble, - (long) cacheDelete, - (long) cacheFlush, - (long) hasnotHit, - (long) hasnotMiss, - (long) hasnotUnique, - (long) hasnotDouble, - (long) hasnotDelete, - (long) hasnotFlush - }; - } - - private static long[] combinedStatus(long[] a, long[] b) { - return new long[]{ - a[0] + b[0], - a[1] + b[1], - a[2] + b[2], - a[3] + b[3], - Math.max(a[4], b[4]), - Math.min(a[5], b[5]), - Math.max(a[6], b[6]), - a[7] + b[7], - a[8] + b[8], - a[9] + b[9], - a[10] + b[10], - a[11] + b[11], - a[12] + b[12], - a[13] + b[13], - a[14] + b[14], - a[15] + b[15], - a[16] + b[16], - a[17] + b[17], - a[18] + b[18] - }; - } - - public static long[] combinedStatus(long[][] a, int l) { - if ((a == null) || (a.length == 0) || (l == 0)) return null; - if ((a.length >= 1) && (l == 1)) return a[0]; - if ((a.length >= 2) && (l == 2)) return combinedStatus(a[0], a[1]); - return combinedStatus(combinedStatus(a, l - 1), a[l - 1]); - } - - private int intTime(long longTime) { - return (int) Math.max(0, ((longTime - startTime) / 1000)); - } - - private long longEmit(int intTime) { - return (((long) intTime) * (long) 1000) + startTime; - } - - public void put(byte[] key, Object value) { - if (key != null) put(new String(key), value); - } - - public void put(String key, Object value) { - if ((key == null) || (value == null)) return; - Object prev = null; - synchronized(cache) { - prev = cache.put(key, value); - ages.setScore(key, intTime(System.currentTimeMillis())); - if (hasnot.deleteScore(key) != 0) hasnotDelete++; - } - if (prev == null) this.writeUnique++; else this.writeDouble++; - flushc(); - } - - public Object get(byte[] key) { - return get(new String(key)); - } - - public Object get(String key) { - if (key == null) return null; - Object r = null; - synchronized(cache) { - r = cache.get(key); - if (r == null) { - this.readMiss++; - } else { - this.readHit++; - ages.setScore(key, intTime(System.currentTimeMillis())); // renew cache update time - } - } - flushc(); - return r; - } - - public void hasnot(byte[] key) { - hasnot(new String(key)); - } - - public void hasnot(String key) { - if (key == null) return; - int prev = 0; - synchronized(cache) { - if (cache.remove(key) != null) cacheDelete++; - ages.deleteScore(key); - prev = hasnot.getScore(key); - hasnot.setScore(key, intTime(System.currentTimeMillis())); - } - if (prev == 0) this.hasnotUnique++; else this.hasnotDouble++; - flushh(); - } - - public int has(byte[] key) { - return has(new String(key)); - } - - public int has(String key) { - // returns a 3-value boolean: - // 1 = key definitely exists - // -1 = key definitely does not exist - // 0 = unknown, if key exists - if (key == null) return 0; - synchronized(cache) { - if (hasnot.getScore(key) > 0) { - hasnot.setScore(key, intTime(System.currentTimeMillis())); // renew cache update time - this.hasnotHit++; - return -1; - } - this.hasnotMiss++; - if (cache.get(key) != null) return 1; - } - flushh(); - return 0; - } - - public void remove(byte[] key) { - remove(new String(key)); - } - - public void remove(String key) { - if (key == null) return; - synchronized(cache) { - if (cache.remove(key) != null) cacheDelete++; - ages.deleteScore(key); - hasnot.setScore(key, intTime(System.currentTimeMillis())); - } - flushh(); - } - - public void flushc() { - String k; - synchronized(cache) { - while ((ages.size() > 0) && - ((k = (String) ages.getMinObject()) != null) && - ((ages.size() > maxHitSize) || - (((System.currentTimeMillis() - longEmit(ages.getScore(k))) > maxAge) && - (serverMemory.available() < minMem))) - ) { - cache.remove(k); - ages.deleteScore(k); - cacheFlush++; - } - } - } - - public void flushh() { - String k; - synchronized(cache) { - while ((hasnot.size() > 0) && - ((k = (String) hasnot.getMinObject()) != null) && - ((hasnot.size() > maxMissSize) || - (((System.currentTimeMillis() - longEmit(hasnot.getScore(k))) > maxAge) && - (serverMemory.available() < minMem))) - ) { - hasnot.deleteScore(k); - hasnotFlush++; - } - } - } - - public static void main(String[] args) { - // test to measure memory usage of miss cache - kelondroMScoreCluster t = new kelondroMScoreCluster(); - System.gc(); long s0 = Runtime.getRuntime().freeMemory(); - int loop = 200000; - for (int i = 0; i < loop; i++) t.setScore((Integer.toString(i) + "000000000000").substring(0, 12), i); - System.gc(); long s1 = Runtime.getRuntime().freeMemory(); - System.out.println((s1 - s0) / loop); - } - -} diff --git a/source/de/anomic/kelondro/kelondroRAMIndex.java b/source/de/anomic/kelondro/kelondroRAMIndex.java deleted file mode 100644 index 101149171..000000000 --- a/source/de/anomic/kelondro/kelondroRAMIndex.java +++ /dev/null @@ -1,126 +0,0 @@ -// kelondroRAMIndex.java -// (C) 2006 by Michael Peter Christen; mc@anomic.de, Frankfurt a. M., Germany -// first published 12.08.2006 on http://www.anomic.de -// -// This is a part of YaCy, a peer-to-peer based web search engine -// -// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $ -// $LastChangedRevision: 1986 $ -// $LastChangedBy: orbiter $ -// -// LICENSE -// -// This program is free software; you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation; either version 2 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -package de.anomic.kelondro; - -import java.io.IOException; -import java.util.Date; -import java.util.Iterator; -import java.util.TreeMap; - -import de.anomic.kelondro.kelondroRow.Entry; - -public class kelondroRAMIndex implements kelondroIndex { - - private TreeMap index; - private kelondroOrder order; - private kelondroRow rowdef; - private kelondroProfile profile; - - public kelondroRAMIndex(kelondroOrder defaultOrder, kelondroRow rowdef) { - this.index = new TreeMap(defaultOrder); - this.order = defaultOrder; - this.rowdef = rowdef; - this.profile = new kelondroProfile(); - } - - public kelondroOrder order() { - return this.order; - } - - public int primarykey() { - return 0; - } - - public synchronized int size() { - return this.index.size(); - } - - public kelondroRow row() { - return this.rowdef; - } - - public synchronized Entry get(byte[] key) { - return (kelondroRow.Entry) index.get(key); - } - - public kelondroRow.Entry put(kelondroRow.Entry row, Date entryDate) throws IOException { - return put(row); - } - - public synchronized Entry put(Entry row) { - return (kelondroRow.Entry) index.put(row.getColBytes(0), row); - } - - public synchronized void addUnique(kelondroRow.Entry row) throws IOException { - throw new UnsupportedOperationException(); - } - - public synchronized void addUnique(kelondroRow.Entry row, Date entryDate) throws IOException { - throw new UnsupportedOperationException(); - } - - public synchronized Entry remove(byte[] key) { - return (kelondroRow.Entry) index.remove(key); - } - - public synchronized Entry removeOne() { - if (this.index.size() == 0) return null; - return remove((byte[]) index.keySet().iterator().next()); - } - - public synchronized Iterator rows(boolean up, boolean rotating, byte[] firstKey) { - return index.values().iterator(); - } - - public void close() { - index = null; - } - - public kelondroProfile profile() { - return profile; - } - - public final int cacheObjectChunkSize() { - // dummy method - return -1; - } - - public long[] cacheObjectStatus() { - // dummy method - return null; - } - - public final int cacheNodeChunkSize() { - // returns the size that the node cache uses for a single entry - return -1; - } - - public final int[] cacheNodeStatus() { - // a collection of different node cache status values - return new int[]{0,0,0,0,0,0,0,0,0,0}; - } -} diff --git a/source/de/anomic/kelondro/kelondroRowCollection.java b/source/de/anomic/kelondro/kelondroRowCollection.java index decbd68a2..b0c70acd4 100644 --- a/source/de/anomic/kelondro/kelondroRowCollection.java +++ b/source/de/anomic/kelondro/kelondroRowCollection.java @@ -98,6 +98,11 @@ public class kelondroRowCollection { } this.sortColumn = (int) exportedCollection.getColLong(exp_order_col); this.sortBound = (int) exportedCollection.getColLong(exp_order_bound); + //assert (sortBound <= chunkcount) : "sortBound = " + sortBound + ", chunkcount = " + chunkcount; + if (sortBound > chunkcount) { + serverLog.logWarning("RowCollection", "corrected wrong sortBound; sortBound = " + sortBound + ", chunkcount = " + chunkcount); + this.sortBound = chunkcount; + } this.chunkcache = exportedCollection.getColBytes(exp_collection); } @@ -127,7 +132,8 @@ public class kelondroRowCollection { trim(); kelondroRow row = exportRow(chunkcache.length); kelondroRow.Entry entry = row.newEntry(); - entry.setCol(exp_chunkcount, size()); + assert (sortBound <= chunkcount) : "sortBound = " + sortBound + ", chunkcount = " + chunkcount; + entry.setCol(exp_chunkcount, this.chunkcount); entry.setCol(exp_last_read, daysSince2000(this.lastTimeRead)); entry.setCol(exp_last_wrote, daysSince2000(this.lastTimeWrote)); entry.setCol(exp_order_type, (this.sortOrder == null) ? "__".getBytes() : this.sortOrder.signature().getBytes()); @@ -161,17 +167,7 @@ public class kelondroRowCollection { newChunkcache = null; } } - /* - public void implantRows(byte[] b) { - assert (b.length % rowdef.objectsize() == 0); - synchronized (chunkcache) { - chunkcache = b; - chunkcount = b.length / rowdef.objectsize(); - sortBound = 0; - lastTimeWrote = System.currentTimeMillis(); - } - } - */ + public final long lastRead() { return lastTimeRead; } @@ -182,7 +178,7 @@ public class kelondroRowCollection { public final kelondroRow.Entry get(int index) { assert (index >= 0) : "get: access with index " + index + " is below zero"; - assert (index < chunkcount) : "get: access with index " + index + " is above chunkcount " + chunkcount; + assert (index < chunkcount) : "get: access with index " + index + " is above chunkcount " + chunkcount + "; sortBound = " + sortBound; byte[] a = new byte[rowdef.objectsize()]; synchronized (chunkcache) { System.arraycopy(chunkcache, index * rowdef.objectsize(), a, 0, rowdef.objectsize()); @@ -305,9 +301,7 @@ public class kelondroRowCollection { public void remove() { p--; - System.arraycopy(chunkcache, (p + 1) * rowdef.objectsize(), chunkcache, p * rowdef.objectsize(), (chunkcount - p - 1) * rowdef.objectsize()); - if (chunkcount == sortBound) sortBound--; - chunkcount--; + removeShift(p); } } diff --git a/source/de/anomic/kelondro/kelondroRowSet.java b/source/de/anomic/kelondro/kelondroRowSet.java index bd8e9c74a..4015013d4 100644 --- a/source/de/anomic/kelondro/kelondroRowSet.java +++ b/source/de/anomic/kelondro/kelondroRowSet.java @@ -30,8 +30,6 @@ import java.util.Iterator; import java.util.Random; import java.util.TreeSet; -import de.anomic.server.logging.serverLog; - public class kelondroRowSet extends kelondroRowCollection implements kelondroIndex { private static final int collectionReSortLimit = 90; @@ -92,7 +90,7 @@ public class kelondroRowSet extends kelondroRowCollection implements kelondroInd public kelondroRow.Entry put(kelondroRow.Entry entry) { assert (entry != null); assert (entry.getColBytes(super.sortColumn) != null); - assert (!(serverLog.allZero(entry.getColBytes(super.sortColumn)))); + //assert (!(serverLog.allZero(entry.getColBytes(super.sortColumn)))); long handle = profile.startWrite(); int index = -1; kelondroRow.Entry oldentry = null; @@ -132,28 +130,23 @@ public class kelondroRowSet extends kelondroRowCollection implements kelondroInd // the entry is not there profile.stopDelete(handle); return null; - } - - // there is an entry - entry = get(p); - if (p < sortBound) { - removeMarker.add(new Integer(p)); } else { - super.swap(p, --chunkcount, 0); - } - - // check case when complete chunkcache is marked as deleted - if (removeMarker.size() == chunkcount) { - this.clear(); - removeMarker.clear(); + // there is an entry + entry = get(p); + if (p < sortBound) { + // mark entry as to-be-deleted + removeMarker.add(new Integer(p)); + if (removeMarker.size() > removeMaxSize) resolveMarkedRemoved(); + } else { + // remove directly by swap + if (chunkcount == sortBound) sortBound--; + super.swap(p, --chunkcount, 0); + } + + profile.stopDelete(handle); + return entry; } } - - // check if removeMarker is full - if (removeMarker.size() >= removeMaxSize) resolveMarkedRemoved(); - - profile.stopDelete(handle); - return entry; } private boolean isMarkedRemoved(int index) { @@ -172,6 +165,14 @@ public class kelondroRowSet extends kelondroRowCollection implements kelondroInd private void resolveMarkedRemoved() { if (removeMarker.size() == 0) return; + + // check case when complete chunkcache is marked as deleted + if (removeMarker.size() == chunkcount) { + this.clear(); + removeMarker.clear(); + return; + } + Integer nxt = (Integer) removeMarker.first(); removeMarker.remove(nxt); int idx = nxt.intValue(); @@ -387,6 +388,10 @@ public class kelondroRowSet extends kelondroRowCollection implements kelondroInd return new int[]{0,0,0,0,0,0,0,0,0,0}; } + public static kelondroIndex getRAMIndex(kelondroRow rowdef, int initSize) { + return new kelondroRowSet(rowdef, kelondroNaturalOrder.naturalOrder, 0, initSize); + } + public static void main(String[] args) { /* String[] test = { "eins", "zwei", "drei", "vier", "fuenf", "sechs", "sieben", "acht", "neun", "zehn" }; diff --git a/source/de/anomic/kelondro/kelondroTree.java b/source/de/anomic/kelondro/kelondroTree.java index 7eb6b043e..3842979a4 100644 --- a/source/de/anomic/kelondro/kelondroTree.java +++ b/source/de/anomic/kelondro/kelondroTree.java @@ -518,11 +518,11 @@ public class kelondroTree extends kelondroRecords implements kelondroIndex { } public synchronized void addUnique(kelondroRow.Entry row) throws IOException { - throw new UnsupportedOperationException(); + this.put(row); } public synchronized void addUnique(kelondroRow.Entry row, Date entryDate) throws IOException { - throw new UnsupportedOperationException(); + this.put(row, entryDate); } private void assignChild(Node parentNode, Node childNode, int childType) throws IOException { diff --git a/source/de/anomic/plasma/plasmaCrawlLURL.java b/source/de/anomic/plasma/plasmaCrawlLURL.java index 2666af033..077059773 100644 --- a/source/de/anomic/plasma/plasmaCrawlLURL.java +++ b/source/de/anomic/plasma/plasmaCrawlLURL.java @@ -66,8 +66,7 @@ import de.anomic.http.httpc; import de.anomic.http.httpc.response; import de.anomic.index.indexEntry; import de.anomic.index.indexURL; -import de.anomic.kelondro.kelondroBufferedIndex; -import de.anomic.kelondro.kelondroCachedIndex; +import de.anomic.kelondro.kelondroCache; import de.anomic.kelondro.kelondroFlexSplitTable; import de.anomic.kelondro.kelondroBase64Order; import de.anomic.kelondro.kelondroRow; @@ -102,11 +101,11 @@ public final class plasmaCrawlLURL extends indexURL { try { if (newdb) { - urlIndexFile = new kelondroBufferedIndex(new kelondroCachedIndex(new kelondroFlexSplitTable(new File(indexPath, "PUBLIC/TEXT"), "urls", bufferkb / 2 * 0x400, preloadTime, plasmaCrawlLURLNewEntry.rowdef, kelondroBase64Order.enhancedCoder), bufferkb / 2 * 0x400)); + urlIndexFile = new kelondroFlexSplitTable(new File(indexPath, "PUBLIC/TEXT"), "urls", bufferkb * 0x400, preloadTime, plasmaCrawlLURLNewEntry.rowdef, kelondroBase64Order.enhancedCoder); } else { File oldLURLDB = new File(plasmaPath, "urlHash.db"); oldLURLDB.getParentFile().mkdirs(); - urlIndexFile = new kelondroBufferedIndex(new kelondroCachedIndex(new kelondroTree(oldLURLDB, bufferkb / 2 * 0x400, preloadTime, plasmaCrawlLURLOldEntry.rowdef), bufferkb / 2 * 0x400)); + urlIndexFile = new kelondroCache(new kelondroTree(oldLURLDB, bufferkb / 2 * 0x400, preloadTime, plasmaCrawlLURLOldEntry.rowdef), bufferkb / 2 * 0x400, true, true); } } catch (IOException e) { e.printStackTrace(); @@ -149,12 +148,15 @@ public final class plasmaCrawlLURL extends indexURL { public synchronized void flushCacheSome() { try { - ((kelondroBufferedIndex) urlIndexFile).flushSome(); + if (urlIndexFile instanceof kelondroFlexSplitTable) ((kelondroFlexSplitTable) urlIndexFile).flushSome(); + if (urlIndexFile instanceof kelondroCache) ((kelondroCache) urlIndexFile).flushSome(); } catch (IOException e) {} } public synchronized int writeCacheSize() { - return ((kelondroBufferedIndex) urlIndexFile).writeBufferSize(); + if (urlIndexFile instanceof kelondroFlexSplitTable) return ((kelondroFlexSplitTable) urlIndexFile).writeBufferSize(); + if (urlIndexFile instanceof kelondroCache) return ((kelondroCache) urlIndexFile).writeBufferSize(); + return 0; } public synchronized plasmaCrawlLURLEntry load(String urlHash, indexEntry searchedWord) { diff --git a/source/de/anomic/plasma/plasmaCrawlNURL.java b/source/de/anomic/plasma/plasmaCrawlNURL.java index f8bc9d5bf..2af48b698 100644 --- a/source/de/anomic/plasma/plasmaCrawlNURL.java +++ b/source/de/anomic/plasma/plasmaCrawlNURL.java @@ -52,7 +52,7 @@ import java.util.Iterator; import de.anomic.index.indexURL; import de.anomic.kelondro.kelondroBase64Order; -import de.anomic.kelondro.kelondroBufferedIndex; +import de.anomic.kelondro.kelondroCache; import de.anomic.kelondro.kelondroException; import de.anomic.kelondro.kelondroFlexTable; import de.anomic.kelondro.kelondroRecords; @@ -157,7 +157,7 @@ public class plasmaCrawlNURL extends indexURL { String newCacheName = "urlNotice4.table"; cacheStacksPath.mkdirs(); try { - urlIndexFile = new kelondroBufferedIndex(new kelondroFlexTable(cacheStacksPath, newCacheName, bufferkb * 0x400, preloadTime, rowdef, kelondroBase64Order.enhancedCoder)); + urlIndexFile = new kelondroCache(new kelondroFlexTable(cacheStacksPath, newCacheName, bufferkb / 2 * 0x400, preloadTime, rowdef, kelondroBase64Order.enhancedCoder), bufferkb / 2 * 0x400, true, true); } catch (IOException e) { e.printStackTrace(); System.exit(-1); @@ -165,7 +165,12 @@ public class plasmaCrawlNURL extends indexURL { } else { File oldCacheFile = new File(cacheStacksPath, "urlNotice1.db"); oldCacheFile.getParentFile().mkdirs(); - urlIndexFile = new kelondroBufferedIndex(kelondroTree.open(oldCacheFile, bufferkb * 0x400, preloadTime, rowdef)); + try { + urlIndexFile = new kelondroCache(kelondroTree.open(oldCacheFile, bufferkb / 2 * 0x400, preloadTime, rowdef), bufferkb / 2 * 0x400, true, true); + } catch (IOException e) { + e.printStackTrace(); + System.exit(-1); + } } } diff --git a/source/de/anomic/plasma/plasmaCrawlStacker.java b/source/de/anomic/plasma/plasmaCrawlStacker.java index 382551f21..282777de8 100644 --- a/source/de/anomic/plasma/plasmaCrawlStacker.java +++ b/source/de/anomic/plasma/plasmaCrawlStacker.java @@ -61,13 +61,12 @@ import de.anomic.data.robotsParser; import de.anomic.http.httpc; import de.anomic.index.indexURL; import de.anomic.kelondro.kelondroBase64Order; -import de.anomic.kelondro.kelondroBufferedIndex; -import de.anomic.kelondro.kelondroCachedIndex; +import de.anomic.kelondro.kelondroCache; import de.anomic.kelondro.kelondroException; import de.anomic.kelondro.kelondroFlexTable; import de.anomic.kelondro.kelondroIndex; -import de.anomic.kelondro.kelondroRAMIndex; import de.anomic.kelondro.kelondroRow; +import de.anomic.kelondro.kelondroRowSet; import de.anomic.kelondro.kelondroTree; import de.anomic.net.URL; import de.anomic.plasma.urlPattern.plasmaURLPattern; @@ -677,13 +676,13 @@ public final class plasmaCrawlStacker { if (!(cacheStacksPath.exists())) cacheStacksPath.mkdir(); // make the path if (this.dbtype == QUEUE_DB_TYPE_RAM) { - this.urlEntryCache = new kelondroRAMIndex(kelondroBase64Order.enhancedCoder, plasmaCrawlNURL.rowdef); + this.urlEntryCache = new kelondroRowSet(plasmaCrawlNURL.rowdef, kelondroBase64Order.enhancedCoder, 0, 0); } if (this.dbtype == QUEUE_DB_TYPE_FLEX) { String newCacheName = "urlPreNotice1.table"; cacheStacksPath.mkdirs(); try { - this.urlEntryCache = new kelondroBufferedIndex(new kelondroFlexTable(cacheStacksPath, newCacheName, bufferkb * 0x400, preloadTime, plasmaCrawlNURL.rowdef, kelondroBase64Order.enhancedCoder)); + this.urlEntryCache = new kelondroCache(new kelondroFlexTable(cacheStacksPath, newCacheName, bufferkb / 2 * 0x400, preloadTime, plasmaCrawlNURL.rowdef, kelondroBase64Order.enhancedCoder), bufferkb / 2 * 0x400, true, true); } catch (IOException e) { e.printStackTrace(); System.exit(-1); @@ -692,28 +691,29 @@ public final class plasmaCrawlStacker { if (this.dbtype == QUEUE_DB_TYPE_TREE) { File cacheFile = new File(cacheStacksPath, "urlPreNotice.db"); cacheFile.getParentFile().mkdirs(); - this.urlEntryCache = new kelondroBufferedIndex(kelondroTree.open(cacheFile, bufferkb * 0x400, preloadTime, plasmaCrawlNURL.rowdef)); + try { + this.urlEntryCache = new kelondroCache(kelondroTree.open(cacheFile, bufferkb / 2 * 0x400, preloadTime, plasmaCrawlNURL.rowdef), bufferkb / 2 * 0x400, true, true); + } catch (IOException e) { + e.printStackTrace(); + System.exit(-1); + } } } public int cacheNodeChunkSize() { - if (urlEntryCache instanceof kelondroTree) return ((kelondroTree) urlEntryCache).cacheNodeChunkSize(); - return 0; + return urlEntryCache.cacheNodeChunkSize(); } public int[] cacheNodeStatus() { - if (urlEntryCache instanceof kelondroTree) return ((kelondroTree) urlEntryCache).cacheNodeStatus(); - return new int[]{0,0,0,0,0,0,0,0,0,0}; + return urlEntryCache.cacheNodeStatus(); } public int cacheObjectChunkSize() { - if (urlEntryCache instanceof kelondroCachedIndex) return ((kelondroCachedIndex) urlEntryCache).cacheObjectChunkSize(); - return 0; + return urlEntryCache.cacheObjectChunkSize(); } public long[] cacheObjectStatus() { - if (urlEntryCache instanceof kelondroCachedIndex) return ((kelondroCachedIndex) urlEntryCache).cacheObjectStatus(); - return new long[]{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; + return urlEntryCache.cacheObjectStatus(); } public void close() throws IOException { diff --git a/source/de/anomic/plasma/plasmaWordIndexAssortment.java b/source/de/anomic/plasma/plasmaWordIndexAssortment.java index 0f530c452..e44aff388 100644 --- a/source/de/anomic/plasma/plasmaWordIndexAssortment.java +++ b/source/de/anomic/plasma/plasmaWordIndexAssortment.java @@ -60,7 +60,7 @@ import de.anomic.index.indexContainer; import de.anomic.index.indexEntry; import de.anomic.index.indexEntryAttribute; import de.anomic.index.indexURLEntry; -import de.anomic.kelondro.kelondroCachedIndex; +import de.anomic.kelondro.kelondroCache; import de.anomic.kelondro.kelondroColumn; import de.anomic.kelondro.kelondroException; import de.anomic.kelondro.kelondroRow; @@ -82,7 +82,7 @@ public final class plasmaWordIndexAssortment { private File assortmentFile; private int assortmentLength; private serverLog log; - private kelondroCachedIndex assortments; + private kelondroCache assortments; private long bufferSize; private long preloadTime; @@ -118,7 +118,7 @@ public final class plasmaWordIndexAssortment { this.log = log; // open assortment tree file long start = System.currentTimeMillis(); - assortments = new kelondroCachedIndex(kelondroTree.open(assortmentFile, bufferSize / 2, preloadTime, bufferStructure(assortmentLength)), bufferSize / 2); + assortments = new kelondroCache(kelondroTree.open(assortmentFile, bufferSize / 2, preloadTime, bufferStructure(assortmentLength)), bufferSize / 2, true, true); long stop = System.currentTimeMillis(); if (log != null) log.logConfig("Opened Assortment, " + assortments.size() + " entries, width " + @@ -248,7 +248,7 @@ public final class plasmaWordIndexAssortment { assortmentFile.renameTo(backupFile); log.logInfo("a back-up of the deleted assortment file is in " + backupFile.toString()); if (assortmentFile.exists()) assortmentFile.delete(); - assortments = new kelondroCachedIndex(kelondroTree.open(assortmentFile, bufferSize / 2, preloadTime, bufferStructure(assortmentLength)), bufferSize / 2); + assortments = new kelondroCache(kelondroTree.open(assortmentFile, bufferSize / 2, preloadTime, bufferStructure(assortmentLength)), bufferSize / 2, true, true); } catch (Exception e) { // if this fails, delete the file if (!(assortmentFile.delete())) throw new RuntimeException("cannot delete assortment database"); diff --git a/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java b/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java index 4ba46e98c..8aa6ab3e0 100644 --- a/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java +++ b/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java @@ -57,9 +57,9 @@ import de.anomic.index.indexContainer; import de.anomic.index.indexContainerOrder; import de.anomic.index.indexEntry; import de.anomic.index.indexRI; +import de.anomic.kelondro.kelondroCache; import de.anomic.kelondro.kelondroMergeIterator; import de.anomic.kelondro.kelondroNaturalOrder; -import de.anomic.kelondro.kelondroObjectCache; import de.anomic.kelondro.kelondroRecords; import de.anomic.server.logging.serverLog; @@ -382,7 +382,7 @@ public final class plasmaWordIndexAssortmentCluster extends indexAbstractRI impl public long[] cacheObjectStatus() { long[][] a = new long[assortments.length][]; for (int i = assortments.length - 1; i >= 0; i--) a[i] = assortments[i].cacheObjectStatus(); - return kelondroObjectCache.combinedStatus(a, a.length); + return kelondroCache.combinedStatus(a, a.length); } public void close(int waitingSeconds) { diff --git a/source/de/anomic/yacy/yacyNewsDB.java b/source/de/anomic/yacy/yacyNewsDB.java index 25c6a2264..d41ae2824 100644 --- a/source/de/anomic/yacy/yacyNewsDB.java +++ b/source/de/anomic/yacy/yacyNewsDB.java @@ -50,7 +50,7 @@ import java.io.UnsupportedEncodingException; import java.util.Iterator; import de.anomic.kelondro.kelondroBase64Order; -import de.anomic.kelondro.kelondroCachedIndex; +import de.anomic.kelondro.kelondroCache; import de.anomic.kelondro.kelondroException; import de.anomic.kelondro.kelondroIndex; import de.anomic.kelondro.kelondroRow; @@ -69,13 +69,13 @@ public class yacyNewsDB { this.path = path; this.bufferkb = bufferkb; this.preloadTime = preloadTime; - this.news = new kelondroCachedIndex(kelondroTree.open(path, bufferkb / 2 * 0x400, preloadTime, yacyNewsRecord.rowdef), bufferkb / 2 * 0x400); + this.news = new kelondroCache(kelondroTree.open(path, bufferkb / 2 * 0x400, preloadTime, yacyNewsRecord.rowdef), bufferkb / 2 * 0x400, true, true); } private void resetDB() throws IOException { try {close();} catch (Exception e) {} if (path.exists()) path.delete(); - this.news = new kelondroCachedIndex(kelondroTree.open(path, bufferkb / 2 * 0x400, preloadTime, yacyNewsRecord.rowdef), bufferkb / 2 * 0x400); + this.news = new kelondroCache(kelondroTree.open(path, bufferkb / 2 * 0x400, preloadTime, yacyNewsRecord.rowdef), bufferkb / 2 * 0x400, true, true); } public int cacheNodeChunkSize() { diff --git a/source/de/anomic/yacy/yacySeedDB.java b/source/de/anomic/yacy/yacySeedDB.java index 821c86d40..44ea3fa31 100644 --- a/source/de/anomic/yacy/yacySeedDB.java +++ b/source/de/anomic/yacy/yacySeedDB.java @@ -60,11 +60,11 @@ import java.util.Map; import de.anomic.http.httpHeader; import de.anomic.http.httpc; import de.anomic.http.httpd; +import de.anomic.kelondro.kelondroCache; import de.anomic.kelondro.kelondroDyn; import de.anomic.kelondro.kelondroException; import de.anomic.kelondro.kelondroMScoreCluster; import de.anomic.kelondro.kelondroMap; -import de.anomic.kelondro.kelondroObjectCache; import de.anomic.kelondro.kelondroRecords; import de.anomic.net.URL; import de.anomic.plasma.plasmaSwitchboard; @@ -197,7 +197,7 @@ public final class yacySeedDB { } public long[] cacheObjectStatus() { - return kelondroObjectCache.combinedStatus(new long[][] { + return kelondroCache.combinedStatus(new long[][] { seedActiveDB.cacheObjectStatus(), seedPassiveDB.cacheObjectStatus(), seedPotentialDB.cacheObjectStatus() }, 3);