From c079b18ee70a052ce48f24d423e88eb63375192e Mon Sep 17 00:00:00 2001 From: orbiter Date: Sun, 7 Jun 2009 21:48:01 +0000 Subject: [PATCH] - refactoring of IntegerHandleIndex and LongHandleIndex: both classes had been merged into the new HandleMap class, which handles (key,n-byte-long) pairs with arbitraty key and value length. This will be useful to get a memory-enhanced/minimized database table indexing. - added a analysis method that counts bytes that could be saved in case the new HandleMap can be applied in the most efficient way. Look for the log messages beginning with "HeapReader saturation": in most cases we could save about 30% RAM! - removed the old FlexTable database structure. It was not used any more. - removed memory statistics in PerformanceMemory about flex tables and node caches (node caches were used by Tree Tables, which are also not used any more) - add a stub for a steering of navigation functions. That should help to switch off naviagtion computation in cases where it is not demanded by a client git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6034 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/PerformanceMemory_p.html | 54 --- htroot/PerformanceMemory_p.java | 50 +- htroot/PerformanceMemory_p.xml | 32 -- htroot/yacy/search.java | 4 +- htroot/yacysearch.java | 5 + source/de/anomic/data/URLAnalysis.java | 6 +- source/de/anomic/data/blogBoard.java | 2 +- source/de/anomic/data/blogBoardComments.java | 2 +- source/de/anomic/data/bookmarksDB.java | 6 +- source/de/anomic/data/messageBoard.java | 2 +- source/de/anomic/data/userDB.java | 2 +- source/de/anomic/data/wiki/wikiBoard.java | 4 +- source/de/anomic/kelondro/blob/BLOBTree.java | 47 +- .../de/anomic/kelondro/blob/HeapReader.java | 17 +- .../de/anomic/kelondro/blob/HeapWriter.java | 6 +- source/de/anomic/kelondro/index/Column.java | 3 +- ...IntegerHandleIndex.java => HandleMap.java} | 212 +++++---- .../de/anomic/kelondro/index/IndexTest.java | 2 +- .../kelondro/index/LongHandleIndex.java | 337 -------------- .../anomic/kelondro/table/CachedRecords.java | 64 +-- source/de/anomic/kelondro/table/EcoTable.java | 46 +- .../de/anomic/kelondro/table/FlexTable.java | 431 ------------------ .../de/anomic/kelondro/table/SplitTable.java | 11 +- .../text/ReferenceContainerArray.java | 8 +- .../de/anomic/plasma/plasmaSearchQuery.java | 4 + 25 files changed, 219 insertions(+), 1138 deletions(-) rename source/de/anomic/kelondro/index/{IntegerHandleIndex.java => HandleMap.java} (67%) delete mode 100644 source/de/anomic/kelondro/index/LongHandleIndex.java delete mode 100644 source/de/anomic/kelondro/table/FlexTable.java diff --git a/htroot/PerformanceMemory_p.html b/htroot/PerformanceMemory_p.html index 717a4f836..d1fac1b0e 100644 --- a/htroot/PerformanceMemory_p.html +++ b/htroot/PerformanceMemory_p.html @@ -99,60 +99,6 @@ -

FlexTable RAM Index:

- - - - - - - - #{TableList}# - - - - - - - #{/TableList}# - - - -
TableCountChunk SizeUsed Memory
#[tableIndexPath]##[tableIndexCount]##[tableIndexChunkSize]##[tableIndexMem]#
Total Mem = #[TableIndexTotalMem]# MB
- -

Node Caches:

- - - - - - - - - - - - - - #{NodeList}# - - - - - - - - - - - - - #{/NodeList}# - - - -
TableSizeChunk SizeUsed MemoryRead HitRead MissWrite UniqueWrite DoubleDeletesFlushes
#[nodeCachePath]##[nodeCacheCount]##[nodeChunkSize]##[nodeCacheMem]##[nodeCacheReadHit]##[nodeCacheReadMiss]##[nodeCacheWriteUnique]##[nodeCacheWriteDouble]##[nodeCacheDeletes]##[nodeCacheFlushes]#
Total Mem = #[nodeCacheTotalMem]# MB; Stop Grow when less than #[nodeCacheStopGrow]# MB available left; Start Shrink when less than #[nodeCacheStartShrink]# MB availabe left
-

Object Read Caches:

diff --git a/htroot/PerformanceMemory_p.java b/htroot/PerformanceMemory_p.java index 9bf425949..9e491ad5a 100644 --- a/htroot/PerformanceMemory_p.java +++ b/htroot/PerformanceMemory_p.java @@ -31,9 +31,7 @@ import java.util.Map; import de.anomic.http.httpRequestHeader; import de.anomic.kelondro.blob.Cache; -import de.anomic.kelondro.table.CachedRecords; import de.anomic.kelondro.table.EcoTable; -import de.anomic.kelondro.table.FlexTable; import de.anomic.kelondro.util.MemoryControl; import de.anomic.kelondro.util.FileUtils; import de.anomic.plasma.plasmaSwitchboard; @@ -90,30 +88,12 @@ public class PerformanceMemory_p { prop.putNum("memoryUsedAfterInitAGC", (memoryTotalAfterInitAGC - memoryFreeAfterInitAGC) / KB); prop.putNum("memoryUsedNow", (memoryTotalNow - memoryFreeNow) / MB); - // write table for FlexTable index sizes - Iterator i = FlexTable.filenames(); + // write table for EcoTable index sizes + Iterator i = EcoTable.filenames(); String filename; Map map; int p, c = 0; long mem, totalmem = 0; - while (i.hasNext()) { - filename = i.next(); - map = FlexTable.memoryStats(filename); - mem = Long.parseLong(map.get("tableIndexMem")); - totalmem += mem; - prop.put("TableList_" + c + "_tableIndexPath", ((p = filename.indexOf("DATA")) < 0) ? filename : filename.substring(p)); - prop.put("TableList_" + c + "_tableIndexChunkSize", map.get("tableIndexChunkSize")); - prop.putNum("TableList_" + c + "_tableIndexCount", map.get("tableIndexCount")); - prop.put("TableList_" + c + "_tableIndexMem", Formatter.bytesToString(mem)); - c++; - } - prop.put("TableList", c); - prop.putNum("TableIndexTotalMem", totalmem / (1024 * 1024d)); - - // write table for EcoTable index sizes - i = EcoTable.filenames(); - c = 0; - totalmem = 0; while (i.hasNext()) { filename = i.next(); map = EcoTable.memoryStats(filename); @@ -135,32 +115,6 @@ public class PerformanceMemory_p { prop.put("EcoList", c); prop.putNum("EcoIndexTotalMem", totalmem / (1024 * 1024d)); - // write node cache table - i = CachedRecords.filenames(); - c = 0; - totalmem = 0; - while (i.hasNext()) { - filename = i.next(); - map = CachedRecords.memoryStats(filename); - mem = Long.parseLong(map.get("nodeCacheMem")); - totalmem += mem; - prop.put("NodeList_" + c + "_nodeCachePath", ((p = filename.indexOf("DATA")) < 0) ? filename : filename.substring(p)); - prop.put("NodeList_" + c + "_nodeChunkSize", map.get("nodeChunkSize")); - prop.putNum("NodeList_" + c + "_nodeCacheCount", map.get("nodeCacheCount")); - prop.put("NodeList_" + c + "_nodeCacheMem", Formatter.bytesToString(mem)); - prop.putNum("NodeList_" + c + "_nodeCacheReadHit", map.get("nodeCacheReadHit")); - prop.putNum("NodeList_" + c + "_nodeCacheReadMiss", map.get("nodeCacheReadMiss")); - prop.putNum("NodeList_" + c + "_nodeCacheWriteUnique", map.get("nodeCacheWriteUnique")); - prop.putNum("NodeList_" + c + "_nodeCacheWriteDouble", map.get("nodeCacheWriteDouble")); - prop.putNum("NodeList_" + c + "_nodeCacheDeletes", map.get("nodeCacheDeletes")); - prop.putNum("NodeList_" + c + "_nodeCacheFlushes", map.get("nodeCacheFlushes")); - c++; - } - prop.put("NodeList", c); - prop.putNum("nodeCacheStopGrow", CachedRecords.getMemStopGrow() / (1024 * 1024d)); - prop.putNum("nodeCacheStartShrink", CachedRecords.getMemStartShrink() / (1024 * 1024d)); - prop.putNum("nodeCacheTotalMem", totalmem / (1024 * 1024d)); - // write object cache table i = Cache.filenames(); c = 0; diff --git a/htroot/PerformanceMemory_p.xml b/htroot/PerformanceMemory_p.xml index fcc61c1de..061dd5419 100644 --- a/htroot/PerformanceMemory_p.xml +++ b/htroot/PerformanceMemory_p.xml @@ -37,38 +37,6 @@ #[EcoIndexTotalMem]# - - #{TableList}# - - #[tableIndexPath]# - #[tableIndexChunkSize]# - #[tableIndexCount]# - #[tableIndexMem]# - - #{/TableList}# - #[TableIndexTotalMem]# - - - - #{NodeList}# - - #[nodeCachePath]# - #[nodeChunkSize]# - #[nodeCacheCount]# - #[nodeCacheMem]# - #[nodeCacheReadHit]# - #[nodeCacheReadMiss]# - #[nodeCacheWriteUnique]# - #[nodeCacheWriteDouble]# - #[nodeCacheDeletes]# - #[nodeCacheFlushes]# - - #{/NodeList}# - #[nodeCacheTotalMem]# - #[nodeCacheStopGrow]# - #[nodeCacheStartShrink]# - - #{ObjectList}# diff --git a/htroot/yacy/search.java b/htroot/yacy/search.java index 37e6e6f21..c8d9b72b3 100644 --- a/htroot/yacy/search.java +++ b/htroot/yacy/search.java @@ -193,6 +193,7 @@ public final class search { prefer, plasmaSearchQuery.contentdomParser(contentdom), language, + "", // no navigation false, count, 0, @@ -243,7 +244,8 @@ public final class search { prefer, plasmaSearchQuery. contentdomParser(contentdom), - language, + language, + "", // no navigation false, count, 0, diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java index 16988ad58..d4c9f9ef6 100644 --- a/htroot/yacysearch.java +++ b/htroot/yacysearch.java @@ -312,6 +312,10 @@ public class yacysearch { if (language == null) language = "en"; } + // navigation + String navigation = (post == null) ? "" : post.get("nav", ""); + + // the query final TreeSet[] query = plasmaSearchQuery.cleanQuery(querystring.trim()); // converts also umlaute int maxDistance = (querystring.indexOf('"') >= 0) ? maxDistance = query.length - 1 : Integer.MAX_VALUE; @@ -385,6 +389,7 @@ public class yacysearch { prefermask, contentdomCode, language, + navigation, fetchSnippets, itemsPerPage, offset, diff --git a/source/de/anomic/data/URLAnalysis.java b/source/de/anomic/data/URLAnalysis.java index e91db4291..9e923cbcc 100644 --- a/source/de/anomic/data/URLAnalysis.java +++ b/source/de/anomic/data/URLAnalysis.java @@ -51,7 +51,7 @@ import java.util.zip.GZIPInputStream; import java.util.zip.GZIPOutputStream; import de.anomic.kelondro.index.HandleSet; -import de.anomic.kelondro.index.IntegerHandleIndex; +import de.anomic.kelondro.index.HandleMap; import de.anomic.kelondro.order.Base64Order; import de.anomic.kelondro.text.MetadataRepository; import de.anomic.kelondro.text.ReferenceContainerArray; @@ -396,7 +396,7 @@ public class URLAnalysis { public static void incell(File cellPath, String statisticPath) { try { - IntegerHandleIndex idx = ReferenceContainerArray.referenceHashes( + HandleMap idx = ReferenceContainerArray.referenceHashes( cellPath, Segment.wordReferenceFactory, Base64Order.enhancedCoder, @@ -411,7 +411,7 @@ public class URLAnalysis { public static int diffurlcol(String metadataPath, String statisticFile, String diffFile) throws IOException { System.out.println("INDEX DIFF URL-COL startup"); - IntegerHandleIndex idx = new IntegerHandleIndex(URLMetadataRow.rowdef.primaryKeyLength, URLMetadataRow.rowdef.objectOrder, new File(statisticFile), 0); + HandleMap idx = new HandleMap(URLMetadataRow.rowdef.primaryKeyLength, URLMetadataRow.rowdef.objectOrder, 4, new File(statisticFile), 0); MetadataRepository mr = new MetadataRepository(new File(metadataPath)); HandleSet hs = new HandleSet(URLMetadataRow.rowdef.primaryKeyLength, URLMetadataRow.rowdef.objectOrder, 0, 1000000); System.out.println("INDEX DIFF URL-COL loaded dump, starting diff"); diff --git a/source/de/anomic/data/blogBoard.java b/source/de/anomic/data/blogBoard.java index bf079340a..c36d08e96 100644 --- a/source/de/anomic/data/blogBoard.java +++ b/source/de/anomic/data/blogBoard.java @@ -67,7 +67,7 @@ public class blogBoard { new File(actpath.getParent()).mkdir(); new File(newFile.getParent()).mkdir(); if (database == null) { - database = new MapView(BLOBTree.toHeap(actpath, true, true, keyLength, recordSize, '_', NaturalOrder.naturalOrder, true, false, false, newFile), 500, '_'); + database = new MapView(BLOBTree.toHeap(actpath, true, true, keyLength, recordSize, '_', NaturalOrder.naturalOrder, false, false, newFile), 500, '_'); } } diff --git a/source/de/anomic/data/blogBoardComments.java b/source/de/anomic/data/blogBoardComments.java index 83f373b27..69f070c42 100644 --- a/source/de/anomic/data/blogBoardComments.java +++ b/source/de/anomic/data/blogBoardComments.java @@ -70,7 +70,7 @@ public class blogBoardComments { new File(actpath.getParent()).mkdir(); new File(newFile.getParent()).mkdir(); if (database == null) { - database = new MapView(BLOBTree.toHeap(actpath, true, true, keyLength, recordSize, '_', NaturalOrder.naturalOrder, false, false, false, newFile), 500, '_'); + database = new MapView(BLOBTree.toHeap(actpath, true, true, keyLength, recordSize, '_', NaturalOrder.naturalOrder, false, false, newFile), 500, '_'); } } diff --git a/source/de/anomic/data/bookmarksDB.java b/source/de/anomic/data/bookmarksDB.java index 0eb596184..9d373f486 100644 --- a/source/de/anomic/data/bookmarksDB.java +++ b/source/de/anomic/data/bookmarksDB.java @@ -116,17 +116,17 @@ public class bookmarksDB { tagCache=new TreeMap(); bookmarksFile.getParentFile().mkdirs(); //this.bookmarksTable = new kelondroMap(kelondroDyn.open(bookmarksFile, bufferkb * 1024, preloadTime, 12, 256, '_', true, false)); - this.bookmarksTable = new MapView(BLOBTree.toHeap(bookmarksFile, true, true, 12, 256, '_', NaturalOrder.naturalOrder, true, false, false, bookmarksFileNew), 1000, '_'); + this.bookmarksTable = new MapView(BLOBTree.toHeap(bookmarksFile, true, true, 12, 256, '_', NaturalOrder.naturalOrder, false, false, bookmarksFileNew), 1000, '_'); // tags tagsFile.getParentFile().mkdirs(); final boolean tagsFileExisted = tagsFile.exists(); - this.tagsTable = new MapView(BLOBTree.toHeap(tagsFile, true, true, 12, 256, '_', NaturalOrder.naturalOrder, true, false, false, tagsFileNew), 500, '_'); + this.tagsTable = new MapView(BLOBTree.toHeap(tagsFile, true, true, 12, 256, '_', NaturalOrder.naturalOrder, false, false, tagsFileNew), 500, '_'); if (!tagsFileExisted) rebuildTags(); // dates final boolean datesExisted = datesFile.exists(); - this.datesTable = new MapView(BLOBTree.toHeap(datesFile, true, true, 20, 256, '_', NaturalOrder.naturalOrder, true, false, false, datesFileNew), 500, '_'); + this.datesTable = new MapView(BLOBTree.toHeap(datesFile, true, true, 20, 256, '_', NaturalOrder.naturalOrder, false, false, datesFileNew), 500, '_'); if (!datesExisted) rebuildDates(); // autoReCrawl diff --git a/source/de/anomic/data/messageBoard.java b/source/de/anomic/data/messageBoard.java index dbf9d13cd..7f4b86f91 100644 --- a/source/de/anomic/data/messageBoard.java +++ b/source/de/anomic/data/messageBoard.java @@ -55,7 +55,7 @@ public class messageBoard { new File(path.getParent()).mkdir(); new File(pathNew.getParent()).mkdir(); if (database == null) { - database = new MapView(BLOBTree.toHeap(path, true, true, categoryLength + dateFormat.length() + 2, recordSize, '_', NaturalOrder.naturalOrder, true, false, false, pathNew), 500, '_'); + database = new MapView(BLOBTree.toHeap(path, true, true, categoryLength + dateFormat.length() + 2, recordSize, '_', NaturalOrder.naturalOrder, false, false, pathNew), 500, '_'); } sn = 0; } diff --git a/source/de/anomic/data/userDB.java b/source/de/anomic/data/userDB.java index d3e764070..ba48bdf9a 100644 --- a/source/de/anomic/data/userDB.java +++ b/source/de/anomic/data/userDB.java @@ -60,7 +60,7 @@ public final class userDB { this.userTableFile = userTableFileNew; userTableFile.getParentFile().mkdirs(); userTableFileNew.getParentFile().mkdirs(); - this.userTable = new MapView(BLOBTree.toHeap(userTableFile, true, true, 128, 256, '_', NaturalOrder.naturalOrder, true, false, false, userTableFile), 10, '_'); + this.userTable = new MapView(BLOBTree.toHeap(userTableFile, true, true, 128, 256, '_', NaturalOrder.naturalOrder, false, false, userTableFile), 10, '_'); } void resetDatabase() { diff --git a/source/de/anomic/data/wiki/wikiBoard.java b/source/de/anomic/data/wiki/wikiBoard.java index 12f5802fc..f472339e5 100644 --- a/source/de/anomic/data/wiki/wikiBoard.java +++ b/source/de/anomic/data/wiki/wikiBoard.java @@ -57,11 +57,11 @@ public class wikiBoard { final File bkppath, final File bkppathNew) throws IOException { new File(actpath.getParent()).mkdirs(); if (datbase == null) { - datbase = new MapView(BLOBTree.toHeap(actpath, true, true, keyLength, recordSize, '_', NaturalOrder.naturalOrder, true, false, false, actpathNew), 500, '_'); + datbase = new MapView(BLOBTree.toHeap(actpath, true, true, keyLength, recordSize, '_', NaturalOrder.naturalOrder, false, false, actpathNew), 500, '_'); } new File(bkppath.getParent()).mkdirs(); if (bkpbase == null) { - bkpbase = new MapView(BLOBTree.toHeap(bkppath, true, true, keyLength + dateFormat.length(), recordSize, '_', NaturalOrder.naturalOrder, true, false, false, bkppathNew), 500, '_'); + bkpbase = new MapView(BLOBTree.toHeap(bkppath, true, true, keyLength + dateFormat.length(), recordSize, '_', NaturalOrder.naturalOrder, false, false, bkppathNew), 500, '_'); } } diff --git a/source/de/anomic/kelondro/blob/BLOBTree.java b/source/de/anomic/kelondro/blob/BLOBTree.java index 2aeb2f325..2e38d1741 100644 --- a/source/de/anomic/kelondro/blob/BLOBTree.java +++ b/source/de/anomic/kelondro/blob/BLOBTree.java @@ -50,7 +50,6 @@ import de.anomic.kelondro.order.ByteOrder; import de.anomic.kelondro.order.CloneableIterator; import de.anomic.kelondro.order.RotateIterator; import de.anomic.kelondro.table.EcoTable; -import de.anomic.kelondro.table.FlexTable; import de.anomic.kelondro.table.Tree; import de.anomic.kelondro.util.FileUtils; import de.anomic.kelondro.util.kelondroException; @@ -58,7 +57,6 @@ import de.anomic.kelondro.util.kelondroException; public class BLOBTree { private static final int counterlen = 8; - private static final int EcoFSBufferSize = 20; protected int keylen; private final int reclen; @@ -72,39 +70,26 @@ public class BLOBTree { * Deprecated Class. Please use kelondroBLOBHeap instead */ private BLOBTree(final File file, final boolean useNodeCache, final boolean useObjectCache, final int key, - final int nodesize, final char fillChar, final ByteOrder objectOrder, final boolean usetree, final boolean writebuffer, final boolean resetOnFail) { + final int nodesize, final char fillChar, final ByteOrder objectOrder, final boolean writebuffer, final boolean resetOnFail) { // creates or opens a dynamic tree rowdef = new Row("byte[] key-" + (key + counterlen) + ", byte[] node-" + nodesize, objectOrder); ObjectIndex fbi; - if (usetree) { - try { - fbi = new Tree(file, useNodeCache, 0, rowdef, 1, 8); - } catch (final IOException e) { - e.printStackTrace(); - if (resetOnFail) { - FileUtils.deletedelete(file); - try { - fbi = new Tree(file, useNodeCache, -1, rowdef, 1, 8); - } catch (final IOException e1) { - e1.printStackTrace(); - throw new kelondroException(e.getMessage()); - } - } else { + try { + fbi = new Tree(file, useNodeCache, 0, rowdef, 1, 8); + } catch (final IOException e) { + e.printStackTrace(); + if (resetOnFail) { + FileUtils.deletedelete(file); + try { + fbi = new Tree(file, useNodeCache, -1, rowdef, 1, 8); + } catch (final IOException e1) { + e1.printStackTrace(); throw new kelondroException(e.getMessage()); } + } else { + throw new kelondroException(e.getMessage()); } - - } else { - if (file.exists()) { - if (file.isDirectory()) { - fbi = new FlexTable(file.getParentFile(), file.getName(), rowdef, 0, resetOnFail); - } else { - fbi = new EcoTable(file, rowdef, EcoTable.tailCacheUsageAuto, EcoFSBufferSize, 0); - } - } else { - fbi = new EcoTable(file, rowdef, EcoTable.tailCacheUsageAuto, EcoFSBufferSize, 0); - } - } + } this.index = ((useObjectCache) && (!(fbi instanceof EcoTable))) ? (ObjectIndex) new Cache(fbi) : fbi; this.keylen = key; this.reclen = nodesize; @@ -115,13 +100,13 @@ public class BLOBTree { } public static BLOBHeap toHeap(final File file, final boolean useNodeCache, final boolean useObjectCache, final int key, - final int nodesize, final char fillChar, final ByteOrder objectOrder, final boolean usetree, final boolean writebuffer, final boolean resetOnFail, final File blob) throws IOException { + final int nodesize, final char fillChar, final ByteOrder objectOrder, final boolean writebuffer, final boolean resetOnFail, final File blob) throws IOException { if (blob.exists() || !file.exists()) { // open the blob file and ignore the tree return new BLOBHeap(blob, key, objectOrder, 1024 * 64); } // open a Tree and migrate everything to a Heap - BLOBTree tree = new BLOBTree(file, useNodeCache, useObjectCache, key, nodesize, fillChar, objectOrder, usetree, writebuffer, resetOnFail); + BLOBTree tree = new BLOBTree(file, useNodeCache, useObjectCache, key, nodesize, fillChar, objectOrder, writebuffer, resetOnFail); BLOBHeap heap = new BLOBHeap(blob, key, objectOrder, 1024 * 64); Iterator i = tree.keys(true, false); byte[] k, kk = new byte[key], v; diff --git a/source/de/anomic/kelondro/blob/HeapReader.java b/source/de/anomic/kelondro/blob/HeapReader.java index 007e2857a..09bf2d9dc 100644 --- a/source/de/anomic/kelondro/blob/HeapReader.java +++ b/source/de/anomic/kelondro/blob/HeapReader.java @@ -35,7 +35,7 @@ import java.util.Map; import java.util.Map.Entry; import java.util.concurrent.ExecutionException; -import de.anomic.kelondro.index.LongHandleIndex; +import de.anomic.kelondro.index.HandleMap; import de.anomic.kelondro.io.CachedRandomAccess; import de.anomic.kelondro.order.ByteOrder; import de.anomic.kelondro.order.CloneableIterator; @@ -49,7 +49,7 @@ public class HeapReader { public final static long keepFreeMem = 20 * 1024 * 1024; protected int keylength; // the length of the primary key - protected LongHandleIndex index; // key/seek relation for used records + protected HandleMap index; // key/seek relation for used records protected Gap free; // set of {seek, size} pairs denoting space and position of free records protected File heapFile; // the file of the heap protected final ByteOrder ordering; // the ordering on keys @@ -117,11 +117,16 @@ public class HeapReader { // there is an index and a gap file: // read the index file: try { - this.index = new LongHandleIndex(this.keylength, this.ordering, fif, 1000000); + this.index = new HandleMap(this.keylength, this.ordering, 8, fif, 1000000); } catch (IOException e) { e.printStackTrace(); return false; } + + // check saturation + int[] saturation = this.index.saturation(); + Log.logInfo("HeapReader", "saturation of " + fif.getName() + ": keylength = " + saturation[0] + ", vallength = " + saturation[1] + ", possible saving: " + ((this.keylength - saturation[0] + 8 - saturation[1]) * index.size() / 1024 / 1024) + " MB"); + // an index file is a one-time throw-away object, so just delete it now FileUtils.deletedelete(fif); @@ -141,10 +146,10 @@ public class HeapReader { private void initIndexReadFromHeap() throws IOException { // this initializes the this.index object by reading positions from the heap file - Log.logInfo("HeapReader", "generating index for " + heapFile.toString() + ", " + (file.length() / 1024) + " kbytes. Please wait."); + Log.logInfo("HeapReader", "generating index for " + heapFile.toString() + ", " + (file.length() / 1024 / 1024) + " MB. Please wait."); this.free = new Gap(); - LongHandleIndex.initDataConsumer indexready = LongHandleIndex.asynchronusInitializer(keylength, this.ordering, 0, Math.max(10, (int) (Runtime.getRuntime().freeMemory() / (10 * 1024 * 1024))), 100000); + HandleMap.initDataConsumer indexready = HandleMap.asynchronusInitializer(keylength, this.ordering, 8, 0, Math.max(10, (int) (Runtime.getRuntime().freeMemory() / (10 * 1024 * 1024))), 100000); byte[] key = new byte[keylength]; int reclen; long seek = 0; @@ -187,7 +192,7 @@ public class HeapReader { // new seek position seek += 4L + reclen; } - indexready.finish(); + indexready.finish(true); // finish the index generation try { diff --git a/source/de/anomic/kelondro/blob/HeapWriter.java b/source/de/anomic/kelondro/blob/HeapWriter.java index 756b66b72..d43b15bf0 100644 --- a/source/de/anomic/kelondro/blob/HeapWriter.java +++ b/source/de/anomic/kelondro/blob/HeapWriter.java @@ -30,7 +30,7 @@ import java.io.File; import java.io.FileOutputStream; import java.io.IOException; -import de.anomic.kelondro.index.LongHandleIndex; +import de.anomic.kelondro.index.HandleMap; import de.anomic.kelondro.order.ByteOrder; import de.anomic.kelondro.order.Digest; import de.anomic.kelondro.util.FileUtils; @@ -39,7 +39,7 @@ import de.anomic.kelondro.util.Log; public final class HeapWriter { private int keylength; // the length of the primary key - private LongHandleIndex index; // key/seek relation for used records + private HandleMap index; // key/seek relation for used records private final File heapFileTMP; // the temporary file of the heap during writing private final File heapFileREADY; // the final file of the heap when the file is closed private DataOutputStream os; // the output stream where the BLOB is written @@ -76,7 +76,7 @@ public final class HeapWriter { this.heapFileTMP = temporaryHeapFile; this.heapFileREADY = readyHeapFile; this.keylength = keylength; - this.index = new LongHandleIndex(keylength, ordering, 10, 100000); + this.index = new HandleMap(keylength, ordering, 8, 10, 100000); this.os = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(temporaryHeapFile), outBuffer)); //this.doublecheck = new HashSet(); this.seek = 0; diff --git a/source/de/anomic/kelondro/index/Column.java b/source/de/anomic/kelondro/index/Column.java index 16031fbdd..46d743cde 100644 --- a/source/de/anomic/kelondro/index/Column.java +++ b/source/de/anomic/kelondro/index/Column.java @@ -154,11 +154,12 @@ public class Column { ((typename.equals("long")) && (this.cellwidth > 8)) || ((typename.equals("char")) && (this.cellwidth > 1)) ) throw new kelondroException("kelondroColumn - cell width " + this.cellwidth + " too wide for type " + typename); + /* if (((typename.equals("short")) && (this.cellwidth <= 1)) || ((typename.equals("int")) && (this.cellwidth <= 2)) || ((typename.equals("long")) && (this.cellwidth <= 4)) ) throw new kelondroException("kelondroColumn - cell width " + this.cellwidth + " not appropriate for type " + typename); - + */ // parse/check encoder type if ((celldef.length() > 0) && (celldef.charAt(0) == '{')) { p = celldef.indexOf('}'); diff --git a/source/de/anomic/kelondro/index/IntegerHandleIndex.java b/source/de/anomic/kelondro/index/HandleMap.java similarity index 67% rename from source/de/anomic/kelondro/index/IntegerHandleIndex.java rename to source/de/anomic/kelondro/index/HandleMap.java index 6f3cbd666..bb5a00f65 100644 --- a/source/de/anomic/kelondro/index/IntegerHandleIndex.java +++ b/source/de/anomic/kelondro/index/HandleMap.java @@ -1,6 +1,6 @@ -// kelondroBytesIntMap.java -// (C) 2006 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany -// first published 18.06.2006 on http://www.anomic.de +// HandleMap.java +// (C) 2008 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany +// first published 08.04.2008 on http://yacy.net // // $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $ // $LastChangedRevision: 1986 $ @@ -43,6 +43,8 @@ import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; +import java.util.zip.GZIPInputStream; +import java.util.zip.GZIPOutputStream; import de.anomic.kelondro.order.Base64Order; import de.anomic.kelondro.order.ByteOrder; @@ -50,28 +52,37 @@ import de.anomic.kelondro.order.CloneableIterator; import de.anomic.kelondro.util.MemoryControl; import de.anomic.yacy.dht.FlatWordPartitionScheme; -public class IntegerHandleIndex { +public class HandleMap implements Iterable { private final Row rowdef; private ObjectIndexCache index; - public IntegerHandleIndex(final int keylength, final ByteOrder objectOrder, final int initialspace, final int expectedspace) { - this.rowdef = new Row(new Column[]{new Column("key", Column.celltype_binary, Column.encoder_bytes, keylength, "key"), new Column("int c-4 {b256}")}, objectOrder); + /** + * initialize a HandleMap + * This may store a key and a long value for each key. + * The class is used as index for database files + * @param keylength + * @param objectOrder + * @param space + */ + public HandleMap(final int keylength, final ByteOrder objectOrder, int idxbytes, final int initialspace, final int expectedspace) { + this.rowdef = new Row(new Column[]{new Column("key", Column.celltype_binary, Column.encoder_bytes, keylength, "key"), new Column("long c-" + idxbytes + " {b256}")}, objectOrder); this.index = new ObjectIndexCache(rowdef, initialspace, expectedspace); } /** - * initialize a BytesLongMap with the content of a dumped index + * initialize a HandleMap with the content of a dumped index * @param keylength * @param objectOrder * @param file * @throws IOException */ - public IntegerHandleIndex(final int keylength, final ByteOrder objectOrder, final File file, final int expectedspace) throws IOException { - this(keylength, objectOrder, (int) (file.length() / (keylength + 8)), expectedspace); + public HandleMap(final int keylength, final ByteOrder objectOrder, int idxbytes, final File file, final int expectedspace) throws IOException { + this(keylength, objectOrder, idxbytes, (int) (file.length() / (keylength + idxbytes)), expectedspace); // read the index dump and fill the index InputStream is = new BufferedInputStream(new FileInputStream(file), 1024 * 1024); - byte[] a = new byte[keylength + 4]; + if (file.getName().endsWith(".gz")) is = new GZIPInputStream(is); + byte[] a = new byte[keylength + idxbytes]; int c; Row.Entry entry; while (true) { @@ -81,9 +92,41 @@ public class IntegerHandleIndex { if (entry != null) this.index.addUnique(entry); } is.close(); - assert this.index.size() == file.length() / (keylength + 4); + is = null; + assert this.index.size() == file.length() / (keylength + idxbytes); } + + public int[] saturation() { + int keym = 0; + int valm = this.rowdef.width(1); + int valc; + byte[] lastk = null, thisk; + for (Row.Entry row: this) { + // check length of key + if (lastk == null) { + lastk = row.bytes(); + } else { + thisk = row.bytes(); + keym = Math.max(keym, eq(lastk, thisk)); + lastk = thisk; + } + // check length of value + for (valc = this.rowdef.primaryKeyLength; valc < this.rowdef.objectsize; valc++) { + if (lastk[valc] != 0) break; + } // valc is the number of leading zeros plus primaryKeyLength + valm = Math.min(valm, valc - this.rowdef.primaryKeyLength); // valm is the number of leading zeros + } + return new int[]{keym, this.rowdef.width(1) - valm}; + } + + private int eq(byte[] a, byte[] b) { + for (int i = 0; i < a.length; i++) { + if (a[i] != b[i]) return i; + } + return a.length; + } + /** * write a dump of the index to a file. All entries are written in order * which makes it possible to read them again in a fast way @@ -95,8 +138,10 @@ public class IntegerHandleIndex { // we must use an iterator from the combined index, because we need the entries sorted // otherwise we could just write the byte[] from the in kelondroRowSet which would make // everything much faster, but this is not an option here. + File tmp = new File(file.getParentFile(), file.getName() + ".prt"); Iterator i = this.index.rows(true, null); - OutputStream os = new BufferedOutputStream(new FileOutputStream(file), 1024 * 1024); + OutputStream os = new BufferedOutputStream(new FileOutputStream(tmp), 4 * 1024 * 1024); + if (file.getName().endsWith(".gz")) os = new GZIPOutputStream(os); int c = 0; while (i.hasNext()) { os.write(i.next().bytes()); @@ -104,15 +149,18 @@ public class IntegerHandleIndex { } os.flush(); os.close(); + tmp.renameTo(file); + assert file.exists() : file.toString(); + assert !tmp.exists() : tmp.toString(); return c; } - + public Row row() { return index.row(); } public void clear() { - this.index.clear(); + index.clear(); } public synchronized boolean has(final byte[] key) { @@ -120,36 +168,34 @@ public class IntegerHandleIndex { return index.has(key); } - public synchronized int get(final byte[] key) { + public synchronized long get(final byte[] key) { assert (key != null); final Row.Entry indexentry = index.get(key); if (indexentry == null) return -1; - return (int) indexentry.getColLong(1); + return indexentry.getColLong(1); } - public synchronized int put(final byte[] key, final int i) { - assert i >= 0 : "i = " + i; + public synchronized long put(final byte[] key, final long l) { + assert l >= 0 : "l = " + l; assert (key != null); final Row.Entry newentry = index.row().newEntry(); newentry.setCol(0, key); - newentry.setCol(1, i); + newentry.setCol(1, l); final Row.Entry oldentry = index.replace(newentry); if (oldentry == null) return -1; - return (int) oldentry.getColLong(1); + return oldentry.getColLong(1); } - - public synchronized int inc(final byte[] key, int a) { - assert key != null; - assert a > 0; // it does not make sense to add 0. If this occurres, it is a performance issue - + + public synchronized void putUnique(final byte[] key, final long l) { + assert l >= 0 : "l = " + l; + assert (key != null); final Row.Entry newentry = this.rowdef.newEntry(); newentry.setCol(0, key); - newentry.setCol(1, a); - long l = index.inc(key, 1, a, newentry); - return (int) l; + newentry.setCol(1, l); + index.addUnique(newentry); } - /* - public synchronized int inc(final byte[] key, int a) throws IOException { + + public synchronized long add(final byte[] key, long a) { assert key != null; assert a > 0; // it does not make sense to add 0. If this occurres, it is a performance issue @@ -161,51 +207,51 @@ public class IntegerHandleIndex { index.addUnique(newentry); return 1; } else { - long l = indexentry.incCol(1, a); + long i = indexentry.getColLong(1) + a; + indexentry.setCol(1, i); index.put(indexentry); - return (int) l; + return i; } } - */ - public synchronized void putUnique(final byte[] key, final int i) { - assert i >= 0 : "i = " + i; - assert (key != null); - final Row.Entry newentry = this.rowdef.newEntry(); - newentry.setCol(0, key); - newentry.setCol(1, i); - index.addUnique(newentry); + public synchronized long inc(final byte[] key) { + return add(key, 1); + } + + public synchronized long dec(final byte[] key) { + return add(key, -1); } - public synchronized ArrayList removeDoubles() { - final ArrayList report = new ArrayList(); - Integer[] is; - int c, i; + public synchronized ArrayList removeDoubles() { + final ArrayList report = new ArrayList(); + Long[] is; + int c; + long l; final int initialSize = this.size(); - for (final RowCollection delset: index.removeDoubles()) { - is = new Integer[delset.size()]; + for (final RowCollection rowset: index.removeDoubles()) { + is = new Long[rowset.size()]; c = 0; - for (Row.Entry e : delset) { - i = (int) e.getColLong(1); - assert i < initialSize : "i = " + i + ", initialSize = " + initialSize; - is[c++] = Integer.valueOf(i); + for (Row.Entry e: rowset) { + l = e.getColLong(1); + assert l < initialSize : "l = " + l + ", initialSize = " + initialSize; + is[c++] = Long.valueOf(l); } report.add(is); } return report; } - public synchronized int remove(final byte[] key) { + public synchronized long remove(final byte[] key) { assert (key != null); final Row.Entry indexentry = index.remove(key); if (indexentry == null) return -1; - return (int) indexentry.getColLong(1); + return indexentry.getColLong(1); } - public synchronized int removeone() { + public synchronized long removeone() { final Row.Entry indexentry = index.removeOne(); if (indexentry == null) return -1; - return (int) indexentry.getColLong(1); + return indexentry.getColLong(1); } public synchronized int size() { @@ -225,19 +271,9 @@ public class IntegerHandleIndex { index = null; } - private static class entry { - public byte[] key; - public int l; - public entry(final byte[] key, final int l) { - this.key = key; - this.l = l; - } - } - private static final entry poisonEntry = new entry(new byte[0], 0); - /** * this method creates a concurrent thread that can take entries that are used to initialize the map - * it should be used when a bytesLongMap is initialized when a file is read. Concurrency of FileIO and + * it should be used when a HandleMap is initialized when a file is read. Concurrency of FileIO and * map creation will speed up the initialization process. * @param keylength * @param objectOrder @@ -245,37 +281,47 @@ public class IntegerHandleIndex { * @param bufferSize * @return */ - public static initDataConsumer asynchronusInitializer(final int keylength, final ByteOrder objectOrder, final int space, final int expectedspace, int bufferSize) { - initDataConsumer initializer = new initDataConsumer(new IntegerHandleIndex(keylength, objectOrder, space, expectedspace), bufferSize); + public static initDataConsumer asynchronusInitializer(final int keylength, final ByteOrder objectOrder, int idxbytes, final int space, final int expectedspace, int bufferSize) { + initDataConsumer initializer = new initDataConsumer(new HandleMap(keylength, objectOrder, idxbytes, space, expectedspace), bufferSize); ExecutorService service = Executors.newSingleThreadExecutor(); initializer.setResult(service.submit(initializer)); service.shutdown(); return initializer; } + + private static class entry { + public byte[] key; + public long l; + public entry(final byte[] key, final long l) { + this.key = key; + this.l = l; + } + } + private static final entry poisonEntry = new entry(new byte[0], 0); - public static class initDataConsumer implements Callable { + public static class initDataConsumer implements Callable { private BlockingQueue cache; - private IntegerHandleIndex map; - private Future result; + private HandleMap map; + private Future result; private boolean sortAtEnd; - public initDataConsumer(IntegerHandleIndex map, int bufferCount) { + public initDataConsumer(HandleMap map, int bufferCount) { this.map = map; cache = new ArrayBlockingQueue(bufferCount); sortAtEnd = false; } - protected void setResult(Future result) { + protected void setResult(Future result) { this.result = result; } /** - * hand over another entry that shall be inserted into the BytesLongMap with an addl method + * hand over another entry that shall be inserted into the HandleMap with an addl method * @param key * @param l */ - public void consume(final byte[] key, final int l) { + public void consume(final byte[] key, final long l) { try { cache.put(new entry(key, l)); } catch (InterruptedException e) { @@ -298,17 +344,17 @@ public class IntegerHandleIndex { /** * this must be called after a finish() was called. this method blocks until all entries - * had been processed, and the content was sorted. It returns the kelondroBytesLongMap + * had been processed, and the content was sorted. It returns the HandleMap * that the user wanted to initialize * @return * @throws InterruptedException * @throws ExecutionException */ - public IntegerHandleIndex result() throws InterruptedException, ExecutionException { + public HandleMap result() throws InterruptedException, ExecutionException { return this.result.get(); } - public IntegerHandleIndex call() throws IOException { + public HandleMap call() throws IOException { try { entry c; while ((c = cache.take()) != poisonEntry) { @@ -326,19 +372,19 @@ public class IntegerHandleIndex { } public static void main(String[] args) { - int count = (args.length == 0) ? 1000000 : Integer.parseInt(args[0]); + int count = (args.length == 0) ? 1000000 : Integer.parseInt(args[0]); System.out.println("Starting test with " + count + " objects, minimum memory: " + (count * 16) + " bytes; " + MemoryControl.available() + " available"); Random r = new Random(0); long start = System.currentTimeMillis(); System.gc(); // for resource measurement long a = MemoryControl.available(); - IntegerHandleIndex idx = new IntegerHandleIndex(12, Base64Order.enhancedCoder, 0, 150000); + HandleMap idx = new HandleMap(12, Base64Order.enhancedCoder, 8, 0, 150000); for (int i = 0; i < count; i++) { - idx.inc(FlatWordPartitionScheme.positionToHash(r.nextInt(count)), 1); + idx.inc(FlatWordPartitionScheme.positionToHash(r.nextInt(count))); } long timek = ((long) count) * 1000L / (System.currentTimeMillis() - start); - System.out.println("Result IntegerHandleIndex: " + timek + " inc per second " + count + " loops."); + System.out.println("Result LongHandleIndex: " + timek + " inc per second " + count + " loops."); System.gc(); long memk = a - MemoryControl.available(); System.out.println("Used Memory: " + memk + " bytes"); @@ -366,7 +412,9 @@ public class IntegerHandleIndex { System.out.println("Geschwindigkeitsfaktor j/k: " + (timej / timek)); System.out.println("Speicherplatzfaktor j/k: " + (memj / memk)); System.exit(0); - } + public Iterator iterator() { + return this.rows(true, null); + } } diff --git a/source/de/anomic/kelondro/index/IndexTest.java b/source/de/anomic/kelondro/index/IndexTest.java index cf223311a..8921c461d 100644 --- a/source/de/anomic/kelondro/index/IndexTest.java +++ b/source/de/anomic/kelondro/index/IndexTest.java @@ -116,7 +116,7 @@ public class IndexTest { System.out.println("sorted map"); Runtime.getRuntime().gc(); long freeStartKelondro = MemoryControl.available(); - IntegerHandleIndex ii = new IntegerHandleIndex(12, Base64Order.enhancedCoder, count, count); + HandleMap ii = new HandleMap(12, Base64Order.enhancedCoder, 4, count, count); for (int i = 0; i < count; i++) ii.putUnique(tests[i], 1); ii.get(randomHash(r)); // trigger sort long t6 = System.currentTimeMillis(); diff --git a/source/de/anomic/kelondro/index/LongHandleIndex.java b/source/de/anomic/kelondro/index/LongHandleIndex.java deleted file mode 100644 index 03eb839c9..000000000 --- a/source/de/anomic/kelondro/index/LongHandleIndex.java +++ /dev/null @@ -1,337 +0,0 @@ -// kelondroBytesLongMap.java -// (C) 2008 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany -// first published 08.04.2008 on http://yacy.net -// -// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $ -// $LastChangedRevision: 1986 $ -// $LastChangedBy: orbiter $ -// -// LICENSE -// -// This program is free software; you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation; either version 2 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -package de.anomic.kelondro.index; - -import java.io.BufferedInputStream; -import java.io.BufferedOutputStream; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.concurrent.ArrayBlockingQueue; -import java.util.concurrent.BlockingQueue; -import java.util.concurrent.Callable; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; -import java.util.zip.GZIPInputStream; -import java.util.zip.GZIPOutputStream; - -import de.anomic.kelondro.order.Base64Order; -import de.anomic.kelondro.order.ByteOrder; -import de.anomic.kelondro.order.CloneableIterator; - -public class LongHandleIndex { - - private final Row rowdef; - private ObjectIndexCache index; - - /** - * initialize a BytesLongMap - * This may store a key and a long value for each key. - * The class is used as index for database files - * @param keylength - * @param objectOrder - * @param space - */ - public LongHandleIndex(final int keylength, final ByteOrder objectOrder, final int initialspace, final int expectedspace) { - this.rowdef = new Row(new Column[]{new Column("key", Column.celltype_binary, Column.encoder_bytes, keylength, "key"), new Column("long c-8 {b256}")}, objectOrder); - this.index = new ObjectIndexCache(rowdef, initialspace, expectedspace); - } - - /** - * initialize a BytesLongMap with the content of a dumped index - * @param keylength - * @param objectOrder - * @param file - * @throws IOException - */ - public LongHandleIndex(final int keylength, final ByteOrder objectOrder, final File file, final int expectedspace) throws IOException { - this(keylength, objectOrder, (int) (file.length() / (keylength + 8)), expectedspace); - // read the index dump and fill the index - InputStream is = new BufferedInputStream(new FileInputStream(file), 1024 * 1024); - if (file.getName().endsWith(".gz")) is = new GZIPInputStream(is); - byte[] a = new byte[keylength + 8]; - int c; - while (true) { - c = is.read(a); - if (c <= 0) break; - this.index.addUnique(this.rowdef.newEntry(a)); - } - is.close(); - is = null; - assert this.index.size() == file.length() / (keylength + 8); - } - - /** - * write a dump of the index to a file. All entries are written in order - * which makes it possible to read them again in a fast way - * @param file - * @return the number of written entries - * @throws IOException - */ - public int dump(File file) throws IOException { - // we must use an iterator from the combined index, because we need the entries sorted - // otherwise we could just write the byte[] from the in kelondroRowSet which would make - // everything much faster, but this is not an option here. - File tmp = new File(file.getParentFile(), file.getName() + ".prt"); - Iterator i = this.index.rows(true, null); - OutputStream os = new BufferedOutputStream(new FileOutputStream(tmp), 4 * 1024 * 1024); - if (file.getName().endsWith(".gz")) os = new GZIPOutputStream(os); - int c = 0; - while (i.hasNext()) { - os.write(i.next().bytes()); - c++; - } - os.flush(); - os.close(); - tmp.renameTo(file); - assert file.exists() : file.toString(); - assert !tmp.exists() : tmp.toString(); - return c; - } - - public Row row() { - return index.row(); - } - - public void clear() { - index.clear(); - } - - public synchronized long get(final byte[] key) { - assert (key != null); - final Row.Entry indexentry = index.get(key); - if (indexentry == null) return -1; - return indexentry.getColLong(1); - } - - public synchronized long put(final byte[] key, final long l) { - assert l >= 0 : "l = " + l; - assert (key != null); - final Row.Entry newentry = index.row().newEntry(); - newentry.setCol(0, key); - newentry.setCol(1, l); - final Row.Entry oldentry = index.replace(newentry); - if (oldentry == null) return -1; - return oldentry.getColLong(1); - } - - public synchronized void putUnique(final byte[] key, final long l) { - assert l >= 0 : "l = " + l; - assert (key != null); - final Row.Entry newentry = this.rowdef.newEntry(); - newentry.setCol(0, key); - newentry.setCol(1, l); - index.addUnique(newentry); - } - - public synchronized long add(final byte[] key, long a) { - assert key != null; - assert a > 0; // it does not make sense to add 0. If this occurres, it is a performance issue - - final Row.Entry indexentry = index.get(key); - if (indexentry == null) { - final Row.Entry newentry = this.rowdef.newEntry(); - newentry.setCol(0, key); - newentry.setCol(1, a); - index.addUnique(newentry); - return 1; - } else { - long i = indexentry.getColLong(1) + a; - indexentry.setCol(1, i); - index.put(indexentry); - return i; - } - } - - public synchronized long inc(final byte[] key) { - return add(key, 1); - } - - public synchronized long dec(final byte[] key) { - return add(key, -1); - } - - public synchronized ArrayList removeDoubles() { - final ArrayList indexreport = index.removeDoubles(); - final ArrayList report = new ArrayList(); - Long[] is; - int c; - for (final RowCollection rowset: indexreport) { - is = new Long[rowset.size()]; - c = 0; - for (Row.Entry e: rowset) { - is[c++] = Long.valueOf(e.getColLong(1)); - } - report.add(is); - } - return report; - } - - public synchronized long remove(final byte[] key) { - assert (key != null); - final Row.Entry indexentry = index.remove(key); - if (indexentry == null) return -1; - return indexentry.getColLong(1); - } - - public synchronized long removeone() { - final Row.Entry indexentry = index.removeOne(); - if (indexentry == null) return -1; - return indexentry.getColLong(1); - } - - public synchronized int size() { - return index.size(); - } - - public synchronized CloneableIterator keys(final boolean up, final byte[] firstKey) { - return index.keys(up, firstKey); - } - - public synchronized CloneableIterator rows(final boolean up, final byte[] firstKey) { - return index.rows(up, firstKey); - } - - public synchronized void close() { - index.close(); - index = null; - } - - /** - * this method creates a concurrent thread that can take entries that are used to initialize the map - * it should be used when a bytesLongMap is initialized when a file is read. Concurrency of FileIO and - * map creation will speed up the initialization process. - * @param keylength - * @param objectOrder - * @param space - * @param bufferSize - * @return - */ - public static initDataConsumer asynchronusInitializer(final int keylength, final ByteOrder objectOrder, final int space, final int expectedspace, int bufferSize) { - initDataConsumer initializer = new initDataConsumer(new LongHandleIndex(keylength, objectOrder, space, expectedspace), bufferSize); - ExecutorService service = Executors.newSingleThreadExecutor(); - initializer.setResult(service.submit(initializer)); - service.shutdown(); - return initializer; - } - - private static class entry { - public byte[] key; - public long l; - public entry(final byte[] key, final long l) { - this.key = key; - this.l = l; - } - } - private static final entry poisonEntry = new entry(new byte[0], 0); - - public static class initDataConsumer implements Callable { - - private BlockingQueue cache; - private LongHandleIndex map; - private Future result; - - public initDataConsumer(LongHandleIndex map, int bufferCount) { - this.map = map; - cache = new ArrayBlockingQueue(bufferCount); - } - - protected void setResult(Future result) { - this.result = result; - } - - /** - * hand over another entry that shall be inserted into the BytesLongMap with an addl method - * @param key - * @param l - */ - public void consume(final byte[] key, final long l) { - try { - cache.put(new entry(key, l)); - } catch (InterruptedException e) { - e.printStackTrace(); - } - } - - /** - * to signal the initialization thread that no more entries will be sublitted with consumer() - * this method must be called. The process will not terminate if this is not called before. - */ - public void finish() { - try { - cache.put(poisonEntry); - } catch (InterruptedException e) { - e.printStackTrace(); - } - } - - /** - * this must be called after a finish() was called. this method blocks until all entries - * had been processed, and the content was sorted. It returns the kelondroBytesLongMap - * that the user wanted to initialize - * @return - * @throws InterruptedException - * @throws ExecutionException - */ - public LongHandleIndex result() throws InterruptedException, ExecutionException { - return this.result.get(); - } - - public LongHandleIndex call() throws IOException { - try { - entry c; - while ((c = cache.take()) != poisonEntry) { - map.putUnique(c.key, c.l); - } - } catch (InterruptedException e) { - e.printStackTrace(); - } - map.index.finishInitialization(); - return map; - } - - } - - public static void main(String[] args) { - LongHandleIndex idx = new LongHandleIndex(12, Base64Order.enhancedCoder, 10000, 10000000); - byte[] s; - //long l; - for (int i = 0; i < 10000000; i = i + 8) { - s = Base64Order.enhancedCoder.uncardinal(Long.MAX_VALUE - i); - //l = Base64Order.enhancedCoder.cardinal(s); - //if (i != l) System.out.println("encoding bug for " + new String(s) + ", v = " + (Long.MAX_VALUE - i) + ", l = " + l); - //System.out.println(s); - if (idx.get(s) >= 0) System.out.println("search bug for " + new String(s) + ": " + idx.get(s)); - idx.putUnique(s, 1); - } - } -} diff --git a/source/de/anomic/kelondro/table/CachedRecords.java b/source/de/anomic/kelondro/table/CachedRecords.java index 34c14301f..36461aaef 100644 --- a/source/de/anomic/kelondro/table/CachedRecords.java +++ b/source/de/anomic/kelondro/table/CachedRecords.java @@ -28,23 +28,17 @@ package de.anomic.kelondro.table; import java.io.File; import java.io.IOException; -import java.util.HashMap; import java.util.Iterator; -import java.util.Map; import java.util.TreeMap; import de.anomic.kelondro.index.Row; import de.anomic.kelondro.index.ObjectArrayCache; -import de.anomic.kelondro.io.RandomAccessInterface; import de.anomic.kelondro.io.RandomAccessRecords; import de.anomic.kelondro.util.MemoryControl; import de.anomic.kelondro.util.kelondroException; public class CachedRecords extends AbstractRecords implements RandomAccessRecords { - - // memory calculation - private static final int element_in_cache = 4; // for kelondroCollectionObjectMap: 4; for HashMap: 52 - + // static supervision objects: recognize and coordinate all activites private static final TreeMap recordTracker = new TreeMap(); private static final long memStopGrow = 40 * 1024 * 1024; // a limit for the node cache to stop growing if less than this memory amount is available @@ -69,23 +63,6 @@ public class CachedRecords extends AbstractRecords implements RandomAccessRecord if (useNodeCache) recordTracker.put(this.filename, this); } - public CachedRecords( - final RandomAccessInterface ra, final String filename, final boolean useNodeCache, final long preloadTime, - final short ohbytec, final short ohhandlec, - final Row rowdef, final int FHandles, final int txtProps, final int txtPropWidth, - final boolean exitOnFail) { - super(ra, filename, useNodeCache, ohbytec, ohhandlec, rowdef, FHandles, txtProps, txtPropWidth, exitOnFail); - initCache(useNodeCache, preloadTime); - if (useNodeCache) recordTracker.put(this.filename, this); - } - - public CachedRecords( - final RandomAccessInterface ra, final String filename, final boolean useNodeCache, final long preloadTime) throws IOException{ - super(ra, filename, useNodeCache); - initCache(useNodeCache, preloadTime); - if (useNodeCache) recordTracker.put(this.filename, this); - } - private void initCache(final boolean useNodeCache, final long preloadTime) { if (useNodeCache) { this.cacheHeaders = new ObjectArrayCache(this.headchunksize, 0); @@ -120,7 +97,7 @@ public class CachedRecords extends AbstractRecords implements RandomAccessRecord } } - public int cacheGrowStatus() { + private int cacheGrowStatus() { final long available = MemoryControl.available(); if ((cacheHeaders != null) && (available - 2 * 1024 * 1024 < cacheHeaders.memoryNeededForGrow())) return 0; return cacheGrowStatus(available, memStopGrow, memStartShrink); @@ -140,43 +117,6 @@ public class CachedRecords extends AbstractRecords implements RandomAccessRecord return 0; } - public static long getMemStopGrow() { - return memStopGrow ; - } - - public static long getMemStartShrink() { - return memStartShrink ; - } - - public static final Iterator filenames() { - // iterates string objects; all file names from record tracker - return recordTracker.keySet().iterator(); - } - - public static final Map memoryStats(final String filename) { - // returns a map for each file in the tracker; - // the map represents properties for each record oobjects, - // i.e. for cache memory allocation - final CachedRecords theRecord = recordTracker.get(filename); - return theRecord.memoryStats(); - } - - private final Map memoryStats() { - // returns statistical data about this object - if (cacheHeaders == null) return null; - final HashMap map = new HashMap(); - map.put("nodeChunkSize", Integer.toString(this.headchunksize + element_in_cache)); - map.put("nodeCacheCount", Integer.toString(cacheHeaders.size())); - map.put("nodeCacheMem", Integer.toString(cacheHeaders.size() * (this.headchunksize + element_in_cache))); - map.put("nodeCacheReadHit", Integer.toString(readHit)); - map.put("nodeCacheReadMiss", Integer.toString(readMiss)); - map.put("nodeCacheWriteUnique", Integer.toString(writeUnique)); - map.put("nodeCacheWriteDouble", Integer.toString(writeDouble)); - map.put("nodeCacheDeletes", Integer.toString(cacheDelete)); - map.put("nodeCacheFlushes", Integer.toString(cacheFlush)); - return map; - } - protected synchronized void deleteNode(final RecordHandle handle) throws IOException { if (cacheHeaders == null) { super.deleteNode(handle); diff --git a/source/de/anomic/kelondro/table/EcoTable.java b/source/de/anomic/kelondro/table/EcoTable.java index 4b54ffcf6..136a8a3e3 100644 --- a/source/de/anomic/kelondro/table/EcoTable.java +++ b/source/de/anomic/kelondro/table/EcoTable.java @@ -38,8 +38,8 @@ import java.util.Map; import java.util.TreeMap; import java.util.TreeSet; -import de.anomic.kelondro.index.IntegerHandleIndex; import de.anomic.kelondro.index.Column; +import de.anomic.kelondro.index.HandleMap; import de.anomic.kelondro.index.Row; import de.anomic.kelondro.index.RowCollection; import de.anomic.kelondro.index.RowSet; @@ -76,7 +76,7 @@ public class EcoTable implements ObjectIndex { public static final long maxarraylength = 134217727L; // that may be the maxmimum size of array length in some JVMs private static final long minmemremaining = 20 * 1024 * 1024; // if less than this memory is remaininig, the memory copy of a table is abandoned private RowSet table; - private IntegerHandleIndex index; + private HandleMap index; private BufferedEcoFS file; private Row rowdef; private int fail; @@ -132,7 +132,7 @@ public class EcoTable implements ObjectIndex { table = null; System.gc(); Log.logSevere("ECOTABLE", tablefile + ": RAM after releasing the table: " + (MemoryControl.available() / 1024 / 1024) + "MB"); } - index = new IntegerHandleIndex(rowdef.primaryKeyLength, rowdef.objectOrder, records, 100000); + index = new HandleMap(rowdef.primaryKeyLength, rowdef.objectOrder, 4, records, 100000); Log.logInfo("ECOTABLE", tablefile + ": EcoTable " + tablefile.toString() + " has table copy " + ((table == null) ? "DISABLED" : "ENABLED")); // read all elements from the file into the copy table @@ -175,7 +175,7 @@ public class EcoTable implements ObjectIndex { // remove doubles if (!freshFile) { - final ArrayList doubles = index.removeDoubles(); + final ArrayList doubles = index.removeDoubles(); //assert index.size() + doubles.size() + fail == i; //System.out.println(" -removed " + doubles.size() + " doubles- done."); if (doubles.size() > 0) { @@ -184,19 +184,19 @@ public class EcoTable implements ObjectIndex { // first put back one element each final byte[] record = new byte[rowdef.objectsize]; key = new byte[rowdef.primaryKeyLength]; - for (final Integer[] ds: doubles) { + for (final Long[] ds: doubles) { file.get(ds[0].intValue(), record, 0); System.arraycopy(record, 0, key, 0, rowdef.primaryKeyLength); index.putUnique(key, ds[0].intValue()); } // then remove the other doubles by removing them from the table, but do a re-indexing while doing that // first aggregate all the delete positions because the elements from the top positions must be removed first - final TreeSet delpos = new TreeSet(); - for (final Integer[] ds: doubles) { + final TreeSet delpos = new TreeSet(); + for (final Long[] ds: doubles) { for (int j = 1; j < ds.length; j++) delpos.add(ds[j]); } // now remove the entries in a sorted way (top-down) - Integer top; + Long top; while (delpos.size() > 0) { top = delpos.last(); delpos.remove(top); @@ -292,13 +292,13 @@ public class EcoTable implements ObjectIndex { assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size(); final ArrayList report = new ArrayList(); RowSet rows; - final TreeSet d = new TreeSet(); + final TreeSet d = new TreeSet(); final byte[] b = new byte[rowdef.objectsize]; - Integer L; + Long L; Row.Entry inconsistentEntry; // iterate over all entries that have inconsistent index references long lastlog = System.currentTimeMillis(); - for (final Integer[] is: index.removeDoubles()) { + for (final Long[] is: index.removeDoubles()) { // 'is' is the set of all indexes, that have the same reference // we collect that entries now here rows = new RowSet(this.rowdef, is.length); @@ -314,7 +314,7 @@ public class EcoTable implements ObjectIndex { report.add(rows); } // finally delete the affected rows, but start with largest id first, otherwise we overwrite wrong entries - Integer s; + Long s; while (d.size() > 0) { s = d.last(); d.remove(s); @@ -347,7 +347,7 @@ public class EcoTable implements ObjectIndex { if ((file == null) || (index == null)) return null; assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size() + ", fail = " + fail; assert ((table == null) || (table.size() == index.size())); - final int i = index.get(key); + final int i = (int) index.get(key); if (i == -1) return null; final byte[] b = new byte[rowdef.objectsize]; if (table == null) { @@ -388,7 +388,7 @@ public class EcoTable implements ObjectIndex { assert row != null; assert row.bytes() != null; if ((row == null) || (row.bytes() == null)) return null; - final int i = index.get(row.getPrimaryKeyBytes()); + final int i = (int) index.get(row.getPrimaryKeyBytes()); if (i == -1) { addUnique(row); return null; @@ -422,7 +422,7 @@ public class EcoTable implements ObjectIndex { assert row != null; assert row.bytes() != null; if (file == null || row == null || row.bytes() == null) return; - final int i = index.get(row.getPrimaryKeyBytes()); + final int i = (int) index.get(row.getPrimaryKeyBytes()); if (i == -1) { addUnique(row); return; @@ -480,7 +480,7 @@ public class EcoTable implements ObjectIndex { assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size(); assert ((table == null) || (table.size() == index.size())); assert key.length == rowdef.primaryKeyLength; - final int i = index.get(key); + final int i = (int) index.get(key); if (i == -1) return null; // nothing to do // prepare result @@ -491,12 +491,12 @@ public class EcoTable implements ObjectIndex { assert i < index.size(); if (table == null) { if (i == index.size() - 1) { - ix = index.remove(key); + ix = (int) index.remove(key); assert ix == i; file.cleanLast(b, 0); } else { assert i < index.size() - 1; - ix = index.remove(key); + ix = (int) index.remove(key); assert ix == i; file.get(i, b, 0); file.cleanLast(p, 0); @@ -514,13 +514,13 @@ public class EcoTable implements ObjectIndex { if (i == index.size() - 1) { // special handling if the entry is the last entry in the file - ix = index.remove(key); + ix = (int) index.remove(key); assert ix == i; table.removeRow(i, false); file.cleanLast(); } else { // switch values - ix = index.remove(key); + ix = (int) index.remove(key); assert ix == i; final Row.Entry te = table.removeOne(); @@ -546,7 +546,7 @@ public class EcoTable implements ObjectIndex { final byte[] le = new byte[rowdef.objectsize]; file.cleanLast(le, 0); final Row.Entry lr = rowdef.newEntry(le); - final int i = index.remove(lr.getPrimaryKeyBytes()); + final int i = (int) index.remove(lr.getPrimaryKeyBytes()); assert i >= 0; if (table != null) table.remove(lr.getPrimaryKeyBytes()); assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size(); @@ -579,7 +579,7 @@ public class EcoTable implements ObjectIndex { // initialize index and copy table table = (table == null) ? null : new RowSet(taildef, 1); - index = new IntegerHandleIndex(rowdef.primaryKeyLength, rowdef.objectOrder, 1, 100000); + index = new HandleMap(rowdef.primaryKeyLength, rowdef.objectOrder, 4, 1, 100000); } public Row row() { @@ -659,7 +659,7 @@ public class EcoTable implements ObjectIndex { final byte[] k = i.next(); assert k != null; if (k == null) return null; - this.c = index.get(k); + this.c = (int) index.get(k); if (this.c < 0) throw new ConcurrentModificationException(); // this should only happen if the table was modified during the iteration final byte[] b = new byte[rowdef.objectsize]; if (table == null) { diff --git a/source/de/anomic/kelondro/table/FlexTable.java b/source/de/anomic/kelondro/table/FlexTable.java deleted file mode 100644 index 35638a289..000000000 --- a/source/de/anomic/kelondro/table/FlexTable.java +++ /dev/null @@ -1,431 +0,0 @@ -// kelondroFlexTable.java -// (C) 2006 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany -// first published 01.06.2006 on http://www.anomic.de -// -// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $ -// $LastChangedRevision: 1986 $ -// $LastChangedBy: orbiter $ -// -// LICENSE -// -// This program is free software; you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation; either version 2 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program; if not, write to the Free Software -// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -package de.anomic.kelondro.table; - -import java.io.File; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Date; -import java.util.HashMap; -import java.util.Iterator; -import java.util.Map; -import java.util.TreeMap; -import java.util.TreeSet; - -import de.anomic.kelondro.index.IntegerHandleIndex; -import de.anomic.kelondro.index.Row; -import de.anomic.kelondro.index.RowCollection; -import de.anomic.kelondro.index.RowSet; -import de.anomic.kelondro.index.ObjectIndex; -import de.anomic.kelondro.order.CloneableIterator; -import de.anomic.kelondro.order.NaturalOrder; -import de.anomic.kelondro.util.FileUtils; -import de.anomic.kelondro.util.MemoryControl; -import de.anomic.kelondro.util.kelondroException; -import de.anomic.kelondro.util.Log; - -public class FlexTable extends FlexWidthArray implements ObjectIndex { - - // static tracker objects - private static TreeMap tableTracker = new TreeMap(); - - // class objects - protected IntegerHandleIndex index; - private boolean RAMIndex; - - /** - * Deprecated Class. Please use kelondroEcoTable instead - */ - @Deprecated - public FlexTable(final File path, final String tablename, final Row rowdef, int minimumSpace, final boolean resetOnFail) { - // the buffersize applies to a possible load of the ram-index - // the minimumSpace is a initial allocation space for the index; names the number of index slots - // if the ram is not sufficient, a tree file is generated - // if, and only if a tree file exists, the preload time is applied - super(path, tablename, rowdef, resetOnFail); - if ((super.col[0].size() < 0) && (resetOnFail)) try { - super.reset(); - } catch (final IOException e2) { - e2.printStackTrace(); - throw new kelondroException(e2.getMessage()); - } - minimumSpace = Math.max(minimumSpace, super.size()); - try { - final long neededRAM = 10 * 1024 * 104 + (long) ((super.row().primaryKeyLength + 4) * minimumSpace * RowCollection.growfactor); - - final File newpath = new File(path, tablename); - final File indexfile = new File(newpath, "col.000.index"); - String description = ""; - description = new String(this.col[0].getDescription()); - final int p = description.indexOf(';', 4); - final long stt = (p > 0) ? Long.parseLong(description.substring(4, p)) : 0; - System.out.println("*** Last Startup time: " + stt + " milliseconds"); - final long start = System.currentTimeMillis(); - - // we use a RAM index - if (indexfile.exists()) { - // delete existing index file - System.out.println("*** Delete File index " + indexfile); - FileUtils.deletedelete(indexfile); - } - - // fill the index - System.out.print("*** Loading RAM index for " + size() + " entries from " + newpath + "; available RAM = " + (MemoryControl.available() >> 20) + " MB, allocating " + (neededRAM >> 20) + " MB for index."); - index = initializeRamIndex(minimumSpace); - - System.out.println(" -done-"); - System.out.println(index.size() + " index entries initialized and sorted from " + super.col[0].size() + " keys."); - RAMIndex = true; - tableTracker.put(this.filename(), this); - - // check consistency - final ArrayList doubles = index.removeDoubles(); - if (doubles.size() > 0) { - System.out.println("DEBUG: WARNING - FlexTable " + newpath.toString() + " has " + doubles.size() + " doubles"); - } - - // assign index to wrapper - description = "stt=" + Long.toString(System.currentTimeMillis() - start) + ";"; - super.col[0].setDescription(description.getBytes()); - } catch (final IOException e) { - if (resetOnFail) { - RAMIndex = true; - index = new IntegerHandleIndex(super.row().primaryKeyLength, super.rowdef.objectOrder, 0, 0); - } else { - throw new kelondroException(e.getMessage()); - } - } - } - - public void clear() throws IOException { - super.reset(); - RAMIndex = true; - index = new IntegerHandleIndex(super.row().primaryKeyLength, super.rowdef.objectOrder, 0, 0); - } - - public static int staticSize(final File path, final String tablename) { - return FlexWidthArray.staticsize(path, tablename); - } - - public static int staticRAMIndexNeed(final File path, final String tablename, final Row rowdef) { - return (int) ((rowdef.primaryKeyLength + 4) * staticSize(path, tablename) * RowCollection.growfactor); - } - - public boolean hasRAMIndex() { - return RAMIndex; - } - - public synchronized boolean has(final byte[] key) { - // it is not recommended to implement or use a has predicate unless - // it can be ensured that it causes no IO - if ((AbstractRecords.debugmode) && (RAMIndex != true)) Log.logWarning("kelondroFlexTable", "RAM index warning in file " + super.tablename); - assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size(); - return index.has(key); - } - - private IntegerHandleIndex initializeRamIndex(final int initialSpace) { - final int space = Math.max(super.col[0].size(), initialSpace) + 1; - if (space < 0) throw new kelondroException("wrong space: " + space); - final IntegerHandleIndex ri = new IntegerHandleIndex(super.row().primaryKeyLength, super.rowdef.objectOrder, space, 0); - final Iterator content = super.col[0].contentNodes(-1); - Node node; - int i; - byte[] key; - while (content.hasNext()) { - node = content.next(); - i = node.handle().hashCode(); - try { - key = node.getKey(); - } catch (IOException e1) { - e1.printStackTrace(); - break; - } - assert (key != null) : "DEBUG: empty key in initializeRamIndex"; // should not happen; if it does, it is an error of the condentNodes iterator - //System.out.println("ENTRY: " + serverLog.arrayList(indexentry.bytes(), 0, indexentry.objectsize())); - ri.putUnique(key, i); - if ((i % 10000) == 0) { - System.out.print('.'); - System.out.flush(); - } - } - System.out.print(" -ordering- "); - System.out.flush(); - return ri; - } - - public synchronized Row.Entry get(final byte[] key) throws IOException { - if (index == null) return null; // case may happen during shutdown - final int pos = index.get(key); - assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size(); - if (pos < 0) return null; - // pos may be greater than this.size(), because this table may have deleted entries - // the deleted entries are subtracted from the 'real' tablesize, - // so the size may be smaller than an index to a row entry - /*if (kelondroAbstractRecords.debugmode) { - kelondroRow.Entry result = super.get(pos); - assert result != null; - assert rowdef.objectOrder.compare(result.getPrimaryKeyBytes(), key) == 0 : "key and row does not match; key = " + serverLog.arrayList(key, 0, key.length) + " row.key = " + serverLog.arrayList(result.getPrimaryKeyBytes(), 0, rowdef.primaryKeyLength); - return result; - } else {*/ - // assume that the column for the primary key is 0, - // and the column 0 is stored in a file only for that column - // then we don't need to lookup from that file, because we already know the value (it's the key) - final Row.Entry result = super.getOmitCol0(pos, key); - assert result != null; - return result; - //} - } - - public synchronized Row.Entry put(final Row.Entry row, final Date entryDate) throws IOException { - assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size(); - return replace(row); - } - - public synchronized Row.Entry replace(final Row.Entry row) throws IOException { - assert (row != null); - assert (!(Log.allZero(row.getColBytes(0)))); - assert row.objectsize() <= this.rowdef.objectsize; - final byte[] key = row.getColBytes(0); - if (index == null) return null; // case may appear during shutdown - int pos = index.get(key); - if (pos < 0) { - pos = super.add(row); - index.put(key, pos); - assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size(); - return null; - } - //System.out.println("row.key=" + serverLog.arrayList(row.bytes(), 0, row.objectsize())); - final Row.Entry oldentry = super.get(pos); - assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size(); - if (oldentry == null) { - Log.logSevere("kelondroFlexTable", "put(): index failure; the index pointed to a cell which is empty. content.size() = " + this.size() + ", index.size() = " + index.size()); - // patch bug ***** FIND CAUSE! (see also: remove) - final int oldindex = index.remove(key); - assert oldindex >= 0; - assert index.get(key) == -1; - // here is this.size() > index.size() because of remove operation above - index.put(key, super.add(row)); - assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size(); - return null; - } - assert oldentry != null : "overwrite of empty position " + pos + ", index management must have failed before"; - assert rowdef.objectOrder.equal(oldentry.getPrimaryKeyBytes(), key) : "key and row does not match; key = " + NaturalOrder.arrayList(key, 0, key.length) + " row.key = " + NaturalOrder.arrayList(oldentry.getPrimaryKeyBytes(), 0, rowdef.primaryKeyLength); - super.set(pos, row); - assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size(); - return oldentry; - } - - public synchronized void put(final Row.Entry row) throws IOException { - assert (row != null); - assert (!(Log.allZero(row.getColBytes(0)))); - assert row.objectsize() <= this.rowdef.objectsize; - final byte[] key = row.getColBytes(0); - if (index == null) return; // case may appear during shutdown - int pos = index.get(key); - if (pos < 0) { - pos = super.add(row); - index.put(key, pos); - assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size(); - return; - } - //System.out.println("row.key=" + serverLog.arrayList(row.bytes(), 0, row.objectsize())); - assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size(); - super.set(pos, row); - assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size(); - } - - public synchronized void addUnique(final Row.Entry row) throws IOException { - assert row.objectsize() == this.rowdef.objectsize; - assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size(); - index.putUnique(row.getColBytes(0), super.add(row)); - } - - public synchronized ArrayList removeDoubles() throws IOException { - final ArrayList report = new ArrayList(); - RowSet rows; - final TreeSet d = new TreeSet(); - for (final Integer[] is: index.removeDoubles()) { - rows = new RowSet(this.rowdef, is.length); - for (int j = 0; j < is.length; j++) { - d.add(is[j]); - rows.addUnique(this.get(is[j].intValue())); - } - report.add(rows); - } - // finally delete the affected rows, but start with largest id first, otherwise we overwrite wrong entries - Integer s; - while (d.size() > 0) { - s = d.last(); - d.remove(s); - this.remove(s.intValue()); - } - return report; - } - - public synchronized Row.Entry remove(final byte[] key) throws IOException { - // the underlying data structure is a file, where the order cannot be maintained. Gaps are filled with new values. - final int i = index.remove(key); - assert (index.get(key) < 0); // must be deleted - if (i < 0) { - assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size(); - return null; - } - final Row.Entry r = super.getOmitCol0(i, key); - if (r == null) { - Log.logSevere("kelondroFlexTable", "remove(): index failure; the index pointed to a cell which is empty. content.size() = " + this.size() + ", index.size() = " + ((index == null) ? 0 : index.size())); - // patch bug ***** FIND CAUSE! (see also: put) - assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size(); - return null; - } - assert r != null : "r == null"; // should be avoided with path above - assert rowdef.objectOrder.equal(r.getPrimaryKeyBytes(), key) : "key and row does not match; key = " + NaturalOrder.arrayList(key, 0, key.length) + " row.key = " + NaturalOrder.arrayList(r.getPrimaryKeyBytes(), 0, rowdef.primaryKeyLength); - super.remove(i); - assert super.get(i) == null : "i = " + i + ", get(i) = " + NaturalOrder.arrayList(super.get(i).bytes(), 0, 12); - assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size(); - return r; - } - - public synchronized Row.Entry removeOne() throws IOException { - final int i = index.removeone(); - if (i < 0) return null; - Row.Entry r; - r = super.get(i); - super.remove(i); - assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size(); - return r; - } - - public synchronized CloneableIterator keys(final boolean up, final byte[] firstKey) throws IOException { - return index.keys(up, firstKey); - } - - public synchronized CloneableIterator rows() throws IOException { - return new rowIterator(true, null); - } - - public synchronized CloneableIterator rows(final boolean up, final byte[] firstKey) throws IOException { - if (index == null) return new rowIterator(up, firstKey); - assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size(); - return new rowIterator(up, firstKey); - } - - public class rowIterator implements CloneableIterator { - - CloneableIterator indexIterator; - boolean up; - - public rowIterator(final boolean up, final byte[] firstKey) throws IOException { - this.up = up; - indexIterator = index.rows(up, firstKey); - } - - public rowIterator clone(final Object modifier) { - try { - return new rowIterator(up, (byte[]) modifier); - } catch (final IOException e) { - return null; - } - } - - public boolean hasNext() { - return indexIterator.hasNext(); - } - - public Row.Entry next() { - Row.Entry idxEntry = null; - while ((indexIterator.hasNext()) && (idxEntry == null)) { - idxEntry = indexIterator.next(); - } - if (idxEntry == null) { - Log.logSevere("kelondroFlexTable.rowIterator: " + tablename, "indexIterator returned null"); - return null; - } - final int idx = (int) idxEntry.getColLong(1); - try { - return get(idx); - } catch (final IOException e) { - e.printStackTrace(); - return null; - } - } - - public void remove() { - indexIterator.remove(); - } - - } - - public static final Iterator filenames() { - // iterates string objects; all file names from record tracker - return tableTracker.keySet().iterator(); - } - - public static final Map memoryStats(final String filename) { - // returns a map for each file in the tracker; - // the map represents properties for each record objects, - // i.e. for cache memory allocation - final FlexTable theFlexTable = tableTracker.get(filename); - return theFlexTable.memoryStats(); - } - - private final Map memoryStats() { - // returns statistical data about this object - final HashMap map = new HashMap(); - map.put("tableIndexChunkSize", (!RAMIndex) ? "0" : Integer.toString(index.row().objectsize)); - map.put("tableIndexCount", (!RAMIndex) ? "0" : Integer.toString(index.size())); - map.put("tableIndexMem", (!RAMIndex) ? "0" : Integer.toString((int) (index.row().objectsize * index.size() * RowCollection.growfactor))); - return map; - } - - public synchronized void close() { - if (tableTracker.remove(this.filename) == null) { - Log.logWarning("kelondroFlexTable", "close(): file '" + this.filename + "' was not tracked with record tracker."); - } - if ((index != null) && (this.size() != ((index == null) ? 0 : index.size()))) { - Log.logSevere("kelondroFlexTable", this.filename + " close(): inconsistent content/index size. content.size() = " + this.size() + ", index.size() = " + ((index == null) ? 0 : index.size())); - } - - if (index != null) {index.close(); index = null;} - super.close(); - } - - public static void main(final String[] args) { - // open a file, add one entry and exit - final File f = new File(args[0]); - final String name = args[1]; - final Row row = new Row("Cardinal key-4 {b256}, byte[] x-64", NaturalOrder.naturalOrder); - try { - final FlexTable t = new FlexTable(f, name, row, 0, true); - final Row.Entry entry = row.newEntry(); - entry.setCol(0, System.currentTimeMillis()); - entry.setCol(1, "dummy".getBytes()); - t.put(entry); - t.close(); - } catch (final IOException e) { - e.printStackTrace(); - } - } - -} diff --git a/source/de/anomic/kelondro/table/SplitTable.java b/source/de/anomic/kelondro/table/SplitTable.java index 991d38d31..8bb3078ee 100644 --- a/source/de/anomic/kelondro/table/SplitTable.java +++ b/source/de/anomic/kelondro/table/SplitTable.java @@ -164,11 +164,7 @@ public class SplitTable implements ObjectIndex { maxtime = time; } - if (f.isDirectory()) { - ram = FlexTable.staticRAMIndexNeed(path, tablefile[i], rowdef); - } else { - ram = EcoTable.staticRAMIndexNeed(f, rowdef); - } + ram = EcoTable.staticRAMIndexNeed(f, rowdef); if (ram > 0) { t.put(tablefile[i], Long.valueOf(ram)); sum += ram; @@ -200,11 +196,6 @@ public class SplitTable implements ObjectIndex { t.remove(maxf); if (maxf != null) { f = new File(path, maxf); - if (f.isDirectory()) { - // this is a kelonodroFlex table - FlexTable.delete(path, maxf); - Log.logInfo("kelondroSplitTable", "replaced partial flex table " + f + " by new eco table"); - } Log.logInfo("kelondroSplitTable", "opening partial eco table " + f); table = new EcoTable(f, rowdef, EcoTable.tailCacheUsageAuto, EcoFSBufferSize, 0); tables.put(maxf, table); diff --git a/source/de/anomic/kelondro/text/ReferenceContainerArray.java b/source/de/anomic/kelondro/text/ReferenceContainerArray.java index 56a7799c2..4a5177d76 100644 --- a/source/de/anomic/kelondro/text/ReferenceContainerArray.java +++ b/source/de/anomic/kelondro/text/ReferenceContainerArray.java @@ -31,7 +31,7 @@ import java.util.Iterator; import de.anomic.kelondro.blob.BLOB; import de.anomic.kelondro.blob.BLOBArray; -import de.anomic.kelondro.index.IntegerHandleIndex; +import de.anomic.kelondro.index.HandleMap; import de.anomic.kelondro.index.Row; import de.anomic.kelondro.index.RowSet; import de.anomic.kelondro.order.ByteOrder; @@ -294,14 +294,14 @@ public final class ReferenceContainerArray { return donesomething; } - public static IntegerHandleIndex referenceHashes( + public static HandleMap referenceHashes( final File heapLocation, final ReferenceFactory factory, final ByteOrder termOrder, final Row payloadrow) throws IOException { System.out.println("CELL REFERENCE COLLECTION startup"); - IntegerHandleIndex references = new IntegerHandleIndex(payloadrow.primaryKeyLength, termOrder, 0, 1000000); + HandleMap references = new HandleMap(payloadrow.primaryKeyLength, termOrder, 4, 0, 1000000); String[] files = heapLocation.list(); for (String f: files) { if (f.length() < 22 || !f.startsWith("index") || !f.endsWith(".blob")) continue; @@ -324,7 +324,7 @@ public final class ReferenceContainerArray { if (reference == null) continue; mh = reference.metadataHash(); if (mh == null) continue; - references.inc(mh.getBytes(), 1); + references.inc(mh.getBytes()); } count++; // write a log diff --git a/source/de/anomic/plasma/plasmaSearchQuery.java b/source/de/anomic/plasma/plasmaSearchQuery.java index 55343d2e4..401c23c84 100644 --- a/source/de/anomic/plasma/plasmaSearchQuery.java +++ b/source/de/anomic/plasma/plasmaSearchQuery.java @@ -63,6 +63,7 @@ public final class plasmaSearchQuery { public int contentdom; public String urlMask; public String targetlang; + public String navigators; public int domType; public int zonecode; public int domMaxTargets; @@ -115,6 +116,7 @@ public final class plasmaSearchQuery { this.remotepeer = null; this.handle = Long.valueOf(System.currentTimeMillis()); this.specialRights = false; + this.navigators = "all"; } public plasmaSearchQuery( @@ -124,6 +126,7 @@ public final class plasmaSearchQuery { final plasmaSearchRankingProfile ranking, final int maxDistance, final String prefer, final int contentdom, final String language, + final String navigators, final boolean onlineSnippetFetch, final int lines, final int offset, final String urlMask, final int domType, final String domGroupName, final int domMaxTargets, @@ -145,6 +148,7 @@ public final class plasmaSearchQuery { this.urlMask = urlMask; assert language != null; this.targetlang = language; + this.navigators = navigators; this.domType = domType; this.zonecode = domainzone; this.domMaxTargets = domMaxTargets;