- refactoring of IntegerHandleIndex and LongHandleIndex: both classes had been merged into the new HandleMap class, which handles (key<byte[]>,n-byte-long) pairs with arbitraty key and value length. This will be useful to get a memory-enhanced/minimized database table indexing.

- added a analysis method that counts bytes that could be saved in case the new HandleMap can be applied in the most efficient way. Look for the log messages beginning with "HeapReader saturation": in most cases we could save about 30% RAM!
- removed the old FlexTable database structure. It was not used any more.
- removed memory statistics in PerformanceMemory about flex tables and node caches (node caches were used by Tree Tables, which are also not used any more)
- add a stub for a steering of navigation functions. That should help to switch off naviagtion computation in cases where it is not demanded by a client

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6034 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 16 years ago
parent bead0006da
commit c079b18ee7

@ -99,60 +99,6 @@
</tr>
</table>
<p><strong>FlexTable RAM Index:</strong></p>
<table border="0" cellpadding="2" cellspacing="1">
<tr class="TableHeader" valign="bottom">
<td>Table</td>
<td>Count</td>
<td>Chunk Size</td>
<td>Used Memory</td>
</tr>
#{TableList}#
<tr class="TableCellLight">
<td align="left" class="TableCellDark">#[tableIndexPath]#</td>
<td align="right">#[tableIndexCount]#</td>
<td align="right">#[tableIndexChunkSize]#</td>
<td align="right">#[tableIndexMem]#</td>
</tr>
#{/TableList}#
<tr class="TableCellDark">
<td colspan="6">Total Mem = #[TableIndexTotalMem]# MB</td>
</tr>
</table>
<p><strong>Node Caches:</strong></p>
<table border="0" cellpadding="2" cellspacing="1">
<tr class="TableHeader" valign="bottom">
<td>Table</td>
<td>Size</td>
<td>Chunk Size</td>
<td>Used Memory</td>
<td>Read Hit</td>
<td>Read Miss</td>
<td>Write Unique</td>
<td>Write Double</td>
<td>Deletes</td>
<td>Flushes</td>
</tr>
#{NodeList}#
<tr class="TableCellLight">
<td align="left" class="TableCellDark">#[nodeCachePath]#</td>
<td align="right">#[nodeCacheCount]#</td>
<td align="right">#[nodeChunkSize]#</td>
<td align="right">#[nodeCacheMem]#</td>
<td align="right">#[nodeCacheReadHit]#</td>
<td align="right">#[nodeCacheReadMiss]#</td>
<td align="right">#[nodeCacheWriteUnique]#</td>
<td align="right">#[nodeCacheWriteDouble]#</td>
<td align="right">#[nodeCacheDeletes]#</td>
<td align="right">#[nodeCacheFlushes]#</td>
</tr>
#{/NodeList}#
<tr class="TableCellDark">
<td colspan="10">Total Mem = #[nodeCacheTotalMem]# MB; Stop Grow when less than #[nodeCacheStopGrow]# MB available left; Start Shrink when less than #[nodeCacheStartShrink]# MB availabe left</td>
</tr>
</table>
<p><strong>Object Read Caches:</strong></p>
<table border="0" cellpadding="2" cellspacing="1">
<tr class="TableHeader" valign="bottom">

@ -31,9 +31,7 @@ import java.util.Map;
import de.anomic.http.httpRequestHeader;
import de.anomic.kelondro.blob.Cache;
import de.anomic.kelondro.table.CachedRecords;
import de.anomic.kelondro.table.EcoTable;
import de.anomic.kelondro.table.FlexTable;
import de.anomic.kelondro.util.MemoryControl;
import de.anomic.kelondro.util.FileUtils;
import de.anomic.plasma.plasmaSwitchboard;
@ -90,30 +88,12 @@ public class PerformanceMemory_p {
prop.putNum("memoryUsedAfterInitAGC", (memoryTotalAfterInitAGC - memoryFreeAfterInitAGC) / KB);
prop.putNum("memoryUsedNow", (memoryTotalNow - memoryFreeNow) / MB);
// write table for FlexTable index sizes
Iterator<String> i = FlexTable.filenames();
// write table for EcoTable index sizes
Iterator<String> i = EcoTable.filenames();
String filename;
Map<String, String> map;
int p, c = 0;
long mem, totalmem = 0;
while (i.hasNext()) {
filename = i.next();
map = FlexTable.memoryStats(filename);
mem = Long.parseLong(map.get("tableIndexMem"));
totalmem += mem;
prop.put("TableList_" + c + "_tableIndexPath", ((p = filename.indexOf("DATA")) < 0) ? filename : filename.substring(p));
prop.put("TableList_" + c + "_tableIndexChunkSize", map.get("tableIndexChunkSize"));
prop.putNum("TableList_" + c + "_tableIndexCount", map.get("tableIndexCount"));
prop.put("TableList_" + c + "_tableIndexMem", Formatter.bytesToString(mem));
c++;
}
prop.put("TableList", c);
prop.putNum("TableIndexTotalMem", totalmem / (1024 * 1024d));
// write table for EcoTable index sizes
i = EcoTable.filenames();
c = 0;
totalmem = 0;
while (i.hasNext()) {
filename = i.next();
map = EcoTable.memoryStats(filename);
@ -135,32 +115,6 @@ public class PerformanceMemory_p {
prop.put("EcoList", c);
prop.putNum("EcoIndexTotalMem", totalmem / (1024 * 1024d));
// write node cache table
i = CachedRecords.filenames();
c = 0;
totalmem = 0;
while (i.hasNext()) {
filename = i.next();
map = CachedRecords.memoryStats(filename);
mem = Long.parseLong(map.get("nodeCacheMem"));
totalmem += mem;
prop.put("NodeList_" + c + "_nodeCachePath", ((p = filename.indexOf("DATA")) < 0) ? filename : filename.substring(p));
prop.put("NodeList_" + c + "_nodeChunkSize", map.get("nodeChunkSize"));
prop.putNum("NodeList_" + c + "_nodeCacheCount", map.get("nodeCacheCount"));
prop.put("NodeList_" + c + "_nodeCacheMem", Formatter.bytesToString(mem));
prop.putNum("NodeList_" + c + "_nodeCacheReadHit", map.get("nodeCacheReadHit"));
prop.putNum("NodeList_" + c + "_nodeCacheReadMiss", map.get("nodeCacheReadMiss"));
prop.putNum("NodeList_" + c + "_nodeCacheWriteUnique", map.get("nodeCacheWriteUnique"));
prop.putNum("NodeList_" + c + "_nodeCacheWriteDouble", map.get("nodeCacheWriteDouble"));
prop.putNum("NodeList_" + c + "_nodeCacheDeletes", map.get("nodeCacheDeletes"));
prop.putNum("NodeList_" + c + "_nodeCacheFlushes", map.get("nodeCacheFlushes"));
c++;
}
prop.put("NodeList", c);
prop.putNum("nodeCacheStopGrow", CachedRecords.getMemStopGrow() / (1024 * 1024d));
prop.putNum("nodeCacheStartShrink", CachedRecords.getMemStartShrink() / (1024 * 1024d));
prop.putNum("nodeCacheTotalMem", totalmem / (1024 * 1024d));
// write object cache table
i = Cache.filenames();
c = 0;

@ -37,38 +37,6 @@
<EcoIndexTotalMem>#[EcoIndexTotalMem]#</EcoIndexTotalMem>
</EcoTable>
<FlexTable>
#{TableList}#
<entry>
<tableIndexPath>#[tableIndexPath]#</tableIndexPath>
<tableIndexChunkSize>#[tableIndexChunkSize]#</tableIndexChunkSize>
<tableIndexCount>#[tableIndexCount]#</tableIndexCount>
<tableIndexMem>#[tableIndexMem]#</tableIndexMem>
</entry>
#{/TableList}#
<TableIndexTotalMem>#[TableIndexTotalMem]#</TableIndexTotalMem>
</FlexTable>
<NodeCaches>
#{NodeList}#
<entry>
<nodeCachePath>#[nodeCachePath]#</nodeCachePath>
<nodeChunkSize>#[nodeChunkSize]#</nodeChunkSize>
<nodeCacheCount>#[nodeCacheCount]#</nodeCacheCount>
<nodeCacheMem>#[nodeCacheMem]#</nodeCacheMem>
<nodeCacheReadHit>#[nodeCacheReadHit]#</nodeCacheReadHit>
<nodeCacheReadMiss>#[nodeCacheReadMiss]#</nodeCacheReadMiss>
<nodeCacheWriteUnique>#[nodeCacheWriteUnique]#</nodeCacheWriteUnique>
<nodeCacheWriteDouble>#[nodeCacheWriteDouble]#</nodeCacheWriteDouble>
<nodeCacheDeletes>#[nodeCacheDeletes]#</nodeCacheDeletes>
<nodeCacheFlushes>#[nodeCacheFlushes]#</nodeCacheFlushes>
</entry>
#{/NodeList}#
<nodeCacheTotalMem>#[nodeCacheTotalMem]#</nodeCacheTotalMem>
<nodeCacheStopGrow>#[nodeCacheStopGrow]#</nodeCacheStopGrow>
<nodeCacheStartShrink>#[nodeCacheStartShrink]#</nodeCacheStartShrink>
</NodeCaches>
<ObjectReadCaches>
#{ObjectList}#
<entry>

@ -193,6 +193,7 @@ public final class search {
prefer,
plasmaSearchQuery.contentdomParser(contentdom),
language,
"", // no navigation
false,
count,
0,
@ -243,7 +244,8 @@ public final class search {
prefer,
plasmaSearchQuery.
contentdomParser(contentdom),
language,
language,
"", // no navigation
false,
count,
0,

@ -312,6 +312,10 @@ public class yacysearch {
if (language == null) language = "en";
}
// navigation
String navigation = (post == null) ? "" : post.get("nav", "");
// the query
final TreeSet<String>[] query = plasmaSearchQuery.cleanQuery(querystring.trim()); // converts also umlaute
int maxDistance = (querystring.indexOf('"') >= 0) ? maxDistance = query.length - 1 : Integer.MAX_VALUE;
@ -385,6 +389,7 @@ public class yacysearch {
prefermask,
contentdomCode,
language,
navigation,
fetchSnippets,
itemsPerPage,
offset,

@ -51,7 +51,7 @@ import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
import de.anomic.kelondro.index.HandleSet;
import de.anomic.kelondro.index.IntegerHandleIndex;
import de.anomic.kelondro.index.HandleMap;
import de.anomic.kelondro.order.Base64Order;
import de.anomic.kelondro.text.MetadataRepository;
import de.anomic.kelondro.text.ReferenceContainerArray;
@ -396,7 +396,7 @@ public class URLAnalysis {
public static void incell(File cellPath, String statisticPath) {
try {
IntegerHandleIndex idx = ReferenceContainerArray.referenceHashes(
HandleMap idx = ReferenceContainerArray.referenceHashes(
cellPath,
Segment.wordReferenceFactory,
Base64Order.enhancedCoder,
@ -411,7 +411,7 @@ public class URLAnalysis {
public static int diffurlcol(String metadataPath, String statisticFile, String diffFile) throws IOException {
System.out.println("INDEX DIFF URL-COL startup");
IntegerHandleIndex idx = new IntegerHandleIndex(URLMetadataRow.rowdef.primaryKeyLength, URLMetadataRow.rowdef.objectOrder, new File(statisticFile), 0);
HandleMap idx = new HandleMap(URLMetadataRow.rowdef.primaryKeyLength, URLMetadataRow.rowdef.objectOrder, 4, new File(statisticFile), 0);
MetadataRepository mr = new MetadataRepository(new File(metadataPath));
HandleSet hs = new HandleSet(URLMetadataRow.rowdef.primaryKeyLength, URLMetadataRow.rowdef.objectOrder, 0, 1000000);
System.out.println("INDEX DIFF URL-COL loaded dump, starting diff");

@ -67,7 +67,7 @@ public class blogBoard {
new File(actpath.getParent()).mkdir();
new File(newFile.getParent()).mkdir();
if (database == null) {
database = new MapView(BLOBTree.toHeap(actpath, true, true, keyLength, recordSize, '_', NaturalOrder.naturalOrder, true, false, false, newFile), 500, '_');
database = new MapView(BLOBTree.toHeap(actpath, true, true, keyLength, recordSize, '_', NaturalOrder.naturalOrder, false, false, newFile), 500, '_');
}
}

@ -70,7 +70,7 @@ public class blogBoardComments {
new File(actpath.getParent()).mkdir();
new File(newFile.getParent()).mkdir();
if (database == null) {
database = new MapView(BLOBTree.toHeap(actpath, true, true, keyLength, recordSize, '_', NaturalOrder.naturalOrder, false, false, false, newFile), 500, '_');
database = new MapView(BLOBTree.toHeap(actpath, true, true, keyLength, recordSize, '_', NaturalOrder.naturalOrder, false, false, newFile), 500, '_');
}
}

@ -116,17 +116,17 @@ public class bookmarksDB {
tagCache=new TreeMap<String, Tag>();
bookmarksFile.getParentFile().mkdirs();
//this.bookmarksTable = new kelondroMap(kelondroDyn.open(bookmarksFile, bufferkb * 1024, preloadTime, 12, 256, '_', true, false));
this.bookmarksTable = new MapView(BLOBTree.toHeap(bookmarksFile, true, true, 12, 256, '_', NaturalOrder.naturalOrder, true, false, false, bookmarksFileNew), 1000, '_');
this.bookmarksTable = new MapView(BLOBTree.toHeap(bookmarksFile, true, true, 12, 256, '_', NaturalOrder.naturalOrder, false, false, bookmarksFileNew), 1000, '_');
// tags
tagsFile.getParentFile().mkdirs();
final boolean tagsFileExisted = tagsFile.exists();
this.tagsTable = new MapView(BLOBTree.toHeap(tagsFile, true, true, 12, 256, '_', NaturalOrder.naturalOrder, true, false, false, tagsFileNew), 500, '_');
this.tagsTable = new MapView(BLOBTree.toHeap(tagsFile, true, true, 12, 256, '_', NaturalOrder.naturalOrder, false, false, tagsFileNew), 500, '_');
if (!tagsFileExisted) rebuildTags();
// dates
final boolean datesExisted = datesFile.exists();
this.datesTable = new MapView(BLOBTree.toHeap(datesFile, true, true, 20, 256, '_', NaturalOrder.naturalOrder, true, false, false, datesFileNew), 500, '_');
this.datesTable = new MapView(BLOBTree.toHeap(datesFile, true, true, 20, 256, '_', NaturalOrder.naturalOrder, false, false, datesFileNew), 500, '_');
if (!datesExisted) rebuildDates();
// autoReCrawl

@ -55,7 +55,7 @@ public class messageBoard {
new File(path.getParent()).mkdir();
new File(pathNew.getParent()).mkdir();
if (database == null) {
database = new MapView(BLOBTree.toHeap(path, true, true, categoryLength + dateFormat.length() + 2, recordSize, '_', NaturalOrder.naturalOrder, true, false, false, pathNew), 500, '_');
database = new MapView(BLOBTree.toHeap(path, true, true, categoryLength + dateFormat.length() + 2, recordSize, '_', NaturalOrder.naturalOrder, false, false, pathNew), 500, '_');
}
sn = 0;
}

@ -60,7 +60,7 @@ public final class userDB {
this.userTableFile = userTableFileNew;
userTableFile.getParentFile().mkdirs();
userTableFileNew.getParentFile().mkdirs();
this.userTable = new MapView(BLOBTree.toHeap(userTableFile, true, true, 128, 256, '_', NaturalOrder.naturalOrder, true, false, false, userTableFile), 10, '_');
this.userTable = new MapView(BLOBTree.toHeap(userTableFile, true, true, 128, 256, '_', NaturalOrder.naturalOrder, false, false, userTableFile), 10, '_');
}
void resetDatabase() {

@ -57,11 +57,11 @@ public class wikiBoard {
final File bkppath, final File bkppathNew) throws IOException {
new File(actpath.getParent()).mkdirs();
if (datbase == null) {
datbase = new MapView(BLOBTree.toHeap(actpath, true, true, keyLength, recordSize, '_', NaturalOrder.naturalOrder, true, false, false, actpathNew), 500, '_');
datbase = new MapView(BLOBTree.toHeap(actpath, true, true, keyLength, recordSize, '_', NaturalOrder.naturalOrder, false, false, actpathNew), 500, '_');
}
new File(bkppath.getParent()).mkdirs();
if (bkpbase == null) {
bkpbase = new MapView(BLOBTree.toHeap(bkppath, true, true, keyLength + dateFormat.length(), recordSize, '_', NaturalOrder.naturalOrder, true, false, false, bkppathNew), 500, '_');
bkpbase = new MapView(BLOBTree.toHeap(bkppath, true, true, keyLength + dateFormat.length(), recordSize, '_', NaturalOrder.naturalOrder, false, false, bkppathNew), 500, '_');
}
}

@ -50,7 +50,6 @@ import de.anomic.kelondro.order.ByteOrder;
import de.anomic.kelondro.order.CloneableIterator;
import de.anomic.kelondro.order.RotateIterator;
import de.anomic.kelondro.table.EcoTable;
import de.anomic.kelondro.table.FlexTable;
import de.anomic.kelondro.table.Tree;
import de.anomic.kelondro.util.FileUtils;
import de.anomic.kelondro.util.kelondroException;
@ -58,7 +57,6 @@ import de.anomic.kelondro.util.kelondroException;
public class BLOBTree {
private static final int counterlen = 8;
private static final int EcoFSBufferSize = 20;
protected int keylen;
private final int reclen;
@ -72,39 +70,26 @@ public class BLOBTree {
* Deprecated Class. Please use kelondroBLOBHeap instead
*/
private BLOBTree(final File file, final boolean useNodeCache, final boolean useObjectCache, final int key,
final int nodesize, final char fillChar, final ByteOrder objectOrder, final boolean usetree, final boolean writebuffer, final boolean resetOnFail) {
final int nodesize, final char fillChar, final ByteOrder objectOrder, final boolean writebuffer, final boolean resetOnFail) {
// creates or opens a dynamic tree
rowdef = new Row("byte[] key-" + (key + counterlen) + ", byte[] node-" + nodesize, objectOrder);
ObjectIndex fbi;
if (usetree) {
try {
fbi = new Tree(file, useNodeCache, 0, rowdef, 1, 8);
} catch (final IOException e) {
e.printStackTrace();
if (resetOnFail) {
FileUtils.deletedelete(file);
try {
fbi = new Tree(file, useNodeCache, -1, rowdef, 1, 8);
} catch (final IOException e1) {
e1.printStackTrace();
throw new kelondroException(e.getMessage());
}
} else {
try {
fbi = new Tree(file, useNodeCache, 0, rowdef, 1, 8);
} catch (final IOException e) {
e.printStackTrace();
if (resetOnFail) {
FileUtils.deletedelete(file);
try {
fbi = new Tree(file, useNodeCache, -1, rowdef, 1, 8);
} catch (final IOException e1) {
e1.printStackTrace();
throw new kelondroException(e.getMessage());
}
} else {
throw new kelondroException(e.getMessage());
}
} else {
if (file.exists()) {
if (file.isDirectory()) {
fbi = new FlexTable(file.getParentFile(), file.getName(), rowdef, 0, resetOnFail);
} else {
fbi = new EcoTable(file, rowdef, EcoTable.tailCacheUsageAuto, EcoFSBufferSize, 0);
}
} else {
fbi = new EcoTable(file, rowdef, EcoTable.tailCacheUsageAuto, EcoFSBufferSize, 0);
}
}
}
this.index = ((useObjectCache) && (!(fbi instanceof EcoTable))) ? (ObjectIndex) new Cache(fbi) : fbi;
this.keylen = key;
this.reclen = nodesize;
@ -115,13 +100,13 @@ public class BLOBTree {
}
public static BLOBHeap toHeap(final File file, final boolean useNodeCache, final boolean useObjectCache, final int key,
final int nodesize, final char fillChar, final ByteOrder objectOrder, final boolean usetree, final boolean writebuffer, final boolean resetOnFail, final File blob) throws IOException {
final int nodesize, final char fillChar, final ByteOrder objectOrder, final boolean writebuffer, final boolean resetOnFail, final File blob) throws IOException {
if (blob.exists() || !file.exists()) {
// open the blob file and ignore the tree
return new BLOBHeap(blob, key, objectOrder, 1024 * 64);
}
// open a Tree and migrate everything to a Heap
BLOBTree tree = new BLOBTree(file, useNodeCache, useObjectCache, key, nodesize, fillChar, objectOrder, usetree, writebuffer, resetOnFail);
BLOBTree tree = new BLOBTree(file, useNodeCache, useObjectCache, key, nodesize, fillChar, objectOrder, writebuffer, resetOnFail);
BLOBHeap heap = new BLOBHeap(blob, key, objectOrder, 1024 * 64);
Iterator<byte[]> i = tree.keys(true, false);
byte[] k, kk = new byte[key], v;

@ -35,7 +35,7 @@ import java.util.Map;
import java.util.Map.Entry;
import java.util.concurrent.ExecutionException;
import de.anomic.kelondro.index.LongHandleIndex;
import de.anomic.kelondro.index.HandleMap;
import de.anomic.kelondro.io.CachedRandomAccess;
import de.anomic.kelondro.order.ByteOrder;
import de.anomic.kelondro.order.CloneableIterator;
@ -49,7 +49,7 @@ public class HeapReader {
public final static long keepFreeMem = 20 * 1024 * 1024;
protected int keylength; // the length of the primary key
protected LongHandleIndex index; // key/seek relation for used records
protected HandleMap index; // key/seek relation for used records
protected Gap free; // set of {seek, size} pairs denoting space and position of free records
protected File heapFile; // the file of the heap
protected final ByteOrder ordering; // the ordering on keys
@ -117,11 +117,16 @@ public class HeapReader {
// there is an index and a gap file:
// read the index file:
try {
this.index = new LongHandleIndex(this.keylength, this.ordering, fif, 1000000);
this.index = new HandleMap(this.keylength, this.ordering, 8, fif, 1000000);
} catch (IOException e) {
e.printStackTrace();
return false;
}
// check saturation
int[] saturation = this.index.saturation();
Log.logInfo("HeapReader", "saturation of " + fif.getName() + ": keylength = " + saturation[0] + ", vallength = " + saturation[1] + ", possible saving: " + ((this.keylength - saturation[0] + 8 - saturation[1]) * index.size() / 1024 / 1024) + " MB");
// an index file is a one-time throw-away object, so just delete it now
FileUtils.deletedelete(fif);
@ -141,10 +146,10 @@ public class HeapReader {
private void initIndexReadFromHeap() throws IOException {
// this initializes the this.index object by reading positions from the heap file
Log.logInfo("HeapReader", "generating index for " + heapFile.toString() + ", " + (file.length() / 1024) + " kbytes. Please wait.");
Log.logInfo("HeapReader", "generating index for " + heapFile.toString() + ", " + (file.length() / 1024 / 1024) + " MB. Please wait.");
this.free = new Gap();
LongHandleIndex.initDataConsumer indexready = LongHandleIndex.asynchronusInitializer(keylength, this.ordering, 0, Math.max(10, (int) (Runtime.getRuntime().freeMemory() / (10 * 1024 * 1024))), 100000);
HandleMap.initDataConsumer indexready = HandleMap.asynchronusInitializer(keylength, this.ordering, 8, 0, Math.max(10, (int) (Runtime.getRuntime().freeMemory() / (10 * 1024 * 1024))), 100000);
byte[] key = new byte[keylength];
int reclen;
long seek = 0;
@ -187,7 +192,7 @@ public class HeapReader {
// new seek position
seek += 4L + reclen;
}
indexready.finish();
indexready.finish(true);
// finish the index generation
try {

@ -30,7 +30,7 @@ import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import de.anomic.kelondro.index.LongHandleIndex;
import de.anomic.kelondro.index.HandleMap;
import de.anomic.kelondro.order.ByteOrder;
import de.anomic.kelondro.order.Digest;
import de.anomic.kelondro.util.FileUtils;
@ -39,7 +39,7 @@ import de.anomic.kelondro.util.Log;
public final class HeapWriter {
private int keylength; // the length of the primary key
private LongHandleIndex index; // key/seek relation for used records
private HandleMap index; // key/seek relation for used records
private final File heapFileTMP; // the temporary file of the heap during writing
private final File heapFileREADY; // the final file of the heap when the file is closed
private DataOutputStream os; // the output stream where the BLOB is written
@ -76,7 +76,7 @@ public final class HeapWriter {
this.heapFileTMP = temporaryHeapFile;
this.heapFileREADY = readyHeapFile;
this.keylength = keylength;
this.index = new LongHandleIndex(keylength, ordering, 10, 100000);
this.index = new HandleMap(keylength, ordering, 8, 10, 100000);
this.os = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(temporaryHeapFile), outBuffer));
//this.doublecheck = new HashSet<String>();
this.seek = 0;

@ -154,11 +154,12 @@ public class Column {
((typename.equals("long")) && (this.cellwidth > 8)) ||
((typename.equals("char")) && (this.cellwidth > 1))
) throw new kelondroException("kelondroColumn - cell width " + this.cellwidth + " too wide for type " + typename);
/*
if (((typename.equals("short")) && (this.cellwidth <= 1)) ||
((typename.equals("int")) && (this.cellwidth <= 2)) ||
((typename.equals("long")) && (this.cellwidth <= 4))
) throw new kelondroException("kelondroColumn - cell width " + this.cellwidth + " not appropriate for type " + typename);
*/
// parse/check encoder type
if ((celldef.length() > 0) && (celldef.charAt(0) == '{')) {
p = celldef.indexOf('}');

@ -1,6 +1,6 @@
// kelondroBytesIntMap.java
// (C) 2006 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 18.06.2006 on http://www.anomic.de
// HandleMap.java
// (C) 2008 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 08.04.2008 on http://yacy.net
//
// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
// $LastChangedRevision: 1986 $
@ -43,6 +43,8 @@ import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
import de.anomic.kelondro.order.Base64Order;
import de.anomic.kelondro.order.ByteOrder;
@ -50,28 +52,37 @@ import de.anomic.kelondro.order.CloneableIterator;
import de.anomic.kelondro.util.MemoryControl;
import de.anomic.yacy.dht.FlatWordPartitionScheme;
public class IntegerHandleIndex {
public class HandleMap implements Iterable<Row.Entry> {
private final Row rowdef;
private ObjectIndexCache index;
public IntegerHandleIndex(final int keylength, final ByteOrder objectOrder, final int initialspace, final int expectedspace) {
this.rowdef = new Row(new Column[]{new Column("key", Column.celltype_binary, Column.encoder_bytes, keylength, "key"), new Column("int c-4 {b256}")}, objectOrder);
/**
* initialize a HandleMap
* This may store a key and a long value for each key.
* The class is used as index for database files
* @param keylength
* @param objectOrder
* @param space
*/
public HandleMap(final int keylength, final ByteOrder objectOrder, int idxbytes, final int initialspace, final int expectedspace) {
this.rowdef = new Row(new Column[]{new Column("key", Column.celltype_binary, Column.encoder_bytes, keylength, "key"), new Column("long c-" + idxbytes + " {b256}")}, objectOrder);
this.index = new ObjectIndexCache(rowdef, initialspace, expectedspace);
}
/**
* initialize a BytesLongMap with the content of a dumped index
* initialize a HandleMap with the content of a dumped index
* @param keylength
* @param objectOrder
* @param file
* @throws IOException
*/
public IntegerHandleIndex(final int keylength, final ByteOrder objectOrder, final File file, final int expectedspace) throws IOException {
this(keylength, objectOrder, (int) (file.length() / (keylength + 8)), expectedspace);
public HandleMap(final int keylength, final ByteOrder objectOrder, int idxbytes, final File file, final int expectedspace) throws IOException {
this(keylength, objectOrder, idxbytes, (int) (file.length() / (keylength + idxbytes)), expectedspace);
// read the index dump and fill the index
InputStream is = new BufferedInputStream(new FileInputStream(file), 1024 * 1024);
byte[] a = new byte[keylength + 4];
if (file.getName().endsWith(".gz")) is = new GZIPInputStream(is);
byte[] a = new byte[keylength + idxbytes];
int c;
Row.Entry entry;
while (true) {
@ -81,9 +92,41 @@ public class IntegerHandleIndex {
if (entry != null) this.index.addUnique(entry);
}
is.close();
assert this.index.size() == file.length() / (keylength + 4);
is = null;
assert this.index.size() == file.length() / (keylength + idxbytes);
}
public int[] saturation() {
int keym = 0;
int valm = this.rowdef.width(1);
int valc;
byte[] lastk = null, thisk;
for (Row.Entry row: this) {
// check length of key
if (lastk == null) {
lastk = row.bytes();
} else {
thisk = row.bytes();
keym = Math.max(keym, eq(lastk, thisk));
lastk = thisk;
}
// check length of value
for (valc = this.rowdef.primaryKeyLength; valc < this.rowdef.objectsize; valc++) {
if (lastk[valc] != 0) break;
} // valc is the number of leading zeros plus primaryKeyLength
valm = Math.min(valm, valc - this.rowdef.primaryKeyLength); // valm is the number of leading zeros
}
return new int[]{keym, this.rowdef.width(1) - valm};
}
private int eq(byte[] a, byte[] b) {
for (int i = 0; i < a.length; i++) {
if (a[i] != b[i]) return i;
}
return a.length;
}
/**
* write a dump of the index to a file. All entries are written in order
* which makes it possible to read them again in a fast way
@ -95,8 +138,10 @@ public class IntegerHandleIndex {
// we must use an iterator from the combined index, because we need the entries sorted
// otherwise we could just write the byte[] from the in kelondroRowSet which would make
// everything much faster, but this is not an option here.
File tmp = new File(file.getParentFile(), file.getName() + ".prt");
Iterator<Row.Entry> i = this.index.rows(true, null);
OutputStream os = new BufferedOutputStream(new FileOutputStream(file), 1024 * 1024);
OutputStream os = new BufferedOutputStream(new FileOutputStream(tmp), 4 * 1024 * 1024);
if (file.getName().endsWith(".gz")) os = new GZIPOutputStream(os);
int c = 0;
while (i.hasNext()) {
os.write(i.next().bytes());
@ -104,15 +149,18 @@ public class IntegerHandleIndex {
}
os.flush();
os.close();
tmp.renameTo(file);
assert file.exists() : file.toString();
assert !tmp.exists() : tmp.toString();
return c;
}
public Row row() {
return index.row();
}
public void clear() {
this.index.clear();
index.clear();
}
public synchronized boolean has(final byte[] key) {
@ -120,36 +168,34 @@ public class IntegerHandleIndex {
return index.has(key);
}
public synchronized int get(final byte[] key) {
public synchronized long get(final byte[] key) {
assert (key != null);
final Row.Entry indexentry = index.get(key);
if (indexentry == null) return -1;
return (int) indexentry.getColLong(1);
return indexentry.getColLong(1);
}
public synchronized int put(final byte[] key, final int i) {
assert i >= 0 : "i = " + i;
public synchronized long put(final byte[] key, final long l) {
assert l >= 0 : "l = " + l;
assert (key != null);
final Row.Entry newentry = index.row().newEntry();
newentry.setCol(0, key);
newentry.setCol(1, i);
newentry.setCol(1, l);
final Row.Entry oldentry = index.replace(newentry);
if (oldentry == null) return -1;
return (int) oldentry.getColLong(1);
return oldentry.getColLong(1);
}
public synchronized int inc(final byte[] key, int a) {
assert key != null;
assert a > 0; // it does not make sense to add 0. If this occurres, it is a performance issue
public synchronized void putUnique(final byte[] key, final long l) {
assert l >= 0 : "l = " + l;
assert (key != null);
final Row.Entry newentry = this.rowdef.newEntry();
newentry.setCol(0, key);
newentry.setCol(1, a);
long l = index.inc(key, 1, a, newentry);
return (int) l;
newentry.setCol(1, l);
index.addUnique(newentry);
}
/*
public synchronized int inc(final byte[] key, int a) throws IOException {
public synchronized long add(final byte[] key, long a) {
assert key != null;
assert a > 0; // it does not make sense to add 0. If this occurres, it is a performance issue
@ -161,51 +207,51 @@ public class IntegerHandleIndex {
index.addUnique(newentry);
return 1;
} else {
long l = indexentry.incCol(1, a);
long i = indexentry.getColLong(1) + a;
indexentry.setCol(1, i);
index.put(indexentry);
return (int) l;
return i;
}
}
*/
public synchronized void putUnique(final byte[] key, final int i) {
assert i >= 0 : "i = " + i;
assert (key != null);
final Row.Entry newentry = this.rowdef.newEntry();
newentry.setCol(0, key);
newentry.setCol(1, i);
index.addUnique(newentry);
public synchronized long inc(final byte[] key) {
return add(key, 1);
}
public synchronized long dec(final byte[] key) {
return add(key, -1);
}
public synchronized ArrayList<Integer[]> removeDoubles() {
final ArrayList<Integer[]> report = new ArrayList<Integer[]>();
Integer[] is;
int c, i;
public synchronized ArrayList<Long[]> removeDoubles() {
final ArrayList<Long[]> report = new ArrayList<Long[]>();
Long[] is;
int c;
long l;
final int initialSize = this.size();
for (final RowCollection delset: index.removeDoubles()) {
is = new Integer[delset.size()];
for (final RowCollection rowset: index.removeDoubles()) {
is = new Long[rowset.size()];
c = 0;
for (Row.Entry e : delset) {
i = (int) e.getColLong(1);
assert i < initialSize : "i = " + i + ", initialSize = " + initialSize;
is[c++] = Integer.valueOf(i);
for (Row.Entry e: rowset) {
l = e.getColLong(1);
assert l < initialSize : "l = " + l + ", initialSize = " + initialSize;
is[c++] = Long.valueOf(l);
}
report.add(is);
}
return report;
}
public synchronized int remove(final byte[] key) {
public synchronized long remove(final byte[] key) {
assert (key != null);
final Row.Entry indexentry = index.remove(key);
if (indexentry == null) return -1;
return (int) indexentry.getColLong(1);
return indexentry.getColLong(1);
}
public synchronized int removeone() {
public synchronized long removeone() {
final Row.Entry indexentry = index.removeOne();
if (indexentry == null) return -1;
return (int) indexentry.getColLong(1);
return indexentry.getColLong(1);
}
public synchronized int size() {
@ -225,19 +271,9 @@ public class IntegerHandleIndex {
index = null;
}
private static class entry {
public byte[] key;
public int l;
public entry(final byte[] key, final int l) {
this.key = key;
this.l = l;
}
}
private static final entry poisonEntry = new entry(new byte[0], 0);
/**
* this method creates a concurrent thread that can take entries that are used to initialize the map
* it should be used when a bytesLongMap is initialized when a file is read. Concurrency of FileIO and
* it should be used when a HandleMap is initialized when a file is read. Concurrency of FileIO and
* map creation will speed up the initialization process.
* @param keylength
* @param objectOrder
@ -245,37 +281,47 @@ public class IntegerHandleIndex {
* @param bufferSize
* @return
*/
public static initDataConsumer asynchronusInitializer(final int keylength, final ByteOrder objectOrder, final int space, final int expectedspace, int bufferSize) {
initDataConsumer initializer = new initDataConsumer(new IntegerHandleIndex(keylength, objectOrder, space, expectedspace), bufferSize);
public static initDataConsumer asynchronusInitializer(final int keylength, final ByteOrder objectOrder, int idxbytes, final int space, final int expectedspace, int bufferSize) {
initDataConsumer initializer = new initDataConsumer(new HandleMap(keylength, objectOrder, idxbytes, space, expectedspace), bufferSize);
ExecutorService service = Executors.newSingleThreadExecutor();
initializer.setResult(service.submit(initializer));
service.shutdown();
return initializer;
}
private static class entry {
public byte[] key;
public long l;
public entry(final byte[] key, final long l) {
this.key = key;
this.l = l;
}
}
private static final entry poisonEntry = new entry(new byte[0], 0);
public static class initDataConsumer implements Callable<IntegerHandleIndex> {
public static class initDataConsumer implements Callable<HandleMap> {
private BlockingQueue<entry> cache;
private IntegerHandleIndex map;
private Future<IntegerHandleIndex> result;
private HandleMap map;
private Future<HandleMap> result;
private boolean sortAtEnd;
public initDataConsumer(IntegerHandleIndex map, int bufferCount) {
public initDataConsumer(HandleMap map, int bufferCount) {
this.map = map;
cache = new ArrayBlockingQueue<entry>(bufferCount);
sortAtEnd = false;
}
protected void setResult(Future<IntegerHandleIndex> result) {
protected void setResult(Future<HandleMap> result) {
this.result = result;
}
/**
* hand over another entry that shall be inserted into the BytesLongMap with an addl method
* hand over another entry that shall be inserted into the HandleMap with an addl method
* @param key
* @param l
*/
public void consume(final byte[] key, final int l) {
public void consume(final byte[] key, final long l) {
try {
cache.put(new entry(key, l));
} catch (InterruptedException e) {
@ -298,17 +344,17 @@ public class IntegerHandleIndex {
/**
* this must be called after a finish() was called. this method blocks until all entries
* had been processed, and the content was sorted. It returns the kelondroBytesLongMap
* had been processed, and the content was sorted. It returns the HandleMap
* that the user wanted to initialize
* @return
* @throws InterruptedException
* @throws ExecutionException
*/
public IntegerHandleIndex result() throws InterruptedException, ExecutionException {
public HandleMap result() throws InterruptedException, ExecutionException {
return this.result.get();
}
public IntegerHandleIndex call() throws IOException {
public HandleMap call() throws IOException {
try {
entry c;
while ((c = cache.take()) != poisonEntry) {
@ -326,19 +372,19 @@ public class IntegerHandleIndex {
}
public static void main(String[] args) {
int count = (args.length == 0) ? 1000000 : Integer.parseInt(args[0]);
int count = (args.length == 0) ? 1000000 : Integer.parseInt(args[0]);
System.out.println("Starting test with " + count + " objects, minimum memory: " + (count * 16) + " bytes; " + MemoryControl.available() + " available");
Random r = new Random(0);
long start = System.currentTimeMillis();
System.gc(); // for resource measurement
long a = MemoryControl.available();
IntegerHandleIndex idx = new IntegerHandleIndex(12, Base64Order.enhancedCoder, 0, 150000);
HandleMap idx = new HandleMap(12, Base64Order.enhancedCoder, 8, 0, 150000);
for (int i = 0; i < count; i++) {
idx.inc(FlatWordPartitionScheme.positionToHash(r.nextInt(count)), 1);
idx.inc(FlatWordPartitionScheme.positionToHash(r.nextInt(count)));
}
long timek = ((long) count) * 1000L / (System.currentTimeMillis() - start);
System.out.println("Result IntegerHandleIndex: " + timek + " inc per second " + count + " loops.");
System.out.println("Result LongHandleIndex: " + timek + " inc per second " + count + " loops.");
System.gc();
long memk = a - MemoryControl.available();
System.out.println("Used Memory: " + memk + " bytes");
@ -366,7 +412,9 @@ public class IntegerHandleIndex {
System.out.println("Geschwindigkeitsfaktor j/k: " + (timej / timek));
System.out.println("Speicherplatzfaktor j/k: " + (memj / memk));
System.exit(0);
}
public Iterator<Row.Entry> iterator() {
return this.rows(true, null);
}
}

@ -116,7 +116,7 @@ public class IndexTest {
System.out.println("sorted map");
Runtime.getRuntime().gc();
long freeStartKelondro = MemoryControl.available();
IntegerHandleIndex ii = new IntegerHandleIndex(12, Base64Order.enhancedCoder, count, count);
HandleMap ii = new HandleMap(12, Base64Order.enhancedCoder, 4, count, count);
for (int i = 0; i < count; i++) ii.putUnique(tests[i], 1);
ii.get(randomHash(r)); // trigger sort
long t6 = System.currentTimeMillis();

@ -1,337 +0,0 @@
// kelondroBytesLongMap.java
// (C) 2008 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 08.04.2008 on http://yacy.net
//
// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
// $LastChangedRevision: 1986 $
// $LastChangedBy: orbiter $
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package de.anomic.kelondro.index;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
import de.anomic.kelondro.order.Base64Order;
import de.anomic.kelondro.order.ByteOrder;
import de.anomic.kelondro.order.CloneableIterator;
public class LongHandleIndex {
private final Row rowdef;
private ObjectIndexCache index;
/**
* initialize a BytesLongMap
* This may store a key and a long value for each key.
* The class is used as index for database files
* @param keylength
* @param objectOrder
* @param space
*/
public LongHandleIndex(final int keylength, final ByteOrder objectOrder, final int initialspace, final int expectedspace) {
this.rowdef = new Row(new Column[]{new Column("key", Column.celltype_binary, Column.encoder_bytes, keylength, "key"), new Column("long c-8 {b256}")}, objectOrder);
this.index = new ObjectIndexCache(rowdef, initialspace, expectedspace);
}
/**
* initialize a BytesLongMap with the content of a dumped index
* @param keylength
* @param objectOrder
* @param file
* @throws IOException
*/
public LongHandleIndex(final int keylength, final ByteOrder objectOrder, final File file, final int expectedspace) throws IOException {
this(keylength, objectOrder, (int) (file.length() / (keylength + 8)), expectedspace);
// read the index dump and fill the index
InputStream is = new BufferedInputStream(new FileInputStream(file), 1024 * 1024);
if (file.getName().endsWith(".gz")) is = new GZIPInputStream(is);
byte[] a = new byte[keylength + 8];
int c;
while (true) {
c = is.read(a);
if (c <= 0) break;
this.index.addUnique(this.rowdef.newEntry(a));
}
is.close();
is = null;
assert this.index.size() == file.length() / (keylength + 8);
}
/**
* write a dump of the index to a file. All entries are written in order
* which makes it possible to read them again in a fast way
* @param file
* @return the number of written entries
* @throws IOException
*/
public int dump(File file) throws IOException {
// we must use an iterator from the combined index, because we need the entries sorted
// otherwise we could just write the byte[] from the in kelondroRowSet which would make
// everything much faster, but this is not an option here.
File tmp = new File(file.getParentFile(), file.getName() + ".prt");
Iterator<Row.Entry> i = this.index.rows(true, null);
OutputStream os = new BufferedOutputStream(new FileOutputStream(tmp), 4 * 1024 * 1024);
if (file.getName().endsWith(".gz")) os = new GZIPOutputStream(os);
int c = 0;
while (i.hasNext()) {
os.write(i.next().bytes());
c++;
}
os.flush();
os.close();
tmp.renameTo(file);
assert file.exists() : file.toString();
assert !tmp.exists() : tmp.toString();
return c;
}
public Row row() {
return index.row();
}
public void clear() {
index.clear();
}
public synchronized long get(final byte[] key) {
assert (key != null);
final Row.Entry indexentry = index.get(key);
if (indexentry == null) return -1;
return indexentry.getColLong(1);
}
public synchronized long put(final byte[] key, final long l) {
assert l >= 0 : "l = " + l;
assert (key != null);
final Row.Entry newentry = index.row().newEntry();
newentry.setCol(0, key);
newentry.setCol(1, l);
final Row.Entry oldentry = index.replace(newentry);
if (oldentry == null) return -1;
return oldentry.getColLong(1);
}
public synchronized void putUnique(final byte[] key, final long l) {
assert l >= 0 : "l = " + l;
assert (key != null);
final Row.Entry newentry = this.rowdef.newEntry();
newentry.setCol(0, key);
newentry.setCol(1, l);
index.addUnique(newentry);
}
public synchronized long add(final byte[] key, long a) {
assert key != null;
assert a > 0; // it does not make sense to add 0. If this occurres, it is a performance issue
final Row.Entry indexentry = index.get(key);
if (indexentry == null) {
final Row.Entry newentry = this.rowdef.newEntry();
newentry.setCol(0, key);
newentry.setCol(1, a);
index.addUnique(newentry);
return 1;
} else {
long i = indexentry.getColLong(1) + a;
indexentry.setCol(1, i);
index.put(indexentry);
return i;
}
}
public synchronized long inc(final byte[] key) {
return add(key, 1);
}
public synchronized long dec(final byte[] key) {
return add(key, -1);
}
public synchronized ArrayList<Long[]> removeDoubles() {
final ArrayList<RowCollection> indexreport = index.removeDoubles();
final ArrayList<Long[]> report = new ArrayList<Long[]>();
Long[] is;
int c;
for (final RowCollection rowset: indexreport) {
is = new Long[rowset.size()];
c = 0;
for (Row.Entry e: rowset) {
is[c++] = Long.valueOf(e.getColLong(1));
}
report.add(is);
}
return report;
}
public synchronized long remove(final byte[] key) {
assert (key != null);
final Row.Entry indexentry = index.remove(key);
if (indexentry == null) return -1;
return indexentry.getColLong(1);
}
public synchronized long removeone() {
final Row.Entry indexentry = index.removeOne();
if (indexentry == null) return -1;
return indexentry.getColLong(1);
}
public synchronized int size() {
return index.size();
}
public synchronized CloneableIterator<byte[]> keys(final boolean up, final byte[] firstKey) {
return index.keys(up, firstKey);
}
public synchronized CloneableIterator<Row.Entry> rows(final boolean up, final byte[] firstKey) {
return index.rows(up, firstKey);
}
public synchronized void close() {
index.close();
index = null;
}
/**
* this method creates a concurrent thread that can take entries that are used to initialize the map
* it should be used when a bytesLongMap is initialized when a file is read. Concurrency of FileIO and
* map creation will speed up the initialization process.
* @param keylength
* @param objectOrder
* @param space
* @param bufferSize
* @return
*/
public static initDataConsumer asynchronusInitializer(final int keylength, final ByteOrder objectOrder, final int space, final int expectedspace, int bufferSize) {
initDataConsumer initializer = new initDataConsumer(new LongHandleIndex(keylength, objectOrder, space, expectedspace), bufferSize);
ExecutorService service = Executors.newSingleThreadExecutor();
initializer.setResult(service.submit(initializer));
service.shutdown();
return initializer;
}
private static class entry {
public byte[] key;
public long l;
public entry(final byte[] key, final long l) {
this.key = key;
this.l = l;
}
}
private static final entry poisonEntry = new entry(new byte[0], 0);
public static class initDataConsumer implements Callable<LongHandleIndex> {
private BlockingQueue<entry> cache;
private LongHandleIndex map;
private Future<LongHandleIndex> result;
public initDataConsumer(LongHandleIndex map, int bufferCount) {
this.map = map;
cache = new ArrayBlockingQueue<entry>(bufferCount);
}
protected void setResult(Future<LongHandleIndex> result) {
this.result = result;
}
/**
* hand over another entry that shall be inserted into the BytesLongMap with an addl method
* @param key
* @param l
*/
public void consume(final byte[] key, final long l) {
try {
cache.put(new entry(key, l));
} catch (InterruptedException e) {
e.printStackTrace();
}
}
/**
* to signal the initialization thread that no more entries will be sublitted with consumer()
* this method must be called. The process will not terminate if this is not called before.
*/
public void finish() {
try {
cache.put(poisonEntry);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
/**
* this must be called after a finish() was called. this method blocks until all entries
* had been processed, and the content was sorted. It returns the kelondroBytesLongMap
* that the user wanted to initialize
* @return
* @throws InterruptedException
* @throws ExecutionException
*/
public LongHandleIndex result() throws InterruptedException, ExecutionException {
return this.result.get();
}
public LongHandleIndex call() throws IOException {
try {
entry c;
while ((c = cache.take()) != poisonEntry) {
map.putUnique(c.key, c.l);
}
} catch (InterruptedException e) {
e.printStackTrace();
}
map.index.finishInitialization();
return map;
}
}
public static void main(String[] args) {
LongHandleIndex idx = new LongHandleIndex(12, Base64Order.enhancedCoder, 10000, 10000000);
byte[] s;
//long l;
for (int i = 0; i < 10000000; i = i + 8) {
s = Base64Order.enhancedCoder.uncardinal(Long.MAX_VALUE - i);
//l = Base64Order.enhancedCoder.cardinal(s);
//if (i != l) System.out.println("encoding bug for " + new String(s) + ", v = " + (Long.MAX_VALUE - i) + ", l = " + l);
//System.out.println(s);
if (idx.get(s) >= 0) System.out.println("search bug for " + new String(s) + ": " + idx.get(s));
idx.putUnique(s, 1);
}
}
}

@ -28,23 +28,17 @@ package de.anomic.kelondro.table;
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.TreeMap;
import de.anomic.kelondro.index.Row;
import de.anomic.kelondro.index.ObjectArrayCache;
import de.anomic.kelondro.io.RandomAccessInterface;
import de.anomic.kelondro.io.RandomAccessRecords;
import de.anomic.kelondro.util.MemoryControl;
import de.anomic.kelondro.util.kelondroException;
public class CachedRecords extends AbstractRecords implements RandomAccessRecords {
// memory calculation
private static final int element_in_cache = 4; // for kelondroCollectionObjectMap: 4; for HashMap: 52
// static supervision objects: recognize and coordinate all activites
private static final TreeMap<String, CachedRecords> recordTracker = new TreeMap<String, CachedRecords>();
private static final long memStopGrow = 40 * 1024 * 1024; // a limit for the node cache to stop growing if less than this memory amount is available
@ -69,23 +63,6 @@ public class CachedRecords extends AbstractRecords implements RandomAccessRecord
if (useNodeCache) recordTracker.put(this.filename, this);
}
public CachedRecords(
final RandomAccessInterface ra, final String filename, final boolean useNodeCache, final long preloadTime,
final short ohbytec, final short ohhandlec,
final Row rowdef, final int FHandles, final int txtProps, final int txtPropWidth,
final boolean exitOnFail) {
super(ra, filename, useNodeCache, ohbytec, ohhandlec, rowdef, FHandles, txtProps, txtPropWidth, exitOnFail);
initCache(useNodeCache, preloadTime);
if (useNodeCache) recordTracker.put(this.filename, this);
}
public CachedRecords(
final RandomAccessInterface ra, final String filename, final boolean useNodeCache, final long preloadTime) throws IOException{
super(ra, filename, useNodeCache);
initCache(useNodeCache, preloadTime);
if (useNodeCache) recordTracker.put(this.filename, this);
}
private void initCache(final boolean useNodeCache, final long preloadTime) {
if (useNodeCache) {
this.cacheHeaders = new ObjectArrayCache(this.headchunksize, 0);
@ -120,7 +97,7 @@ public class CachedRecords extends AbstractRecords implements RandomAccessRecord
}
}
public int cacheGrowStatus() {
private int cacheGrowStatus() {
final long available = MemoryControl.available();
if ((cacheHeaders != null) && (available - 2 * 1024 * 1024 < cacheHeaders.memoryNeededForGrow())) return 0;
return cacheGrowStatus(available, memStopGrow, memStartShrink);
@ -140,43 +117,6 @@ public class CachedRecords extends AbstractRecords implements RandomAccessRecord
return 0;
}
public static long getMemStopGrow() {
return memStopGrow ;
}
public static long getMemStartShrink() {
return memStartShrink ;
}
public static final Iterator<String> filenames() {
// iterates string objects; all file names from record tracker
return recordTracker.keySet().iterator();
}
public static final Map<String, String> memoryStats(final String filename) {
// returns a map for each file in the tracker;
// the map represents properties for each record oobjects,
// i.e. for cache memory allocation
final CachedRecords theRecord = recordTracker.get(filename);
return theRecord.memoryStats();
}
private final Map<String, String> memoryStats() {
// returns statistical data about this object
if (cacheHeaders == null) return null;
final HashMap<String, String> map = new HashMap<String, String>();
map.put("nodeChunkSize", Integer.toString(this.headchunksize + element_in_cache));
map.put("nodeCacheCount", Integer.toString(cacheHeaders.size()));
map.put("nodeCacheMem", Integer.toString(cacheHeaders.size() * (this.headchunksize + element_in_cache)));
map.put("nodeCacheReadHit", Integer.toString(readHit));
map.put("nodeCacheReadMiss", Integer.toString(readMiss));
map.put("nodeCacheWriteUnique", Integer.toString(writeUnique));
map.put("nodeCacheWriteDouble", Integer.toString(writeDouble));
map.put("nodeCacheDeletes", Integer.toString(cacheDelete));
map.put("nodeCacheFlushes", Integer.toString(cacheFlush));
return map;
}
protected synchronized void deleteNode(final RecordHandle handle) throws IOException {
if (cacheHeaders == null) {
super.deleteNode(handle);

@ -38,8 +38,8 @@ import java.util.Map;
import java.util.TreeMap;
import java.util.TreeSet;
import de.anomic.kelondro.index.IntegerHandleIndex;
import de.anomic.kelondro.index.Column;
import de.anomic.kelondro.index.HandleMap;
import de.anomic.kelondro.index.Row;
import de.anomic.kelondro.index.RowCollection;
import de.anomic.kelondro.index.RowSet;
@ -76,7 +76,7 @@ public class EcoTable implements ObjectIndex {
public static final long maxarraylength = 134217727L; // that may be the maxmimum size of array length in some JVMs
private static final long minmemremaining = 20 * 1024 * 1024; // if less than this memory is remaininig, the memory copy of a table is abandoned
private RowSet table;
private IntegerHandleIndex index;
private HandleMap index;
private BufferedEcoFS file;
private Row rowdef;
private int fail;
@ -132,7 +132,7 @@ public class EcoTable implements ObjectIndex {
table = null; System.gc();
Log.logSevere("ECOTABLE", tablefile + ": RAM after releasing the table: " + (MemoryControl.available() / 1024 / 1024) + "MB");
}
index = new IntegerHandleIndex(rowdef.primaryKeyLength, rowdef.objectOrder, records, 100000);
index = new HandleMap(rowdef.primaryKeyLength, rowdef.objectOrder, 4, records, 100000);
Log.logInfo("ECOTABLE", tablefile + ": EcoTable " + tablefile.toString() + " has table copy " + ((table == null) ? "DISABLED" : "ENABLED"));
// read all elements from the file into the copy table
@ -175,7 +175,7 @@ public class EcoTable implements ObjectIndex {
// remove doubles
if (!freshFile) {
final ArrayList<Integer[]> doubles = index.removeDoubles();
final ArrayList<Long[]> doubles = index.removeDoubles();
//assert index.size() + doubles.size() + fail == i;
//System.out.println(" -removed " + doubles.size() + " doubles- done.");
if (doubles.size() > 0) {
@ -184,19 +184,19 @@ public class EcoTable implements ObjectIndex {
// first put back one element each
final byte[] record = new byte[rowdef.objectsize];
key = new byte[rowdef.primaryKeyLength];
for (final Integer[] ds: doubles) {
for (final Long[] ds: doubles) {
file.get(ds[0].intValue(), record, 0);
System.arraycopy(record, 0, key, 0, rowdef.primaryKeyLength);
index.putUnique(key, ds[0].intValue());
}
// then remove the other doubles by removing them from the table, but do a re-indexing while doing that
// first aggregate all the delete positions because the elements from the top positions must be removed first
final TreeSet<Integer> delpos = new TreeSet<Integer>();
for (final Integer[] ds: doubles) {
final TreeSet<Long> delpos = new TreeSet<Long>();
for (final Long[] ds: doubles) {
for (int j = 1; j < ds.length; j++) delpos.add(ds[j]);
}
// now remove the entries in a sorted way (top-down)
Integer top;
Long top;
while (delpos.size() > 0) {
top = delpos.last();
delpos.remove(top);
@ -292,13 +292,13 @@ public class EcoTable implements ObjectIndex {
assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size();
final ArrayList<RowCollection> report = new ArrayList<RowCollection>();
RowSet rows;
final TreeSet<Integer> d = new TreeSet<Integer>();
final TreeSet<Long> d = new TreeSet<Long>();
final byte[] b = new byte[rowdef.objectsize];
Integer L;
Long L;
Row.Entry inconsistentEntry;
// iterate over all entries that have inconsistent index references
long lastlog = System.currentTimeMillis();
for (final Integer[] is: index.removeDoubles()) {
for (final Long[] is: index.removeDoubles()) {
// 'is' is the set of all indexes, that have the same reference
// we collect that entries now here
rows = new RowSet(this.rowdef, is.length);
@ -314,7 +314,7 @@ public class EcoTable implements ObjectIndex {
report.add(rows);
}
// finally delete the affected rows, but start with largest id first, otherwise we overwrite wrong entries
Integer s;
Long s;
while (d.size() > 0) {
s = d.last();
d.remove(s);
@ -347,7 +347,7 @@ public class EcoTable implements ObjectIndex {
if ((file == null) || (index == null)) return null;
assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size() + ", fail = " + fail;
assert ((table == null) || (table.size() == index.size()));
final int i = index.get(key);
final int i = (int) index.get(key);
if (i == -1) return null;
final byte[] b = new byte[rowdef.objectsize];
if (table == null) {
@ -388,7 +388,7 @@ public class EcoTable implements ObjectIndex {
assert row != null;
assert row.bytes() != null;
if ((row == null) || (row.bytes() == null)) return null;
final int i = index.get(row.getPrimaryKeyBytes());
final int i = (int) index.get(row.getPrimaryKeyBytes());
if (i == -1) {
addUnique(row);
return null;
@ -422,7 +422,7 @@ public class EcoTable implements ObjectIndex {
assert row != null;
assert row.bytes() != null;
if (file == null || row == null || row.bytes() == null) return;
final int i = index.get(row.getPrimaryKeyBytes());
final int i = (int) index.get(row.getPrimaryKeyBytes());
if (i == -1) {
addUnique(row);
return;
@ -480,7 +480,7 @@ public class EcoTable implements ObjectIndex {
assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size();
assert ((table == null) || (table.size() == index.size()));
assert key.length == rowdef.primaryKeyLength;
final int i = index.get(key);
final int i = (int) index.get(key);
if (i == -1) return null; // nothing to do
// prepare result
@ -491,12 +491,12 @@ public class EcoTable implements ObjectIndex {
assert i < index.size();
if (table == null) {
if (i == index.size() - 1) {
ix = index.remove(key);
ix = (int) index.remove(key);
assert ix == i;
file.cleanLast(b, 0);
} else {
assert i < index.size() - 1;
ix = index.remove(key);
ix = (int) index.remove(key);
assert ix == i;
file.get(i, b, 0);
file.cleanLast(p, 0);
@ -514,13 +514,13 @@ public class EcoTable implements ObjectIndex {
if (i == index.size() - 1) {
// special handling if the entry is the last entry in the file
ix = index.remove(key);
ix = (int) index.remove(key);
assert ix == i;
table.removeRow(i, false);
file.cleanLast();
} else {
// switch values
ix = index.remove(key);
ix = (int) index.remove(key);
assert ix == i;
final Row.Entry te = table.removeOne();
@ -546,7 +546,7 @@ public class EcoTable implements ObjectIndex {
final byte[] le = new byte[rowdef.objectsize];
file.cleanLast(le, 0);
final Row.Entry lr = rowdef.newEntry(le);
final int i = index.remove(lr.getPrimaryKeyBytes());
final int i = (int) index.remove(lr.getPrimaryKeyBytes());
assert i >= 0;
if (table != null) table.remove(lr.getPrimaryKeyBytes());
assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size();
@ -579,7 +579,7 @@ public class EcoTable implements ObjectIndex {
// initialize index and copy table
table = (table == null) ? null : new RowSet(taildef, 1);
index = new IntegerHandleIndex(rowdef.primaryKeyLength, rowdef.objectOrder, 1, 100000);
index = new HandleMap(rowdef.primaryKeyLength, rowdef.objectOrder, 4, 1, 100000);
}
public Row row() {
@ -659,7 +659,7 @@ public class EcoTable implements ObjectIndex {
final byte[] k = i.next();
assert k != null;
if (k == null) return null;
this.c = index.get(k);
this.c = (int) index.get(k);
if (this.c < 0) throw new ConcurrentModificationException(); // this should only happen if the table was modified during the iteration
final byte[] b = new byte[rowdef.objectsize];
if (table == null) {

@ -1,431 +0,0 @@
// kelondroFlexTable.java
// (C) 2006 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 01.06.2006 on http://www.anomic.de
//
// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
// $LastChangedRevision: 1986 $
// $LastChangedBy: orbiter $
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package de.anomic.kelondro.table;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.TreeMap;
import java.util.TreeSet;
import de.anomic.kelondro.index.IntegerHandleIndex;
import de.anomic.kelondro.index.Row;
import de.anomic.kelondro.index.RowCollection;
import de.anomic.kelondro.index.RowSet;
import de.anomic.kelondro.index.ObjectIndex;
import de.anomic.kelondro.order.CloneableIterator;
import de.anomic.kelondro.order.NaturalOrder;
import de.anomic.kelondro.util.FileUtils;
import de.anomic.kelondro.util.MemoryControl;
import de.anomic.kelondro.util.kelondroException;
import de.anomic.kelondro.util.Log;
public class FlexTable extends FlexWidthArray implements ObjectIndex {
// static tracker objects
private static TreeMap<String, FlexTable> tableTracker = new TreeMap<String, FlexTable>();
// class objects
protected IntegerHandleIndex index;
private boolean RAMIndex;
/**
* Deprecated Class. Please use kelondroEcoTable instead
*/
@Deprecated
public FlexTable(final File path, final String tablename, final Row rowdef, int minimumSpace, final boolean resetOnFail) {
// the buffersize applies to a possible load of the ram-index
// the minimumSpace is a initial allocation space for the index; names the number of index slots
// if the ram is not sufficient, a tree file is generated
// if, and only if a tree file exists, the preload time is applied
super(path, tablename, rowdef, resetOnFail);
if ((super.col[0].size() < 0) && (resetOnFail)) try {
super.reset();
} catch (final IOException e2) {
e2.printStackTrace();
throw new kelondroException(e2.getMessage());
}
minimumSpace = Math.max(minimumSpace, super.size());
try {
final long neededRAM = 10 * 1024 * 104 + (long) ((super.row().primaryKeyLength + 4) * minimumSpace * RowCollection.growfactor);
final File newpath = new File(path, tablename);
final File indexfile = new File(newpath, "col.000.index");
String description = "";
description = new String(this.col[0].getDescription());
final int p = description.indexOf(';', 4);
final long stt = (p > 0) ? Long.parseLong(description.substring(4, p)) : 0;
System.out.println("*** Last Startup time: " + stt + " milliseconds");
final long start = System.currentTimeMillis();
// we use a RAM index
if (indexfile.exists()) {
// delete existing index file
System.out.println("*** Delete File index " + indexfile);
FileUtils.deletedelete(indexfile);
}
// fill the index
System.out.print("*** Loading RAM index for " + size() + " entries from " + newpath + "; available RAM = " + (MemoryControl.available() >> 20) + " MB, allocating " + (neededRAM >> 20) + " MB for index.");
index = initializeRamIndex(minimumSpace);
System.out.println(" -done-");
System.out.println(index.size() + " index entries initialized and sorted from " + super.col[0].size() + " keys.");
RAMIndex = true;
tableTracker.put(this.filename(), this);
// check consistency
final ArrayList<Integer[]> doubles = index.removeDoubles();
if (doubles.size() > 0) {
System.out.println("DEBUG: WARNING - FlexTable " + newpath.toString() + " has " + doubles.size() + " doubles");
}
// assign index to wrapper
description = "stt=" + Long.toString(System.currentTimeMillis() - start) + ";";
super.col[0].setDescription(description.getBytes());
} catch (final IOException e) {
if (resetOnFail) {
RAMIndex = true;
index = new IntegerHandleIndex(super.row().primaryKeyLength, super.rowdef.objectOrder, 0, 0);
} else {
throw new kelondroException(e.getMessage());
}
}
}
public void clear() throws IOException {
super.reset();
RAMIndex = true;
index = new IntegerHandleIndex(super.row().primaryKeyLength, super.rowdef.objectOrder, 0, 0);
}
public static int staticSize(final File path, final String tablename) {
return FlexWidthArray.staticsize(path, tablename);
}
public static int staticRAMIndexNeed(final File path, final String tablename, final Row rowdef) {
return (int) ((rowdef.primaryKeyLength + 4) * staticSize(path, tablename) * RowCollection.growfactor);
}
public boolean hasRAMIndex() {
return RAMIndex;
}
public synchronized boolean has(final byte[] key) {
// it is not recommended to implement or use a has predicate unless
// it can be ensured that it causes no IO
if ((AbstractRecords.debugmode) && (RAMIndex != true)) Log.logWarning("kelondroFlexTable", "RAM index warning in file " + super.tablename);
assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size();
return index.has(key);
}
private IntegerHandleIndex initializeRamIndex(final int initialSpace) {
final int space = Math.max(super.col[0].size(), initialSpace) + 1;
if (space < 0) throw new kelondroException("wrong space: " + space);
final IntegerHandleIndex ri = new IntegerHandleIndex(super.row().primaryKeyLength, super.rowdef.objectOrder, space, 0);
final Iterator<Node> content = super.col[0].contentNodes(-1);
Node node;
int i;
byte[] key;
while (content.hasNext()) {
node = content.next();
i = node.handle().hashCode();
try {
key = node.getKey();
} catch (IOException e1) {
e1.printStackTrace();
break;
}
assert (key != null) : "DEBUG: empty key in initializeRamIndex"; // should not happen; if it does, it is an error of the condentNodes iterator
//System.out.println("ENTRY: " + serverLog.arrayList(indexentry.bytes(), 0, indexentry.objectsize()));
ri.putUnique(key, i);
if ((i % 10000) == 0) {
System.out.print('.');
System.out.flush();
}
}
System.out.print(" -ordering- ");
System.out.flush();
return ri;
}
public synchronized Row.Entry get(final byte[] key) throws IOException {
if (index == null) return null; // case may happen during shutdown
final int pos = index.get(key);
assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size();
if (pos < 0) return null;
// pos may be greater than this.size(), because this table may have deleted entries
// the deleted entries are subtracted from the 'real' tablesize,
// so the size may be smaller than an index to a row entry
/*if (kelondroAbstractRecords.debugmode) {
kelondroRow.Entry result = super.get(pos);
assert result != null;
assert rowdef.objectOrder.compare(result.getPrimaryKeyBytes(), key) == 0 : "key and row does not match; key = " + serverLog.arrayList(key, 0, key.length) + " row.key = " + serverLog.arrayList(result.getPrimaryKeyBytes(), 0, rowdef.primaryKeyLength);
return result;
} else {*/
// assume that the column for the primary key is 0,
// and the column 0 is stored in a file only for that column
// then we don't need to lookup from that file, because we already know the value (it's the key)
final Row.Entry result = super.getOmitCol0(pos, key);
assert result != null;
return result;
//}
}
public synchronized Row.Entry put(final Row.Entry row, final Date entryDate) throws IOException {
assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size();
return replace(row);
}
public synchronized Row.Entry replace(final Row.Entry row) throws IOException {
assert (row != null);
assert (!(Log.allZero(row.getColBytes(0))));
assert row.objectsize() <= this.rowdef.objectsize;
final byte[] key = row.getColBytes(0);
if (index == null) return null; // case may appear during shutdown
int pos = index.get(key);
if (pos < 0) {
pos = super.add(row);
index.put(key, pos);
assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size();
return null;
}
//System.out.println("row.key=" + serverLog.arrayList(row.bytes(), 0, row.objectsize()));
final Row.Entry oldentry = super.get(pos);
assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size();
if (oldentry == null) {
Log.logSevere("kelondroFlexTable", "put(): index failure; the index pointed to a cell which is empty. content.size() = " + this.size() + ", index.size() = " + index.size());
// patch bug ***** FIND CAUSE! (see also: remove)
final int oldindex = index.remove(key);
assert oldindex >= 0;
assert index.get(key) == -1;
// here is this.size() > index.size() because of remove operation above
index.put(key, super.add(row));
assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size();
return null;
}
assert oldentry != null : "overwrite of empty position " + pos + ", index management must have failed before";
assert rowdef.objectOrder.equal(oldentry.getPrimaryKeyBytes(), key) : "key and row does not match; key = " + NaturalOrder.arrayList(key, 0, key.length) + " row.key = " + NaturalOrder.arrayList(oldentry.getPrimaryKeyBytes(), 0, rowdef.primaryKeyLength);
super.set(pos, row);
assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size();
return oldentry;
}
public synchronized void put(final Row.Entry row) throws IOException {
assert (row != null);
assert (!(Log.allZero(row.getColBytes(0))));
assert row.objectsize() <= this.rowdef.objectsize;
final byte[] key = row.getColBytes(0);
if (index == null) return; // case may appear during shutdown
int pos = index.get(key);
if (pos < 0) {
pos = super.add(row);
index.put(key, pos);
assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size();
return;
}
//System.out.println("row.key=" + serverLog.arrayList(row.bytes(), 0, row.objectsize()));
assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size();
super.set(pos, row);
assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size();
}
public synchronized void addUnique(final Row.Entry row) throws IOException {
assert row.objectsize() == this.rowdef.objectsize;
assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size();
index.putUnique(row.getColBytes(0), super.add(row));
}
public synchronized ArrayList<RowCollection> removeDoubles() throws IOException {
final ArrayList<RowCollection> report = new ArrayList<RowCollection>();
RowSet rows;
final TreeSet<Integer> d = new TreeSet<Integer>();
for (final Integer[] is: index.removeDoubles()) {
rows = new RowSet(this.rowdef, is.length);
for (int j = 0; j < is.length; j++) {
d.add(is[j]);
rows.addUnique(this.get(is[j].intValue()));
}
report.add(rows);
}
// finally delete the affected rows, but start with largest id first, otherwise we overwrite wrong entries
Integer s;
while (d.size() > 0) {
s = d.last();
d.remove(s);
this.remove(s.intValue());
}
return report;
}
public synchronized Row.Entry remove(final byte[] key) throws IOException {
// the underlying data structure is a file, where the order cannot be maintained. Gaps are filled with new values.
final int i = index.remove(key);
assert (index.get(key) < 0); // must be deleted
if (i < 0) {
assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size();
return null;
}
final Row.Entry r = super.getOmitCol0(i, key);
if (r == null) {
Log.logSevere("kelondroFlexTable", "remove(): index failure; the index pointed to a cell which is empty. content.size() = " + this.size() + ", index.size() = " + ((index == null) ? 0 : index.size()));
// patch bug ***** FIND CAUSE! (see also: put)
assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size();
return null;
}
assert r != null : "r == null"; // should be avoided with path above
assert rowdef.objectOrder.equal(r.getPrimaryKeyBytes(), key) : "key and row does not match; key = " + NaturalOrder.arrayList(key, 0, key.length) + " row.key = " + NaturalOrder.arrayList(r.getPrimaryKeyBytes(), 0, rowdef.primaryKeyLength);
super.remove(i);
assert super.get(i) == null : "i = " + i + ", get(i) = " + NaturalOrder.arrayList(super.get(i).bytes(), 0, 12);
assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size();
return r;
}
public synchronized Row.Entry removeOne() throws IOException {
final int i = index.removeone();
if (i < 0) return null;
Row.Entry r;
r = super.get(i);
super.remove(i);
assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size();
return r;
}
public synchronized CloneableIterator<byte[]> keys(final boolean up, final byte[] firstKey) throws IOException {
return index.keys(up, firstKey);
}
public synchronized CloneableIterator<Row.Entry> rows() throws IOException {
return new rowIterator(true, null);
}
public synchronized CloneableIterator<Row.Entry> rows(final boolean up, final byte[] firstKey) throws IOException {
if (index == null) return new rowIterator(up, firstKey);
assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size();
return new rowIterator(up, firstKey);
}
public class rowIterator implements CloneableIterator<Row.Entry> {
CloneableIterator<Row.Entry> indexIterator;
boolean up;
public rowIterator(final boolean up, final byte[] firstKey) throws IOException {
this.up = up;
indexIterator = index.rows(up, firstKey);
}
public rowIterator clone(final Object modifier) {
try {
return new rowIterator(up, (byte[]) modifier);
} catch (final IOException e) {
return null;
}
}
public boolean hasNext() {
return indexIterator.hasNext();
}
public Row.Entry next() {
Row.Entry idxEntry = null;
while ((indexIterator.hasNext()) && (idxEntry == null)) {
idxEntry = indexIterator.next();
}
if (idxEntry == null) {
Log.logSevere("kelondroFlexTable.rowIterator: " + tablename, "indexIterator returned null");
return null;
}
final int idx = (int) idxEntry.getColLong(1);
try {
return get(idx);
} catch (final IOException e) {
e.printStackTrace();
return null;
}
}
public void remove() {
indexIterator.remove();
}
}
public static final Iterator<String> filenames() {
// iterates string objects; all file names from record tracker
return tableTracker.keySet().iterator();
}
public static final Map<String, String> memoryStats(final String filename) {
// returns a map for each file in the tracker;
// the map represents properties for each record objects,
// i.e. for cache memory allocation
final FlexTable theFlexTable = tableTracker.get(filename);
return theFlexTable.memoryStats();
}
private final Map<String, String> memoryStats() {
// returns statistical data about this object
final HashMap<String, String> map = new HashMap<String, String>();
map.put("tableIndexChunkSize", (!RAMIndex) ? "0" : Integer.toString(index.row().objectsize));
map.put("tableIndexCount", (!RAMIndex) ? "0" : Integer.toString(index.size()));
map.put("tableIndexMem", (!RAMIndex) ? "0" : Integer.toString((int) (index.row().objectsize * index.size() * RowCollection.growfactor)));
return map;
}
public synchronized void close() {
if (tableTracker.remove(this.filename) == null) {
Log.logWarning("kelondroFlexTable", "close(): file '" + this.filename + "' was not tracked with record tracker.");
}
if ((index != null) && (this.size() != ((index == null) ? 0 : index.size()))) {
Log.logSevere("kelondroFlexTable", this.filename + " close(): inconsistent content/index size. content.size() = " + this.size() + ", index.size() = " + ((index == null) ? 0 : index.size()));
}
if (index != null) {index.close(); index = null;}
super.close();
}
public static void main(final String[] args) {
// open a file, add one entry and exit
final File f = new File(args[0]);
final String name = args[1];
final Row row = new Row("Cardinal key-4 {b256}, byte[] x-64", NaturalOrder.naturalOrder);
try {
final FlexTable t = new FlexTable(f, name, row, 0, true);
final Row.Entry entry = row.newEntry();
entry.setCol(0, System.currentTimeMillis());
entry.setCol(1, "dummy".getBytes());
t.put(entry);
t.close();
} catch (final IOException e) {
e.printStackTrace();
}
}
}

@ -164,11 +164,7 @@ public class SplitTable implements ObjectIndex {
maxtime = time;
}
if (f.isDirectory()) {
ram = FlexTable.staticRAMIndexNeed(path, tablefile[i], rowdef);
} else {
ram = EcoTable.staticRAMIndexNeed(f, rowdef);
}
ram = EcoTable.staticRAMIndexNeed(f, rowdef);
if (ram > 0) {
t.put(tablefile[i], Long.valueOf(ram));
sum += ram;
@ -200,11 +196,6 @@ public class SplitTable implements ObjectIndex {
t.remove(maxf);
if (maxf != null) {
f = new File(path, maxf);
if (f.isDirectory()) {
// this is a kelonodroFlex table
FlexTable.delete(path, maxf);
Log.logInfo("kelondroSplitTable", "replaced partial flex table " + f + " by new eco table");
}
Log.logInfo("kelondroSplitTable", "opening partial eco table " + f);
table = new EcoTable(f, rowdef, EcoTable.tailCacheUsageAuto, EcoFSBufferSize, 0);
tables.put(maxf, table);

@ -31,7 +31,7 @@ import java.util.Iterator;
import de.anomic.kelondro.blob.BLOB;
import de.anomic.kelondro.blob.BLOBArray;
import de.anomic.kelondro.index.IntegerHandleIndex;
import de.anomic.kelondro.index.HandleMap;
import de.anomic.kelondro.index.Row;
import de.anomic.kelondro.index.RowSet;
import de.anomic.kelondro.order.ByteOrder;
@ -294,14 +294,14 @@ public final class ReferenceContainerArray<ReferenceType extends Reference> {
return donesomething;
}
public static <ReferenceType extends Reference> IntegerHandleIndex referenceHashes(
public static <ReferenceType extends Reference> HandleMap referenceHashes(
final File heapLocation,
final ReferenceFactory<ReferenceType> factory,
final ByteOrder termOrder,
final Row payloadrow) throws IOException {
System.out.println("CELL REFERENCE COLLECTION startup");
IntegerHandleIndex references = new IntegerHandleIndex(payloadrow.primaryKeyLength, termOrder, 0, 1000000);
HandleMap references = new HandleMap(payloadrow.primaryKeyLength, termOrder, 4, 0, 1000000);
String[] files = heapLocation.list();
for (String f: files) {
if (f.length() < 22 || !f.startsWith("index") || !f.endsWith(".blob")) continue;
@ -324,7 +324,7 @@ public final class ReferenceContainerArray<ReferenceType extends Reference> {
if (reference == null) continue;
mh = reference.metadataHash();
if (mh == null) continue;
references.inc(mh.getBytes(), 1);
references.inc(mh.getBytes());
}
count++;
// write a log

@ -63,6 +63,7 @@ public final class plasmaSearchQuery {
public int contentdom;
public String urlMask;
public String targetlang;
public String navigators;
public int domType;
public int zonecode;
public int domMaxTargets;
@ -115,6 +116,7 @@ public final class plasmaSearchQuery {
this.remotepeer = null;
this.handle = Long.valueOf(System.currentTimeMillis());
this.specialRights = false;
this.navigators = "all";
}
public plasmaSearchQuery(
@ -124,6 +126,7 @@ public final class plasmaSearchQuery {
final plasmaSearchRankingProfile ranking,
final int maxDistance, final String prefer, final int contentdom,
final String language,
final String navigators,
final boolean onlineSnippetFetch,
final int lines, final int offset, final String urlMask,
final int domType, final String domGroupName, final int domMaxTargets,
@ -145,6 +148,7 @@ public final class plasmaSearchQuery {
this.urlMask = urlMask;
assert language != null;
this.targetlang = language;
this.navigators = navigators;
this.domType = domType;
this.zonecode = domainzone;
this.domMaxTargets = domMaxTargets;

Loading…
Cancel
Save