- removed write buffer from kelondroCache (was never used because buggy; will now be replaced by new EcoBuffer)

- added new data structure 'eco' for an index file that should use only 50% of write-IO compared to kelondroFlex
The new eco index is not used yet, but already successfully tested with the collectionIndex
The main purpose is to replace the kelondroFlex at every point when enough RAM is available.
Othervise, the kelondroFlex stays as option in case of low memory (which then can even use a file-index)


git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4337 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 17 years ago
parent dbdec0f4d3
commit dc26d6262b

@ -14,6 +14,7 @@ import javax.imageio.ImageIO;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroCache;
import de.anomic.kelondro.kelondroCloneableIterator;
import de.anomic.kelondro.kelondroEcoTable;
import de.anomic.kelondro.kelondroFlexSplitTable;
import de.anomic.kelondro.kelondroFlexTable;
import de.anomic.kelondro.kelondroIndex;
@ -165,7 +166,7 @@ public class dbtest {
System.out.println("INVALID: " + dbEntry);
} /* else {
System.out.println("_VALID_: " + dbEntry);
getTable().remove(entry.getKey());
getTable().remove(entry.getKey(), true);
} */
}
} catch (IOException e) {
@ -197,7 +198,7 @@ public class dbtest {
}
if (dbe.equals("kelondroTree")) {
File tablefile = new File(tablename + ".kelondro.db");
table = new kelondroCache(new kelondroTree(tablefile, true, preload, testRow), true, false);
table = new kelondroCache(new kelondroTree(tablefile, true, preload, testRow));
}
if (dbe.equals("kelondroSplittedTree")) {
File tablepath = new File(tablename).getParentFile();
@ -213,6 +214,9 @@ public class dbtest {
File tablepath = new File(tablename).getParentFile();
table = new kelondroFlexSplitTable(tablepath, new File(tablename).getName(), preload, testRow, true);
}
if (dbe.equals("kelondroEcoTable")) {
table = new kelondroEcoTable(new File(tablename), testRow, 100);
}
if (dbe.equals("mysql")) {
table = new kelondroSQLTable("mysql", testRow);
}

@ -0,0 +1,149 @@
// kelondroBufferedEcoFS.java
// (C) 2008 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 14.01.2008 on http://yacy.net
//
// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
// $LastChangedRevision: 1986 $
// $LastChangedBy: orbiter $
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package de.anomic.kelondro;
import java.io.File;
import java.io.IOException;
import java.util.Iterator;
import java.util.Map;
import java.util.TreeMap;
public class kelondroBufferedEcoFS {
private kelondroEcoFS efs;
private int maxEntries;
private TreeMap<Integer, byte[]> buffer;
/*
* The kelondroBufferedEcoFS extends the IO reduction to EcoFS by providing a
* write buffer to elements that are inside the filed entries of the file
* That means, each time, an entry is written to the end of the file, it is not buffered
*/
public kelondroBufferedEcoFS(kelondroEcoFS efs, int maxEntries) throws IOException {
this.efs = efs;
this.maxEntries = maxEntries;
this.buffer = new TreeMap<Integer, byte[]>();
}
private void flushBuffer() throws IOException {
Iterator<Map.Entry<Integer, byte[]>> i = buffer.entrySet().iterator();
Map.Entry<Integer, byte[]> entry;
while (i.hasNext()) {
entry = i.next();
efs.put(entry.getKey().intValue(), entry.getValue(), 0);
}
buffer.clear();
}
public synchronized int size() throws IOException {
return efs.size();
}
public File filename() {
return efs.filename();
}
public synchronized void close() {
try {
flushBuffer();
} catch (IOException e) {
e.printStackTrace();
}
efs.close();
efs = null;
}
public synchronized void finalize() {
if (this.efs != null) this.close();
}
public synchronized void get(int index, byte[] b, int start) throws IOException {
assert b.length - start >= efs.recordsize;
if (index >= size()) throw new IndexOutOfBoundsException("kelondroBufferedEcoFS.get(" + index + ") outside bounds (" + this.size() + ")");
byte[] bb = buffer.get(new Integer(index));
if (bb == null) {
efs.get(index, b, start);
} else {
System.arraycopy(bb, 0, b, start, efs.recordsize);
}
}
public synchronized void put(int index, byte[] b, int start) throws IOException {
assert b.length - start >= efs.recordsize;
if (index > size()) throw new IndexOutOfBoundsException("kelondroEcoFS.put(" + index + ") outside bounds (" + this.size() + ")");
if (index == efs.size()) {
efs.put(index, b, start);
} else {
byte[] bb = new byte[efs.recordsize];
System.arraycopy(b, start, bb, 0, efs.recordsize);
buffer.put(new Integer(index), bb);
if (buffer.size() > this.maxEntries) flushBuffer();
}
}
public synchronized void add(byte[] b, int start) throws IOException {
put(size(), b, start);
}
public synchronized void clean(int index, byte[] b, int start) throws IOException {
assert b.length - start >= efs.recordsize;
if (index >= size()) throw new IndexOutOfBoundsException("kelondroBufferedEcoFS.clean(" + index + ") outside bounds (" + this.size() + ")");
byte[] bb = buffer.get(new Integer(index));
if (bb == null) {
efs.clean(index, b, start);
} else {
System.arraycopy(bb, 0, b, start, efs.recordsize);
buffer.remove(new Integer(index));
efs.clean(index);
}
}
public synchronized void clean(int index) throws IOException {
if (index >= size()) throw new IndexOutOfBoundsException("kelondroBufferedEcoFS.clean(" + index + ") outside bounds (" + this.size() + ")");
buffer.remove(new Integer(index));
efs.clean(index);
}
public synchronized void cleanLast(byte[] b, int start) throws IOException {
assert b.length - start >= efs.recordsize;
Integer i = new Integer(size() - 1);
byte[] bb = buffer.get(i);
if (bb == null) {
efs.clean(i.intValue(), b, start);
} else {
System.arraycopy(bb, 0, b, start, efs.recordsize);
buffer.remove(i);
efs.clean(i.intValue());
}
}
public synchronized void cleanLast() throws IOException {
Integer i = new Integer(size() - 1);
buffer.remove(i);
efs.clean(i.intValue());
}
}

@ -56,29 +56,21 @@ public class kelondroCache implements kelondroIndex {
// class objects
private kelondroRowSet readHitCache;
private kelondroRowSet readMissCache;
private kelondroRowSet writeBufferUnique; // entries of that buffer are not contained in index
private kelondroRowSet writeBufferDoubles; // entries of that buffer shall overwrite entries in index
private kelondroIndex index;
private kelondroRow keyrow;
private int readHit, readMiss, writeUnique, writeDouble, cacheDelete, cacheFlush;
private int hasnotHit, hasnotMiss, hasnotUnique, hasnotDouble, hasnotDelete;
private boolean read, write;
public kelondroCache(kelondroIndex backupIndex, boolean read, boolean write) {
assert write == false;
public kelondroCache(kelondroIndex backupIndex) {
this.index = backupIndex;
this.read = read;
this.write = write;
init();
objectTracker.put(backupIndex.filename(), this);
}
private void init() {
this.keyrow = new kelondroRow(new kelondroColumn[]{index.row().column(index.row().primaryKeyIndex)}, index.row().objectOrder, 0);
this.readHitCache = (read) ? new kelondroRowSet(index.row(), 0) : null;
this.readMissCache = (read) ? new kelondroRowSet(this.keyrow, 0) : null;
this.writeBufferUnique = (write) ? new kelondroRowSet(index.row(), 0) : null;
this.writeBufferDoubles = (write) ? new kelondroRowSet(index.row(), 0) : null;
this.readHitCache = new kelondroRowSet(index.row(), 0);
this.readMissCache = new kelondroRowSet(this.keyrow, 0);
this.readHit = 0;
this.readMiss = 0;
this.writeUnique = 0;
@ -97,9 +89,7 @@ public class kelondroCache implements kelondroIndex {
}
public int writeBufferSize() {
return
((writeBufferUnique == null) ? 0 : writeBufferUnique.size()) +
((writeBufferDoubles == null) ? 0 : writeBufferDoubles.size());
return 0;
}
public kelondroProfile profile() {
@ -163,84 +153,6 @@ public class kelondroCache implements kelondroIndex {
return kelondroCachedRecords.cacheGrowStatus(serverMemory.available(), memStopGrow, memStartShrink);
}
private void flushUnique() throws IOException {
if (writeBufferUnique == null) return;
synchronized (writeBufferUnique) {
Iterator<kelondroRow.Entry> i = writeBufferUnique.rows();
while (i.hasNext()) {
this.index.addUnique(i.next());
this.cacheFlush++;
}
writeBufferUnique.clear();
}
}
private void flushUnique(int maxcount) throws IOException {
if (writeBufferUnique == null) return;
if (maxcount == 0) return;
synchronized (writeBufferUnique) {
kelondroRowCollection delete = new kelondroRowCollection(this.keyrow, maxcount);
Iterator<kelondroRow.Entry> i = writeBufferUnique.rows();
kelondroRow.Entry row;
while ((i.hasNext()) && (maxcount-- > 0)) {
row = i.next();
delete.add(row.getPrimaryKeyBytes());
this.index.addUnique(row);
this.cacheFlush++;
}
i = delete.rows();
while (i.hasNext()) {
writeBufferUnique.remove(((kelondroRow.Entry) i.next()).getColBytes(0), true);
}
delete = null;
writeBufferUnique.trim(true);
}
}
private void flushDoubles() throws IOException {
if (writeBufferDoubles == null) return;
synchronized (writeBufferDoubles) {
Iterator<kelondroRow.Entry> i = writeBufferDoubles.rows();
while (i.hasNext()) {
this.index.put(i.next());
this.cacheFlush++;
}
writeBufferDoubles.clear();
}
}
private void flushDoubles(int maxcount) throws IOException {
if (writeBufferDoubles == null) return;
if (maxcount == 0) return;
synchronized (writeBufferDoubles) {
kelondroRowCollection delete = new kelondroRowCollection(this.keyrow, maxcount);
Iterator<kelondroRow.Entry> i = writeBufferDoubles.rows();
kelondroRow.Entry row;
while ((i.hasNext()) && (maxcount-- > 0)) {
row = i.next();
delete.add(row.getPrimaryKeyBytes());
this.index.addUnique(row);
this.cacheFlush++;
}
i = delete.rows();
while (i.hasNext()) writeBufferDoubles.remove(((kelondroRow.Entry) i.next()).getColBytes(0), true);
delete = null;
writeBufferDoubles.trim(true);
}
}
public void flushSome() throws IOException {
if (writeBufferUnique != null) flushUnique(writeBufferUnique.size() / 10);
if (writeBufferDoubles != null) flushDoubles(writeBufferDoubles.size() / 10);
}
private int sumRecords() {
return
((readHitCache == null) ? 0 : readHitCache.size()) +
((writeBufferUnique == null) ? 0 : writeBufferUnique.size()) +
((writeBufferDoubles == null) ? 0 : writeBufferDoubles.size());
}
private boolean checkMissSpace() {
// returns true if it is allowed to write into this cache
if (cacheGrowStatus() < 1) {
@ -256,38 +168,21 @@ public class kelondroCache implements kelondroIndex {
// returns true if it is allowed to write into this cache
int status = cacheGrowStatus();
if (status < 1) {
flushUnique();
flushDoubles();
if (readHitCache != null) {
readHitCache.clear();
}
return false;
}
if (status < 2) {
int s = sumRecords();
flushDoubles(s / 4);
flushUnique(s / 4);
if (readHitCache != null) readHitCache.clear();
}
return true;
}
public synchronized void close() {
try {
flushUnique();
} catch (IOException e) {
e.printStackTrace();
}
try {
flushDoubles();
} catch (IOException e) {
e.printStackTrace();
}
index.close();
readHitCache = null;
readMissCache = null;
writeBufferUnique = null;
writeBufferDoubles = null;
}
public boolean has(byte[] key) throws IOException {
@ -315,20 +210,6 @@ public class kelondroCache implements kelondroIndex {
return entry;
}
}
if (writeBufferUnique != null) {
entry = writeBufferUnique.get(key);
if (entry != null) {
this.readHit++;
return entry;
}
}
if (writeBufferDoubles != null) {
entry = writeBufferDoubles.get(key);
if (entry != null) {
this.readHit++;
return entry;
}
}
// finally ask the backend index
this.readMiss++;
@ -372,13 +253,6 @@ public class kelondroCache implements kelondroIndex {
if (readMissCache.remove(key, true) != null) {
this.hasnotHit++;
// the entry does not exist before
if (writeBufferUnique != null) {
// since we know that the entry does not exist, we know that new
// entry belongs to the unique buffer
writeBufferUnique.put(row);
return null;
}
assert (writeBufferDoubles == null);
index.put(row); // write to backend
if (readHitCache != null) {
kelondroRow.Entry dummy = readHitCache.put(row); // learn that entry
@ -394,13 +268,6 @@ public class kelondroCache implements kelondroIndex {
entry = readHitCache.get(key);
if (entry != null) {
// since we know that the entry was in the read cache, it cannot be in any write cache
if (writeBufferDoubles != null) {
// because the entry exists, it must be written in the doubles buffer
readHitCache.remove(key, true);
this.cacheDelete++;
writeBufferDoubles.put(row);
return entry;
} else {
// write directly to backend index
index.put(row);
// learn from situation
@ -409,37 +276,6 @@ public class kelondroCache implements kelondroIndex {
return entry;
}
}
}
// we still don't know if the key exists. Look into the buffers
if (writeBufferUnique != null) {
entry = writeBufferUnique.get(key);
if (entry != null) {
writeBufferUnique.put(row);
return entry;
}
}
if (writeBufferDoubles != null) {
entry = writeBufferDoubles.get(key);
if (entry != null) {
writeBufferDoubles.put(row);
return entry;
}
}
// finally, we still don't know if this is a double-entry or unique-entry
// there is a chance to get that information 'cheap':
// look into the node ram cache of the back-end index.
// that does only work, if the node cache is complete
// that is the case for kelondroFlexTables with ram index
if ((writeBufferUnique != null) &&
(index instanceof kelondroFlexTable) &&
(((kelondroFlexTable) index).hasRAMIndex()) &&
(!(((kelondroFlexTable) index).has(key)))) {
// this an unique entry
writeBufferUnique.put(row);
return null; // since that was unique, there was no entry before
}
// the worst case: we must write to the back-end directly
entry = index.put(row);
@ -470,13 +306,6 @@ public class kelondroCache implements kelondroIndex {
this.readMissCache.remove(key, true);
this.hasnotDelete++;
// the entry does not exist before
if (writeBufferUnique != null) {
// since we know that the entry does not exist, we know that new
// entry belongs to the unique buffer
writeBufferUnique.put(row);
return;
}
assert (writeBufferDoubles == null);
index.addUnique(row); // write to backend
if (readHitCache != null) {
kelondroRow.Entry dummy = readHitCache.put(row); // learn that entry
@ -485,15 +314,6 @@ public class kelondroCache implements kelondroIndex {
return;
}
if ((writeBufferUnique != null) &&
(index instanceof kelondroFlexTable) &&
(((kelondroFlexTable) index).hasRAMIndex()) &&
(!(((kelondroFlexTable) index).has(key)))) {
// this an unique entry
writeBufferUnique.addUnique(row);
return;
}
// the worst case: we must write to the back-end directly
index.addUnique(row);
if (readHitCache != null) {
@ -510,9 +330,6 @@ public class kelondroCache implements kelondroIndex {
assert (row != null);
assert (row.columns() == row().columns());
//assert (!(serverLog.allZero(row.getColBytes(index.primarykey()))));
assert (writeBufferUnique == null);
assert (writeBufferDoubles == null);
byte[] key = row.getPrimaryKeyBytes();
checkHitSpace();
@ -562,19 +379,6 @@ public class kelondroCache implements kelondroIndex {
}
}
// if the key already exists in one buffer, remove that buffer
if (writeBufferUnique != null) {
Entry entry = writeBufferUnique.remove(key, true);
if (entry != null) return entry;
}
if (writeBufferDoubles != null) {
Entry entry = writeBufferDoubles.remove(key, true);
if (entry != null) {
index.remove(key, false);
return entry;
}
}
return index.remove(key, false);
}
@ -582,26 +386,6 @@ public class kelondroCache implements kelondroIndex {
checkMissSpace();
if ((writeBufferUnique != null) && (writeBufferUnique.size() > 0)) {
Entry entry = writeBufferUnique.removeOne();
if (readMissCache != null) {
kelondroRow.Entry dummy = readMissCache.put(readMissCache.row().newEntry(entry.getPrimaryKeyBytes()));
if (dummy == null) this.hasnotUnique++; else this.hasnotDouble++;
}
return entry;
}
if ((writeBufferDoubles != null) && (writeBufferDoubles.size() > 0)) {
Entry entry = writeBufferDoubles.removeOne();
byte[] key = entry.getPrimaryKeyBytes();
if (readMissCache != null) {
kelondroRow.Entry dummy = readMissCache.put(readMissCache.row().newEntry(key));
if (dummy == null) this.hasnotUnique++; else this.hasnotDouble++;
}
index.remove(key, false);
return entry;
}
Entry entry = index.removeOne();
if (entry == null) return null;
byte[] key = entry.getPrimaryKeyBytes();
@ -621,17 +405,15 @@ public class kelondroCache implements kelondroIndex {
}
public synchronized kelondroCloneableIterator<byte[]> keys(boolean up, byte[] firstKey) throws IOException {
flushUnique();
return index.keys(up, firstKey);
}
public synchronized kelondroCloneableIterator<kelondroRow.Entry> rows(boolean up, byte[] firstKey) throws IOException {
flushUnique();
return index.rows(up, firstKey);
}
public int size() {
return index.size() + ((writeBufferUnique == null) ? 0 : writeBufferUnique.size());
return index.size();
}
public String filename() {

@ -74,6 +74,8 @@ public class kelondroCollectionIndex {
private static final int idx_col_lastread = 6; // a time stamp, update time in days since 1.1.2000
private static final int idx_col_lastwrote = 7; // a time stamp, update time in days since 1.1.2000
private static final boolean useEcoTable = false;
private static kelondroRow indexRow(int keylength, kelondroByteOrder payloadOrder) {
return new kelondroRow(
"byte[] key-" + keylength + "," +
@ -122,8 +124,9 @@ public class kelondroCollectionIndex {
this.maxPartitions = maxpartitions;
this.commonsPath = new File(path, filenameStub + "." + fillZ(Integer.toHexString(rowdef.objectsize).toUpperCase(), 4) + ".commons");
this.commonsPath.mkdirs();
File f = new File(path, filenameStub + ".index");
if (new File(path, filenameStub + ".index").exists()) {
if (f.exists()) {
serverLog.logFine("STARTUP", "OPENING COLLECTION INDEX");
// open index and array files
@ -153,7 +156,11 @@ public class kelondroCollectionIndex {
serverLog.logFine("STARTUP", "STARTED INITIALIZATION OF NEW COLLECTION INDEX WITH " + initialSpace + " ENTRIES. THIS WILL TAKE SOME TIME");
// initialize (new generation) index table from file
if (useEcoTable) {
index = new kelondroEcoTable(f, indexRow(keyLength, indexOrder), 100);
} else {
index = new kelondroFlexTable(path, filenameStub + ".index", preloadTime, indexRow(keyLength, indexOrder), initialSpace, true);
}
// open array files
this.arrays = new HashMap<String, kelondroFixedWidthArray>(); // all entries will be dynamically created with getArray()
@ -225,8 +232,10 @@ public class kelondroCollectionIndex {
private kelondroIndex openIndexFile(File path, String filenameStub, kelondroByteOrder indexOrder,
long preloadTime, int loadfactor, kelondroRow rowdef, int initialSpace) throws IOException {
// open/create index table
kelondroIndex theindex = new kelondroCache(new kelondroFlexTable(path, filenameStub + ".index", preloadTime, indexRow(keylength, indexOrder), initialSpace, true), true, false);
//kelondroIndex theindex = new kelondroFlexTable(path, filenameStub + ".index", preloadTime, indexRow(keylength, indexOrder), true);
File f = new File(path, filenameStub + ".index");
if (f.isDirectory()) {
// use a flextable
kelondroIndex theindex = new kelondroCache(new kelondroFlexTable(path, filenameStub + ".index", preloadTime, indexRow(keylength, indexOrder), initialSpace, true));
// save/check property file for this array
File propfile = propertyFile(path, filenameStub, loadfactor, rowdef.objectsize);
@ -244,6 +253,10 @@ public class kelondroCollectionIndex {
serverFileUtils.saveMap(propfile, props, "CollectionIndex properties");
return theindex;
} else {
// open a ecotable
return new kelondroEcoTable(f, indexRow(keylength, indexOrder), 100);
}
}
private kelondroFixedWidthArray openArrayFile(int partitionNumber, int serialNumber, kelondroByteOrder indexOrder, boolean create) throws IOException {

@ -96,7 +96,7 @@ public class kelondroDyn {
} else {
fbi = new kelondroFlexTable(file.getParentFile(), file.getName(), 10000, rowdef, 0, resetOnFail);
}
this.index = (useObjectCache) ? (kelondroIndex) new kelondroCache(fbi, true, writebuffer) : fbi;
this.index = (useObjectCache) ? (kelondroIndex) new kelondroCache(fbi) : fbi;
this.keylen = key;
this.reclen = nodesize;
this.fillChar = fillChar;

@ -0,0 +1,495 @@
// kelondroEcoFS.java
// (C) 2008 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 14.01.2008 on http://yacy.net
//
// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
// $LastChangedRevision: 1986 $
// $LastChangedBy: orbiter $
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package de.anomic.kelondro;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.RandomAccessFile;
public class kelondroEcoFS {
/*
* The EcoFS is a flat file with records of fixed length. The file does not contain
* any meta information and the first record starts right at file position 0
* The access rules are in such a way that a minimum of IO operations are necessary
* Two caches provide a mirror to content in the file: a read cache and a write buffer
* The read cache contains a number of entries from the file; a mirror that moves
* whenever information outsite the mirror is requested.
* The write buffer always exists only at the end of the file. It contains only records
* that have never been written to the file before. When the write buffer is flushed,
* the file grows
* The record file may also shrink when the last entry of the file is removed.
* Removal of Entries inside the file is not possible, but such entries can be erased
* by overwriting the data with zero bytes
* All access to the file is made with byte[] that are generated outsite of this class
* This class only references byte[] that are handed over to methods of this class.
*/
private RandomAccessFile raf;
private File tablefile;
protected int recordsize; // number of bytes in one record
private int cacheindex, cachecount, buffercount; // number of entries in buffer
private byte[] cache, buffer, zero;
private static final int maxBuffer = 512;
public kelondroEcoFS(File tablefile, int recordsize) throws IOException {
this.tablefile = tablefile;
this.recordsize = recordsize;
// initialize zero buffer
this.zero = new byte[recordsize];
for (int i = 0; i < recordsize; i++) this.zero[i] = 0;
// initialize table file
if (!tablefile.exists()) {
// make new file
FileOutputStream fos = null;
try {
fos = new FileOutputStream(tablefile);
} catch (FileNotFoundException e) {
// should not happen
e.printStackTrace();
}
try { fos.close(); } catch (IOException e) {}
}
// open an existing table file
try {
raf = new RandomAccessFile(tablefile, "rw");
} catch (FileNotFoundException e) {
// should never happen
e.printStackTrace();
}
// initialize cache and buffer
int maxrecords = Math.max(1, maxBuffer / recordsize);
cache = new byte[maxrecords * recordsize];
buffer = new byte[maxrecords * recordsize];
this.buffercount = 0;
// first-time read of cache
fillCache(0);
}
public static long tableSize(File tablefile, int recordsize) {
// returns number of records in table
if (!tablefile.exists()) return 0;
long size = tablefile.length();
assert size % recordsize == 0;
return size / recordsize;
}
public synchronized int size() throws IOException {
// return the number of records in file plus number of records in buffer
return filesize() + this.buffercount;
}
public File filename() {
return this.tablefile;
}
private int filesize() throws IOException {
return (int) (raf.length() / recordsize);
}
private int inCache(int index) {
// checks if the index is inside the cache and returns the index offset inside
// the cache if the index is inside the cache
// returns -1 if the index is not in the cache
if ((index >= this.cacheindex) && (index < this.cacheindex + this.cachecount)) {
return index - this.cacheindex;
}
return -1;
}
private int inBuffer(int index) throws IOException {
// checks if the index is inside the buffer and returns the index offset inside
// the buffer if the index is inside the buffer
// returns -1 if the index is not in the buffer
int fs = filesize();
if ((index >= fs) && (index < fs + this.buffercount)) {
return index - fs;
}
return -1;
}
private void fillCache(int index) throws IOException {
// load cache with copy of disc content; start with record at index
// if the record would overlap with the write buffer,
// its start is shifted forward until it fits
// first check if the index is inside the current cache
assert inCache(index) < 0;
if (inCache(index) >= 0) return;
// calculate new start position
int fs = this.filesize();
if (index + this.cache.length / this.recordsize > fs) {
index = fs - this.cache.length / this.recordsize;
}
if (index < 0) index = 0;
// calculate number of records that shall be stored in the cache
this.cachecount = Math.min(this.cache.length / this.recordsize, this.filesize() - index);
assert this.cachecount >= 0;
// check if we need to read 0 bytes from the file
this.cacheindex = index;
if (this.cachecount == 0) return;
// copy records from file to cache
raf.seek((long) this.recordsize * (long) index);
raf.read(this.cache, 0, this.recordsize * this.cachecount);
}
private void flushBuffer() {
// write buffer to end of file
try {
raf.seek(raf.length());
raf.write(this.buffer, 0, this.recordsize * this.buffercount);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
this.buffercount = 0;
}
public synchronized void close() {
flushBuffer();
// then close the file
try {
raf.close();
} catch (IOException e) {
e.printStackTrace();
}
raf = null;
buffer = null;
cache = null;
}
public synchronized void get(int index, byte[] b, int start) throws IOException {
assert b.length - start >= this.recordsize;
if (index >= size()) throw new IndexOutOfBoundsException("kelondroEcoFS.get(" + index + ") outside bounds (" + this.size() + ")");
// check if index is inside of cache
int p = inCache(index);
int q = (p >= 0) ? -1 : inBuffer(index);
if ((p < 0) && (q < 0)) {
// the index is outside of cache and buffer index. shift cache window
fillCache(index);
p = inCache(index);
assert p >= 0;
}
if (p >= 0) {
// read entry from the cache
System.arraycopy(this.cache, p * this.recordsize, b, start, this.recordsize);
return;
}
if (q >= 0) {
// read entry from the buffer
System.arraycopy(this.buffer, q * this.recordsize, b, start, this.recordsize);
return;
}
assert false;
}
public synchronized void put(int index, byte[] b, int start) throws IOException {
assert b.length - start >= this.recordsize;
if (index > size()) throw new IndexOutOfBoundsException("kelondroEcoFS.put(" + index + ") outside bounds (" + this.size() + ")");
// check if this is an empty entry
if (isClean(b , start, this.recordsize)) {
clean(index);
return;
}
// check if index is inside of cache
int p = inCache(index);
int q = (p >= 0) ? -1 : inBuffer(index);
if (p >= 0) {
// write entry to the cache and to the file
System.arraycopy(b, start, this.cache, p * this.recordsize, this.recordsize);
raf.seek((long) index * (long) this.recordsize);
raf.write(b, start, this.recordsize);
return;
}
if (q >= 0) {
// write entry to the buffer
System.arraycopy(b, start, this.buffer, q * this.recordsize, this.recordsize);
return;
}
if (index == size()) {
// append the record to the end of the file;
// look if there is space in the buffer
int bufferpos = index - filesize();
if (bufferpos >= this.buffer.length / this.recordsize) {
assert this.buffercount == this.buffer.length / this.recordsize;
// the record does not fit in current buffer
// write buffer
flushBuffer();
// write new entry to buffer
System.arraycopy(b, start, this.buffer, 0, this.recordsize);
this.buffercount = 1;
} else {
System.arraycopy(b, start, this.buffer, bufferpos * this.recordsize, this.recordsize);
this.buffercount++;
}
assert this.buffercount <= this.buffer.length / this.recordsize;
} else {
// write the record directly to the file,
// do not care about the cache; this case was checked before
raf.seek((long) index * (long) this.recordsize);
raf.write(b, start, this.recordsize);
}
}
public synchronized void add(byte[] b, int start) throws IOException {
put(size(), b, start);
}
private boolean isClean(byte[] b, int offset, int length) {
for (int i = 0; i < length; i++) {
if (b[i + offset] != 0) return false;
}
return true;
}
private boolean isClean(int index) throws IOException {
assert index < size();
// check if index is inside of cache
int p = inCache(index);
int q = (p >= 0) ? -1 : inBuffer(index);
if ((p < 0) && (q < 0)) {
// the index is outside of cache and buffer index. shift cache window
fillCache(index);
p = inCache(index);
assert p >= 0;
}
if (p >= 0) {
// check entry from the cache
return isClean(this.cache, p * this.recordsize, this.recordsize);
}
if (q >= 0) {
// check entry from the buffer
return isClean(this.buffer, q * this.recordsize, this.recordsize);
}
assert false;
return false;
}
public synchronized void clean(int index, byte[] b, int start) throws IOException {
// removes an entry by cleaning (writing zero bytes to the file)
// the entry that had been at the specific place before is copied to the given array b
// if the last entry in the file was cleaned, the file shrinks by the given record
// this is like
// get(index, b, start);
// put(index, zero, 0);
// plus an additional check if the file should shrink
assert b.length - start >= this.recordsize;
if (index >= size()) throw new IndexOutOfBoundsException("kelondroEcoFS.clean(" + index + ") outside bounds (" + this.size() + ")");
if (index == size() - 1) {
cleanLast(b, start);
return;
}
// check if index is inside of cache
int p = inCache(index);
int q = (p >= 0) ? -1 : inBuffer(index);
if ((p < 0) && (q < 0)) {
// the index is outside of cache and buffer index. shift cache window
fillCache(index);
p = inCache(index);
assert p >= 0;
}
if (p >= 0) {
// read entry from the cache
System.arraycopy(this.cache, p * this.recordsize, b, start, this.recordsize);
// write zero bytes to the cache and to the file
System.arraycopy(zero, 0, this.cache, p * this.recordsize, this.recordsize);
this.raf.seek((long) index * (long) this.recordsize);
this.raf.write(zero, 0, this.recordsize);
return;
}
if (q >= 0) {
// read entry from the buffer
System.arraycopy(this.buffer, q * this.recordsize, b, start, this.recordsize);
// write zero to the buffer
System.arraycopy(zero, 0, this.buffer, q * this.recordsize, this.recordsize);
return;
}
assert false;
}
public synchronized void clean(int index) throws IOException {
if (index >= size()) throw new IndexOutOfBoundsException("kelondroEcoFS.clean(" + index + ") outside bounds (" + this.size() + ")");
if (index == size() - 1) {
cleanLast();
return;
}
// check if index is inside of cache
int p = inCache(index);
int q = (p >= 0) ? -1 : inBuffer(index);
if (p >= 0) {
// write zero bytes to the cache and to the file
System.arraycopy(zero, 0, this.cache, p * this.recordsize, this.recordsize);
raf.seek((long) index * (long) this.recordsize);
raf.write(zero, 0, this.recordsize);
return;
}
if (q >= 0) {
// write zero to the buffer
System.arraycopy(zero, 0, this.buffer, q * this.recordsize, this.recordsize);
return;
}
raf.seek((long) index * (long) this.recordsize);
raf.write(zero, 0, this.recordsize);
}
public synchronized void cleanLast(byte[] b, int start) throws IOException {
cleanLast0(b, start);
int i;
while (((i = size()) > 0) && (isClean(i - 1))) {
//System.out.println("Extra clean/1: before size = " + size());
cleanLast0();
//System.out.println(" after size = " + size());
}
}
private synchronized void cleanLast0(byte[] b, int start) throws IOException {
// this is like
// clean(this.size() - 1, b, start);
assert b.length - start >= this.recordsize;
// check if index is inside of cache
int p = inCache(this.size() - 1);
int q = (p >= 0) ? -1 : inBuffer(this.size() - 1);
if ((p < 0) && (q < 0)) {
// the index is outside of cache and buffer index. shift cache window
fillCache(this.size() - 1);
p = inCache(this.size() - 1);
assert p >= 0;
}
if (p >= 0) {
// read entry from the cache
System.arraycopy(this.cache, p * this.recordsize, b, start, this.recordsize);
// shrink cache and file
assert this.buffercount == 0;
this.raf.setLength((long) (this.size() - 1) * (long) this.recordsize);
this.cachecount--;
return;
}
if (q >= 0) {
// read entry from the buffer
System.arraycopy(this.buffer, q * this.recordsize, b, start, this.recordsize);
// shrink buffer
assert this.buffercount > 0;
this.buffercount--;
return;
}
assert false;
}
public synchronized void cleanLast() throws IOException {
cleanLast0();
int i;
while (((i = size()) > 0) && (isClean(i - 1))) {
//System.out.println("Extra clean/0: before size = " + size());
cleanLast0();
//System.out.println(" after size = " + size());
}
}
private synchronized void cleanLast0() throws IOException {
// check if index is inside of cache
int p = inCache(this.size() - 1);
int q = (p >= 0) ? -1 : inBuffer(this.size() - 1);
if (p >= 0) {
// shrink cache and file
assert this.buffercount == 0;
this.raf.setLength((long) (this.size() - 1) * (long) this.recordsize);
this.cachecount--;
return;
}
if (q >= 0) {
// shrink buffer
assert this.buffercount > 0;
this.buffercount--;
return;
}
// check if file should shrink
assert this.buffercount == 0;
this.raf.setLength((long) (this.size() - 1) * (long) this.recordsize);
}
public static void main(String[] args) {
// open a file, add one entry and exit
File f = new File(args[0]);
if (f.exists()) f.delete();
try {
kelondroEcoFS t = new kelondroEcoFS(f, 8);
byte[] b = new byte[8];
t.add("01234567".getBytes(), 0);
t.add("ABCDEFGH".getBytes(), 0);
t.add("abcdefgh".getBytes(), 0);
t.add("--------".getBytes(), 0);
t.add("********".getBytes(), 0);
for (int i = 0; i < 1000; i++) t.add("++++++++".getBytes(), 0);
t.add("=======0".getBytes(), 0);
t.add("=======1".getBytes(), 0);
t.add("=======2".getBytes(), 0);
t.cleanLast(b, 0);
System.out.println(new String(b));
t.clean(2, b, 0);
System.out.println(new String(b));
t.get(1, b, 0);
System.out.println(new String(b));
t.put(1, "AbCdEfGh".getBytes(), 0);
t.get(1, b, 0);
System.out.println(new String(b));
t.get(3, b, 0);
System.out.println(new String(b));
t.get(4, b, 0);
System.out.println(new String(b));
System.out.println("size = " + t.size());
t.clean(t.size() - 2);
t.cleanLast();
System.out.println("size = " + t.size());
t.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}

@ -0,0 +1,463 @@
// kelondroEcoIndex.java
// (C) 2008 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 14.01.2008 on http://yacy.net
//
// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
// $LastChangedRevision: 1986 $
// $LastChangedBy: orbiter $
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package de.anomic.kelondro;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.TreeMap;
import de.anomic.kelondro.kelondroRow.Entry;
import de.anomic.server.serverMemory;
/*
* The EcoIndex builts upon the EcoFS and tries to reduce the number of IO requests that the
* EcoFS must do to a minimum. In best cases, no IO has to be done for read operations (complete database shadow in RAM)
* and a rare number of write IO operations must be done for a large number of table-writings (using the write buffer of EcoFS)
* To make the EcoIndex scalable in question of available RAM, there are two elements that must be scalable:
* - the access index can be either completely in RAM (kelondroRAMIndex) or it is file-based (kelondroTree)
* - the content cache can be either a complete RAM-based shadow of the File, or empty.
* The content cache can also be deleted during run-time, if the available RAM gets too low.
*
*/
public class kelondroEcoTable implements kelondroIndex {
// static tracker objects
private static TreeMap<String, kelondroEcoTable> tableTracker = new TreeMap<String, kelondroEcoTable>();
private kelondroRowSet table;
private kelondroBytesIntMap index;
private kelondroBufferedEcoFS file;
private kelondroRow rowdef, taildef;
private int buffersize;
public kelondroEcoTable(File tablefile, kelondroRow rowdef, int buffersize) throws IOException {
this.rowdef = rowdef;
this.buffersize = buffersize;
assert rowdef.primaryKeyIndex == 0;
// define the taildef, a row like the rowdef but without the first column
kelondroColumn[] cols = new kelondroColumn[rowdef.columns() - 1];
for (int i = 0; i < cols.length; i++) {
cols[i] = rowdef.column(i + 1);
}
this.taildef = new kelondroRow(cols, kelondroNaturalOrder.naturalOrder, rowdef.primaryKeyIndex);
// initialize table file
if (!tablefile.exists()) {
// make new file
FileOutputStream fos = null;
try {
fos = new FileOutputStream(tablefile);
} catch (FileNotFoundException e) {
// should not happen
e.printStackTrace();
}
try { fos.close(); } catch (IOException e) {}
}
// open an existing table file
try {
this.file = new kelondroBufferedEcoFS(new kelondroEcoFS(tablefile, rowdef.objectsize), this.buffersize);
} catch (FileNotFoundException e) {
// should never happen
e.printStackTrace();
}
// initialize index and copy table
int records = file.size();
long neededRAM4table = records * taildef.objectsize * 3 / 2;
table = (serverMemory.request(neededRAM4table, true)) ? new kelondroRowSet(taildef, records + 1) : null;
index = new kelondroBytesIntMap(rowdef.primaryKeyLength, rowdef.objectOrder, records + 1);
// read all elements from the file into the copy table
byte[] record = new byte[rowdef.objectsize];
byte[] key = new byte[rowdef.primaryKeyLength];
for (int i = 0; i < records; i++) {
// read entry
file.get(i, record, 0);
// write the key into the index table
System.arraycopy(record, 0, key, 0, rowdef.primaryKeyLength);
index.addi(key, i);
// write the tail into the table
if (table != null) table.addUnique(taildef.newEntry(record, rowdef.primaryKeyLength, true));
}
// track this table
tableTracker.put(tablefile.toString(), this);
}
public static long tableSize(File tablefile, int recordsize) {
// returns number of records in table
return kelondroEcoFS.tableSize(tablefile, recordsize);
}
public synchronized void addUnique(Entry row) throws IOException {
assert (file.size() == index.size());
assert ((table == null) || (table.size() == index.size()));
int i = file.size();
index.addi(row.getPrimaryKeyBytes(), i);
if (table != null) {
assert table.size() == i;
table.addUnique(taildef.newEntry(row.bytes(), rowdef.primaryKeyLength, true));
}
file.put(i, row.bytes(), 0);
}
public synchronized void addUniqueMultiple(List<Entry> rows) throws IOException {
Iterator<Entry> i = rows.iterator();
while (i.hasNext()) {
addUnique(i.next());
}
}
public void close() {
file.close();
file = null;
}
public void finalize() {
if (this.file != null) this.close();
}
public String filename() {
return this.file.filename().toString();
}
public synchronized Entry get(byte[] key) throws IOException {
assert (file.size() == index.size());
assert ((table == null) || (table.size() == index.size()));
int i = index.geti(key);
if (i == -1) return null;
byte[] b = new byte[rowdef.objectsize];
if (table == null) {
// read row from the file
file.get(i, b, 0);
} else {
// construct the row using the copy in RAM
kelondroRow.Entry v = table.get(i);
assert v != null;
assert key.length == rowdef.primaryKeyLength;
System.arraycopy(key, 0, b, 0, key.length);
System.arraycopy(v.bytes(), 0, b, rowdef.primaryKeyLength, rowdef.objectsize - rowdef.primaryKeyLength);
}
assert (file.size() == index.size());
assert ((table == null) || (table.size() == index.size()));
return rowdef.newEntry(b);
}
public synchronized boolean has(byte[] key) throws IOException {
assert (file.size() == index.size());
assert ((table == null) || (table.size() == index.size()));
return index.geti(key) >= 0;
}
public synchronized kelondroCloneableIterator<byte[]> keys(boolean up, byte[] firstKey) throws IOException {
return index.keys(up, firstKey);
}
public kelondroProfile profile() {
return null;
}
public synchronized Entry put(Entry row) throws IOException {
assert (file.size() == index.size());
assert ((table == null) || (table.size() == index.size()));
int i = index.geti(row.getPrimaryKeyBytes());
if (i == -1) {
addUnique(row);
return null;
}
byte[] b = new byte[rowdef.objectsize];
if (table == null) {
// read old value
file.get(i, b, 0);
// write new value
file.put(i, row.bytes(), 0);
} else {
// read old value
kelondroRow.Entry v = table.get(i);
System.arraycopy(row.getPrimaryKeyBytes(), 0, b, 0, rowdef.primaryKeyLength);
System.arraycopy(v.bytes(), 0, b, rowdef.primaryKeyLength, rowdef.objectsize - rowdef.primaryKeyLength);
// write new value
table.set(i, taildef.newEntry(row.bytes(), rowdef.primaryKeyLength, true));
file.put(i, row.bytes(), 0);
}
assert (file.size() == index.size());
assert ((table == null) || (table.size() == index.size()));
// return old value
return rowdef.newEntry(b);
}
public synchronized Entry put(Entry row, Date entryDate) throws IOException {
return put(row);
}
public synchronized void putMultiple(List<Entry> rows) throws IOException {
Iterator<Entry> i = rows.iterator();
while (i.hasNext()) {
put(i.next());
}
}
public synchronized Entry remove(byte[] key, boolean keepOrder) throws IOException {
assert (file.size() == index.size());
assert ((table == null) || (table.size() == index.size()));
assert keepOrder == false; // this class cannot keep the order during a remove
int i = index.geti(key);
if (i == -1) return null; // nothing to do
// prepare result
byte[] b = new byte[rowdef.objectsize];
byte[] p = new byte[rowdef.objectsize];
if (table == null) {
index.removei(key);
file.get(i, b, 0);
file.cleanLast(p, 0);
file.put(i, p, 0);
byte[] k = new byte[rowdef.primaryKeyLength];
System.arraycopy(p, 0, k, 0, rowdef.primaryKeyLength);
index.puti(k, i);
assert (file.size() == index.size());
assert ((table == null) || (table.size() == index.size()));
} else {
kelondroRow.Entry v = table.get(i);
assert key.length == rowdef.primaryKeyLength;
System.arraycopy(key, 0, b, 0, key.length);
System.arraycopy(v.bytes(), 0, b, rowdef.primaryKeyLength, taildef.objectsize);
if (i == index.size() - 1) {
// special handling if the entry is the last entry in the file
index.removei(key);
table.removeRow(i, false);
file.clean(i);
assert (file.size() == index.size());
assert ((table == null) || (table.size() == index.size()));
} else {
// switch values
kelondroRow.Entry te = table.removeOne();
table.set(i, te);
file.cleanLast(p, 0);
file.put(i, p, 0);
kelondroRow.Entry lr = rowdef.newEntry(p);
index.removei(key);
index.puti(lr.getPrimaryKeyBytes(), i);
assert (file.size() == index.size());
assert ((table == null) || (table.size() == index.size())) : "table.size() = " + table.size() + ", index.size() = " + index.size();
}
}
assert (file.size() == index.size());
assert ((table == null) || (table.size() == index.size()));
return rowdef.newEntry(b);
}
public synchronized Entry removeOne() throws IOException {
assert (file.size() == index.size());
assert ((table == null) || (table.size() == index.size()));
byte[] le = new byte[rowdef.objectsize];
file.cleanLast(le, 0);
kelondroRow.Entry lr = rowdef.newEntry(le);
int i = index.removei(lr.getPrimaryKeyBytes());
assert i >= 0;
table.removeRow(i, false);
return lr;
}
public void reset() throws IOException {
File f = file.filename();
file.close();
f.delete();
// make new file
FileOutputStream fos = null;
try {
fos = new FileOutputStream(f);
} catch (FileNotFoundException e) {
// should not happen
e.printStackTrace();
}
try { fos.close(); } catch (IOException e) {}
// open an existing table file
try {
this.file = new kelondroBufferedEcoFS(new kelondroEcoFS(f, rowdef.objectsize), this.buffersize);
} catch (FileNotFoundException e) {
// should never happen
e.printStackTrace();
}
// initialize index and copy table
table = new kelondroRowSet(taildef, 1);
index = new kelondroBytesIntMap(rowdef.primaryKeyLength, rowdef.objectOrder, 1);
}
public kelondroRow row() {
return this.rowdef;
}
public synchronized int size() {
return index.size();
}
public synchronized kelondroCloneableIterator<Entry> rows(boolean up, byte[] firstKey) throws IOException {
return new rowIterator(up, firstKey);
}
public class rowIterator implements kelondroCloneableIterator<Entry> {
Iterator<byte[]> i;
boolean up;
byte[] fk;
int c;
public rowIterator(boolean up, byte[] firstKey) throws IOException {
this.up = up;
this.fk = firstKey;
this.i = index.keys(up, firstKey);
this.c = -1;
}
public kelondroCloneableIterator<Entry> clone(Object modifier) {
try {
return new rowIterator(up, fk);
} catch (IOException e) {
e.printStackTrace();
return null;
}
}
public boolean hasNext() {
return i.hasNext();
}
public Entry next() {
byte[] k = i.next();
try {
this.c = index.geti(k);
} catch (IOException e) {
e.printStackTrace();
return null;
}
byte[] b = new byte[rowdef.objectsize];
if (table == null) {
// read from file
try {
file.get(this.c, b, 0);
} catch (IOException e) {
e.printStackTrace();
return null;
}
} else {
// compose from table and key
kelondroRow.Entry v = table.get(this.c);
System.arraycopy(k, 0, b, 0, rowdef.primaryKeyLength);
System.arraycopy(v.bytes(), 0, b, rowdef.primaryKeyLength, taildef.objectsize);
}
return rowdef.newEntry(b);
}
public void remove() {
throw new UnsupportedOperationException("no remove in EcoTable");
}
}
public static kelondroIndex testTable(File f, String testentities) throws IOException {
if (f.exists()) f.delete();
kelondroRow rowdef = new kelondroRow("byte[] a-4, byte[] b-4", kelondroNaturalOrder.naturalOrder, 0);
kelondroIndex tt = new kelondroEcoTable(f, rowdef, 100);
byte[] b;
kelondroRow.Entry row = rowdef.newEntry();
for (int i = 0; i < testentities.length(); i++) {
b = kelondroTree.testWord(testentities.charAt(i));
row.setCol(0, b);
row.setCol(1, b);
tt.put(row);
}
return tt;
}
public static void bigtest(int elements, File testFile) {
System.out.println("starting big test with " + elements + " elements:");
long start = System.currentTimeMillis();
String[] s = kelondroTree.permutations(elements);
kelondroIndex tt;
try {
for (int i = 0; i < s.length; i++) {
System.out.println("*** probing tree " + i + " for permutation " + s[i]);
// generate tree and delete elements
tt = testTable(testFile, s[i]);
if (kelondroTree.countElements(tt) != tt.size()) {
System.out.println("wrong size for " + s[i]);
}
tt.close();
for (int j = 0; j < s.length; j++) {
tt = testTable(testFile, s[i]);
// delete by permutation j
for (int elt = 0; elt < s[j].length(); elt++) {
tt.remove(kelondroTree.testWord(s[j].charAt(elt)), false);
if (kelondroTree.countElements(tt) != tt.size()) {
System.out.println("ERROR! wrong size for probe tree " + s[i] + "; probe delete " + s[j] + "; position " + elt);
}
}
tt.close();
}
}
System.out.println("FINISHED test after " + ((System.currentTimeMillis() - start) / 1000) + " seconds.");
} catch (Exception e) {
e.printStackTrace();
System.out.println("TERMINATED");
}
}
public static void main(String[] args) {
// open a file, add one entry and exit
File f = new File(args[0]);
bigtest(5, f);
/*
kelondroRow row = new kelondroRow("byte[] key-4, byte[] x-5", kelondroNaturalOrder.naturalOrder, 0);
try {
kelondroEcoTable t = new kelondroEcoTable(f, row);
kelondroRow.Entry entry = row.newEntry();
entry.setCol(0, "abcd".getBytes());
entry.setCol(1, "dummy".getBytes());
t.put(entry);
t.close();
} catch (IOException e) {
e.printStackTrace();
}
*/
}
}

@ -101,7 +101,7 @@ public class kelondroFlexSplitTable implements kelondroIndex {
// open next biggest table
t.remove(maxf);
date = maxf.substring(tablename.length() + 1);
table = new kelondroCache(new kelondroFlexTable(path, maxf, preloadTime, rowdef, 0, resetOnFail), true, false);
table = new kelondroCache(new kelondroFlexTable(path, maxf, preloadTime, rowdef, 0, resetOnFail));
tables.put(date, table);
}
}
@ -164,16 +164,6 @@ public class kelondroFlexSplitTable implements kelondroIndex {
return s;
}
public void flushSome() {
Iterator<kelondroIndex> i = tables.values().iterator();
kelondroIndex ki;
while (i.hasNext()) {
ki = ((kelondroIndex) i.next());
if (ki instanceof kelondroCache)
try {((kelondroCache) ki).flushSome();} catch (IOException e) {}
}
}
public kelondroRow row() {
return this.rowdef;
}

@ -93,7 +93,7 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr
if (indexfile.exists()) {
// use existing index file
System.out.println("*** Using File index " + indexfile);
ki = new kelondroCache(kelondroTree.open(indexfile, true, preloadTime, treeIndexRow(rowdef.width(0), rowdef.objectOrder), 2, 80), true, false);
ki = new kelondroCache(kelondroTree.open(indexfile, true, preloadTime, treeIndexRow(rowdef.width(0), rowdef.objectOrder), 2, 80));
RAMIndex = false;
} else {
// generate new index file
@ -175,7 +175,7 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr
private kelondroIndex initializeTreeIndex(File indexfile, long preloadTime, kelondroByteOrder objectOrder) throws IOException {
kelondroIndex treeindex = new kelondroCache(new kelondroTree(indexfile, true, preloadTime, treeIndexRow(rowdef.primaryKeyLength, objectOrder), 2, 80), true, false);
kelondroIndex treeindex = new kelondroCache(new kelondroTree(indexfile, true, preloadTime, treeIndexRow(rowdef.primaryKeyLength, objectOrder), 2, 80));
Iterator<kelondroNode> content = super.col[0].contentNodes(-1);
kelondroNode node;
kelondroRow.Entry indexentry;

@ -93,7 +93,7 @@ public class kelondroMapTable {
if (mTables.containsKey(tablename)) throw new RuntimeException("kelondroTables.declareTree: table '" + tablename + "' declared already in other context.");
if (tTables.containsKey(tablename)) throw new RuntimeException("kelondroTables.declareTree: table '" + tablename + "' declared twice.");
File tablefile = new File(tablesPath, "table." + tablename + ".tdb");
kelondroIndex Tree = new kelondroCache(kelondroTree.open(tablefile, true, preloadTime, rowdef), true, false);
kelondroIndex Tree = new kelondroCache(kelondroTree.open(tablefile, true, preloadTime, rowdef));
tTables.put(tablename, Tree);
}

@ -169,6 +169,9 @@ public class kelondroRAMIndex implements kelondroIndex {
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
return index1.keys(up, firstKey);
}
// index0 should be sorted
// sort index1 to enable working of the merge iterator
index1.sort();
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
return new kelondroMergeIterator<byte[]>(
index0.keys(up, firstKey),
@ -192,6 +195,9 @@ public class kelondroRAMIndex implements kelondroIndex {
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
return index1.rows(up, firstKey);
}
// index0 should be sorted
// sort index1 to enable working of the merge iterator
index1.sort();
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
return new kelondroMergeIterator<kelondroRow.Entry>(
index0.rows(up, firstKey),

@ -165,7 +165,7 @@ public final class kelondroRow {
public final Entry newEntry(byte[] rowinstance, int start, boolean clone) {
if (rowinstance == null) return null;
//assert (rowinstance[0] != 0);
assert (this.objectOrder.wellformed(rowinstance, start, row[0].cellwidth));
assert (this.objectOrder.wellformed(rowinstance, start, row[0].cellwidth)) : "rowinstance = " + new String(rowinstance);
// this method offers the option to clone the content
// this is necessary if it is known that the underlying byte array may change and therefore
// the reference to the byte array does not contain the original content

@ -355,6 +355,7 @@ public class kelondroRowCollection {
}
public synchronized kelondroRow.Entry removeOne() {
// removes the last entry from the collection
if (chunkcount == 0) return null;
kelondroRow.Entry r = get(chunkcount - 1);
if (chunkcount == sortBound) sortBound--;

@ -1637,7 +1637,7 @@ public class kelondroTree extends kelondroCachedRecords implements kelondroIndex
}
}
public static int countElements(kelondroTree t) {
public static int countElements(kelondroIndex t) {
int count = 0;
try {
Iterator<kelondroRow.Entry> iter = t.rows(true, null);

@ -140,7 +140,7 @@ public class plasmaCrawlBalancer {
private void openFileIndex() {
cacheStacksPath.mkdirs();
urlFileIndex = new kelondroCache(new kelondroFlexTable(cacheStacksPath, stackname + indexSuffix, -1, plasmaCrawlEntry.rowdef, 0, true), true, false);
urlFileIndex = new kelondroCache(new kelondroFlexTable(cacheStacksPath, stackname + indexSuffix, -1, plasmaCrawlEntry.rowdef, 0, true));
}
private void resetFileIndex() {

@ -146,13 +146,6 @@ public final class plasmaCrawlLURL {
gcrawlResultStack.add(urlHash + initiatorHash + executorHash);
}
public synchronized void flushCacheSome() {
try {
if (urlIndexFile instanceof kelondroFlexSplitTable) ((kelondroFlexSplitTable) urlIndexFile).flushSome();
if (urlIndexFile instanceof kelondroCache) ((kelondroCache) urlIndexFile).flushSome();
} catch (IOException e) {}
}
public synchronized int writeCacheSize() {
if (urlIndexFile instanceof kelondroFlexSplitTable) return ((kelondroFlexSplitTable) urlIndexFile).writeBufferSize();
if (urlIndexFile instanceof kelondroCache) return ((kelondroCache) urlIndexFile).writeBufferSize();

@ -80,7 +80,7 @@ public final class plasmaCrawlStacker extends Thread {
final serverLog log = new serverLog("STACKCRAWL");
private plasmaSwitchboard sb;
private final LinkedList urlEntryHashCache;
private final LinkedList<String> urlEntryHashCache;
private kelondroIndex urlEntryCache;
private File cacheStacksPath;
private long preloadTime;
@ -91,7 +91,7 @@ public final class plasmaCrawlStacker extends Thread {
// objects for the prefetch task
private ArrayList dnsfetchHosts = new ArrayList();
private ArrayList<String> dnsfetchHosts = new ArrayList<String>();
public plasmaCrawlStacker(plasmaSwitchboard sb, File dbPath, long preloadTime, int dbtype, boolean prequeue) {
this.sb = sb;
@ -101,7 +101,7 @@ public final class plasmaCrawlStacker extends Thread {
this.alternateCount = 0;
// init the message list
this.urlEntryHashCache = new LinkedList();
this.urlEntryHashCache = new LinkedList<String>();
// create a stack for newly entered entries
this.cacheStacksPath = dbPath;
@ -111,7 +111,7 @@ public final class plasmaCrawlStacker extends Thread {
openDB();
try {
// loop through the list and fill the messageList with url hashs
Iterator rows = this.urlEntryCache.rows(true, null);
Iterator<kelondroRow.Entry> rows = this.urlEntryCache.rows(true, null);
kelondroRow.Entry entry;
while (rows.hasNext()) {
entry = (kelondroRow.Entry) rows.next();
@ -299,13 +299,13 @@ public final class plasmaCrawlStacker extends Thread {
String newCacheName = "urlNoticeStacker8.db";
cacheStacksPath.mkdirs();
try {
this.urlEntryCache = new kelondroCache(new kelondroFlexTable(cacheStacksPath, newCacheName, preloadTime, plasmaCrawlEntry.rowdef, 0, true), true, false);
this.urlEntryCache = new kelondroCache(new kelondroFlexTable(cacheStacksPath, newCacheName, preloadTime, plasmaCrawlEntry.rowdef, 0, true));
} catch (Exception e) {
e.printStackTrace();
// kill DB and try again
kelondroFlexTable.delete(cacheStacksPath, newCacheName);
try {
this.urlEntryCache = new kelondroCache(new kelondroFlexTable(cacheStacksPath, newCacheName, preloadTime, plasmaCrawlEntry.rowdef, 0, true), true, false);
this.urlEntryCache = new kelondroCache(new kelondroFlexTable(cacheStacksPath, newCacheName, preloadTime, plasmaCrawlEntry.rowdef, 0, true));
} catch (Exception ee) {
ee.printStackTrace();
System.exit(-1);
@ -315,7 +315,7 @@ public final class plasmaCrawlStacker extends Thread {
if (this.dbtype == QUEUE_DB_TYPE_TREE) {
File cacheFile = new File(cacheStacksPath, "urlNoticeStacker8.db");
cacheFile.getParentFile().mkdirs();
this.urlEntryCache = new kelondroCache(kelondroTree.open(cacheFile, true, preloadTime, plasmaCrawlEntry.rowdef), true, true);
this.urlEntryCache = new kelondroCache(kelondroTree.open(cacheFile, true, preloadTime, plasmaCrawlEntry.rowdef));
}
}

@ -1802,7 +1802,6 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
// flush some entries from the RAM cache
if (sbQueue.size() == 0) wordIndex.flushCacheSome(); // permanent flushing only if we are not busy
wordIndex.loadedURL.flushCacheSome();
boolean doneSomething = false;

@ -67,13 +67,13 @@ public class yacyNewsDB {
public yacyNewsDB(File path, long preloadTime) {
this.path = path;
this.preloadTime = preloadTime;
this.news = new kelondroCache(kelondroTree.open(path, true, preloadTime, yacyNewsRecord.rowdef), true, false);
this.news = new kelondroCache(kelondroTree.open(path, true, preloadTime, yacyNewsRecord.rowdef));
}
private void resetDB() {
try {close();} catch (Exception e) {}
if (path.exists()) path.delete();
this.news = new kelondroCache(kelondroTree.open(path, true, preloadTime, yacyNewsRecord.rowdef), true, false);
this.news = new kelondroCache(kelondroTree.open(path, true, preloadTime, yacyNewsRecord.rowdef));
}
public void close() {
@ -102,14 +102,14 @@ public class yacyNewsDB {
}
}
public synchronized Iterator news() throws IOException {
public synchronized Iterator<yacyNewsRecord> news() throws IOException {
// the iteration iterates yacyNewsRecord - type objects
return new recordIterator();
}
public class recordIterator implements Iterator {
public class recordIterator implements Iterator<yacyNewsRecord> {
Iterator rowIterator;
Iterator<kelondroRow.Entry> rowIterator;
public recordIterator() throws IOException {
rowIterator = news.rows(true, null);
@ -119,7 +119,7 @@ public class yacyNewsDB {
return rowIterator.hasNext();
}
public Object next() {
public yacyNewsRecord next() {
return b2r((kelondroRow.Entry) rowIterator.next());
}

Loading…
Cancel
Save