better memory management and slightly less (in total and temporary) RAM allocation:

- confirm that database objects that are not supposed to grow do not have a index memory management that is designed for growth
- changed index sorting method in such a way that it allocates less objects during quicksort
- database classes classes renaming (shorter, naming addresses that objects hold in RAM)
- added a large number of asserts to check if objects actually take the RAM that they should have


git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7019 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 15 years ago
parent 5924a0d851
commit 6388a58fc7

@ -30,7 +30,7 @@ import java.util.Iterator;
import java.util.Map;
import net.yacy.kelondro.index.Cache;
import net.yacy.kelondro.index.ObjectIndexCache;
import net.yacy.kelondro.index.RAMIndex;
import net.yacy.kelondro.table.Table;
import net.yacy.kelondro.util.Domains;
import net.yacy.kelondro.util.FileUtils;
@ -118,11 +118,11 @@ public class PerformanceMemory_p {
prop.putNum("EcoIndexTotalMem", totalmem / (1024 * 1024d));
// write object cache table
Iterator<Map.Entry<String, ObjectIndexCache>> oi = ObjectIndexCache.objects();
Iterator<Map.Entry<String, RAMIndex>> oi = RAMIndex.objects();
c = 0;
mem = 0;
Map.Entry<String, ObjectIndexCache> oie;
ObjectIndexCache cache;
Map.Entry<String, RAMIndex> oie;
RAMIndex cache;
long hitmem, totalhitmem = 0;
while (oi.hasNext()) {
oie = oi.next();

@ -35,7 +35,7 @@ import java.util.concurrent.ConcurrentLinkedQueue;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.index.ObjectIndex;
import net.yacy.kelondro.index.Index;
import net.yacy.kelondro.index.Row;
import net.yacy.kelondro.index.RowSet;
import net.yacy.kelondro.index.RowSpaceExceededException;
@ -63,7 +63,7 @@ public class ZURL implements Iterable<ZURL.Entry> {
);
// the class object
protected ObjectIndex urlIndex;
protected Index urlIndex;
protected final ConcurrentLinkedQueue<byte[]> stack;
public ZURL(

@ -84,7 +84,7 @@ public final class Cache {
Log.logException(e);
}
try {
fileDBunbuffered = new ArrayStack(new File(cachePath, FILE_DB_NAME), prefix, 12, Base64Order.enhancedCoder, 1024 * 1024 * 2);
fileDBunbuffered = new ArrayStack(new File(cachePath, FILE_DB_NAME), prefix, 12, Base64Order.enhancedCoder, 1024 * 1024 * 2, false);
fileDBunbuffered.setMaxSize(maxCacheSize);
fileDB = new Compressor(fileDBunbuffered, 2 * 1024 * 1024);
} catch (IOException e) {

@ -46,7 +46,7 @@ import net.yacy.kelondro.data.meta.URIMetadataRow;
import net.yacy.kelondro.data.word.WordReferenceVars;
import net.yacy.kelondro.index.Cache;
import net.yacy.kelondro.index.HandleSet;
import net.yacy.kelondro.index.ObjectIndex;
import net.yacy.kelondro.index.Index;
import net.yacy.kelondro.index.Row;
import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.logging.Log;
@ -61,7 +61,7 @@ import net.yacy.repository.Blacklist;
public final class MetadataRepository implements Iterable<byte[]> {
// class objects
protected ObjectIndex urlIndexFile;
protected Index urlIndexFile;
private Export exportthread; // will have a export thread assigned if exporter is running
private File location;
private ArrayList<hostStat> statsDump;
@ -72,7 +72,7 @@ public final class MetadataRepository implements Iterable<byte[]> {
final boolean useTailCache,
final boolean exceed134217727) {
this.location = path;
ObjectIndex backupIndex = null;
Index backupIndex = null;
try {
backupIndex = new SplitTable(this.location, tablename, URIMetadataRow.rowdef, useTailCache, exceed134217727);
} catch (RowSpaceExceededException e) {

@ -32,7 +32,7 @@ import java.util.Date;
import java.util.Iterator;
import net.yacy.kelondro.data.word.WordReference;
import net.yacy.kelondro.index.ObjectIndex;
import net.yacy.kelondro.index.Index;
import net.yacy.kelondro.index.Row;
import net.yacy.kelondro.index.RowSet;
import net.yacy.kelondro.index.RowSpaceExceededException;
@ -169,7 +169,7 @@ public class CRProcess {
return true;
}
public static boolean accumulate_upd(final File f, final ObjectIndex acc) throws IOException, RowSpaceExceededException {
public static boolean accumulate_upd(final File f, final Index acc) throws IOException, RowSpaceExceededException {
// open file
AttrSeq source_cr = null;
try {
@ -275,7 +275,7 @@ public class CRProcess {
// open target file
AttrSeq acc = null;
ObjectIndex newacc = null;
Index newacc = null;
IndexCell<WordReference> newseq = null;
if (newdb) {
final File path = to_file.getParentFile(); // path to storage place

@ -55,7 +55,7 @@ import java.util.Properties;
import java.util.Map.Entry;
import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.index.ObjectIndex;
import net.yacy.kelondro.index.Index;
import net.yacy.kelondro.index.Row;
import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.logging.Log;
@ -73,7 +73,7 @@ public class yacyNewsDB {
private final File path;
private final Row rowdef;
protected final int attributesMaxLength;
protected ObjectIndex news;
protected Index news;
private static final int categoryStringLength = 8;
public static final int idLength = DateFormatter.PATTERN_SHORT_SECOND.length() + Word.commonHashLength;

@ -11,7 +11,7 @@ import java.util.Random;
import javax.imageio.ImageIO;
import net.yacy.kelondro.index.ObjectIndex;
import net.yacy.kelondro.index.Index;
import net.yacy.kelondro.index.Row;
import net.yacy.kelondro.index.RowSet;
import net.yacy.kelondro.index.RowSpaceExceededException;
@ -89,21 +89,21 @@ public class dbtest {
}
public static abstract class STJob implements Runnable {
private final ObjectIndex table_test, table_reference;
private final Index table_test, table_reference;
private final long source;
public STJob(final ObjectIndex table_test, final ObjectIndex table_reference, final long aSource) {
public STJob(final Index table_test, final Index table_reference, final long aSource) {
this.table_test = table_test;
this.table_reference = table_reference;
this.source = aSource;
}
public ObjectIndex getTable_test() {
public Index getTable_test() {
return this.table_test;
}
public ObjectIndex getTable_reference() {
public Index getTable_reference() {
return this.table_reference;
}
@ -115,7 +115,7 @@ public class dbtest {
}
public static final class WriteJob extends STJob {
public WriteJob(final ObjectIndex table_test, final ObjectIndex table_reference, final long aSource) {
public WriteJob(final Index table_test, final Index table_reference, final long aSource) {
super(table_test, table_reference, aSource);
}
@ -138,7 +138,7 @@ public class dbtest {
}
public static final class RemoveJob extends STJob {
public RemoveJob(final ObjectIndex table_test, final ObjectIndex table_reference, final long aSource) {
public RemoveJob(final Index table_test, final Index table_reference, final long aSource) {
super(table_test, table_reference, aSource);
}
@ -157,7 +157,7 @@ public class dbtest {
}
public static final class ReadJob extends STJob {
public ReadJob(final ObjectIndex table_test, final ObjectIndex table_reference, final long aSource) {
public ReadJob(final Index table_test, final Index table_reference, final long aSource) {
super(table_test, table_reference, aSource);
}
@ -194,7 +194,7 @@ public class dbtest {
}
}
public static ObjectIndex selectTableType(final String dbe, final String tablename, final Row testRow) throws Exception {
public static Index selectTableType(final String dbe, final String tablename, final Row testRow) throws Exception {
if (dbe.equals("kelondroRowSet")) {
return new RowSet(testRow, 0);
}
@ -214,7 +214,7 @@ public class dbtest {
return null;
}
public static boolean checkEquivalence(final ObjectIndex test, final ObjectIndex reference) throws IOException {
public static boolean checkEquivalence(final Index test, final Index reference) throws IOException {
if (reference == null) return true;
if (test.size() == reference.size()) {
System.out.println("* Testing equivalence of test table to reference table, " + test.size() + " entries");
@ -288,8 +288,8 @@ public class dbtest {
// create the database access
final Row testRow = new Row("byte[] key-" + keylength + ", byte[] dummy-" + keylength + ", value-" + valuelength, Base64Order.enhancedCoder);
final ObjectIndex table_test = selectTableType(dbe_test, tablename_test, testRow);
final ObjectIndex table_reference = (dbe_reference == null) ? null : selectTableType(dbe_reference, tablename_reference, testRow);
final Index table_test = selectTableType(dbe_test, tablename_test, testRow);
final Index table_reference = (dbe_reference == null) ? null : selectTableType(dbe_reference, tablename_reference, testRow);
final long afterinit = System.currentTimeMillis();
System.out.println("Test for db-engine " + dbe_test + " started to create file " + tablename_test + " with test " + command);

@ -91,6 +91,7 @@ public class ArrayStack implements BLOB {
protected List<blobItem> blobs;
private final String prefix;
private final int buffersize;
private final boolean trimall;
// the thread pool for the keeperOf executor service
private final ExecutorService executor;
@ -100,7 +101,8 @@ public class ArrayStack implements BLOB {
final String prefix,
final int keylength,
final ByteOrder ordering,
final int buffersize) throws IOException {
final int buffersize,
final boolean trimall) throws IOException {
this.keylength = keylength;
this.prefix = prefix;
this.ordering = ordering;
@ -110,6 +112,7 @@ public class ArrayStack implements BLOB {
this.fileSizeLimit = (long) Integer.MAX_VALUE;
this.repositoryAgeMax = Long.MAX_VALUE;
this.repositorySizeMax = Long.MAX_VALUE;
this.trimall = trimall;
// init the thread pool for the keeperOf executor service
this.executor = new ThreadPoolExecutor(
@ -187,7 +190,12 @@ public class ArrayStack implements BLOB {
d = DateFormatter.parseShortMilliSecond(files[i].substring(prefix.length() + 1, prefix.length() + 18));
f = new File(heapLocation, files[i]);
time = d.getTime();
oneBlob = (time == maxtime) ? new Heap(f, keylength, ordering, buffersize) : new HeapModifier(f, keylength, ordering);
if (time == maxtime && !trimall) {
oneBlob = new Heap(f, keylength, ordering, buffersize);
} else {
oneBlob = new HeapModifier(f, keylength, ordering);
oneBlob.trim(); // no writings here, can be used with minimum memory
}
sortedItems.put(Long.valueOf(time), new blobItem(d, f, oneBlob));
} catch (ParseException e) {continue;}
}
@ -200,6 +208,19 @@ public class ArrayStack implements BLOB {
}
}
public long mem() {
long m = 0;
for (blobItem b: this.blobs) m += b.blob.mem();
return m;
}
public void trim() {
// trim shall not be called for ArrayStacks because the characteristics of an ArrayStack is that the 'topmost' BLOB on the stack
// is used for write operations and all other shall be trimmed automatically since they are not used for writing. And the
// topmost BLOB must not be trimmed to support fast writings.
throw new UnsupportedOperationException();
}
/**
* add a blob file to the array.
* note that this file must be generated with a file name from newBLOB()
@ -213,7 +234,13 @@ public class ArrayStack implements BLOB {
} catch (ParseException e) {
throw new IOException("date parse problem with file " + location.toString() + ": " + e.getMessage());
}
BLOB oneBlob = (full && buffersize > 0) ? new Heap(location, keylength, ordering, buffersize) : new HeapModifier(location, keylength, ordering);
BLOB oneBlob;
if (full && buffersize > 0 && !trimall) {
oneBlob = new Heap(location, keylength, ordering, buffersize);
} else {
oneBlob = new HeapModifier(location, keylength, ordering);
oneBlob.trim();
}
blobs.add(new blobItem(d, location, oneBlob));
}
@ -321,25 +348,6 @@ public class ArrayStack implements BLOB {
return unmount(idx);
}
/*
public synchronized File unmountSimilarSizeBLOB(long otherSize) {
if (this.blobs.isEmpty() || otherSize == 0) return null;
blobItem b;
double delta, bestDelta = Double.MAX_VALUE;
int bestIndex = -1;
for (int i = 0; i < this.blobs.size(); i++) {
b = this.blobs.get(i);
if (b.location.length() == 0) continue;
delta = ((double) b.location.length()) / ((double) otherSize);
if (delta < 1.0) delta = 1.0 / delta;
if (delta < bestDelta) {
bestDelta = delta;
bestIndex = i;
}
}
return unmount(bestIndex);
}
*/
/**
* return the number of BLOB files in this array
* @return
@ -684,7 +692,7 @@ public class ArrayStack implements BLOB {
}
/**
* replace a BLOB entry with another which must be smaller or same size
* replace a BLOB entry with another
* @param key the primary key
* @throws IOException
* @throws RowSpaceExceededException
@ -697,13 +705,29 @@ public class ArrayStack implements BLOB {
return d;
}
/**
* replace a BLOB entry with another which must be smaller or same size
* @param key the primary key
* @throws IOException
* @throws RowSpaceExceededException
*/
public synchronized int reduce(byte[] key, Reducer reduce) throws IOException, RowSpaceExceededException {
int d = 0;
for (blobItem bi: blobs) {
d += bi.blob.reduce(key, reduce);
}
return d;
}
/**
* remove a BLOB
* @param key the primary key
* @throws IOException
*/
public synchronized void remove(byte[] key) throws IOException {
long m = this.mem();
for (blobItem bi: blobs) bi.blob.remove(key);
assert this.mem() <= m : "m = " + m + ", mem() = " + mem();
}
/**
@ -971,7 +995,7 @@ public class ArrayStack implements BLOB {
final File f = new File("/Users/admin/blobarraytest");
try {
//f.delete();
final ArrayStack heap = new ArrayStack(f, "test", 12, NaturalOrder.naturalOrder, 512 * 1024);
final ArrayStack heap = new ArrayStack(f, "test", 12, NaturalOrder.naturalOrder, 512 * 1024, false);
heap.put("aaaaaaaaaaaa".getBytes(), "eins zwei drei".getBytes());
heap.put("aaaaaaaaaaab".getBytes(), "vier fuenf sechs".getBytes());
heap.put("aaaaaaaaaaac".getBytes(), "sieben acht neun".getBytes());

@ -59,6 +59,18 @@ public interface BLOB {
*/
public void clear() throws IOException;
/**
* trim the index of the database: this releases memory not currently used
* @throws IOException
*/
public void trim();
/**
* calculate the memory in RAM that the BLOB occupies
* @return number of bytes that is used
*/
public long mem();
/**
* ask for the number of entries
* @return the number of entries in the table
@ -139,12 +151,22 @@ public interface BLOB {
* It is therefore necessary that it is known that the new entry will be smaller than the
* old entry before calling this method.
* @param key the primary key
* @param b
* @param rewriter
* @return the number of bytes that the rewriter reduced the BLOB
* @throws IOException
* @throws RowSpaceExceededException
*/
public int replace(byte[] key, Rewriter rewriter) throws IOException, RowSpaceExceededException;
/**
* a reduce method is the same as the replace. A replace subsumes a reduce method. A reduce method may be more simple.
* @param key the primary key
* @param reducer
* @return the number of bytes that the rewriter reduced the BLOB
* @throws IOException
* @throws RowSpaceExceededException
*/
public int reduce(byte[] key, Reducer reducer) throws IOException, RowSpaceExceededException;
/**
* remove a BLOB
@ -171,4 +193,14 @@ public interface BLOB {
}
public interface Reducer extends Rewriter {
/**
* A Reducer is a rewriter that reduced the content. There are no additional methods in this interface.
* The interface shall be used in replacement of a Rewriter to simply state the fact that the rewritement
* also reduces the content of a BLOB entry or may also keep the size the same;
*/
}
}

@ -70,6 +70,14 @@ public class Compressor implements BLOB {
initBuffer();
}
public long mem() {
return backend.mem();
}
public void trim() {
this.backend.trim();
}
private static class Entity implements Map.Entry<String, byte[]> {
private String key;
private byte[] payload;
@ -361,5 +369,16 @@ public class Compressor implements BLOB {
this.put(key, c);
return reduction;
}
public int reduce(byte[] key, Reducer reducer) throws IOException, RowSpaceExceededException {
byte[] b = get(key);
if (b == null) return 0;
byte[] c = reducer.rewrite(b);
int reduction = c.length - b.length;
assert reduction >= 0;
if (reduction == 0) return 0;
this.put(key, c);
return reduction;
}
}

@ -239,14 +239,20 @@ public class HeapModifier extends HeapReader implements BLOB {
}
public int replace(byte[] key, final Rewriter rewriter) throws IOException {
key = normalizeKey(key);
assert key.length == this.keylength;
// pre-check before synchronization
throw new UnsupportedOperationException();
}
public int reduce(byte[] key, final Reducer reducer) throws IOException {
key = normalizeKey(key);
assert key.length == this.keylength;
// pre-check before synchronization
long pos = index.get(key);
if (pos < 0) return 0;
synchronized (this) {
long m = this.mem();
// check again if the index contains the key
pos = index.get(key);
if (pos < 0) return 0;
@ -271,7 +277,7 @@ public class HeapModifier extends HeapReader implements BLOB {
file.readFully(blob, 0, blob.length);
// rewrite the entry
blob = rewriter.rewrite(blob);
blob = reducer.rewrite(blob);
int reduction = len - blob.length;
if (reduction == 0) {
// even if the reduction is zero then it is still be possible that the record has been changed
@ -302,8 +308,10 @@ public class HeapModifier extends HeapReader implements BLOB {
// add a new free entry
this.free.put(pos + 4 + blob.length + key.length, newfreereclen);
assert mem() <= m : "m = " + m + ", mem() = " + mem();
return reduction;
}
}
}
}

@ -112,7 +112,15 @@ public class HeapReader {
this.file.close();
// the file will be opened again automatically when the next access to it comes.
}
public long mem() {
return index.mem(); // don't add the memory for free here since then the asserts for memory management don't work
}
public void trim() {
this.index.trim();
}
protected byte[] normalizeKey(byte[] key) {
// check size of key: zero-filled keys are only possible of the ordering is
// an instance of the natural ordering. Base64-orderings cannot use zeros in keys.

@ -3,7 +3,9 @@
* Copyright 2010 by Michael Peter Christen
* First released 18.4.2010 at http://yacy.net
*
* This file is part of YaCy
* $LastChangedDate: 2010-06-16 17:11:21 +0200 (Mi, 16 Jun 2010) $
* $LastChangedRevision: 6922 $
* $LastChangedBy: orbiter $
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
@ -37,14 +39,14 @@ import net.yacy.kelondro.order.MergeIterator;
* @author Michael Peter Christen
*
*/
public class BufferedObjectIndex implements ObjectIndex, Iterable<Row.Entry> {
public class BufferedObjectIndex implements Index, Iterable<Row.Entry> {
private final ObjectIndex backend;
private final Index backend;
private final RowSet buffer;
private final int buffersize;
private final Row.EntryComparator entryComparator;
public BufferedObjectIndex(ObjectIndex backend, int buffersize) {
public BufferedObjectIndex(Index backend, int buffersize) {
this.backend = backend;
this.buffersize = buffersize;
this.buffer = new RowSet(backend.row());

@ -1,29 +1,26 @@
// Cache.java
// (C) 2006 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 26.10.2006 on http://www.anomic.de
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
// $LastChangedRevision: 1986 $
// $LastChangedBy: orbiter $
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
/**
* Cache.java
* Copyright 2006 by Michael Peter Christen
* First released 26.10.2006 at http://yacy.net
*
* $LastChangedDate: 2010-06-16 17:11:21 +0200 (Mi, 16 Jun 2010) $
* $LastChangedRevision: 6922 $
* $LastChangedBy: orbiter $
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.kelondro.index;
@ -41,7 +38,7 @@ import net.yacy.kelondro.order.CloneableIterator;
import net.yacy.kelondro.util.MemoryControl;
public final class Cache implements ObjectIndex, Iterable<Row.Entry> {
public final class Cache implements Index, Iterable<Row.Entry> {
// this is a combined read cache and write buffer
// we maintain four tables:
@ -57,7 +54,7 @@ public final class Cache implements ObjectIndex, Iterable<Row.Entry> {
private static final long memStartShrink = 20 * 1024 * 1024; // a limit for the node cache to start with shrinking if less than this memory amount is available
// class objects
private final ObjectIndex index; // the back-end of the cache
private final Index index; // the back-end of the cache
private RowSet readHitCache; // contains a complete copy of the cached objects
private RowSet readMissCache; // contains only the keys of the objects that had been a miss
private Row keyrow;
@ -72,7 +69,7 @@ public final class Cache implements ObjectIndex, Iterable<Row.Entry> {
* @param hitLimit a limit of cache hit entries. If given as value <= 0, then only the RAM limits the size
* @param missLimit a limit of cache miss entries. If given as value <= 0, then only the RAM limits the size
*/
public Cache(final ObjectIndex backupIndex, final int hitLimit, final int missLimit) {
public Cache(final Index backupIndex, final int hitLimit, final int missLimit) {
this.index = backupIndex;
this.hitLimit = hitLimit;
this.missLimit = missLimit;

@ -1,26 +1,26 @@
// HandleMap.java
// (C) 2008 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 08.04.2008 on http://yacy.net
//
// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
// $LastChangedRevision: 1986 $
// $LastChangedBy: orbiter $
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
/**
* HandleMap
* Copyright 2008 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
* First released 08.04.2008 at http://yacy.net
*
* $LastChangedDate: 2010-06-16 17:11:21 +0200 (Mi, 16 Jun 2010) $
* $LastChangedRevision: 6922 $
* $LastChangedBy: orbiter $
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.kelondro.index;
@ -53,7 +53,7 @@ import net.yacy.kelondro.order.CloneableIterator;
public final class HandleMap implements Iterable<Row.Entry> {
private final Row rowdef;
private RowSetArray index;
private RAMIndexCluster index;
/**
* initialize a HandleMap
@ -65,7 +65,7 @@ public final class HandleMap implements Iterable<Row.Entry> {
*/
public HandleMap(final int keylength, final ByteOrder objectOrder, final int idxbytes, final int expectedspace, String name) {
this.rowdef = new Row(new Column[]{new Column("key", Column.celltype_binary, Column.encoder_bytes, keylength, "key"), new Column("long c-" + idxbytes + " {b256}")}, objectOrder);
this.index = new RowSetArray(name, rowdef, spread(expectedspace));
this.index = new RAMIndexCluster(name, rowdef, spread(expectedspace));
}
/**
@ -95,6 +95,10 @@ public final class HandleMap implements Iterable<Row.Entry> {
assert this.index.size() == file.length() / (keylength + idxbytes);
}
public void trim() {
this.index.trim();
}
public long mem() {
return index.mem();
}
@ -266,7 +270,14 @@ public final class HandleMap implements Iterable<Row.Entry> {
public final synchronized long remove(final byte[] key) {
assert (key != null);
final boolean exist = index.has(key);
if (!exist) return -1;
final int s = index.size();
final long m = index.mem();
final Row.Entry indexentry = index.remove(key);
assert (indexentry != null);
assert index.size() < s : "s = " + s + ", index.size() = " + index.size();
assert index.mem() <= m : "m = " + m + ", index.mem() = " + index.mem();
if (indexentry == null) return -1;
return indexentry.getColLong(1);
}

@ -1,32 +1,25 @@
// ObjectIndex.java
// ------------------
// part of the Kelondro Database
// (C) by Michael Peter Christen; mc@yacy.net
// first published on http://www.anomic.de
// Frankfurt, Germany, 2005
// created: 26.10.2005
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
/* A kelondroIndex is a table with indexed access on the first column
Elements may be selected from the table with logarithmic computation time
using the get-method. Inserts have also the same computation order and
can be done with the put-method.
The kelondro Database provides two implementations of this interface:
kelondroTree and kelondroHashtable
/**
* Index
* Copyright 2005 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
* First released 26.10.2005 at http://yacy.net
*
* $LastChangedDate: 2010-06-16 17:11:21 +0200 (Mi, 16 Jun 2010) $
* $LastChangedRevision: 6922 $
* $LastChangedBy: orbiter $
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.kelondro.index;
@ -39,7 +32,7 @@ import java.util.List;
import net.yacy.kelondro.order.CloneableIterator;
public interface ObjectIndex extends Iterable<Row.Entry> {
public interface Index extends Iterable<Row.Entry> {
public String filename(); // returns a unique identified for this index; can be a real or artificial file name
public int size();

@ -1,26 +1,26 @@
// ObjectIndexCache.java
// (C) 2008 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 07.01.2008 on http://yacy.net
//
// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
// $LastChangedRevision: 1986 $
// $LastChangedBy: orbiter $
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
/**
* RAMIndex
* Copyright 2008 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
* First released 07.01.2008 at http://yacy.net
*
* $LastChangedDate: 2010-06-16 17:11:21 +0200 (Mi, 16 Jun 2010) $
* $LastChangedRevision: 6922 $
* $LastChangedBy: orbiter $
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.kelondro.index;
@ -37,9 +37,9 @@ import net.yacy.kelondro.order.MergeIterator;
import net.yacy.kelondro.order.StackIterator;
public final class ObjectIndexCache implements ObjectIndex, Iterable<Row.Entry> {
public final class RAMIndex implements Index, Iterable<Row.Entry> {
private static final TreeMap<String, ObjectIndexCache> objectTracker = new TreeMap<String, ObjectIndexCache>();
private static final TreeMap<String, RAMIndex> objectTracker = new TreeMap<String, RAMIndex>();
private final String name;
private final Row rowdef;
@ -48,7 +48,7 @@ public final class ObjectIndexCache implements ObjectIndex, Iterable<Row.Entry>
private final Row.EntryComparator entryComparator;
//private final int spread;
public ObjectIndexCache(String name, final Row rowdef, final int expectedspace) {
public RAMIndex(String name, final Row rowdef, final int expectedspace) {
this.name = name;
this.rowdef = rowdef;
this.entryComparator = new Row.EntryComparator(rowdef.objectOrder);
@ -57,7 +57,7 @@ public final class ObjectIndexCache implements ObjectIndex, Iterable<Row.Entry>
objectTracker.put(name, this);
}
private ObjectIndexCache(String name, final Row rowdef, RowSet index0, RowSet index1, Row.EntryComparator entryComparator) {
private RAMIndex(String name, final Row rowdef, RowSet index0, RowSet index1, Row.EntryComparator entryComparator) {
this.name = name;
this.rowdef = rowdef;
this.index0 = index0;
@ -66,18 +66,23 @@ public final class ObjectIndexCache implements ObjectIndex, Iterable<Row.Entry>
objectTracker.put(name, this);
}
public static final Iterator<Map.Entry<String, ObjectIndexCache>> objects() {
public static final Iterator<Map.Entry<String, RAMIndex>> objects() {
return objectTracker.entrySet().iterator();
}
public ObjectIndexCache clone() {
return new ObjectIndexCache(this.name + ".clone", this.rowdef, index0.clone(), index1.clone(), entryComparator);
public RAMIndex clone() {
return new RAMIndex(this.name + ".clone", this.rowdef, index0.clone(), index1.clone(), entryComparator);
}
public void clear() {
reset();
}
public void trim() {
if (this.index0 != null) this.index0.trim();
if (this.index1 != null) this.index1.trim();
}
public final synchronized void reset() {
this.index0 = null; // first flush RAM to make room
this.index0 = new RowSet(rowdef);
@ -223,13 +228,17 @@ public final class ObjectIndexCache implements ObjectIndex, Iterable<Row.Entry>
public final synchronized Row.Entry remove(final byte[] key) {
finishInitialization();
// if the new entry is within the initialization part, just delete it
int s = index0.size();
final Row.Entry indexentry = index0.remove(key);
if (indexentry != null) {
assert index0.size() < s: "s = " + s + ", index0.size() = " + index0.size();
assert index0.get(key) == null; // check if remove worked
return indexentry;
}
// else remove it from the index1
s = index1.size();
final Row.Entry removed = index1.remove(key);
assert removed == null || index1.size() < s: "s = " + s + ", index1.size() = " + index1.size();
assert index1.get(key) == null : "removed " + ((removed == null) ? " is null" : " is not null") + ", and index entry still exists"; // check if remove worked
return removed;
}

@ -1,23 +1,26 @@
// RowSetArray.java
// --------------------------
// (C) by Michael Peter Christen; mc@yacy.net
// first published on http://yacy.net
// Frankfurt, Germany, 2009
// last major change: 12.03.2009
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
/**
* RAMIndexCluster
* Copyright 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
* First released 12.03.2009 at http://yacy.net
*
* $LastChangedDate: 2010-06-16 17:11:21 +0200 (Mi, 16 Jun 2010) $
* $LastChangedRevision: 6922 $
* $LastChangedBy: orbiter $
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.kelondro.index;
@ -34,48 +37,52 @@ import net.yacy.kelondro.order.MergeIterator;
import net.yacy.kelondro.order.StackIterator;
public final class RowSetArray implements ObjectIndex, Iterable<Row.Entry>, Cloneable {
public final class RAMIndexCluster implements Index, Iterable<Row.Entry>, Cloneable {
private final String name;
private final Row rowdef;
private final ObjectIndexCache[] array;
private final RAMIndex[] cluster;
public RowSetArray(String name, final Row rowdef, final int arraySize) {
public RAMIndexCluster(String name, final Row rowdef, final int clusterSize) {
//assert arraySize < 100 : arraySize;
this.name = name;
this.array = new ObjectIndexCache[arraySize];
this.cluster = new RAMIndex[clusterSize];
this.rowdef = rowdef;
for (int i = 0; i < arraySize; i++) {
this.array[i] = new ObjectIndexCache(name + "." + i, rowdef, 0);
for (int i = 0; i < clusterSize; i++) {
this.cluster[i] = new RAMIndex(name + "." + i, rowdef, 0);
}
}
private RowSetArray(String name, final Row rowdef, final ObjectIndexCache[] array) {
private RAMIndexCluster(String name, final Row rowdef, final RAMIndex[] array) {
this.name = name;
this.array = array;
this.cluster = array;
this.rowdef = rowdef;
}
public RowSetArray clone() {
ObjectIndexCache[] a = new ObjectIndexCache[this.array.length];
for (int i = 0; i < this.array.length; i++) {
a[i] = this.array[i].clone();
public void trim() {
for (RAMIndex i: this.cluster) if (i != null) i.trim();
}
public RAMIndexCluster clone() {
RAMIndex[] a = new RAMIndex[this.cluster.length];
for (int i = 0; i < this.cluster.length; i++) {
a[i] = this.cluster[i].clone();
}
return new RowSetArray(this.name + ".clone", this.rowdef, a);
return new RAMIndexCluster(this.name + ".clone", this.rowdef, a);
}
private final int indexFor(final byte[] key) {
return (int) ((this.rowdef.objectOrder.cardinal(key) / 17) % ((long) array.length));
return (int) ((this.rowdef.objectOrder.cardinal(key) / 17) % ((long) cluster.length));
}
private final int indexFor(final Entry row) {
return (int) ((this.rowdef.objectOrder.cardinal(row.bytes(), 0, row.getPrimaryKeyLength()) / 17) % ((long) array.length));
return (int) ((this.rowdef.objectOrder.cardinal(row.bytes(), 0, row.getPrimaryKeyLength()) / 17) % ((long) cluster.length));
}
public final byte[] smallestKey() {
HandleSet keysort = new HandleSet(this.rowdef.primaryKeyLength, this.rowdef.objectOrder, this.array.length);
synchronized (this.array) {
for (ObjectIndexCache rs: this.array) try {
HandleSet keysort = new HandleSet(this.rowdef.primaryKeyLength, this.rowdef.objectOrder, this.cluster.length);
synchronized (this.cluster) {
for (RAMIndex rs: this.cluster) try {
keysort.put(rs.smallestKey());
} catch (RowSpaceExceededException e) {
Log.logException(e);
@ -85,9 +92,9 @@ public final class RowSetArray implements ObjectIndex, Iterable<Row.Entry>, Clon
}
public final byte[] largestKey() {
HandleSet keysort = new HandleSet(this.rowdef.primaryKeyLength, this.rowdef.objectOrder, this.array.length);
synchronized (this.array) {
for (ObjectIndexCache rs: this.array) try {
HandleSet keysort = new HandleSet(this.rowdef.primaryKeyLength, this.rowdef.objectOrder, this.cluster.length);
synchronized (this.cluster) {
for (RAMIndex rs: this.cluster) try {
keysort.put(rs.largestKey());
} catch (RowSpaceExceededException e) {
Log.logException(e);
@ -96,11 +103,11 @@ public final class RowSetArray implements ObjectIndex, Iterable<Row.Entry>, Clon
return keysort.largestKey();
}
private final ObjectIndexCache accessArray(final int i) {
ObjectIndexCache r = this.array[i];
if (r == null) synchronized (this.array) {
r = new ObjectIndexCache(name + "." + i, this.rowdef, 0);
this.array[i] = r;
private final RAMIndex accessArray(final int i) {
RAMIndex r = this.cluster[i];
if (r == null) synchronized (this.cluster) {
r = new RAMIndex(name + "." + i, this.rowdef, 0);
this.cluster[i] = r;
}
return r;
}
@ -116,15 +123,15 @@ public final class RowSetArray implements ObjectIndex, Iterable<Row.Entry>, Clon
}
public final void clear() {
synchronized (this.array) {
for (ObjectIndexCache c: this.array) if (c != null) c.clear();
synchronized (this.cluster) {
for (RAMIndex c: this.cluster) if (c != null) c.clear();
}
}
public final void close() {
clear();
synchronized (this.array) {
for (ObjectIndexCache c: this.array) if (c != null) c.close();
synchronized (this.cluster) {
for (RAMIndex c: this.cluster) if (c != null) c.close();
}
}
@ -140,7 +147,7 @@ public final class RowSetArray implements ObjectIndex, Iterable<Row.Entry>, Clon
public final Entry get(final byte[] key) {
final int i = indexFor(key);
if (i < 0) return null;
final ObjectIndexCache r = this.array[i];
final RAMIndex r = this.cluster[i];
if (r == null) return null;
return r.get(key);
}
@ -148,17 +155,17 @@ public final class RowSetArray implements ObjectIndex, Iterable<Row.Entry>, Clon
public final boolean has(final byte[] key) {
final int i = indexFor(key);
if (i < 0) return false;
final ObjectIndexCache r = this.array[i];
final RAMIndex r = this.cluster[i];
if (r == null) return false;
return r.has(key);
}
public final CloneableIterator<byte[]> keys(final boolean up, final byte[] firstKey) {
synchronized (this.array) {
synchronized (this.cluster) {
final Collection<CloneableIterator<byte[]>> col = new ArrayList<CloneableIterator<byte[]>>();
for (int i = 0; i < this.array.length; i++) {
if (this.array[i] != null) {
col.add(this.array[i].keys(up, firstKey));
for (int i = 0; i < this.cluster.length; i++) {
if (this.cluster[i] != null) {
col.add(this.cluster[i].keys(up, firstKey));
}
}
return MergeIterator.cascade(col, this.rowdef.objectOrder, MergeIterator.simpleMerge, up);
@ -185,11 +192,11 @@ public final class RowSetArray implements ObjectIndex, Iterable<Row.Entry>, Clon
public final ArrayList<RowCollection> removeDoubles() throws RowSpaceExceededException {
final ArrayList<RowCollection> col = new ArrayList<RowCollection>();
synchronized (this.array) {
for (int i = 0; i < this.array.length; i++) {
if (this.array[i] != null) {
col.addAll(this.array[i].removeDoubles());
if (this.array[i].isEmpty()) this.array[i] = null;
synchronized (this.cluster) {
for (int i = 0; i < this.cluster.length; i++) {
if (this.cluster[i] != null) {
col.addAll(this.cluster[i].removeDoubles());
if (this.cluster[i].isEmpty()) this.cluster[i] = null;
}
}
}
@ -197,11 +204,11 @@ public final class RowSetArray implements ObjectIndex, Iterable<Row.Entry>, Clon
}
public final Entry removeOne() {
synchronized (this.array) {
for (int i = 0; i < this.array.length; i++) {
if (this.array[i] != null) {
final Entry entry = this.array[i].removeOne();
if (this.array[i].isEmpty()) this.array[i] = null;
synchronized (this.cluster) {
for (int i = 0; i < this.cluster.length; i++) {
if (this.cluster[i] != null) {
final Entry entry = this.cluster[i].removeOne();
if (this.cluster[i].isEmpty()) this.cluster[i] = null;
return entry;
}
}
@ -211,11 +218,11 @@ public final class RowSetArray implements ObjectIndex, Iterable<Row.Entry>, Clon
public List<Row.Entry> top(int count) {
List<Row.Entry> list = new ArrayList<Row.Entry>();
synchronized (this.array) {
for (int i = 0; i < this.array.length; i++) {
if (this.array[i] != null) {
synchronized (this.cluster) {
for (int i = 0; i < this.cluster.length; i++) {
if (this.cluster[i] != null) {
try {
List<Row.Entry> list0 = this.array[i].top(count - list.size());
List<Row.Entry> list0 = this.cluster[i].top(count - list.size());
list.addAll(list0);
} catch (IOException e) {
continue;
@ -239,13 +246,13 @@ public final class RowSetArray implements ObjectIndex, Iterable<Row.Entry>, Clon
@SuppressWarnings("unchecked")
public final CloneableIterator<Entry> rows(final boolean up, final byte[] firstKey) {
synchronized (this.array) {
final CloneableIterator<Entry>[] col = new CloneableIterator[this.array.length];
for (int i = 0; i < this.array.length; i++) {
if (this.array[i] == null) {
synchronized (this.cluster) {
final CloneableIterator<Entry>[] col = new CloneableIterator[this.cluster.length];
for (int i = 0; i < this.cluster.length; i++) {
if (this.cluster[i] == null) {
col[i] = null;
} else {
col[i] = this.array[i].rows(up, firstKey);
col[i] = this.cluster[i].rows(up, firstKey);
}
}
return StackIterator.stack(col);
@ -258,36 +265,23 @@ public final class RowSetArray implements ObjectIndex, Iterable<Row.Entry>, Clon
public final int size() {
int c = 0;
synchronized (this.array) {
for (int i = 0; i < this.array.length; i++) {
if (this.array[i] != null) {
c += this.array[i].size();
}
}
synchronized (this.cluster) {
for (RAMIndex i: this.cluster) if (i != null) c += i.size();
}
return c;
}
public long mem() {
long m = 0;
synchronized (this.array) {
for (int i = 0; i < this.array.length; i++) {
if (this.array[i] != null) {
m += this.array[i].mem();
}
}
synchronized (this.cluster) {
for (RAMIndex i: this.cluster) if (i != null) m += i.mem();
}
return m;
}
public final boolean isEmpty() {
synchronized (this.array) {
for (int i = 0; i < this.array.length; i++) {
if (this.array[i] != null) {
if (!this.array[i].isEmpty()) return false;
}
}
synchronized (this.cluster) {
for (RAMIndex i: this.cluster) if (i != null && !i.isEmpty()) return false;
}
return true;
}

@ -877,15 +877,14 @@ public class RowCollection implements Iterable<Row.Entry>, Cloneable {
}
private final int picMiddle(final int a, final int b, final int c) {
if (a > b) {
if (c > a) return a;
if (c < b) return b;
return c;
if (compare(a, b) > 0) {
if (compare(c, a) > 0) return a;
if (compare(b, c) > 0) return b;
} else {
if (c < a) return a;
if (c > b) return b;
return c;
if (compare(a, c) > 0) return a;
if (compare(c, b) > 0) return b;
}
return c;
//if (c < a && a < b || a > b && c > a) return a;
//if (a < b && c > b || c < b && a > b) return b;
}

@ -1,26 +1,26 @@
// RowSet.java
// (C) 2006 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 20.06.2006 on http://www.anomic.de
//
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
/**
* RowSet
* Copyright 2006 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
* First released 20.06.2006 at http://yacy.net
*
* $LastChangedDate$
* $LastChangedRevision$
* $LastChangedBy$
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.kelondro.index;
@ -33,7 +33,7 @@ import net.yacy.kelondro.order.CloneableIterator;
import net.yacy.kelondro.order.NaturalOrder;
public class RowSet extends RowCollection implements ObjectIndex, Iterable<Row.Entry> {
public class RowSet extends RowCollection implements Index, Iterable<Row.Entry> {
private static final int collectionReSortLimit = 300;

@ -249,36 +249,40 @@ public final class IndexCell<ReferenceType extends Reference> extends AbstractBu
public int remove(byte[] termHash, HandleSet urlHashes) throws IOException {
int removed = this.ram.remove(termHash, urlHashes);
int reduced;
//final long am = this.array.mem();
try {
reduced = this.array.replace(termHash, new RemoveRewriter<ReferenceType>(urlHashes));
reduced = this.array.reduce(termHash, new RemoveReducer<ReferenceType>(urlHashes));
} catch (RowSpaceExceededException e) {
reduced = 0;
Log.logWarning("IndexCell", "not possible to remove urlHashes from a RWI because of too low memory. Remove was not applied. Please increase RAM assignment");
}
//assert this.array.mem() <= am : "am = " + am + ", array.mem() = " + this.array.mem();
return removed + (reduced / this.array.rowdef().objectsize);
}
public boolean remove(byte[] termHash, byte[] urlHashBytes) throws IOException {
boolean removed = this.ram.remove(termHash, urlHashBytes);
int reduced;
//final long am = this.array.mem();
try {
reduced = this.array.replace(termHash, new RemoveRewriter<ReferenceType>(urlHashBytes));
reduced = this.array.reduce(termHash, new RemoveReducer<ReferenceType>(urlHashBytes));
} catch (RowSpaceExceededException e) {
reduced = 0;
Log.logWarning("IndexCell", "not possible to remove urlHashes from a RWI because of too low memory. Remove was not applied. Please increase RAM assignment");
}
//assert this.array.mem() <= am : "am = " + am + ", array.mem() = " + this.array.mem();
return removed || (reduced > 0);
}
private static class RemoveRewriter<ReferenceType extends Reference> implements ReferenceContainerArray.ContainerRewriter<ReferenceType> {
private static class RemoveReducer<ReferenceType extends Reference> implements ReferenceContainerArray.ContainerReducer<ReferenceType> {
HandleSet urlHashes;
public RemoveRewriter(HandleSet urlHashes) {
public RemoveReducer(HandleSet urlHashes) {
this.urlHashes = urlHashes;
}
public RemoveRewriter(byte[] urlHashBytes) {
public RemoveReducer(byte[] urlHashBytes) {
this.urlHashes = new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.objectOrder, 0);
try {
this.urlHashes.put(urlHashBytes);
@ -287,7 +291,7 @@ public final class IndexCell<ReferenceType extends Reference> extends AbstractBu
}
}
public ReferenceContainer<ReferenceType> rewrite(ReferenceContainer<ReferenceType> container) {
public ReferenceContainer<ReferenceType> reduce(ReferenceContainer<ReferenceType> container) {
container.sort();
container.removeEntries(urlHashes);
return container;

@ -71,7 +71,8 @@ public final class ReferenceContainerArray<ReferenceType extends Reference> {
prefix,
payloadrow.primaryKeyLength,
termOrder,
0);
0,
true);
assert merger != null;
this.merger = merger;
}
@ -84,6 +85,10 @@ public final class ReferenceContainerArray<ReferenceType extends Reference> {
this.array.clear();
}
public long mem() {
return array.mem();
}
public int[] sizes() {
return (this.array == null) ? new int[0] : this.array.sizes();
}
@ -249,34 +254,36 @@ public final class ReferenceContainerArray<ReferenceType extends Reference> {
array.remove(termHash);
}
public int replace(final byte[] termHash, ContainerRewriter<ReferenceType> rewriter) throws IOException, RowSpaceExceededException {
return array.replace(termHash, new BLOBRewriter(termHash, rewriter));
public int reduce(final byte[] termHash, ContainerReducer<ReferenceType> reducer) throws IOException, RowSpaceExceededException {
return array.reduce(termHash, new BLOBReducer(termHash, reducer));
}
public class BLOBRewriter implements BLOB.Rewriter {
public class BLOBReducer implements BLOB.Reducer {
ContainerRewriter<ReferenceType> rewriter;
ContainerReducer<ReferenceType> rewriter;
byte[] wordHash;
public BLOBRewriter(byte[] wordHash, ContainerRewriter<ReferenceType> rewriter) {
public BLOBReducer(byte[] wordHash, ContainerReducer<ReferenceType> rewriter) {
this.rewriter = rewriter;
this.wordHash = wordHash;
}
public byte[] rewrite(byte[] b) {
if (b == null) return null;
ReferenceContainer<ReferenceType> c = rewriter.rewrite(new ReferenceContainer<ReferenceType>(factory, this.wordHash, RowSet.importRowSet(b, payloadrow)));
ReferenceContainer<ReferenceType> c = rewriter.reduce(new ReferenceContainer<ReferenceType>(factory, this.wordHash, RowSet.importRowSet(b, payloadrow)));
if (c == null) return null;
return c.exportCollection();
byte bb[] = c.exportCollection();
assert bb.length <= b.length;
return bb;
}
}
public interface ContainerRewriter<ReferenceType extends Reference> {
public interface ContainerReducer<ReferenceType extends Reference> {
public ReferenceContainer<ReferenceType> rewrite(ReferenceContainer<ReferenceType> container);
public ReferenceContainer<ReferenceType> reduce(ReferenceContainer<ReferenceType> container);
}
public int entries() {
return this.array.entries();
}

@ -30,7 +30,7 @@ import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import net.yacy.kelondro.index.ObjectIndex;
import net.yacy.kelondro.index.Index;
import net.yacy.kelondro.index.Row;
import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.logging.Log;
@ -40,7 +40,7 @@ import net.yacy.kelondro.order.NaturalOrder;
public class Relations {
private final File baseDir;
private HashMap<String, ObjectIndex> relations;
private HashMap<String, Index> relations;
private final boolean useTailCache;
private final boolean exceed134217727;
@ -81,7 +81,7 @@ public class Relations {
public void declareRelation(final String name, final int keysize, final int payloadsize) throws RowSpaceExceededException {
// try to get the relation from the relation-cache
final ObjectIndex relation = relations.get(name);
final Index relation = relations.get(name);
if (relation != null) return;
// try to find the relation as stored on file
final String[] list = baseDir.list();
@ -91,7 +91,7 @@ public class Relations {
if (!list[i].equals(targetfilename)) continue;
final Row row = rowdef(list[i]);
if (row.primaryKeyLength != keysize || row.column(1).cellwidth != payloadsize) continue; // a wrong table
ObjectIndex table;
Index table;
try {
table = new Table(new File(baseDir, list[i]), row, 1024*1024, 0, this.useTailCache, this.exceed134217727);
} catch (RowSpaceExceededException e) {
@ -103,7 +103,7 @@ public class Relations {
}
// the relation does not exist, create it
final Row row = rowdef(keysize, payloadsize);
ObjectIndex table;
Index table;
try {
table = new Table(new File(baseDir, targetfilename), row, 1024*1024, 0, this.useTailCache, this.exceed134217727);
} catch (RowSpaceExceededException e) {
@ -112,16 +112,16 @@ public class Relations {
relations.put(name, table);
}
public ObjectIndex getRelation(final String name) throws RowSpaceExceededException {
public Index getRelation(final String name) throws RowSpaceExceededException {
// try to get the relation from the relation-cache
final ObjectIndex relation = relations.get(name);
final Index relation = relations.get(name);
if (relation != null) return relation;
// try to find the relation as stored on file
final String[] list = baseDir.list();
for (int i = 0; i < list.length; i++) {
if (list[i].startsWith(name)) {
final Row row = rowdef(list[i]);
ObjectIndex table;
Index table;
try {
table = new Table(new File(baseDir, list[i]), row, 1024*1024, 0, this.useTailCache, this.exceed134217727);
} catch (RowSpaceExceededException e) {
@ -142,7 +142,7 @@ public class Relations {
}
public byte[] putRelation(final String name, final byte[] key, final byte[] value) throws IOException, RowSpaceExceededException {
final ObjectIndex table = getRelation(name);
final Index table = getRelation(name);
if (table == null) return null;
final Row.Entry entry = table.row().newEntry();
entry.setCol(0, key);
@ -161,7 +161,7 @@ public class Relations {
}
public byte[] getRelation(final String name, final byte[] key) throws IOException, RowSpaceExceededException {
final ObjectIndex table = getRelation(name);
final Index table = getRelation(name);
if (table == null) return null;
final Row.Entry entry = table.get(key);
if (entry == null) return null;
@ -169,13 +169,13 @@ public class Relations {
}
public boolean hasRelation(final String name, final byte[] key) throws RowSpaceExceededException {
final ObjectIndex table = getRelation(name);
final Index table = getRelation(name);
if (table == null) return false;
return table.has(key);
}
public byte[] removeRelation(final String name, final byte[] key) throws IOException, RowSpaceExceededException {
final ObjectIndex table = getRelation(name);
final Index table = getRelation(name);
if (table == null) return null;
final Row.Entry entry = table.remove(key);
if (entry == null) return null;

@ -37,7 +37,7 @@ import java.util.Date;
import java.util.Iterator;
import java.util.List;
import net.yacy.kelondro.index.ObjectIndex;
import net.yacy.kelondro.index.Index;
import net.yacy.kelondro.index.Row;
import net.yacy.kelondro.index.RowCollection;
import net.yacy.kelondro.index.Row.Entry;
@ -58,7 +58,7 @@ import net.yacy.kelondro.order.NaturalOrder;
* grant ALL on yacy.* to yacy;
*/
public class SQLTable implements ObjectIndex, Iterable<Row.Entry> {
public class SQLTable implements Index, Iterable<Row.Entry> {
private static final String db_driver_str_mysql = "org.gjt.mm.mysql.Driver";
private static final String db_driver_str_pgsql = "org.postgresql.Driver";

@ -44,7 +44,7 @@ import java.util.concurrent.TimeUnit;
import net.yacy.kelondro.blob.ArrayStack;
import net.yacy.kelondro.index.Cache;
import net.yacy.kelondro.index.HandleSet;
import net.yacy.kelondro.index.ObjectIndex;
import net.yacy.kelondro.index.Index;
import net.yacy.kelondro.index.Row;
import net.yacy.kelondro.index.RowCollection;
import net.yacy.kelondro.index.RowSpaceExceededException;
@ -59,7 +59,7 @@ import net.yacy.kelondro.util.FileUtils;
import net.yacy.kelondro.util.NamePrefixThreadFactory;
public class SplitTable implements ObjectIndex, Iterable<Row.Entry> {
public class SplitTable implements Index, Iterable<Row.Entry> {
// this is a set of kelondro tables
// the set is divided into tables with different entry date
@ -70,7 +70,7 @@ public class SplitTable implements ObjectIndex, Iterable<Row.Entry> {
// the thread pool for the keeperOf executor service
private ExecutorService executor;
private Map<String, ObjectIndex> tables; // a map from a date string to a kelondroIndex object
private Map<String, Index> tables; // a map from a date string to a kelondroIndex object
private final Row rowdef;
private final File path;
private final String prefix;
@ -111,13 +111,13 @@ public class SplitTable implements ObjectIndex, Iterable<Row.Entry> {
public long mem() {
long m = 0;
for (ObjectIndex i: tables.values()) m += i.mem();
for (Index i: tables.values()) m += i.mem();
return m;
}
public final byte[] smallestKey() {
HandleSet keysort = new HandleSet(this.rowdef.primaryKeyLength, this.rowdef.objectOrder, this.tables.size());
for (ObjectIndex oi: this.tables.values()) try {
for (Index oi: this.tables.values()) try {
keysort.put(oi.smallestKey());
} catch (RowSpaceExceededException e) {
Log.logException(e);
@ -127,7 +127,7 @@ public class SplitTable implements ObjectIndex, Iterable<Row.Entry> {
public final byte[] largestKey() {
HandleSet keysort = new HandleSet(this.rowdef.primaryKeyLength, this.rowdef.objectOrder, this.tables.size());
for (ObjectIndex oi: this.tables.values()) try {
for (Index oi: this.tables.values()) try {
keysort.put(oi.largestKey());
} catch (RowSpaceExceededException e) {
Log.logException(e);
@ -143,7 +143,7 @@ public class SplitTable implements ObjectIndex, Iterable<Row.Entry> {
current = null;
// initialized tables map
this.tables = new HashMap<String, ObjectIndex>();
this.tables = new HashMap<String, Index>();
if (!(path.exists())) path.mkdirs();
String[] tablefile = path.list();
@ -193,7 +193,7 @@ public class SplitTable implements ObjectIndex, Iterable<Row.Entry> {
Map.Entry<String, Long> entry;
String maxf;
long maxram;
ObjectIndex table;
Index table;
while (!t.isEmpty()) {
// find maximum table
maxram = 0;
@ -272,21 +272,21 @@ public class SplitTable implements ObjectIndex, Iterable<Row.Entry> {
}
public int size() {
final Iterator<ObjectIndex> i = tables.values().iterator();
final Iterator<Index> i = tables.values().iterator();
int s = 0;
while (i.hasNext()) s += i.next().size();
return s;
}
public boolean isEmpty() {
final Iterator<ObjectIndex> i = tables.values().iterator();
final Iterator<Index> i = tables.values().iterator();
while (i.hasNext()) if (!i.next().isEmpty()) return false;
return true;
}
public int writeBufferSize() {
int s = 0;
for (final ObjectIndex index : tables.values()) {
for (final Index index : tables.values()) {
if (index instanceof Cache) s += ((Cache) index).writeBufferSize();
}
return s;
@ -301,12 +301,12 @@ public class SplitTable implements ObjectIndex, Iterable<Row.Entry> {
}
public Row.Entry get(final byte[] key) throws IOException {
final ObjectIndex keeper = keeperOf(key);
final Index keeper = keeperOf(key);
if (keeper == null) return null;
return keeper.get(key);
}
private ObjectIndex newTable() {
private Index newTable() {
this.current = newFilename();
final File f = new File(path, this.current);
Table table = null;
@ -324,7 +324,7 @@ public class SplitTable implements ObjectIndex, Iterable<Row.Entry> {
return table;
}
private ObjectIndex checkTable(ObjectIndex table) {
private Index checkTable(Index table) {
// check size and age of given table; in case it is too large or too old
// create a new table
assert table != null;
@ -344,7 +344,7 @@ public class SplitTable implements ObjectIndex, Iterable<Row.Entry> {
public Row.Entry replace(final Row.Entry row) throws IOException, RowSpaceExceededException {
assert row.objectsize() <= this.rowdef.objectsize;
ObjectIndex keeper = keeperOf(row.getColBytes(0, true));
Index keeper = keeperOf(row.getColBytes(0, true));
if (keeper != null) return keeper.replace(row);
synchronized (this.tables) {
assert this.current == null || this.tables.get(this.current) != null : "this.current = " + this.current;
@ -356,7 +356,7 @@ public class SplitTable implements ObjectIndex, Iterable<Row.Entry> {
public void put(final Row.Entry row) throws IOException, RowSpaceExceededException {
assert row.objectsize() <= this.rowdef.objectsize;
ObjectIndex keeper = keeperOf(row.getColBytes(0, true));
Index keeper = keeperOf(row.getColBytes(0, true));
if (keeper != null) {keeper.put(row); return;}
synchronized (this.tables) {
assert this.current == null || this.tables.get(this.current) != null : "this.current = " + this.current;
@ -366,9 +366,9 @@ public class SplitTable implements ObjectIndex, Iterable<Row.Entry> {
}
private ObjectIndex keeperOf(final byte[] key) {
private Index keeperOf(final byte[] key) {
if (key == null) return null;
for (ObjectIndex oi: tables.values()) {
for (Index oi: tables.values()) {
if (oi.has(key)) return oi;
}
return null;
@ -376,7 +376,7 @@ public class SplitTable implements ObjectIndex, Iterable<Row.Entry> {
public void addUnique(final Row.Entry row) throws IOException, RowSpaceExceededException {
assert row.objectsize() <= this.rowdef.objectsize;
ObjectIndex table = (this.current == null) ? null : tables.get(this.current);
Index table = (this.current == null) ? null : tables.get(this.current);
synchronized (this.tables) {
assert this.current == null || this.tables.get(this.current) != null : "this.current = " + this.current;
if (table == null) table = newTable(); else table = checkTable(table);
@ -385,7 +385,7 @@ public class SplitTable implements ObjectIndex, Iterable<Row.Entry> {
}
public ArrayList<RowCollection> removeDoubles() throws IOException, RowSpaceExceededException {
final Iterator<ObjectIndex> i = tables.values().iterator();
final Iterator<Index> i = tables.values().iterator();
final ArrayList<RowCollection> report = new ArrayList<RowCollection>();
while (i.hasNext()) {
report.addAll(i.next().removeDoubles());
@ -394,20 +394,20 @@ public class SplitTable implements ObjectIndex, Iterable<Row.Entry> {
}
public boolean delete(final byte[] key) throws IOException {
final ObjectIndex table = keeperOf(key);
final Index table = keeperOf(key);
if (table == null) return false;
return table.delete(key);
}
public Row.Entry remove(final byte[] key) throws IOException {
final ObjectIndex table = keeperOf(key);
final Index table = keeperOf(key);
if (table == null) return null;
return table.remove(key);
}
public Row.Entry removeOne() throws IOException {
final Iterator<ObjectIndex> i = tables.values().iterator();
ObjectIndex table, maxtable = null;
final Iterator<Index> i = tables.values().iterator();
Index table, maxtable = null;
int maxcount = -1;
while (i.hasNext()) {
table = i.next();
@ -423,8 +423,8 @@ public class SplitTable implements ObjectIndex, Iterable<Row.Entry> {
}
public List<Row.Entry> top(int count) throws IOException {
final Iterator<ObjectIndex> i = tables.values().iterator();
ObjectIndex table, maxtable = null;
final Iterator<Index> i = tables.values().iterator();
Index table, maxtable = null;
int maxcount = -1;
while (i.hasNext()) {
table = i.next();
@ -441,7 +441,7 @@ public class SplitTable implements ObjectIndex, Iterable<Row.Entry> {
public CloneableIterator<byte[]> keys(final boolean up, final byte[] firstKey) throws IOException {
final List<CloneableIterator<byte[]>> c = new ArrayList<CloneableIterator<byte[]>>(tables.size());
final Iterator<ObjectIndex> i = tables.values().iterator();
final Iterator<Index> i = tables.values().iterator();
CloneableIterator<byte[]> k;
while (i.hasNext()) {
k = i.next().keys(up, firstKey);
@ -452,7 +452,7 @@ public class SplitTable implements ObjectIndex, Iterable<Row.Entry> {
public CloneableIterator<Row.Entry> rows(final boolean up, final byte[] firstKey) throws IOException {
final List<CloneableIterator<Row.Entry>> c = new ArrayList<CloneableIterator<Row.Entry>>(tables.size());
final Iterator<ObjectIndex> i = tables.values().iterator();
final Iterator<Index> i = tables.values().iterator();
while (i.hasNext()) {
c.add(i.next().rows(up, firstKey));
}
@ -470,7 +470,7 @@ public class SplitTable implements ObjectIndex, Iterable<Row.Entry> {
@SuppressWarnings("unchecked")
public synchronized CloneableIterator<Row.Entry> rows() throws IOException {
final CloneableIterator<Row.Entry>[] c = new CloneableIterator[tables.size()];
final Iterator<ObjectIndex> i = tables.values().iterator();
final Iterator<Index> i = tables.values().iterator();
int d = 0;
while (i.hasNext()) {
c[d++] = i.next().rows();
@ -486,7 +486,7 @@ public class SplitTable implements ObjectIndex, Iterable<Row.Entry> {
} catch (final InterruptedException e) {
}
this.executor = null;
final Iterator<ObjectIndex> i = tables.values().iterator();
final Iterator<Index> i = tables.values().iterator();
while (i.hasNext()) {
i.next().close();
}
@ -494,7 +494,7 @@ public class SplitTable implements ObjectIndex, Iterable<Row.Entry> {
}
public void deleteOnExit() {
for (ObjectIndex i: this.tables.values()) i.deleteOnExit();
for (Index i: this.tables.values()) i.deleteOnExit();
}
}

@ -40,7 +40,7 @@ import java.util.TreeSet;
import net.yacy.kelondro.index.Column;
import net.yacy.kelondro.index.HandleMap;
import net.yacy.kelondro.index.ObjectIndex;
import net.yacy.kelondro.index.Index;
import net.yacy.kelondro.index.Row;
import net.yacy.kelondro.index.RowCollection;
import net.yacy.kelondro.index.RowSet;
@ -66,7 +66,7 @@ import net.yacy.kelondro.util.kelondroException;
* The content cache can also be deleted during run-time, if the available RAM gets too low.
*/
public class Table implements ObjectIndex, Iterable<Row.Entry> {
public class Table implements Index, Iterable<Row.Entry> {
// static tracker objects
private final static TreeMap<String, Table> tableTracker = new TreeMap<String, Table>();
@ -660,12 +660,14 @@ public class Table implements ObjectIndex, Iterable<Row.Entry> {
if (i == index.size() - 1) {
// element is at last entry position
ix = (int) index.remove(key);
assert index.size() < i + 1 : "index.size() = " + index.size() + ", i = " + i;
assert ix == i;
file.cleanLast(b, 0);
} else {
// remove entry from index
assert i < index.size() - 1;
assert i < index.size() - 1 : "index.size() = " + index.size() + ", i = " + i;
ix = (int) index.remove(key);
assert i < index.size() : "index.size() = " + index.size() + ", i = " + i;
assert ix == i;
// read element that shall be removed
@ -693,12 +695,14 @@ public class Table implements ObjectIndex, Iterable<Row.Entry> {
if (i == index.size() - 1) {
// special handling if the entry is the last entry in the file
ix = (int) index.remove(key);
assert index.size() < i + 1 : "index.size() = " + index.size() + ", i = " + i;
assert ix == i;
table.removeRow(i, false);
file.cleanLast();
} else {
// remove entry from index
ix = (int) index.remove(key);
assert i < index.size() : "index.size() = " + index.size() + ", i = " + i;
assert ix == i;
// switch values:
@ -742,7 +746,12 @@ public class Table implements ObjectIndex, Iterable<Row.Entry> {
file.cleanLast(le, 0);
assert file.size() < fsb : "file.size() = " + file.size();
final Row.Entry lr = rowdef.newEntry(le);
assert lr != null;
assert lr.getPrimaryKeyBytes() != null;
final int is = index.size();
assert index.has(lr.getPrimaryKeyBytes());
final int i = (int) index.remove(lr.getPrimaryKeyBytes());
assert i < 0 || index.size() < is : "index.size() = " + index.size() + ", is = " + is;
assert i >= 0;
if (table != null) table.removeOne();
assert file.size() == index.size() : "file.size() = " + file.size() + ", index.size() = " + index.size();

Loading…
Cancel
Save