- refactoring of data access methods to be prepared for new cell data structure

- removed a memory overhead in collections which prevent OOM Exception in low memory configurations

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5443 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 16 years ago
parent 5ce9d81955
commit 6c7e83909b

@ -82,7 +82,7 @@ public final class query {
if (obj.equals("rwiurlcount")) {
// the total number of different urls in the rwi is returned
// <env> shall contain a word hash, the number of assigned lurls to this hash is returned
prop.put("response", sb.webIndex.indexSize(env));
prop.put("response", sb.webIndex.getContainer(env, null).size());
return prop;
}

@ -68,23 +68,9 @@ public class indexCollectionRI implements indexRI {
}
}
public long getUpdateTime(final String wordHash) {
final indexContainer entries = getContainer(wordHash, null);
if (entries == null) return 0;
return entries.updated();
}
public int size() {
return collectionIndex.size();
}
public int indexSize(final String wordHash) {
try {
return collectionIndex.indexSize(wordHash.getBytes());
} catch (final IOException e) {
return 0;
}
}
public int minMem() {
// calculate a minimum amount of memory that is necessary to use the index

@ -105,6 +105,10 @@ public class indexContainer extends kelondroRowSet {
return c;
}
public indexContainer merge(final indexContainer c) {
return new indexContainer(this.wordHash, this.merge(c));
}
public indexRWIEntry put(final indexRWIRowEntry entry) {
assert entry.toKelondroEntry().objectsize() == super.rowdef.objectsize;
final kelondroRow.Entry r = super.put(entry.toKelondroEntry());
@ -152,6 +156,11 @@ public class indexContainer extends kelondroRowSet {
return new indexRWIRowEntry(entry);
}
/**
* remove a url reference from the container.
* if the url hash was found, return the entry, but delete the entry from the container
* if the entry was not found, return null.
*/
public indexRWIEntry remove(final String urlHash) {
final kelondroRow.Entry entry = remove(urlHash.getBytes());
if (entry == null) return null;

@ -0,0 +1,193 @@
// indexContainerBLOBHeap.java
// (C) 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 04.01.2009 on http://yacy.net
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// $LastChangedDate: 2008-03-14 01:16:04 +0100 (Fr, 14 Mrz 2008) $
// $LastChangedRevision: 4558 $
// $LastChangedBy: orbiter $
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package de.anomic.index;
import java.io.File;
import java.io.IOException;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import de.anomic.kelondro.kelondroBLOBArray;
import de.anomic.kelondro.kelondroCloneableIterator;
import de.anomic.kelondro.kelondroRow;
import de.anomic.kelondro.kelondroRowSet;
public final class indexContainerBLOBHeap {
private final kelondroRow payloadrow;
private final kelondroBLOBArray array;
/**
* open a index container based on a BLOB dump. The content of the BLOB will not be read
* unless a .idx file exists. Only the .idx file is opened to get a fast read access to
* the BLOB. This class provides no write methods, because BLOB files should not be
* written in random access. To support deletion, a write access to the BLOB for deletion
* is still possible
* @param payloadrow
* @param log
* @throws IOException
*/
public indexContainerBLOBHeap(
final File heapLocation,
final String blobSalt,
final kelondroRow payloadrow) throws IOException {
this.payloadrow = payloadrow;
this.array = new kelondroBLOBArray(
heapLocation,
blobSalt,
payloadrow.primaryKeyLength,
payloadrow.getOrdering(),
0);
}
public void close() {
this.array.close();
}
public void clear() throws IOException {
this.array.clear();
}
public int size() {
return (this.array == null) ? 0 : this.array.size();
}
public File newContainerBLOBFile() {
return this.array.newBLOB(new Date());
}
/**
* return an iterator object that creates top-level-clones of the indexContainers
* in the cache, so that manipulations of the iterated objects do not change
* objects in the cache.
* @throws IOException
*/
public synchronized kelondroCloneableIterator<indexContainer> wordContainers(final String startWordHash, final boolean rot) throws IOException {
return new heapCacheIterator(startWordHash, rot);
}
/**
* cache iterator: iterates objects within the heap cache. This can only be used
* for write-enabled heaps, read-only heaps do not have a heap cache
*/
public class heapCacheIterator implements kelondroCloneableIterator<indexContainer>, Iterable<indexContainer> {
// this class exists, because the wCache cannot be iterated with rotation
// and because every indexContainer Object that is iterated must be returned as top-level-clone
// so this class simulates wCache.tailMap(startWordHash).values().iterator()
// plus the mentioned features
private final boolean rot;
private kelondroCloneableIterator<byte[]> iterator;
public heapCacheIterator(final String startWordHash, final boolean rot) throws IOException {
this.rot = rot;
this.iterator = array.keys(true, startWordHash.getBytes());
// The collection's iterator will return the values in the order that their corresponding keys appear in the tree.
}
public heapCacheIterator clone(final Object secondWordHash) {
try {
return new heapCacheIterator((String) secondWordHash, rot);
} catch (IOException e) {
e.printStackTrace();
return null;
}
}
public boolean hasNext() {
if (rot) return true;
return iterator.hasNext();
}
public indexContainer next() {
try {
if (iterator.hasNext()) {
return get(new String(iterator.next()));
}
// rotation iteration
if (!rot) {
return null;
}
iterator = array.keys(true, null);
return get(new String(iterator.next()));
} catch (IOException e) {
e.printStackTrace();
return null;
}
}
public void remove() {
iterator.remove();
}
public Iterator<indexContainer> iterator() {
return this;
}
}
/**
* test if a given key is in the heap
* this works with heaps in write- and read-mode
* @param key
* @return true, if the key is used in the heap; false othervise
* @throws IOException
*/
public boolean has(final String key) {
return this.array.has(key.getBytes());
}
/**
* get a indexContainer from a heap
* @param key
* @return the indexContainer if one exist, null otherwise
* @throws IOException
*/
public indexContainer get(final String key) throws IOException {
List<byte[]> entries = this.array.getAll(key.getBytes());
if (entries == null || entries.size() == 0) return null;
byte[] a = entries.remove(0);
indexContainer c = new indexContainer(key, kelondroRowSet.importRowSet(a, payloadrow));
while (entries.size() > 0) {
c = c.merge(new indexContainer(key, kelondroRowSet.importRowSet(entries.remove(0), payloadrow)));
}
return c;
}
/**
* delete a indexContainer from the heap cache. This can only be used for write-enabled heaps
* @param wordHash
* @return the indexContainer if the cache contained the container, null othervise
* @throws IOException
*/
public synchronized void delete(final String wordHash) throws IOException {
// returns the index that had been deleted
array.remove(wordHash.getBytes());
}
}

@ -1,4 +1,4 @@
// indexContainerHeap.java
// indexContainerRAMHeap.java
// (C) 2008 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 30.03.2008 on http://yacy.net
//
@ -48,30 +48,32 @@ import de.anomic.kelondro.kelondroRow;
import de.anomic.kelondro.kelondroRowSet;
import de.anomic.server.logging.serverLog;
public final class indexContainerHeap {
public final class indexContainerRAMHeap {
private final kelondroRow payloadrow;
private final serverLog log;
private SortedMap<String, indexContainer> cache;
/**
* opens an existing heap file in undefined mode
* after this a initialization should be made to use the heap:
* either a read-only or read/write mode inititalization
* either a read-only or read/write mode initialization
* @param payloadrow
* @param log
*/
public indexContainerHeap(final kelondroRow payloadrow, final serverLog log) {
public indexContainerRAMHeap(final kelondroRow payloadrow) {
this.payloadrow = payloadrow;
this.log = log;
this.cache = null;
}
public void clear() throws IOException {
public void clear() {
if (cache != null) cache.clear();
initWriteMode();
}
public void close() {
this.cache = null;
}
/**
* initializes the heap in read/write mode without reading of a dump first
* another dump reading afterwards is not possible
@ -88,7 +90,7 @@ public final class indexContainerHeap {
* @throws IOException
*/
public void initWriteModeFromHeap(final File heapFile) throws IOException {
if (log != null) log.logInfo("restoring dump for rwi heap '" + heapFile.getName() + "'");
serverLog.logInfo("indexContainerRAMHeap", "restoring dump for rwi heap '" + heapFile.getName() + "'");
final long start = System.currentTimeMillis();
this.cache = Collections.synchronizedSortedMap(new TreeMap<String, indexContainer>(new kelondroByteOrder.StringOrder(payloadrow.getOrdering())));
int urlCount = 0;
@ -100,7 +102,7 @@ public final class indexContainerHeap {
urlCount += container.size();
}
}
if (log != null) log.logInfo("finished rwi heap restore: " + cache.size() + " words, " + urlCount + " word/URL relations in " + (System.currentTimeMillis() - start) + " milliseconds");
serverLog.logInfo("indexContainerRAMHeap", "finished rwi heap restore: " + cache.size() + " words, " + urlCount + " word/URL relations in " + (System.currentTimeMillis() - start) + " milliseconds");
}
/**
@ -109,7 +111,7 @@ public final class indexContainerHeap {
* @throws IOException
*/
public void initWriteModeFromBLOB(final File blobFile) throws IOException {
if (log != null) log.logInfo("restoring rwi blob dump '" + blobFile.getName() + "'");
serverLog.logInfo("indexContainerRAMHeap", "restoring rwi blob dump '" + blobFile.getName() + "'");
final long start = System.currentTimeMillis();
this.cache = Collections.synchronizedSortedMap(new TreeMap<String, indexContainer>(new kelondroByteOrder.StringOrder(payloadrow.getOrdering())));
int urlCount = 0;
@ -124,46 +126,12 @@ public final class indexContainerHeap {
}
// remove idx and gap files if they exist here
kelondroBLOBHeapWriter.deleteAllFingerprints(blobFile);
if (log != null) log.logInfo("finished rwi blob restore: " + cache.size() + " words, " + urlCount + " word/URL relations in " + (System.currentTimeMillis() - start) + " milliseconds");
}
/*
public void dumpold(final File heapFile) throws IOException {
assert this.cache != null;
if (log != null) log.logInfo("creating rwi heap dump '" + heapFile.getName() + "', " + cache.size() + " rwi's");
if (heapFile.exists()) heapFile.delete();
final DataOutputStream os = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(heapFile), 64 * 1024));
final long startTime = System.currentTimeMillis();
long wordcount = 0, urlcount = 0;
String wordHash;
indexContainer container;
// write wCache
synchronized (cache) {
for (final Map.Entry<String, indexContainer> entry: cache.entrySet()) {
// get entries
wordHash = entry.getKey();
container = entry.getValue();
// put entries on heap
if (container != null) {
os.write(wordHash.getBytes());
if (wordHash.length() < payloadrow.primaryKeyLength) {
for (int i = 0; i < payloadrow.primaryKeyLength - wordHash.length(); i++) os.write(0);
}
os.write(container.exportCollection());
urlcount += container.size();
}
wordcount++;
}
}
os.flush();
os.close();
if (log != null) log.logInfo("finished rwi heap dump: " + wordcount + " words, " + urlcount + " word/URL relations in " + (System.currentTimeMillis() - startTime) + " milliseconds");
serverLog.logInfo("indexContainerRAMHeap", "finished rwi blob restore: " + cache.size() + " words, " + urlCount + " word/URL relations in " + (System.currentTimeMillis() - start) + " milliseconds");
}
*/
public void dump(final File heapFile) throws IOException {
assert this.cache != null;
if (log != null) log.logInfo("creating alternative rwi heap dump '" + heapFile.getName() + "', " + cache.size() + " rwi's");
serverLog.logInfo("indexContainerRAMHeap", "creating alternative rwi heap dump '" + heapFile.getName() + "', " + cache.size() + " rwi's");
if (heapFile.exists()) heapFile.delete();
final kelondroBLOBHeapWriter dump = new kelondroBLOBHeapWriter(heapFile, payloadrow.primaryKeyLength, kelondroBase64Order.enhancedCoder);
final long startTime = System.currentTimeMillis();
@ -187,7 +155,7 @@ public final class indexContainerHeap {
}
}
dump.close();
if (log != null) log.logInfo("finished alternative rwi heap dump: " + wordcount + " words, " + urlcount + " word/URL relations in " + (System.currentTimeMillis() - startTime) + " milliseconds");
serverLog.logInfo("indexContainerRAMHeap", "finished alternative rwi heap dump: " + wordcount + " words, " + urlcount + " word/URL relations in " + (System.currentTimeMillis() - startTime) + " milliseconds");
}
public int size() {

@ -48,7 +48,7 @@ public final class indexRAMRI implements indexRI, indexRIReader {
public long cacheReferenceAgeLimit; // the maximum age (= time not changed) of a RWI entity
private final serverLog log;
private final File oldDumpFile, newDumpFile;
private indexContainerHeap heap;
private indexContainerRAMHeap heap;
@SuppressWarnings("unchecked")
public indexRAMRI(
@ -72,7 +72,7 @@ public final class indexRAMRI implements indexRI, indexRIReader {
this.log = log;
this.oldDumpFile = new File(databaseRoot, oldHeapName);
this.newDumpFile = new File(databaseRoot, newHeapName);
this.heap = new indexContainerHeap(payloadrow, log);
this.heap = new indexContainerRAMHeap(payloadrow);
// read in dump of last session
boolean initFailed = false;
@ -115,11 +115,7 @@ public final class indexRAMRI implements indexRI, indexRIReader {
hashScore.clear();
hashDate.clear();
initTime = System.currentTimeMillis();
try {
heap.clear();
} catch (final IOException e) {
e.printStackTrace();
}
heap.clear();
}
public int minMem() {
@ -128,12 +124,6 @@ public final class indexRAMRI implements indexRI, indexRIReader {
return 100 * 1024; // 100 kb
}
public synchronized long getUpdateTime(final String wordHash) {
final indexContainer entries = getContainer(wordHash, null);
if (entries == null) return 0;
return entries.updated();
}
// cache settings
public int maxURLinCache() {
if (hashScore.size() == 0) return 0;
@ -162,12 +152,6 @@ public final class indexRAMRI implements indexRI, indexRIReader {
return heap.size();
}
public synchronized int indexSize(final String wordHash) {
final indexContainer cacheIndex = heap.get(wordHash);
if (cacheIndex == null) return 0;
return cacheIndex.size();
}
public synchronized kelondroCloneableIterator<indexContainer> wordContainers(final String startWordHash, final boolean rot) {
// we return an iterator object that creates top-level-clones of the indexContainers
// in the cache, so that manipulations of the iterated objects do not change
@ -175,7 +159,6 @@ public final class indexRAMRI implements indexRI, indexRIReader {
return heap.wordContainers(startWordHash, rot);
}
public synchronized String maxScoreWordHash() {
if (heap == null || heap.size() == 0) return null;
try {

@ -28,6 +28,7 @@
package de.anomic.index;
import java.io.IOException;
import java.util.Set;
import de.anomic.kelondro.kelondroCloneableIterator;
@ -37,15 +38,13 @@ public interface indexRI {
public int size();
public int minMem();
public kelondroCloneableIterator<indexContainer> wordContainers(String startWordHash, boolean rot); // method to replace wordHashes
public long getUpdateTime(String wordHash);
public int indexSize(String wordHash);
public boolean hasContainer(String wordHash); // should only be used if in case that true is returned the getContainer is NOT called
public indexContainer getContainer(String wordHash, Set<String> urlselection); // if urlselection != null all url references which are not in urlselection are removed from the container
public indexContainer deleteContainer(String wordHash);
public indexContainer getContainer(String wordHash, Set<String> urlselection) throws IOException; // if urlselection != null all url references which are not in urlselection are removed from the container
public indexContainer deleteContainer(String wordHash) throws IOException;
public boolean removeEntry(String wordHash, String urlHash);
public int removeEntries(String wordHash, Set<String> urlHashes);
public void addEntries(indexContainer newEntries);
public void clear();
public void clear() throws IOException;
public void close();
}

@ -95,5 +95,27 @@ public abstract class kelondroAbstractIOChunks implements kelondroIOChunks {
public synchronized void write(final long pos, final byte[] b) throws IOException {
this.write(pos, b, 0, b.length);
}
public synchronized void writeSpace(long pos, int spaceCount) throws IOException {
if (spaceCount < 512) {
write(pos, space(spaceCount));
return;
}
byte[] b = space(512);
while (spaceCount > b.length) {
write(pos, b);
pos += b.length;
spaceCount -= b.length;
}
if (spaceCount > 0) {
write(pos, space(spaceCount));
}
}
private byte[] space(int count) {
byte[] s = new byte[count];
while (count-- > 0) s[count] = 0;
return s;
}
}

@ -77,7 +77,7 @@ public abstract class kelondroAbstractRecords implements kelondroRecords {
protected int headchunksize;// overheadsize + key element column size
protected int tailchunksize;// sum(all: COLWIDTHS) minus the size of the key element colum
protected int recordsize; // (overhead + sum(all: COLWIDTHS)) = the overall size of a record
byte[] spaceChunk; // a chunk of data that is used to reserve space within the file
//byte[] spaceChunk; // a chunk of data that is used to reserve space within the file
// dynamic run-time seek pointers
private long POS_HANDLES = 0; // starts after end of POS_COLWIDHS which is POS_COLWIDTHS + COLWIDTHS.length * 4
@ -206,20 +206,21 @@ public abstract class kelondroAbstractRecords implements kelondroRecords {
//}
}
protected synchronized int allocatePayload(byte[] chunk) throws IOException {
protected synchronized int allocatePayload(byte[] chunk0) throws IOException {
// reserves a new record and returns index of record
// the return value is not a seek position
// the seek position can be retrieved using the seekpos() function
if (chunk == null) {
chunk = spaceChunk;
}
assert (chunk.length == ROW.objectsize) : "chunk.length = " + chunk.length + ", ROW.objectsize() = " + ROW.objectsize;
assert (chunk0 == null || chunk0.length == ROW.objectsize) : "chunk.length = " + chunk0.length + ", ROW.objectsize() = " + ROW.objectsize;
//synchronized (USAGE) {
synchronized (entryFile) {
if (USAGE.FREEC == 0) {
// generate new entry
final int index = USAGE.allCount();
entryFile.write(seekpos(index) + overhead, chunk, 0, ROW.objectsize); // occupy space, otherwise the USAGE computation does not work
if (chunk0 == null) {
entryFile.writeSpace(seekpos(index) + overhead, ROW.objectsize);
} else {
entryFile.write(seekpos(index) + overhead, chunk0, 0, ROW.objectsize); // occupy space, otherwise the USAGE computation does not work
}
USAGE.USEDC++;
writeused(false);
return index;
@ -264,7 +265,11 @@ public abstract class kelondroAbstractRecords implements kelondroRecords {
}
}
}
entryFile.write(seekpos(index) + overhead, chunk, 0, ROW.objectsize); // overwrite space
if (chunk0 == null) {
entryFile.writeSpace(seekpos(index) + overhead, ROW.objectsize);
} else {
entryFile.write(seekpos(index) + overhead, chunk0, 0, ROW.objectsize); // overwrite space
}
USAGE.writeused(false);
USAGE.writefree();
return index;
@ -299,7 +304,7 @@ public abstract class kelondroAbstractRecords implements kelondroRecords {
while (index > USAGE.allCount()) {
h = new kelondroHandle(USAGE.allCount());
USAGE.FREEC++;
entryFile.write(seekpos(h), spaceChunk); // occupy space, othervise the USAGE computaton does not work
entryFile.writeSpace(seekpos(h), overhead + ROW.objectsize); // occupy space, otherwise the USAGE computation does not work
entryFile.writeInt(seekpos(h), USAGE.FREEH.index);
USAGE.FREEH = h;
assert ((USAGE.FREEH.index == kelondroHandle.NUL) && (USAGE.FREEC == 0)) || seekpos(USAGE.FREEH) < entryFile.length() : "allocateRecord: USAGE.FREEH.index = " + USAGE.FREEH.index;
@ -436,7 +441,6 @@ public abstract class kelondroAbstractRecords implements kelondroRecords {
this.recordsize = this.overhead + ROW.objectsize;
this.headchunksize = overhead + ROW.width(0);
this.tailchunksize = this.recordsize - this.headchunksize;
this.spaceChunk = fillSpaceChunk(recordsize);
// store dynamic run-time seek pointers
POS_HANDLES = POS_COLWIDTHS + ROW.columns() * 4;
@ -485,12 +489,6 @@ public abstract class kelondroAbstractRecords implements kelondroRecords {
this.entryFile.commit();
}
private static final byte[] fillSpaceChunk(int size) {
final byte[] chunk = new byte[size];
while (--size >= 0) chunk[size] = (byte) 0xff;
return chunk;
}
public void setDescription(final byte[] description) throws IOException {
if (description.length > LEN_DESCR)
@ -583,7 +581,6 @@ public abstract class kelondroAbstractRecords implements kelondroRecords {
this.recordsize = this.overhead + ROW.objectsize;
this.headchunksize = this.overhead + this.ROW.width(0);
this.tailchunksize = this.recordsize - this.headchunksize;
this.spaceChunk = fillSpaceChunk(recordsize);
// init USAGE, must be done at the end because it needs the recordsize value
this.USAGE = new usageControl(false);

@ -84,7 +84,7 @@ public interface kelondroBLOB {
* @return
* @throws IOException
*/
public boolean has(byte[] key) throws IOException;
public boolean has(byte[] key);
/**
* retrieve the whole BLOB from the table

@ -113,7 +113,7 @@ public class kelondroBLOBArray implements kelondroBLOB {
d = serverDate.parseShortSecond(files[i].substring(0, 14));
f = new File(heapLocation, files[i]);
time = d.getTime();
oneBlob = new kelondroBLOBHeap(f, keylength, ordering, (time == maxtime) ? buffersize : 0);
oneBlob = (time == maxtime && buffersize > 0) ? new kelondroBLOBHeap(f, keylength, ordering, buffersize) : new kelondroBLOBHeapModifier(f, keylength, ordering);
sortedItems.put(Long.valueOf(time), new blobItem(d, f, oneBlob));
} catch (ParseException e) {continue;}
}
@ -183,14 +183,18 @@ public class kelondroBLOBArray implements kelondroBLOB {
this.location = location;
this.blob = blob;
}
public blobItem() throws IOException {
public blobItem(int buffer) throws IOException {
// make a new blob file and assign it in this item
this.creation = new Date();
this.location = new File(heapLocation, serverDate.formatShortSecond(creation) + "." + blobSalt + ".blob");
this.blob = new kelondroBLOBHeap(location, keylength, ordering, buffersize);
this.location = newBLOB(this.creation);
this.blob = (buffer == 0) ? new kelondroBLOBHeapModifier(location, keylength, ordering) : new kelondroBLOBHeap(location, keylength, ordering, buffer);
}
}
public File newBLOB(Date creation) {
return new File(heapLocation, serverDate.formatShortSecond(creation) + "." + blobSalt + ".blob");
}
/**
* ask for the length of the primary key
* @return the length of the key
@ -257,7 +261,7 @@ public class kelondroBLOBArray implements kelondroBLOB {
* @return
* @throws IOException
*/
public boolean has(byte[] key) throws IOException {
public boolean has(byte[] key) {
for (blobItem bi: blobs) if (bi.blob.has(key)) return true;
return false;
}
@ -277,6 +281,23 @@ public class kelondroBLOBArray implements kelondroBLOB {
return null;
}
/**
* get all BLOBs in the array.
* this is useful when it is not clear if an entry is unique in all BLOBs in this array.
* @param key
* @return
* @throws IOException
*/
public List<byte[]> getAll(byte[] key) throws IOException {
byte[] b;
ArrayList<byte[]> l = new ArrayList<byte[]>(blobs.size());
for (blobItem bi: blobs) {
b = bi.blob.get(key);
if (b != null) l.add(b);
}
return l;
}
/**
* retrieve the size of the BLOB
* @param key
@ -308,9 +329,10 @@ public class kelondroBLOBArray implements kelondroBLOB {
System.out.println("bi.location.length() > this.maxsize");
if ((bi == null) || (System.currentTimeMillis() - bi.creation.getTime() > this.fileAgeLimit) || (bi.location.length() > this.fileSizeLimit)) {
// add a new blob to the array
bi = new blobItem();
bi = new blobItem(buffersize);
blobs.add(bi);
}
assert bi.blob instanceof kelondroBLOBHeap;
bi.blob.put(key, b);
executeLimits();
}

@ -174,7 +174,7 @@ public class kelondroBLOBCompressor extends Thread implements kelondroBLOB {
return decompress(b);
}
public synchronized boolean has(byte[] key) throws IOException {
public synchronized boolean has(byte[] key) {
return
this.buffer.containsKey(new String(key)) || this.backend.has(key);
}

@ -31,11 +31,8 @@ import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.SortedMap;
import de.anomic.server.logging.serverLog;
public final class kelondroBLOBHeap extends kelondroBLOBHeapReader implements kelondroBLOB {
public final class kelondroBLOBHeap extends kelondroBLOBHeapModifier implements kelondroBLOB {
private HashMap<String, byte[]> buffer; // a write buffer to limit IO to the file; attention: Maps cannot use byte[] as key
private int buffersize; // bytes that are buffered in buffer
@ -80,7 +77,6 @@ public final class kelondroBLOBHeap extends kelondroBLOBHeapReader implements ke
this.buffermax = buffermax;
this.buffer = new HashMap<String, byte[]>();
this.buffersize = 0;
mergeFreeEntries();
/*
// DEBUG
Iterator<byte[]> i = index.keys(true, null);
@ -99,36 +95,6 @@ public final class kelondroBLOBHeap extends kelondroBLOBHeapReader implements ke
*/
}
private void mergeFreeEntries() throws IOException {
// try to merge free entries
if (super.free.size() > 1) {
int merged = 0;
Map.Entry<Long, Integer> lastFree, nextFree;
final Iterator<Map.Entry<Long, Integer>> i = this.free.entrySet().iterator();
lastFree = i.next();
while (i.hasNext()) {
nextFree = i.next();
//System.out.println("*** DEBUG BLOB: free-seek = " + nextFree.seek + ", size = " + nextFree.size);
// check if they follow directly
if (lastFree.getKey() + lastFree.getValue() + 4 == nextFree.getKey()) {
// merge those records
this.file.seek(lastFree.getKey());
lastFree.setValue(lastFree.getValue() + nextFree.getValue() + 4); // this updates also the free map
this.file.writeInt(lastFree.getValue());
this.file.seek(nextFree.getKey());
this.file.writeInt(0);
i.remove();
merged++;
} else {
lastFree = nextFree;
}
}
serverLog.logInfo("kelondroBLOBHeap", "BLOB " + heapFile.getName() + ": merged " + merged + " free records");
}
}
/**
* the number of BLOBs in the heap
* @return the number of BLOBs in the heap
@ -249,60 +215,24 @@ public final class kelondroBLOBHeap extends kelondroBLOBHeapReader implements ke
* @throws IOException
*/
public synchronized void clear() throws IOException {
this.buffer.clear();
this.buffer.clear();
this.buffersize = 0;
this.index.clear();
this.free.clear();
try {
this.file.close();
} catch (final IOException e) {
e.printStackTrace();
}
this.heapFile.delete();
this.file = new kelondroCachedFileRA(heapFile);
super.clear();
}
/**
* close the BLOB table
*/
public synchronized void close() {
shrinkWithGapsAtEnd();
if (file != null) {
if (file != null) {
try {
flushBuffer();
} catch (IOException e) {
e.printStackTrace();
}
try {
file.close();
} catch (final IOException e) {
e.printStackTrace();
}
}
file = null;
if (index != null && free != null && (index.size() > 3 || free.size() > 3)) {
// now we can create a dump of the index and the gap information
// to speed up the next start
try {
long start = System.currentTimeMillis();
free.dump(kelondroBLOBHeapWriter.fingerprintGapFile(this.heapFile));
free.clear();
free = null;
index.dump(kelondroBLOBHeapWriter.fingerprintIndexFile(this.heapFile));
serverLog.logInfo("kelondroBLOBHeap", "wrote a dump for the " + this.index.size() + " index entries of " + heapFile.getName()+ " in " + (System.currentTimeMillis() - start) + " milliseconds.");
index.close();
index = null;
} catch (IOException e) {
e.printStackTrace();
}
} else {
// this is small.. just free resources, do not write index
free.clear();
free = null;
index.close();
index = null;
}
this.buffer = null;
super.close();
}
/**
@ -326,7 +256,7 @@ public final class kelondroBLOBHeap extends kelondroBLOBHeapReader implements ke
// if there is not enough space in the buffer, flush all
if (this.buffersize + b.length > buffermax) {
// this is too big. Flush everything
shrinkWithGapsAtEnd();
super.shrinkWithGapsAtEnd();
flushBuffer();
if (b.length > buffermax) {
this.add(key, b);
@ -437,132 +367,7 @@ public final class kelondroBLOBHeap extends kelondroBLOBHeapReader implements ke
return;
}
// check if the index contains the key
final long seek = index.getl(key);
if (seek < 0) return;
// access the file and read the container
this.file.seek(seek);
int size = file.readInt();
//assert seek + size + 4 <= this.file.length() : heapFile.getName() + ": too long size " + size + " in record at " + seek;
long filelength = this.file.length(); // put in separate variable for debugging
if (seek + size + 4 > filelength) {
serverLog.logSevere("BLOBHeap", heapFile.getName() + ": too long size " + size + " in record at " + seek);
throw new IOException(heapFile.getName() + ": too long size " + size + " in record at " + seek);
}
// add entry to free array
this.free.put(seek, size);
// fill zeros to the content
int l = size; byte[] fill = new byte[size];
while (l-- > 0) fill[l] = 0;
this.file.write(fill, 0, size);
// remove entry from index
this.index.removel(key);
// recursively merge gaps
tryMergeNextGaps(seek, size);
tryMergePreviousGap(seek);
}
private void tryMergePreviousGap(final long thisSeek) throws IOException {
// this is called after a record has been removed. That may cause that a new
// empty record was surrounded by gaps. We merge with a previous gap, if this
// is also empty, but don't do that recursively
// If this is successful, it removes the given marker for thisSeed and
// because of this, this method MUST be called AFTER tryMergeNextGaps was called.
// first find the gap entry for the closest gap in front of the give gap
SortedMap<Long, Integer> head = this.free.headMap(thisSeek);
if (head.size() == 0) return;
long previousSeek = head.lastKey().longValue();
int previousSize = head.get(previousSeek).intValue();
// check if this is directly in front
if (previousSeek + previousSize + 4 == thisSeek) {
// right in front! merge the gaps
Integer thisSize = this.free.get(thisSeek);
assert thisSize != null;
mergeGaps(previousSeek, previousSize, thisSeek, thisSize.intValue());
}
}
private void tryMergeNextGaps(final long thisSeek, final int thisSize) throws IOException {
// try to merge two gaps if one gap has been processed already and the position of the next record is known
// if the next record is also a gap, merge these gaps and go on recursively
// first check if next gap position is outside of file size
long nextSeek = thisSeek + thisSize + 4;
if (nextSeek >= this.file.length()) return; // end of recursion
// move to next position and read record size
Integer nextSize = this.free.get(nextSeek);
if (nextSize == null) return; // finished, this is not a gap
// check if the record is a gap-record
assert nextSize.intValue() > 0;
if (nextSize.intValue() == 0) {
// a strange gap record: we can extend the thisGap with four bytes
// the nextRecord is a gap record; we remove that from the free list because it will be joined with the current gap
mergeGaps(thisSeek, thisSize, nextSeek, 0);
// recursively go on
tryMergeNextGaps(thisSeek, thisSize + 4);
} else {
// check if this is a true gap!
this.file.seek(nextSeek + 4);
byte[] o = new byte[1];
this.file.readFully(o, 0, 1);
int t = o[0];
assert t == 0;
if (t == 0) {
// the nextRecord is a gap record; we remove that from the free list because it will be joined with the current gap
mergeGaps(thisSeek, thisSize, nextSeek, nextSize.intValue());
// recursively go on
tryMergeNextGaps(thisSeek, thisSize + 4 + nextSize.intValue());
}
}
}
private void mergeGaps(final long seek0, final int size0, final long seek1, final int size1) throws IOException {
//System.out.println("*** DEBUG-BLOBHeap " + heapFile.getName() + ": merging gap from pos " + seek0 + ", len " + size0 + " with next record of size " + size1 + " (+ 4)");
Integer g = this.free.remove(seek1); // g is only used for debugging
assert g != null;
assert g.intValue() == size1;
// overwrite the size bytes of next records with zeros
this.file.seek(seek1);
this.file.writeInt(0);
// the new size of the current gap: old size + len + 4
int newSize = size0 + 4 + size1;
this.file.seek(seek0);
this.file.writeInt(newSize);
// register new gap in the free array; overwrite old gap entry
g = this.free.put(seek0, newSize);
assert g != null;
assert g.intValue() == size0;
}
private void shrinkWithGapsAtEnd() {
// find gaps at the end of the file and shrink the file by these gaps
try {
while (this.free.size() > 0) {
Long seek = this.free.lastKey();
int size = this.free.get(seek).intValue();
if (seek.longValue() + size + 4 != this.file.length()) return;
// shrink the file
this.file.setLength(seek.longValue());
this.free.remove(seek);
}
} catch (IOException e) {
e.printStackTrace();
}
super.remove(key);
}
/**

@ -0,0 +1,277 @@
// kelondroBLOBHeapModifier.java
// (C) 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 05.01.2009 on http://yacy.net
//
// $LastChangedDate: 2008-03-14 01:16:04 +0100 (Fr, 14 Mrz 2008) $
// $LastChangedRevision: 4558 $
// $LastChangedBy: orbiter $
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package de.anomic.kelondro;
import java.io.File;
import java.io.IOException;
import java.util.Iterator;
import java.util.Map;
import java.util.SortedMap;
import de.anomic.server.logging.serverLog;
public class kelondroBLOBHeapModifier extends kelondroBLOBHeapReader implements kelondroBLOB {
/*
* This class adds a remove operation to a BLOBHeapReader
*/
/**
* create a heap file: a arbitrary number of BLOBs, indexed by an access key
* The heap file will be indexed upon initialization.
* @param heapFile
* @param keylength
* @param ordering
* @throws IOException
*/
public kelondroBLOBHeapModifier(final File heapFile, final int keylength, final kelondroByteOrder ordering) throws IOException {
super(heapFile, keylength, ordering);
mergeFreeEntries();
}
private void mergeFreeEntries() throws IOException {
// try to merge free entries
if (super.free.size() > 1) {
int merged = 0;
Map.Entry<Long, Integer> lastFree, nextFree;
final Iterator<Map.Entry<Long, Integer>> i = this.free.entrySet().iterator();
lastFree = i.next();
while (i.hasNext()) {
nextFree = i.next();
//System.out.println("*** DEBUG BLOB: free-seek = " + nextFree.seek + ", size = " + nextFree.size);
// check if they follow directly
if (lastFree.getKey() + lastFree.getValue() + 4 == nextFree.getKey()) {
// merge those records
this.file.seek(lastFree.getKey());
lastFree.setValue(lastFree.getValue() + nextFree.getValue() + 4); // this updates also the free map
this.file.writeInt(lastFree.getValue());
this.file.seek(nextFree.getKey());
this.file.writeInt(0);
i.remove();
merged++;
} else {
lastFree = nextFree;
}
}
serverLog.logInfo("kelondroBLOBHeap", "BLOB " + heapFile.getName() + ": merged " + merged + " free records");
}
}
/**
* clears the content of the database
* @throws IOException
*/
public synchronized void clear() throws IOException {
this.index.clear();
this.free.clear();
try {
this.file.close();
} catch (final IOException e) {
e.printStackTrace();
}
this.heapFile.delete();
this.file = new kelondroCachedFileRA(heapFile);
}
/**
* close the BLOB table
*/
public synchronized void close() {
shrinkWithGapsAtEnd();
if (file != null) {
try {
file.close();
} catch (final IOException e) {
e.printStackTrace();
}
}
file = null;
if (index != null && free != null && (index.size() > 3 || free.size() > 3)) {
// now we can create a dump of the index and the gap information
// to speed up the next start
try {
long start = System.currentTimeMillis();
free.dump(kelondroBLOBHeapWriter.fingerprintGapFile(this.heapFile));
free.clear();
free = null;
index.dump(kelondroBLOBHeapWriter.fingerprintIndexFile(this.heapFile));
serverLog.logInfo("kelondroBLOBHeap", "wrote a dump for the " + this.index.size() + " index entries of " + heapFile.getName()+ " in " + (System.currentTimeMillis() - start) + " milliseconds.");
index.close();
index = null;
} catch (IOException e) {
e.printStackTrace();
}
} else {
// this is small.. just free resources, do not write index
free.clear();
free = null;
index.close();
index = null;
}
}
/**
* remove a BLOB
* @param key the primary key
* @throws IOException
*/
public synchronized void remove(final byte[] key) throws IOException {
assert index.row().primaryKeyLength == key.length : index.row().primaryKeyLength + "!=" + key.length;
// check if the index contains the key
final long seek = index.getl(key);
if (seek < 0) return;
// access the file and read the container
this.file.seek(seek);
int size = file.readInt();
//assert seek + size + 4 <= this.file.length() : heapFile.getName() + ": too long size " + size + " in record at " + seek;
long filelength = this.file.length(); // put in separate variable for debugging
if (seek + size + 4 > filelength) {
serverLog.logSevere("BLOBHeap", heapFile.getName() + ": too long size " + size + " in record at " + seek);
throw new IOException(heapFile.getName() + ": too long size " + size + " in record at " + seek);
}
// add entry to free array
this.free.put(seek, size);
// fill zeros to the content
int l = size; byte[] fill = new byte[size];
while (l-- > 0) fill[l] = 0;
this.file.write(fill, 0, size);
// remove entry from index
this.index.removel(key);
// recursively merge gaps
tryMergeNextGaps(seek, size);
tryMergePreviousGap(seek);
}
private void tryMergePreviousGap(final long thisSeek) throws IOException {
// this is called after a record has been removed. That may cause that a new
// empty record was surrounded by gaps. We merge with a previous gap, if this
// is also empty, but don't do that recursively
// If this is successful, it removes the given marker for thisSeed and
// because of this, this method MUST be called AFTER tryMergeNextGaps was called.
// first find the gap entry for the closest gap in front of the give gap
SortedMap<Long, Integer> head = this.free.headMap(thisSeek);
if (head.size() == 0) return;
long previousSeek = head.lastKey().longValue();
int previousSize = head.get(previousSeek).intValue();
// check if this is directly in front
if (previousSeek + previousSize + 4 == thisSeek) {
// right in front! merge the gaps
Integer thisSize = this.free.get(thisSeek);
assert thisSize != null;
mergeGaps(previousSeek, previousSize, thisSeek, thisSize.intValue());
}
}
private void tryMergeNextGaps(final long thisSeek, final int thisSize) throws IOException {
// try to merge two gaps if one gap has been processed already and the position of the next record is known
// if the next record is also a gap, merge these gaps and go on recursively
// first check if next gap position is outside of file size
long nextSeek = thisSeek + thisSize + 4;
if (nextSeek >= this.file.length()) return; // end of recursion
// move to next position and read record size
Integer nextSize = this.free.get(nextSeek);
if (nextSize == null) return; // finished, this is not a gap
// check if the record is a gap-record
assert nextSize.intValue() > 0;
if (nextSize.intValue() == 0) {
// a strange gap record: we can extend the thisGap with four bytes
// the nextRecord is a gap record; we remove that from the free list because it will be joined with the current gap
mergeGaps(thisSeek, thisSize, nextSeek, 0);
// recursively go on
tryMergeNextGaps(thisSeek, thisSize + 4);
} else {
// check if this is a true gap!
this.file.seek(nextSeek + 4);
byte[] o = new byte[1];
this.file.readFully(o, 0, 1);
int t = o[0];
assert t == 0;
if (t == 0) {
// the nextRecord is a gap record; we remove that from the free list because it will be joined with the current gap
mergeGaps(thisSeek, thisSize, nextSeek, nextSize.intValue());
// recursively go on
tryMergeNextGaps(thisSeek, thisSize + 4 + nextSize.intValue());
}
}
}
private void mergeGaps(final long seek0, final int size0, final long seek1, final int size1) throws IOException {
//System.out.println("*** DEBUG-BLOBHeap " + heapFile.getName() + ": merging gap from pos " + seek0 + ", len " + size0 + " with next record of size " + size1 + " (+ 4)");
Integer g = this.free.remove(seek1); // g is only used for debugging
assert g != null;
assert g.intValue() == size1;
// overwrite the size bytes of next records with zeros
this.file.seek(seek1);
this.file.writeInt(0);
// the new size of the current gap: old size + len + 4
int newSize = size0 + 4 + size1;
this.file.seek(seek0);
this.file.writeInt(newSize);
// register new gap in the free array; overwrite old gap entry
g = this.free.put(seek0, newSize);
assert g != null;
assert g.intValue() == size0;
}
protected void shrinkWithGapsAtEnd() {
// find gaps at the end of the file and shrink the file by these gaps
try {
while (this.free.size() > 0) {
Long seek = this.free.lastKey();
int size = this.free.get(seek).intValue();
if (seek.longValue() + size + 4 != this.file.length()) return;
// shrink the file
this.file.setLength(seek.longValue());
this.free.remove(seek);
}
} catch (IOException e) {
e.printStackTrace();
}
}
public void put(byte[] key, byte[] b) throws IOException {
throw new UnsupportedOperationException("put is not supported in BLOBHeapModifier");
}
}

@ -392,8 +392,12 @@ public class kelondroBLOBTree implements kelondroBLOB {
//segmentCount--; writeSegmentCount();
}
public synchronized boolean has(final byte[] key) throws IOException {
return (key != null) && (getValueCached(elementKey(new String(key), 0)) != null);
public synchronized boolean has(final byte[] key) {
try {
return (key != null) && (getValueCached(elementKey(new String(key), 0)) != null);
} catch (IOException e) {
return false;
}
}
public synchronized kelondroRA getRA(final String filekey) {

@ -693,12 +693,6 @@ public class kelondroCollectionIndex {
return removed;
}
public synchronized int indexSize(final byte[] key) throws IOException {
final kelondroRow.Entry indexrow = index.get(key);
if (indexrow == null) return 0;
return (int) indexrow.getColLong(idx_col_chunkcount);
}
public synchronized boolean has(final byte[] key) {
return index.has(key);
}

@ -53,6 +53,7 @@ public interface kelondroIOChunks {
public void writeLong(long pos, long v) throws IOException;
public void write(long pos, byte[] b) throws IOException;
public void writeSpace(long pos, int spacecount) throws IOException;
public void deleteOnExit();
}

@ -102,7 +102,7 @@ public final class kelondroRow {
this.primaryKeyLength = (primaryKey < 0) ? this.objectsize : row[primaryKeyIndex].cellwidth;
}
public final kelondroOrder<byte[]> getOrdering() {
public final kelondroByteOrder getOrdering() {
return this.objectOrder;
}

@ -151,6 +151,11 @@ public class kelondroRowSet extends kelondroRowCollection implements kelondroInd
return entry;
}
/**
* remove a byte[] from the set.
* if the entry was found, return the entry, but delete the entry from the set
* if the entry was not found, return null.
*/
public kelondroRow.Entry remove(final byte[] a) {
return remove(a, 0, a.length);
}

@ -259,7 +259,7 @@ public class plasmaDHTChunk {
} catch (kelondroException e) {
e.printStackTrace();
}
if (this.log.isFine()) log.logFine("Deleted partial index (" + c + " URLs) for word " + wordHash + "; " + this.wordIndex.indexSize(wordHash) + " entries left");
if (this.log.isFine()) log.logFine("Deleted partial index (" + c + " URLs) for word " + wordHash);
this.indexContainers[i] = null;
}
return count;

@ -483,12 +483,6 @@ public final class plasmaWordIndex implements indexRI {
}
}
public long getUpdateTime(final String wordHash) {
final indexContainer entries = getContainer(wordHash, null);
if (entries == null) return 0;
return entries.updated();
}
public static indexContainer emptyContainer(final String wordHash, final int elementCount) {
return new indexContainer(wordHash, indexRWIRowEntry.urlEntryRow, elementCount);
}
@ -729,14 +723,6 @@ public final class plasmaWordIndex implements indexRI {
public int cacheSize() {
return dhtInCache.size() + dhtOutCache.size();
}
public int indexSize(final String wordHash) {
int size = 0;
size += dhtInCache.indexSize(wordHash);
size += dhtOutCache.indexSize(wordHash);
size += collections.indexSize(wordHash);
return size;
}
public void close() {
dhtInCache.close();
@ -754,7 +740,7 @@ public final class plasmaWordIndex implements indexRI {
final indexContainer c = new indexContainer(
wordHash,
indexRWIRowEntry.urlEntryRow,
dhtInCache.sizeContainer(wordHash) + dhtOutCache.sizeContainer(wordHash) + collections.indexSize(wordHash)
dhtInCache.sizeContainer(wordHash) + dhtOutCache.sizeContainer(wordHash)
);
c.addAllUnique(dhtInCache.deleteContainer(wordHash));
c.addAllUnique(dhtOutCache.deleteContainer(wordHash));

Loading…
Cancel
Save