Integration of the new index data structure IndexCell

This is the start of a testing phase for IndexCell data structure which will replace
the collections and caching strategy. IndexCall creation and maintenance is fast, has
no caching overhead, very low IO load and is the basis for the next data structure,
index segments.

IndexCell files are stored at DATA/<network>/TEXT/RICELL
With this commit still the old data structures are used, until a flag in yacy.conf is set.
To switch to the new data structure, set
useCell = true
in yacy.conf. Then you will have no access any more to TEXT/RICACHE and TEXT/RICOLLECTION

This code is still bleeding-edge development. Please do not use the new data structure for
production now. Future versions may have changed data types, or other storage locations.
The next main release will have a migration feature for old data structures.

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5724 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 16 years ago
parent fd0976c0a7
commit a9cea419ef

@ -3,7 +3,7 @@ javacSource=1.5
javacTarget=1.5 javacTarget=1.5
# Release Configuration # Release Configuration
releaseVersion=0.72 releaseVersion=0.73
stdReleaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz stdReleaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
embReleaseFile=yacy_emb_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz embReleaseFile=yacy_emb_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
proReleaseFile=yacy_pro_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz proReleaseFile=yacy_pro_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz

@ -900,3 +900,6 @@ cgi.suffixes = cgi,pl
# whether this is a version for a web browser # whether this is a version for a web browser
browserintegration = false browserintegration = false
# next index data structure
useCell = false

@ -107,6 +107,17 @@ public interface BufferedIndex extends Index {
*/ */
public int getBufferSize(); public int getBufferSize();
/**
* iterate over entries in index. this method differs from the iterator in an Index
* object in such a way that it has the additional 'buffer' flag. When using this method,
* the iteration goes only over the buffer content, or over the backend-content, but
* not over a merged content.
* @param startHash
* @param rot
* @param buffer
* @return
* @throws IOException
*/
public CloneableIterator<ReferenceContainer> references( public CloneableIterator<ReferenceContainer> references(
String startHash, String startHash,
boolean rot, boolean rot,
@ -114,6 +125,18 @@ public interface BufferedIndex extends Index {
) throws IOException; ) throws IOException;
/**
* collect reference container in index. this method differs from the collector in an Index
* object in such a way that it has the additional 'buffer' flag. When using this method,
* the collection goes only over the buffer content, or over the backend-content, but
* not over a merged content.
* @param startHash
* @param rot
* @param count
* @param buffer
* @return
* @throws IOException
*/
public TreeSet<ReferenceContainer> references( public TreeSet<ReferenceContainer> references(
String startHash, String startHash,
boolean rot, boolean rot,

@ -57,8 +57,8 @@ public final class BufferedIndexCollection extends AbstractBufferedIndex impleme
public static final int lowcachedivisor = 900; public static final int lowcachedivisor = 900;
public static final int maxCollectionPartition = 7; // should be 7 public static final int maxCollectionPartition = 7; // should be 7
private final IndexBuffer indexCache; private final IndexBuffer buffer;
private final IndexCollection collections; // new database structure to replace AssortmentCluster and FileCluster private final IndexCollection collections;
public BufferedIndexCollection ( public BufferedIndexCollection (
File indexPrimaryTextLocation, File indexPrimaryTextLocation,
@ -73,15 +73,15 @@ public final class BufferedIndexCollection extends AbstractBufferedIndex impleme
if (!(textindexcache.exists())) textindexcache.mkdirs(); if (!(textindexcache.exists())) textindexcache.mkdirs();
if (new File(textindexcache, "index.dhtin.blob").exists()) { if (new File(textindexcache, "index.dhtin.blob").exists()) {
// migration of the both caches into one // migration of the both caches into one
this.indexCache = new IndexBuffer(textindexcache, wordOrdering, payloadrow, entityCacheMaxSize, wCacheMaxChunk, wCacheMaxAge, "index.dhtout.blob", log); this.buffer = new IndexBuffer(textindexcache, wordOrdering, payloadrow, entityCacheMaxSize, wCacheMaxChunk, wCacheMaxAge, "index.dhtout.blob", log);
IndexBuffer dhtInCache = new IndexBuffer(textindexcache, wordOrdering, payloadrow, entityCacheMaxSize, wCacheMaxChunk, wCacheMaxAge, "index.dhtin.blob", log); IndexBuffer dhtInCache = new IndexBuffer(textindexcache, wordOrdering, payloadrow, entityCacheMaxSize, wCacheMaxChunk, wCacheMaxAge, "index.dhtin.blob", log);
for (ReferenceContainer c: dhtInCache) { for (ReferenceContainer c: dhtInCache) {
this.indexCache.add(c); this.buffer.add(c);
} }
new File(textindexcache, "index.dhtin.blob").delete(); new File(textindexcache, "index.dhtin.blob").delete();
} else { } else {
// read in new BLOB // read in new BLOB
this.indexCache = new IndexBuffer(textindexcache, wordOrdering, payloadrow, entityCacheMaxSize, wCacheMaxChunk, wCacheMaxAge, "index.dhtout.blob", log); this.buffer = new IndexBuffer(textindexcache, wordOrdering, payloadrow, entityCacheMaxSize, wCacheMaxChunk, wCacheMaxAge, "index.dhtout.blob", log);
} }
// create collections storage path // create collections storage path
@ -103,24 +103,24 @@ public final class BufferedIndexCollection extends AbstractBufferedIndex impleme
assert (entries.row().objectsize == ReferenceRow.urlEntryRow.objectsize); assert (entries.row().objectsize == ReferenceRow.urlEntryRow.objectsize);
// add the entry // add the entry
indexCache.add(entries); buffer.add(entries);
cacheFlushControl(); cacheFlushControl();
} }
public void add(final String wordHash, final ReferenceRow entry) throws IOException { public void add(final String wordHash, final ReferenceRow entry) throws IOException {
// add the entry // add the entry
indexCache.add(wordHash, entry); buffer.add(wordHash, entry);
cacheFlushControl(); cacheFlushControl();
} }
public boolean has(final String wordHash) { public boolean has(final String wordHash) {
if (indexCache.has(wordHash)) return true; if (buffer.has(wordHash)) return true;
if (collections.has(wordHash)) return true; if (collections.has(wordHash)) return true;
return false; return false;
} }
public int count(String key) { public int count(String key) {
return indexCache.count(key) + collections.count(key); return buffer.count(key) + collections.count(key);
} }
public ReferenceContainer get(final String wordHash, final Set<String> urlselection) { public ReferenceContainer get(final String wordHash, final Set<String> urlselection) {
@ -131,7 +131,7 @@ public final class BufferedIndexCollection extends AbstractBufferedIndex impleme
// get from cache // get from cache
ReferenceContainer container; ReferenceContainer container;
container = indexCache.get(wordHash, urlselection); container = buffer.get(wordHash, urlselection);
// get from collection index // get from collection index
if (container == null) { if (container == null) {
@ -172,22 +172,22 @@ public final class BufferedIndexCollection extends AbstractBufferedIndex impleme
final ReferenceContainer c = new ReferenceContainer( final ReferenceContainer c = new ReferenceContainer(
wordHash, wordHash,
ReferenceRow.urlEntryRow, ReferenceRow.urlEntryRow,
indexCache.count(wordHash)); buffer.count(wordHash));
c.addAllUnique(indexCache.delete(wordHash)); c.addAllUnique(buffer.delete(wordHash));
c.addAllUnique(collections.delete(wordHash)); c.addAllUnique(collections.delete(wordHash));
return c; return c;
} }
public boolean remove(final String wordHash, final String urlHash) { public boolean remove(final String wordHash, final String urlHash) {
boolean removed = false; boolean removed = false;
removed = removed | (indexCache.remove(wordHash, urlHash)); removed = removed | (buffer.remove(wordHash, urlHash));
removed = removed | (collections.remove(wordHash, urlHash)); removed = removed | (collections.remove(wordHash, urlHash));
return removed; return removed;
} }
public int remove(final String wordHash, final Set<String> urlHashes) { public int remove(final String wordHash, final Set<String> urlHashes) {
int removed = 0; int removed = 0;
removed += indexCache.remove(wordHash, urlHashes); removed += buffer.remove(wordHash, urlHashes);
removed += collections.remove(wordHash, urlHashes); removed += collections.remove(wordHash, urlHashes);
return removed; return removed;
} }
@ -195,16 +195,16 @@ public final class BufferedIndexCollection extends AbstractBufferedIndex impleme
public synchronized CloneableIterator<ReferenceContainer> references(final String startHash, final boolean rot, final boolean ram) throws IOException { public synchronized CloneableIterator<ReferenceContainer> references(final String startHash, final boolean rot, final boolean ram) throws IOException {
final CloneableIterator<ReferenceContainer> i = wordContainers(startHash, ram); final CloneableIterator<ReferenceContainer> i = wordContainers(startHash, ram);
if (rot) { if (rot) {
return new RotateIterator<ReferenceContainer>(i, new String(Base64Order.zero(startHash.length())), indexCache.size() + ((ram) ? 0 : collections.size())); return new RotateIterator<ReferenceContainer>(i, new String(Base64Order.zero(startHash.length())), buffer.size() + ((ram) ? 0 : collections.size()));
} }
return i; return i;
} }
private synchronized CloneableIterator<ReferenceContainer> wordContainers(final String startWordHash, final boolean ram) throws IOException { private synchronized CloneableIterator<ReferenceContainer> wordContainers(final String startWordHash, final boolean ram) throws IOException {
final Order<ReferenceContainer> containerOrder = new ReferenceContainerOrder(indexCache.ordering().clone()); final Order<ReferenceContainer> containerOrder = new ReferenceContainerOrder(buffer.ordering().clone());
containerOrder.rotate(ReferenceContainer.emptyContainer(startWordHash, 0)); containerOrder.rotate(ReferenceContainer.emptyContainer(startWordHash, 0));
if (ram) { if (ram) {
return indexCache.references(startWordHash, false); return buffer.references(startWordHash, false);
} }
return collections.references(startWordHash, false); return collections.references(startWordHash, false);
/* /*
@ -218,7 +218,7 @@ public final class BufferedIndexCollection extends AbstractBufferedIndex impleme
} }
public void clear() { public void clear() {
indexCache.clear(); buffer.clear();
try { try {
collections.clear(); collections.clear();
} catch (IOException e) { } catch (IOException e) {
@ -227,16 +227,16 @@ public final class BufferedIndexCollection extends AbstractBufferedIndex impleme
} }
public void close() { public void close() {
indexCache.close(); buffer.close();
collections.close(); collections.close();
} }
public int size() { public int size() {
return java.lang.Math.max(collections.size(), indexCache.size()); return java.lang.Math.max(collections.size(), buffer.size());
} }
public int minMem() { public int minMem() {
return 1024*1024 /* indexing overhead */ + indexCache.minMem() + collections.minMem(); return 1024*1024 /* indexing overhead */ + buffer.minMem() + collections.minMem();
} }
@ -245,23 +245,23 @@ public final class BufferedIndexCollection extends AbstractBufferedIndex impleme
*/ */
public int getBufferMaxReferences() { public int getBufferMaxReferences() {
return indexCache.getBufferMaxReferences(); return buffer.getBufferMaxReferences();
} }
public long getBufferMinAge() { public long getBufferMinAge() {
return indexCache.getBufferMinAge(); return buffer.getBufferMinAge();
} }
public long getBufferMaxAge() { public long getBufferMaxAge() {
return indexCache.getBufferMaxAge(); return buffer.getBufferMaxAge();
} }
public long getBufferSizeBytes() { public long getBufferSizeBytes() {
return indexCache.getBufferSizeBytes(); return buffer.getBufferSizeBytes();
} }
public void setBufferMaxWordCount(final int maxWords) { public void setBufferMaxWordCount(final int maxWords) {
indexCache.setMaxWordCount(maxWords); buffer.setMaxWordCount(maxWords);
} }
private void cacheFlushControl() { private void cacheFlushControl() {
@ -274,14 +274,14 @@ public final class BufferedIndexCollection extends AbstractBufferedIndex impleme
serverProfiling.update("wordcache", Long.valueOf(cs), true); serverProfiling.update("wordcache", Long.valueOf(cs), true);
// To ensure termination an additional counter is used // To ensure termination an additional counter is used
int l = 0; int l = 0;
while (this.indexCache.size() > 0 && (l++ < 100) && (this.indexCache.getBufferMaxReferences() > wCacheMaxChunk)) { while (this.buffer.size() > 0 && (l++ < 100) && (this.buffer.getBufferMaxReferences() > wCacheMaxChunk)) {
flushCacheOne(this.indexCache); flushCacheOne(this.buffer);
} }
// next flush more entries if the size exceeds the maximum size of the cache // next flush more entries if the size exceeds the maximum size of the cache
while (this.indexCache.size() > 0 && while (this.buffer.size() > 0 &&
((this.indexCache.size() > this.indexCache.getMaxWordCount()) || ((this.buffer.size() > this.buffer.getMaxWordCount()) ||
(MemoryControl.available() < collections.minMem()))) { (MemoryControl.available() < collections.minMem()))) {
flushCacheOne(this.indexCache); flushCacheOne(this.buffer);
} }
if (getBufferSize() != cs) serverProfiling.update("wordcache", Long.valueOf(getBufferSize()), true); if (getBufferSize() != cs) serverProfiling.update("wordcache", Long.valueOf(getBufferSize()), true);
} }
@ -292,8 +292,8 @@ public final class BufferedIndexCollection extends AbstractBufferedIndex impleme
} }
private synchronized void flushCacheUntil(long timeout) { private synchronized void flushCacheUntil(long timeout) {
while (System.currentTimeMillis() < timeout && indexCache.size() > 0) { while (System.currentTimeMillis() < timeout && buffer.size() > 0) {
flushCacheOne(indexCache); flushCacheOne(buffer);
} }
} }
@ -318,7 +318,7 @@ public final class BufferedIndexCollection extends AbstractBufferedIndex impleme
} }
public int getBufferSize() { public int getBufferSize() {
return indexCache.size(); return buffer.size();
} }
public ByteOrder ordering() { public ByteOrder ordering() {
@ -326,11 +326,11 @@ public final class BufferedIndexCollection extends AbstractBufferedIndex impleme
} }
public CloneableIterator<ReferenceContainer> references(String startWordHash, boolean rot) { public CloneableIterator<ReferenceContainer> references(String startWordHash, boolean rot) {
final Order<ReferenceContainer> containerOrder = new ReferenceContainerOrder(this.indexCache.ordering().clone()); final Order<ReferenceContainer> containerOrder = new ReferenceContainerOrder(this.buffer.ordering().clone());
return new MergeIterator<ReferenceContainer>( return new MergeIterator<ReferenceContainer>(
this.indexCache.references(startWordHash, rot), this.buffer.references(startWordHash, rot),
new MergeIterator<ReferenceContainer>( new MergeIterator<ReferenceContainer>(
this.indexCache.references(startWordHash, false), this.buffer.references(startWordHash, false),
this.collections.references(startWordHash, false), this.collections.references(startWordHash, false),
containerOrder, containerOrder,
ReferenceContainer.containerMergeMethod, ReferenceContainer.containerMergeMethod,

@ -36,6 +36,7 @@ import de.anomic.kelondro.order.ByteOrder;
import de.anomic.kelondro.order.CloneableIterator; import de.anomic.kelondro.order.CloneableIterator;
import de.anomic.kelondro.order.MergeIterator; import de.anomic.kelondro.order.MergeIterator;
import de.anomic.kelondro.order.Order; import de.anomic.kelondro.order.Order;
import de.anomic.server.serverProfiling;
/* /*
* an index cell is a part of the horizontal index in the new segment-oriented index * an index cell is a part of the horizontal index in the new segment-oriented index
@ -48,7 +49,7 @@ import de.anomic.kelondro.order.Order;
* another BLOB file in the index array. * another BLOB file in the index array.
*/ */
public final class IndexCell extends AbstractIndex implements Index { public final class IndexCell extends AbstractBufferedIndex implements BufferedIndex {
// class variables // class variables
private ReferenceContainerArray array; private ReferenceContainerArray array;
@ -63,22 +64,14 @@ public final class IndexCell extends AbstractIndex implements Index {
) throws IOException { ) throws IOException {
this.array = new ReferenceContainerArray(cellPath, wordOrder, payloadrow); this.array = new ReferenceContainerArray(cellPath, wordOrder, payloadrow);
this.ram = new ReferenceContainerCache(payloadrow, wordOrder); this.ram = new ReferenceContainerCache(payloadrow, wordOrder);
this.ram.initWriteMode();
this.maxRamEntries = maxRamEntries; this.maxRamEntries = maxRamEntries;
} }
private void cacheDump() throws IOException {
// dump the ram
File dumpFile = this.array.newContainerBLOBFile();
this.ram.dump(dumpFile);
// get a fresh ram cache
this.ram = new ReferenceContainerCache(this.array.rowdef(), this.array.ordering());
// add the dumped indexContainerBLOB to the array
this.array.mountBLOBContainer(dumpFile);
}
public ByteOrder ordering() { /*
return this.array.ordering(); * methods to implement Index
} */
/** /**
* add entries to the cell: this adds the new entries always to the RAM part, never to BLOBs * add entries to the cell: this adds the new entries always to the RAM part, never to BLOBs
@ -87,108 +80,78 @@ public final class IndexCell extends AbstractIndex implements Index {
*/ */
public synchronized void add(ReferenceContainer newEntries) throws IOException { public synchronized void add(ReferenceContainer newEntries) throws IOException {
this.ram.add(newEntries); this.ram.add(newEntries);
serverProfiling.update("wordcache", Long.valueOf(this.ram.size()), true);
if (this.ram.size() > this.maxRamEntries) cacheDump(); if (this.ram.size() > this.maxRamEntries) cacheDump();
} }
public synchronized void add(String hash, ReferenceRow entry) throws IOException { public synchronized void add(String hash, ReferenceRow entry) throws IOException {
this.ram.add(hash, entry); this.ram.add(hash, entry);
serverProfiling.update("wordcache", Long.valueOf(this.ram.size()), true);
if (this.ram.size() > this.maxRamEntries) cacheDump(); if (this.ram.size() > this.maxRamEntries) cacheDump();
} }
/** /**
* clear the RAM and BLOB part, deletes everything in the cell * checks if there is any container for this wordHash, either in RAM or any BLOB
* @throws IOException
*/ */
public synchronized void clear() throws IOException { public boolean has(String wordHash) {
this.ram.clear(); if (this.ram.has(wordHash)) return true;
this.array.clear(); return this.array.has(wordHash);
} }
/** public int count(String wordHash) {
* when a cell is closed, the current RAM is dumped to a file which will be opened as ReferenceContainer c0 = this.ram.get(wordHash, null);
* BLOB file the next time a cell is opened. A name for the dump is automatically generated ReferenceContainer c1;
* and is composed of the current date and the cell salt
*/
public synchronized void close() {
// dump the ram
try { try {
this.ram.dump(this.array.newContainerBLOBFile()); c1 = this.array.get(wordHash);
} catch (IOException e) { } catch (IOException e) {
e.printStackTrace(); c1 = null;
} }
// close all if (c1 == null) {
this.ram.close(); if (c0 == null) return 0;
this.array.close(); return c0.size();
}
if (c0 == null) return c1.size();
return c1.size() + c0.size();
} }
/** /**
* deleting a container affects the containers in RAM and all the BLOB files * all containers in the BLOBs and the RAM are merged and returned
* the deleted containers are merged and returned as result of the method
* @throws IOException * @throws IOException
*/ */
public ReferenceContainer delete(String wordHash) throws IOException { public ReferenceContainer get(String wordHash, Set<String> urlselection) throws IOException {
ReferenceContainer c0 = this.ram.delete(wordHash); ReferenceContainer c0 = this.ram.get(wordHash, null);
ReferenceContainer c1 = this.array.get(wordHash); ReferenceContainer c1 = this.array.get(wordHash);
if (c1 == null) { if (c1 == null) {
if (c0 == null) return null; if (c0 == null) return null;
return c0; return c0;
} }
this.array.delete(wordHash);
if (c0 == null) return c1; if (c0 == null) return c1;
return c1.merge(c0); return c1.merge(c0);
} }
/** /**
* all containers in the BLOBs and the RAM are merged and returned * deleting a container affects the containers in RAM and all the BLOB files
* the deleted containers are merged and returned as result of the method
* @throws IOException * @throws IOException
*/ */
public ReferenceContainer get(String wordHash, Set<String> urlselection) throws IOException { public ReferenceContainer delete(String wordHash) throws IOException {
ReferenceContainer c0 = this.ram.get(wordHash, null); ReferenceContainer c0 = this.ram.delete(wordHash);
ReferenceContainer c1 = this.array.get(wordHash); ReferenceContainer c1 = this.array.get(wordHash);
if (c1 == null) { if (c1 == null) {
if (c0 == null) return null; if (c0 == null) return null;
return c0; return c0;
} }
this.array.delete(wordHash);
if (c0 == null) return c1; if (c0 == null) return c1;
return c1.merge(c0); return c1.merge(c0);
} }
public int count(String wordHash) {
ReferenceContainer c0 = this.ram.get(wordHash, null);
ReferenceContainer c1;
try {
c1 = this.array.get(wordHash);
} catch (IOException e) {
c1 = null;
}
if (c1 == null) {
if (c0 == null) return 0;
return c0.size();
}
if (c0 == null) return c1.size();
return c1.size() + c0.size();
}
/**
* checks if there is any container for this wordHash, either in RAM or any BLOB
*/
public boolean has(String wordHash) {
if (this.ram.has(wordHash)) return true;
return this.array.has(wordHash);
}
public int minMem() {
return 10 * 1024 * 1024;
}
/** /**
* remove url references from a selected word hash. this deletes also in the BLOB * remove url references from a selected word hash. this deletes also in the BLOB
* files, which means that there exists new gap entries after the deletion * files, which means that there exists new gap entries after the deletion
* The gaps are never merged in place, but can be eliminated when BLOBs are merged into * The gaps are never merged in place, but can be eliminated when BLOBs are merged into
* new BLOBs. This returns the sum of all url references that have been removed * new BLOBs. This returns the sum of all url references that have been removed
* @throws IOException * @throws IOException
* @throws IOException
* @throws IOException
*/ */
public int remove(String wordHash, Set<String> urlHashes) throws IOException { public int remove(String wordHash, Set<String> urlHashes) throws IOException {
int reduced = this.array.replace(wordHash, new RemoveRewriter(urlHashes)); int reduced = this.array.replace(wordHash, new RemoveRewriter(urlHashes));
@ -200,8 +163,24 @@ public final class IndexCell extends AbstractIndex implements Index {
return reduced > 0; return reduced > 0;
} }
public int size() { private static class RemoveRewriter implements ReferenceContainerArray.ContainerRewriter {
return this.ram.size() + this.array.size();
Set<String> urlHashes;
public RemoveRewriter(Set<String> urlHashes) {
this.urlHashes = urlHashes;
}
public RemoveRewriter(String urlHash) {
this.urlHashes = new HashSet<String>();
this.urlHashes.add(urlHash);
}
public ReferenceContainer rewrite(ReferenceContainer container) {
container.removeEntries(urlHashes);
return container;
}
} }
public CloneableIterator<ReferenceContainer> references(String startWordHash, boolean rot) { public CloneableIterator<ReferenceContainer> references(String startWordHash, boolean rot) {
@ -234,24 +213,97 @@ public final class IndexCell extends AbstractIndex implements Index {
true); true);
} }
private static class RemoveRewriter implements ReferenceContainerArray.ContainerRewriter { /**
* clear the RAM and BLOB part, deletes everything in the cell
Set<String> urlHashes; * @throws IOException
*/
public synchronized void clear() throws IOException {
this.ram.clear();
this.array.clear();
}
public RemoveRewriter(Set<String> urlHashes) { /**
this.urlHashes = urlHashes; * when a cell is closed, the current RAM is dumped to a file which will be opened as
* BLOB file the next time a cell is opened. A name for the dump is automatically generated
* and is composed of the current date and the cell salt
*/
public synchronized void close() {
// dump the ram
try {
this.ram.dump(this.array.newContainerBLOBFile());
} catch (IOException e) {
e.printStackTrace();
} }
// close all
this.ram.close();
this.array.close();
}
public RemoveRewriter(String urlHash) { public int size() {
this.urlHashes = new HashSet<String>(); return this.ram.size() + this.array.size();
this.urlHashes.add(urlHash); }
}
public int minMem() {
return 10 * 1024 * 1024;
}
public ByteOrder ordering() {
return this.array.ordering();
}
/*
* cache control methods
*/
private void cacheDump() throws IOException {
// dump the ram
File dumpFile = this.array.newContainerBLOBFile();
this.ram.dump(dumpFile);
// get a fresh ram cache
this.ram = new ReferenceContainerCache(this.array.rowdef(), this.array.ordering());
this.ram.initWriteMode();
// add the dumped indexContainerBLOB to the array
this.array.mountBLOBContainer(dumpFile);
}
public void cleanupBuffer(int time) {
// do nothing
}
public int getBackendSize() {
return this.array.size();
}
public long getBufferMaxAge() {
return System.currentTimeMillis();
}
public int getBufferMaxReferences() {
return this.ram.maxReferences();
}
public long getBufferMinAge() {
return System.currentTimeMillis();
}
public int getBufferSize() {
return this.ram.size();
}
public long getBufferSizeBytes() {
return 10000 * this.ram.size(); // guessed; we don't know that exactly because there is no statistics here (expensive, not necessary)
}
public ReferenceContainer rewrite(ReferenceContainer container) {
container.removeEntries(urlHashes);
return container;
}
public void setBufferMaxWordCount(int maxWords) {
this.maxRamEntries = maxWords;
} }
} }

@ -120,7 +120,7 @@ public class ReferenceContainer extends RowSet {
} }
public ReferenceContainer merge(final ReferenceContainer c) { public ReferenceContainer merge(final ReferenceContainer c) {
return new ReferenceContainer(this.wordHash, this.merge(c)); return new ReferenceContainer(this.wordHash, super.merge(c));
} }
public Reference put(final ReferenceRow entry) { public Reference put(final ReferenceRow entry) {

@ -306,6 +306,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
final boolean useCommons = getConfigBool("index.storeCommons", false); final boolean useCommons = getConfigBool("index.storeCommons", false);
final int redundancy = (int) sb.getConfigLong("network.unit.dhtredundancy.senior", 1); final int redundancy = (int) sb.getConfigLong("network.unit.dhtredundancy.senior", 1);
final int paritionExponent = (int) sb.getConfigLong("network.unit.dht.partitionExponent", 0); final int paritionExponent = (int) sb.getConfigLong("network.unit.dht.partitionExponent", 0);
final boolean useCell = sb.getConfigBool("useCell", false);
try { try {
webIndex = new plasmaWordIndex( webIndex = new plasmaWordIndex(
networkName, networkName,
@ -315,7 +316,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
wordCacheMaxCount, wordCacheMaxCount,
useCommons, useCommons,
redundancy, redundancy,
paritionExponent); paritionExponent,
useCell);
} catch (IOException e1) { } catch (IOException e1) {
e1.printStackTrace(); e1.printStackTrace();
webIndex = null; webIndex = null;
@ -760,6 +762,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
final boolean useCommons = getConfigBool("index.storeCommons", false); final boolean useCommons = getConfigBool("index.storeCommons", false);
final int redundancy = (int) sb.getConfigLong("network.unit.dhtredundancy.senior", 1); final int redundancy = (int) sb.getConfigLong("network.unit.dhtredundancy.senior", 1);
final int paritionExponent = (int) sb.getConfigLong("network.unit.dht.partitionExponent", 0); final int paritionExponent = (int) sb.getConfigLong("network.unit.dht.partitionExponent", 0);
final boolean useCell = sb.getConfigBool("useCell", false);
try { try {
this.webIndex = new plasmaWordIndex( this.webIndex = new plasmaWordIndex(
getConfig(plasmaSwitchboardConstants.NETWORK_NAME, ""), getConfig(plasmaSwitchboardConstants.NETWORK_NAME, ""),
@ -769,7 +772,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
wordCacheMaxCount, wordCacheMaxCount,
useCommons, useCommons,
redundancy, redundancy,
paritionExponent); paritionExponent,
useCell);
} catch (IOException e) { } catch (IOException e) {
e.printStackTrace(); e.printStackTrace();
this.webIndex = null; this.webIndex = null;

@ -42,8 +42,9 @@ import de.anomic.htmlFilter.htmlFilterContentScraper;
import de.anomic.http.httpdProxyCacheEntry; import de.anomic.http.httpdProxyCacheEntry;
import de.anomic.kelondro.order.Base64Order; import de.anomic.kelondro.order.Base64Order;
import de.anomic.kelondro.order.ByteOrder; import de.anomic.kelondro.order.ByteOrder;
import de.anomic.kelondro.text.BufferedIndexCollection;
import de.anomic.kelondro.text.BufferedIndex; import de.anomic.kelondro.text.BufferedIndex;
import de.anomic.kelondro.text.BufferedIndexCollection;
import de.anomic.kelondro.text.IndexCell;
import de.anomic.kelondro.text.MetadataRowContainer; import de.anomic.kelondro.text.MetadataRowContainer;
import de.anomic.kelondro.text.ReferenceContainer; import de.anomic.kelondro.text.ReferenceContainer;
import de.anomic.kelondro.text.ReferenceRow; import de.anomic.kelondro.text.ReferenceRow;
@ -83,7 +84,7 @@ public final class plasmaWordIndex {
public static final ByteOrder wordOrder = Base64Order.enhancedCoder; public static final ByteOrder wordOrder = Base64Order.enhancedCoder;
private final BufferedIndex index; private final BufferedIndex index;
private final Log log; private final Log log;
private MetadataRepository metadata; private MetadataRepository metadata;
private final yacySeedDB peers; private final yacySeedDB peers;
@ -104,7 +105,8 @@ public final class plasmaWordIndex {
final int entityCacheMaxSize, final int entityCacheMaxSize,
final boolean useCommons, final boolean useCommons,
final int redundancy, final int redundancy,
final int partitionExponent) throws IOException { final int partitionExponent,
final boolean useCell) throws IOException {
if (networkName == null || networkName.length() == 0) { if (networkName == null || networkName.length() == 0) {
log.logSevere("no network name given - shutting down"); log.logSevere("no network name given - shutting down");
System.exit(0); System.exit(0);
@ -128,7 +130,13 @@ public final class plasmaWordIndex {
} }
} }
} }
this.index = new BufferedIndexCollection( this.index = (useCell) ?
new IndexCell(
new File(indexPrimaryTextLocation, "RICELL"),
wordOrder,
ReferenceRow.urlEntryRow,
entityCacheMaxSize) :
new BufferedIndexCollection(
indexPrimaryTextLocation, indexPrimaryTextLocation,
wordOrder, wordOrder,
ReferenceRow.urlEntryRow, ReferenceRow.urlEntryRow,

@ -675,7 +675,7 @@ public final class yacy {
final int cacheMem = (int)(MemoryControl.max() - MemoryControl.total()); final int cacheMem = (int)(MemoryControl.max() - MemoryControl.total());
if (cacheMem < 2048000) throw new OutOfMemoryError("Not enough memory available to start clean up."); if (cacheMem < 2048000) throw new OutOfMemoryError("Not enough memory available to start clean up.");
final plasmaWordIndex wordIndex = new plasmaWordIndex(networkName, log, indexPrimaryRoot, indexSecondaryRoot, 10000, false, 1, 0); final plasmaWordIndex wordIndex = new plasmaWordIndex(networkName, log, indexPrimaryRoot, indexSecondaryRoot, 10000, false, 1, 0, false);
final Iterator<ReferenceContainer> indexContainerIterator = wordIndex.index().references("AAAAAAAAAAAA", false, false); final Iterator<ReferenceContainer> indexContainerIterator = wordIndex.index().references("AAAAAAAAAAAA", false, false);
long urlCounter = 0, wordCounter = 0; long urlCounter = 0, wordCounter = 0;
@ -866,7 +866,7 @@ public final class yacy {
try { try {
Iterator<ReferenceContainer> indexContainerIterator = null; Iterator<ReferenceContainer> indexContainerIterator = null;
if (resource.equals("all")) { if (resource.equals("all")) {
WordIndex = new plasmaWordIndex("freeworld", log, indexPrimaryRoot, indexSecondaryRoot, 10000, false, 1, 0); WordIndex = new plasmaWordIndex("freeworld", log, indexPrimaryRoot, indexSecondaryRoot, 10000, false, 1, 0, false);
indexContainerIterator = WordIndex.index().references(wordChunkStartHash, false, false); indexContainerIterator = WordIndex.index().references(wordChunkStartHash, false, false);
} }
int counter = 0; int counter = 0;

Loading…
Cancel
Save