diff --git a/build.properties b/build.properties index 707145f4d..1e90a7963 100644 --- a/build.properties +++ b/build.properties @@ -3,7 +3,7 @@ javacSource=1.5 javacTarget=1.5 # Release Configuration -releaseVersion=0.72 +releaseVersion=0.73 stdReleaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz embReleaseFile=yacy_emb_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz proReleaseFile=yacy_pro_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz diff --git a/defaults/yacy.init b/defaults/yacy.init index e4b5b9634..65be3fbda 100644 --- a/defaults/yacy.init +++ b/defaults/yacy.init @@ -899,4 +899,7 @@ cgi.allow = false cgi.suffixes = cgi,pl # whether this is a version for a web browser -browserintegration = false \ No newline at end of file +browserintegration = false + +# next index data structure +useCell = false \ No newline at end of file diff --git a/source/de/anomic/kelondro/text/BufferedIndex.java b/source/de/anomic/kelondro/text/BufferedIndex.java index 71fd754f4..c75d5d987 100644 --- a/source/de/anomic/kelondro/text/BufferedIndex.java +++ b/source/de/anomic/kelondro/text/BufferedIndex.java @@ -107,6 +107,17 @@ public interface BufferedIndex extends Index { */ public int getBufferSize(); + /** + * iterate over entries in index. this method differs from the iterator in an Index + * object in such a way that it has the additional 'buffer' flag. When using this method, + * the iteration goes only over the buffer content, or over the backend-content, but + * not over a merged content. + * @param startHash + * @param rot + * @param buffer + * @return + * @throws IOException + */ public CloneableIterator references( String startHash, boolean rot, @@ -114,6 +125,18 @@ public interface BufferedIndex extends Index { ) throws IOException; + /** + * collect reference container in index. this method differs from the collector in an Index + * object in such a way that it has the additional 'buffer' flag. When using this method, + * the collection goes only over the buffer content, or over the backend-content, but + * not over a merged content. + * @param startHash + * @param rot + * @param count + * @param buffer + * @return + * @throws IOException + */ public TreeSet references( String startHash, boolean rot, diff --git a/source/de/anomic/kelondro/text/BufferedIndexCollection.java b/source/de/anomic/kelondro/text/BufferedIndexCollection.java index 16289151c..3e7bd3f1e 100644 --- a/source/de/anomic/kelondro/text/BufferedIndexCollection.java +++ b/source/de/anomic/kelondro/text/BufferedIndexCollection.java @@ -57,8 +57,8 @@ public final class BufferedIndexCollection extends AbstractBufferedIndex impleme public static final int lowcachedivisor = 900; public static final int maxCollectionPartition = 7; // should be 7 - private final IndexBuffer indexCache; - private final IndexCollection collections; // new database structure to replace AssortmentCluster and FileCluster + private final IndexBuffer buffer; + private final IndexCollection collections; public BufferedIndexCollection ( File indexPrimaryTextLocation, @@ -73,15 +73,15 @@ public final class BufferedIndexCollection extends AbstractBufferedIndex impleme if (!(textindexcache.exists())) textindexcache.mkdirs(); if (new File(textindexcache, "index.dhtin.blob").exists()) { // migration of the both caches into one - this.indexCache = new IndexBuffer(textindexcache, wordOrdering, payloadrow, entityCacheMaxSize, wCacheMaxChunk, wCacheMaxAge, "index.dhtout.blob", log); + this.buffer = new IndexBuffer(textindexcache, wordOrdering, payloadrow, entityCacheMaxSize, wCacheMaxChunk, wCacheMaxAge, "index.dhtout.blob", log); IndexBuffer dhtInCache = new IndexBuffer(textindexcache, wordOrdering, payloadrow, entityCacheMaxSize, wCacheMaxChunk, wCacheMaxAge, "index.dhtin.blob", log); for (ReferenceContainer c: dhtInCache) { - this.indexCache.add(c); + this.buffer.add(c); } new File(textindexcache, "index.dhtin.blob").delete(); } else { // read in new BLOB - this.indexCache = new IndexBuffer(textindexcache, wordOrdering, payloadrow, entityCacheMaxSize, wCacheMaxChunk, wCacheMaxAge, "index.dhtout.blob", log); + this.buffer = new IndexBuffer(textindexcache, wordOrdering, payloadrow, entityCacheMaxSize, wCacheMaxChunk, wCacheMaxAge, "index.dhtout.blob", log); } // create collections storage path @@ -103,24 +103,24 @@ public final class BufferedIndexCollection extends AbstractBufferedIndex impleme assert (entries.row().objectsize == ReferenceRow.urlEntryRow.objectsize); // add the entry - indexCache.add(entries); + buffer.add(entries); cacheFlushControl(); } public void add(final String wordHash, final ReferenceRow entry) throws IOException { // add the entry - indexCache.add(wordHash, entry); + buffer.add(wordHash, entry); cacheFlushControl(); } public boolean has(final String wordHash) { - if (indexCache.has(wordHash)) return true; + if (buffer.has(wordHash)) return true; if (collections.has(wordHash)) return true; return false; } public int count(String key) { - return indexCache.count(key) + collections.count(key); + return buffer.count(key) + collections.count(key); } public ReferenceContainer get(final String wordHash, final Set urlselection) { @@ -131,7 +131,7 @@ public final class BufferedIndexCollection extends AbstractBufferedIndex impleme // get from cache ReferenceContainer container; - container = indexCache.get(wordHash, urlselection); + container = buffer.get(wordHash, urlselection); // get from collection index if (container == null) { @@ -172,22 +172,22 @@ public final class BufferedIndexCollection extends AbstractBufferedIndex impleme final ReferenceContainer c = new ReferenceContainer( wordHash, ReferenceRow.urlEntryRow, - indexCache.count(wordHash)); - c.addAllUnique(indexCache.delete(wordHash)); + buffer.count(wordHash)); + c.addAllUnique(buffer.delete(wordHash)); c.addAllUnique(collections.delete(wordHash)); return c; } public boolean remove(final String wordHash, final String urlHash) { boolean removed = false; - removed = removed | (indexCache.remove(wordHash, urlHash)); + removed = removed | (buffer.remove(wordHash, urlHash)); removed = removed | (collections.remove(wordHash, urlHash)); return removed; } public int remove(final String wordHash, final Set urlHashes) { int removed = 0; - removed += indexCache.remove(wordHash, urlHashes); + removed += buffer.remove(wordHash, urlHashes); removed += collections.remove(wordHash, urlHashes); return removed; } @@ -195,16 +195,16 @@ public final class BufferedIndexCollection extends AbstractBufferedIndex impleme public synchronized CloneableIterator references(final String startHash, final boolean rot, final boolean ram) throws IOException { final CloneableIterator i = wordContainers(startHash, ram); if (rot) { - return new RotateIterator(i, new String(Base64Order.zero(startHash.length())), indexCache.size() + ((ram) ? 0 : collections.size())); + return new RotateIterator(i, new String(Base64Order.zero(startHash.length())), buffer.size() + ((ram) ? 0 : collections.size())); } return i; } private synchronized CloneableIterator wordContainers(final String startWordHash, final boolean ram) throws IOException { - final Order containerOrder = new ReferenceContainerOrder(indexCache.ordering().clone()); + final Order containerOrder = new ReferenceContainerOrder(buffer.ordering().clone()); containerOrder.rotate(ReferenceContainer.emptyContainer(startWordHash, 0)); if (ram) { - return indexCache.references(startWordHash, false); + return buffer.references(startWordHash, false); } return collections.references(startWordHash, false); /* @@ -218,7 +218,7 @@ public final class BufferedIndexCollection extends AbstractBufferedIndex impleme } public void clear() { - indexCache.clear(); + buffer.clear(); try { collections.clear(); } catch (IOException e) { @@ -227,16 +227,16 @@ public final class BufferedIndexCollection extends AbstractBufferedIndex impleme } public void close() { - indexCache.close(); + buffer.close(); collections.close(); } public int size() { - return java.lang.Math.max(collections.size(), indexCache.size()); + return java.lang.Math.max(collections.size(), buffer.size()); } public int minMem() { - return 1024*1024 /* indexing overhead */ + indexCache.minMem() + collections.minMem(); + return 1024*1024 /* indexing overhead */ + buffer.minMem() + collections.minMem(); } @@ -245,23 +245,23 @@ public final class BufferedIndexCollection extends AbstractBufferedIndex impleme */ public int getBufferMaxReferences() { - return indexCache.getBufferMaxReferences(); + return buffer.getBufferMaxReferences(); } public long getBufferMinAge() { - return indexCache.getBufferMinAge(); + return buffer.getBufferMinAge(); } public long getBufferMaxAge() { - return indexCache.getBufferMaxAge(); + return buffer.getBufferMaxAge(); } public long getBufferSizeBytes() { - return indexCache.getBufferSizeBytes(); + return buffer.getBufferSizeBytes(); } public void setBufferMaxWordCount(final int maxWords) { - indexCache.setMaxWordCount(maxWords); + buffer.setMaxWordCount(maxWords); } private void cacheFlushControl() { @@ -274,14 +274,14 @@ public final class BufferedIndexCollection extends AbstractBufferedIndex impleme serverProfiling.update("wordcache", Long.valueOf(cs), true); // To ensure termination an additional counter is used int l = 0; - while (this.indexCache.size() > 0 && (l++ < 100) && (this.indexCache.getBufferMaxReferences() > wCacheMaxChunk)) { - flushCacheOne(this.indexCache); + while (this.buffer.size() > 0 && (l++ < 100) && (this.buffer.getBufferMaxReferences() > wCacheMaxChunk)) { + flushCacheOne(this.buffer); } // next flush more entries if the size exceeds the maximum size of the cache - while (this.indexCache.size() > 0 && - ((this.indexCache.size() > this.indexCache.getMaxWordCount()) || + while (this.buffer.size() > 0 && + ((this.buffer.size() > this.buffer.getMaxWordCount()) || (MemoryControl.available() < collections.minMem()))) { - flushCacheOne(this.indexCache); + flushCacheOne(this.buffer); } if (getBufferSize() != cs) serverProfiling.update("wordcache", Long.valueOf(getBufferSize()), true); } @@ -292,8 +292,8 @@ public final class BufferedIndexCollection extends AbstractBufferedIndex impleme } private synchronized void flushCacheUntil(long timeout) { - while (System.currentTimeMillis() < timeout && indexCache.size() > 0) { - flushCacheOne(indexCache); + while (System.currentTimeMillis() < timeout && buffer.size() > 0) { + flushCacheOne(buffer); } } @@ -318,7 +318,7 @@ public final class BufferedIndexCollection extends AbstractBufferedIndex impleme } public int getBufferSize() { - return indexCache.size(); + return buffer.size(); } public ByteOrder ordering() { @@ -326,11 +326,11 @@ public final class BufferedIndexCollection extends AbstractBufferedIndex impleme } public CloneableIterator references(String startWordHash, boolean rot) { - final Order containerOrder = new ReferenceContainerOrder(this.indexCache.ordering().clone()); + final Order containerOrder = new ReferenceContainerOrder(this.buffer.ordering().clone()); return new MergeIterator( - this.indexCache.references(startWordHash, rot), + this.buffer.references(startWordHash, rot), new MergeIterator( - this.indexCache.references(startWordHash, false), + this.buffer.references(startWordHash, false), this.collections.references(startWordHash, false), containerOrder, ReferenceContainer.containerMergeMethod, diff --git a/source/de/anomic/kelondro/text/IndexCell.java b/source/de/anomic/kelondro/text/IndexCell.java index ed88a5e3a..0d9c9477c 100644 --- a/source/de/anomic/kelondro/text/IndexCell.java +++ b/source/de/anomic/kelondro/text/IndexCell.java @@ -36,6 +36,7 @@ import de.anomic.kelondro.order.ByteOrder; import de.anomic.kelondro.order.CloneableIterator; import de.anomic.kelondro.order.MergeIterator; import de.anomic.kelondro.order.Order; +import de.anomic.server.serverProfiling; /* * an index cell is a part of the horizontal index in the new segment-oriented index @@ -48,7 +49,7 @@ import de.anomic.kelondro.order.Order; * another BLOB file in the index array. */ -public final class IndexCell extends AbstractIndex implements Index { +public final class IndexCell extends AbstractBufferedIndex implements BufferedIndex { // class variables private ReferenceContainerArray array; @@ -63,22 +64,14 @@ public final class IndexCell extends AbstractIndex implements Index { ) throws IOException { this.array = new ReferenceContainerArray(cellPath, wordOrder, payloadrow); this.ram = new ReferenceContainerCache(payloadrow, wordOrder); + this.ram.initWriteMode(); this.maxRamEntries = maxRamEntries; } - - private void cacheDump() throws IOException { - // dump the ram - File dumpFile = this.array.newContainerBLOBFile(); - this.ram.dump(dumpFile); - // get a fresh ram cache - this.ram = new ReferenceContainerCache(this.array.rowdef(), this.array.ordering()); - // add the dumped indexContainerBLOB to the array - this.array.mountBLOBContainer(dumpFile); - } - public ByteOrder ordering() { - return this.array.ordering(); - } + + /* + * methods to implement Index + */ /** * add entries to the cell: this adds the new entries always to the RAM part, never to BLOBs @@ -87,108 +80,78 @@ public final class IndexCell extends AbstractIndex implements Index { */ public synchronized void add(ReferenceContainer newEntries) throws IOException { this.ram.add(newEntries); + serverProfiling.update("wordcache", Long.valueOf(this.ram.size()), true); if (this.ram.size() > this.maxRamEntries) cacheDump(); } public synchronized void add(String hash, ReferenceRow entry) throws IOException { this.ram.add(hash, entry); + serverProfiling.update("wordcache", Long.valueOf(this.ram.size()), true); if (this.ram.size() > this.maxRamEntries) cacheDump(); } /** - * clear the RAM and BLOB part, deletes everything in the cell - * @throws IOException + * checks if there is any container for this wordHash, either in RAM or any BLOB */ - public synchronized void clear() throws IOException { - this.ram.clear(); - this.array.clear(); + public boolean has(String wordHash) { + if (this.ram.has(wordHash)) return true; + return this.array.has(wordHash); } - /** - * when a cell is closed, the current RAM is dumped to a file which will be opened as - * BLOB file the next time a cell is opened. A name for the dump is automatically generated - * and is composed of the current date and the cell salt - */ - public synchronized void close() { - // dump the ram + public int count(String wordHash) { + ReferenceContainer c0 = this.ram.get(wordHash, null); + ReferenceContainer c1; try { - this.ram.dump(this.array.newContainerBLOBFile()); + c1 = this.array.get(wordHash); } catch (IOException e) { - e.printStackTrace(); + c1 = null; } - // close all - this.ram.close(); - this.array.close(); + if (c1 == null) { + if (c0 == null) return 0; + return c0.size(); + } + if (c0 == null) return c1.size(); + return c1.size() + c0.size(); } - + /** - * deleting a container affects the containers in RAM and all the BLOB files - * the deleted containers are merged and returned as result of the method + * all containers in the BLOBs and the RAM are merged and returned * @throws IOException */ - public ReferenceContainer delete(String wordHash) throws IOException { - ReferenceContainer c0 = this.ram.delete(wordHash); + public ReferenceContainer get(String wordHash, Set urlselection) throws IOException { + ReferenceContainer c0 = this.ram.get(wordHash, null); ReferenceContainer c1 = this.array.get(wordHash); if (c1 == null) { if (c0 == null) return null; return c0; } - this.array.delete(wordHash); if (c0 == null) return c1; return c1.merge(c0); } /** - * all containers in the BLOBs and the RAM are merged and returned + * deleting a container affects the containers in RAM and all the BLOB files + * the deleted containers are merged and returned as result of the method * @throws IOException */ - public ReferenceContainer get(String wordHash, Set urlselection) throws IOException { - ReferenceContainer c0 = this.ram.get(wordHash, null); + public ReferenceContainer delete(String wordHash) throws IOException { + ReferenceContainer c0 = this.ram.delete(wordHash); ReferenceContainer c1 = this.array.get(wordHash); if (c1 == null) { if (c0 == null) return null; return c0; } + this.array.delete(wordHash); if (c0 == null) return c1; return c1.merge(c0); } - public int count(String wordHash) { - ReferenceContainer c0 = this.ram.get(wordHash, null); - ReferenceContainer c1; - try { - c1 = this.array.get(wordHash); - } catch (IOException e) { - c1 = null; - } - if (c1 == null) { - if (c0 == null) return 0; - return c0.size(); - } - if (c0 == null) return c1.size(); - return c1.size() + c0.size(); - } - - /** - * checks if there is any container for this wordHash, either in RAM or any BLOB - */ - public boolean has(String wordHash) { - if (this.ram.has(wordHash)) return true; - return this.array.has(wordHash); - } - - public int minMem() { - return 10 * 1024 * 1024; - } - /** * remove url references from a selected word hash. this deletes also in the BLOB * files, which means that there exists new gap entries after the deletion * The gaps are never merged in place, but can be eliminated when BLOBs are merged into * new BLOBs. This returns the sum of all url references that have been removed * @throws IOException - * @throws IOException - * @throws IOException */ public int remove(String wordHash, Set urlHashes) throws IOException { int reduced = this.array.replace(wordHash, new RemoveRewriter(urlHashes)); @@ -200,8 +163,24 @@ public final class IndexCell extends AbstractIndex implements Index { return reduced > 0; } - public int size() { - return this.ram.size() + this.array.size(); + private static class RemoveRewriter implements ReferenceContainerArray.ContainerRewriter { + + Set urlHashes; + + public RemoveRewriter(Set urlHashes) { + this.urlHashes = urlHashes; + } + + public RemoveRewriter(String urlHash) { + this.urlHashes = new HashSet(); + this.urlHashes.add(urlHash); + } + + public ReferenceContainer rewrite(ReferenceContainer container) { + container.removeEntries(urlHashes); + return container; + } + } public CloneableIterator references(String startWordHash, boolean rot) { @@ -234,24 +213,97 @@ public final class IndexCell extends AbstractIndex implements Index { true); } - private static class RemoveRewriter implements ReferenceContainerArray.ContainerRewriter { - - Set urlHashes; - - public RemoveRewriter(Set urlHashes) { - this.urlHashes = urlHashes; - } - - public RemoveRewriter(String urlHash) { - this.urlHashes = new HashSet(); - this.urlHashes.add(urlHash); - } - - public ReferenceContainer rewrite(ReferenceContainer container) { - container.removeEntries(urlHashes); - return container; + /** + * clear the RAM and BLOB part, deletes everything in the cell + * @throws IOException + */ + public synchronized void clear() throws IOException { + this.ram.clear(); + this.array.clear(); + } + + /** + * when a cell is closed, the current RAM is dumped to a file which will be opened as + * BLOB file the next time a cell is opened. A name for the dump is automatically generated + * and is composed of the current date and the cell salt + */ + public synchronized void close() { + // dump the ram + try { + this.ram.dump(this.array.newContainerBLOBFile()); + } catch (IOException e) { + e.printStackTrace(); } - + // close all + this.ram.close(); + this.array.close(); + } + + public int size() { + return this.ram.size() + this.array.size(); + } + + public int minMem() { + return 10 * 1024 * 1024; + } + + public ByteOrder ordering() { + return this.array.ordering(); + } + + + /* + * cache control methods + */ + + private void cacheDump() throws IOException { + // dump the ram + File dumpFile = this.array.newContainerBLOBFile(); + this.ram.dump(dumpFile); + // get a fresh ram cache + this.ram = new ReferenceContainerCache(this.array.rowdef(), this.array.ordering()); + this.ram.initWriteMode(); + // add the dumped indexContainerBLOB to the array + this.array.mountBLOBContainer(dumpFile); } + + public void cleanupBuffer(int time) { + // do nothing + } + + + public int getBackendSize() { + return this.array.size(); + } + + + public long getBufferMaxAge() { + return System.currentTimeMillis(); + } + + + public int getBufferMaxReferences() { + return this.ram.maxReferences(); + } + + + public long getBufferMinAge() { + return System.currentTimeMillis(); + } + + + public int getBufferSize() { + return this.ram.size(); + } + + + public long getBufferSizeBytes() { + return 10000 * this.ram.size(); // guessed; we don't know that exactly because there is no statistics here (expensive, not necessary) + } + + + public void setBufferMaxWordCount(int maxWords) { + this.maxRamEntries = maxWords; + } } diff --git a/source/de/anomic/kelondro/text/ReferenceContainer.java b/source/de/anomic/kelondro/text/ReferenceContainer.java index 319aef078..f5e796b08 100644 --- a/source/de/anomic/kelondro/text/ReferenceContainer.java +++ b/source/de/anomic/kelondro/text/ReferenceContainer.java @@ -120,7 +120,7 @@ public class ReferenceContainer extends RowSet { } public ReferenceContainer merge(final ReferenceContainer c) { - return new ReferenceContainer(this.wordHash, this.merge(c)); + return new ReferenceContainer(this.wordHash, super.merge(c)); } public Reference put(final ReferenceRow entry) { diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index 9b732fec1..3ad99c491 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -306,6 +306,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch indexContainerIterator = wordIndex.index().references("AAAAAAAAAAAA", false, false); long urlCounter = 0, wordCounter = 0; @@ -866,7 +866,7 @@ public final class yacy { try { Iterator indexContainerIterator = null; if (resource.equals("all")) { - WordIndex = new plasmaWordIndex("freeworld", log, indexPrimaryRoot, indexSecondaryRoot, 10000, false, 1, 0); + WordIndex = new plasmaWordIndex("freeworld", log, indexPrimaryRoot, indexSecondaryRoot, 10000, false, 1, 0, false); indexContainerIterator = WordIndex.index().references(wordChunkStartHash, false, false); } int counter = 0;