diff --git a/source/dbtest.java b/source/dbtest.java index b76ffb435..1bfc24d94 100644 --- a/source/dbtest.java +++ b/source/dbtest.java @@ -528,7 +528,7 @@ final class dbTable implements kelondroIndex { public synchronized void putMultiple(List rows, Date entryDate) throws IOException { Iterator i = rows.iterator(); - while (i.hasNext()) put ((Entry) i.next(), entryDate); + while (i.hasNext()) put((Entry) i.next(), entryDate); } public kelondroRow.Entry put(kelondroRow.Entry row, Date entryDate) throws IOException { @@ -571,6 +571,10 @@ final class dbTable implements kelondroIndex { throw new UnsupportedOperationException(); } + public synchronized void addUniqueMultiple(List rows, Date entryDate) throws IOException { + throw new UnsupportedOperationException(); + } + public kelondroRow.Entry remove(byte[] key) throws IOException { try { diff --git a/source/de/anomic/kelondro/kelondroCache.java b/source/de/anomic/kelondro/kelondroCache.java index 8dfe83b20..1d36b7b25 100644 --- a/source/de/anomic/kelondro/kelondroCache.java +++ b/source/de/anomic/kelondro/kelondroCache.java @@ -539,6 +539,11 @@ public class kelondroCache implements kelondroIndex { if (dummy == null) this.writeUnique++; else this.writeDouble++; } } + + public synchronized void addUniqueMultiple(List rows, Date entryDate) throws IOException { + Iterator i = rows.iterator(); + while (i.hasNext()) addUnique((Entry) i.next(), entryDate); + } public synchronized Entry remove(byte[] key) throws IOException { diff --git a/source/de/anomic/kelondro/kelondroCollectionIndex.java b/source/de/anomic/kelondro/kelondroCollectionIndex.java index 6f9a20349..fe84ae3a8 100644 --- a/source/de/anomic/kelondro/kelondroCollectionIndex.java +++ b/source/de/anomic/kelondro/kelondroCollectionIndex.java @@ -278,11 +278,23 @@ public class kelondroCollectionIndex { return 2 * m * this.payloadrow.objectsize; } - private kelondroRow.Entry array_new(byte[] key, kelondroRowCollection collection) throws IOException { + private void array_remove( + int oldPartitionNumber, int serialNumber, int chunkSize, + int oldRownumber) throws IOException { + // we need a new slot, that means we must first delete the old entry + // find array file + kelondroFixedWidthArray array = getArray(oldPartitionNumber, serialNumber, chunkSize); + + // delete old entry + array.remove(oldRownumber, true); + } + + private kelondroRow.Entry array_new( + byte[] key, kelondroRowCollection collection) throws IOException { // the collection is new - int newPartitionNumber = arrayIndex(collection.size()); + int partitionNumber = arrayIndex(collection.size()); kelondroRow.Entry indexrow = index.row().newEntry(); - kelondroFixedWidthArray array = getArray(newPartitionNumber, serialNumber, this.payloadrow.objectsize()); + kelondroFixedWidthArray array = getArray(partitionNumber, serialNumber, this.payloadrow.objectsize()); // define row kelondroRow.Entry arrayEntry = array.row().newEntry(); @@ -296,7 +308,7 @@ public class kelondroCollectionIndex { indexrow.setCol(idx_col_key, key); indexrow.setCol(idx_col_chunksize, this.payloadrow.objectsize()); indexrow.setCol(idx_col_chunkcount, collection.size()); - indexrow.setCol(idx_col_clusteridx, (byte) newPartitionNumber); + indexrow.setCol(idx_col_clusteridx, (byte) partitionNumber); indexrow.setCol(idx_col_flags, (byte) 0); indexrow.setCol(idx_col_indexpos, (long) newRowNumber); indexrow.setCol(idx_col_lastread, kelondroRowCollection.daysSince2000(System.currentTimeMillis())); @@ -306,17 +318,6 @@ public class kelondroCollectionIndex { return indexrow; } - private void array_remove( - int oldPartitionNumber, int serialNumber, int chunkSize, - int oldRownumber) throws IOException { - // we need a new slot, that means we must first delete the old entry - // find array file - kelondroFixedWidthArray array = getArray(oldPartitionNumber, serialNumber, chunkSize); - - // delete old entry - array.remove(oldRownumber, true); - } - private void array_add( byte[] key, kelondroRowCollection collection, kelondroRow.Entry indexrow, int partitionNumber, int serialNumber, int chunkSize) throws IOException { @@ -450,7 +451,7 @@ public class kelondroCollectionIndex { // merge existing containers i = existingContainer.iterator(); Object[] record; - ArrayList indexrows = new ArrayList(); + ArrayList indexrows_existing = new ArrayList(); kelondroRowCollection collection; while (i.hasNext()) { record = (Object[]) i.next(); // {byte[], indexContainer, kelondroRow.Entry} @@ -495,21 +496,23 @@ public class kelondroCollectionIndex { newPartitionNumber, oldSerialNumber, this.payloadrow.objectsize()); // modifies indexrow } arrayResolveRemoved(); // remove all to-be-removed marked entries - indexrows.add(indexrow); // indexrows are collected and written later as block + indexrows_existing.add(indexrow); // indexrows are collected and written later as block } // write new containers i = newContainer.iterator(); + ArrayList indexrows_new = new ArrayList(); while (i.hasNext()) { record = (Object[]) i.next(); // {byte[], indexContainer} key = (byte[]) record[0]; collection = (indexContainer) record[1]; indexrow = array_new(key, collection); // modifies indexrow - index.addUnique(indexrow); // write modified indexrow + indexrows_new.add(indexrow); // collect new index rows } // write index entries - index.putMultiple(indexrows, new Date()); // write modified indexrows in optimized manner + index.putMultiple(indexrows_existing, new Date()); // write modified indexrows in optimized manner + index.addUniqueMultiple(indexrows_new, new Date()); // write new indexrows in optimized manner } public synchronized void merge(indexContainer container) throws IOException, kelondroOutOfLimitsException { diff --git a/source/de/anomic/kelondro/kelondroFixedWidthArray.java b/source/de/anomic/kelondro/kelondroFixedWidthArray.java index e07f20007..068a94c38 100644 --- a/source/de/anomic/kelondro/kelondroFixedWidthArray.java +++ b/source/de/anomic/kelondro/kelondroFixedWidthArray.java @@ -49,6 +49,8 @@ package de.anomic.kelondro; import java.io.File; import java.io.IOException; import java.util.Iterator; +import java.util.Map; +import java.util.TreeMap; import java.util.TreeSet; public class kelondroFixedWidthArray extends kelondroRecords implements kelondroArray { @@ -114,6 +116,15 @@ public class kelondroFixedWidthArray extends kelondroRecords implements kelondro // the OHbytes and OHhandles are zero. } + public synchronized void setMultiple(TreeMap /* of Integer/kelondroRow.Entry */ rows) throws IOException { + Iterator i = rows.entrySet().iterator(); + Map.Entry entry; + while (i.hasNext()) { + entry = (Map.Entry) i.next(); + set(((Integer) entry.getKey()).intValue(), (kelondroRow.Entry) entry.getValue()); + } + } + public synchronized kelondroRow.Entry get(int index) throws IOException { return row().newEntry(getNode(new Handle(index), true).getValueRow()); } diff --git a/source/de/anomic/kelondro/kelondroFlexSplitTable.java b/source/de/anomic/kelondro/kelondroFlexSplitTable.java index 2aa280f82..9ad321725 100644 --- a/source/de/anomic/kelondro/kelondroFlexSplitTable.java +++ b/source/de/anomic/kelondro/kelondroFlexSplitTable.java @@ -239,6 +239,11 @@ public class kelondroFlexSplitTable implements kelondroIndex { table.addUnique(row, entryDate); } + public synchronized void addUniqueMultiple(List rows, Date entryDate) throws IOException { + Iterator i = rows.iterator(); + while (i.hasNext()) addUnique((kelondroRow.Entry) i.next(), entryDate); + } + public synchronized kelondroRow.Entry remove(byte[] key) throws IOException { Iterator i = tables.values().iterator(); kelondroIndex table; diff --git a/source/de/anomic/kelondro/kelondroFlexTable.java b/source/de/anomic/kelondro/kelondroFlexTable.java index 4cc4ed455..2957f8e38 100644 --- a/source/de/anomic/kelondro/kelondroFlexTable.java +++ b/source/de/anomic/kelondro/kelondroFlexTable.java @@ -27,9 +27,11 @@ package de.anomic.kelondro; import java.io.File; import java.io.IOException; +import java.util.ArrayList; import java.util.Date; import java.util.Iterator; import java.util.List; +import java.util.Map; import java.util.TreeMap; import de.anomic.server.logging.serverLog; @@ -187,20 +189,25 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr // this should save R/W head positioning time Iterator i = rows.iterator(); kelondroRow.Entry row; - TreeMap ordered = new TreeMap(); int pos; byte[] key; + TreeMap old_rows_ordered = new TreeMap(); + ArrayList new_rows_sequential = new ArrayList(); while (i.hasNext()) { row = (kelondroRow.Entry) i.next(); key = row.getColBytes(0); pos = index.geti(key); if (pos < 0) { - index.puti(key, super.add(row)); + new_rows_sequential.add(row); } else { - ordered.put(new Integer(pos), row); + old_rows_ordered.put(new Integer(pos), row); } } - super.setMultiple(ordered); + // overwrite existing entries in index + super.setMultiple(old_rows_ordered); + + // write new entries to index + addUniqueMultiple(new_rows_sequential, entryDate); } public synchronized kelondroRow.Entry put(kelondroRow.Entry row, Date entryDate) throws IOException { @@ -230,6 +237,20 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr index.addi(row.getColBytes(0), super.add(row)); } + public synchronized void addUniqueMultiple(List rows, Date entryDate) throws IOException { + // add a list of entries in a ordered way. + // this should save R/W head positioning time + TreeMap indexed_result = super.addMultiple(rows); + // indexed_result is a Integer/byte[] relation + // that is used here to store the index + Iterator i = indexed_result.entrySet().iterator(); + Map.Entry entry; + while (i.hasNext()) { + entry = (Map.Entry) i.next(); + index.puti((byte[]) entry.getValue(), ((Integer) entry.getKey()).intValue()); + } + } + public synchronized kelondroRow.Entry remove(byte[] key) throws IOException { int i = index.removei(key); if (i < 0) return null; diff --git a/source/de/anomic/kelondro/kelondroFlexWidthArray.java b/source/de/anomic/kelondro/kelondroFlexWidthArray.java index 26c0d9005..bac64972f 100644 --- a/source/de/anomic/kelondro/kelondroFlexWidthArray.java +++ b/source/de/anomic/kelondro/kelondroFlexWidthArray.java @@ -29,6 +29,7 @@ import java.io.File; import java.io.IOException; import java.util.HashMap; import java.util.Iterator; +import java.util.List; import java.util.Map; import java.util.TreeMap; @@ -188,7 +189,7 @@ public class kelondroFlexWidthArray implements kelondroArray { // a R/W head path-optimized option to write a set of entries Iterator i; Map.Entry entry; - kelondroRow.Entry rowentry, e0; + kelondroRow.Entry rowentry, e; int c = 0, index, lastcol; synchronized (col) { // go across each file @@ -201,11 +202,11 @@ public class kelondroFlexWidthArray implements kelondroArray { rowentry = (kelondroRow.Entry) entry.getValue(); assert rowentry.bytes().length == this.rowdef.objectsize; - e0 = col[c].row().newEntry( + e = col[c].row().newEntry( rowentry.bytes(), rowdef.colstart[c], rowdef.colstart[lastcol] - rowdef.colstart[c] + rowdef.width(lastcol)); - col[c].set(index, e0); + col[c].set(index, e); } c = c + col[c].row().columns(); } @@ -215,16 +216,16 @@ public class kelondroFlexWidthArray implements kelondroArray { public void set(int index, kelondroRow.Entry rowentry) throws IOException { assert rowentry.bytes().length == this.rowdef.objectsize; int c = 0; - kelondroRow.Entry e0; + kelondroRow.Entry e; int lastcol; synchronized (col) { while (c < rowdef.columns()) { lastcol = c + col[c].row().columns() - 1; - e0 = col[c].row().newEntry( + e = col[c].row().newEntry( rowentry.bytes(), rowdef.colstart[c], rowdef.colstart[lastcol] - rowdef.colstart[c] + rowdef.width(lastcol)); - col[c].set(index, e0); + col[c].set(index, e); c = c + col[c].row().columns(); } } @@ -246,13 +247,58 @@ public class kelondroFlexWidthArray implements kelondroArray { rowentry.bytes(), rowdef.colstart[c], rowdef.colstart[lastcol] + rowdef.width(lastcol) - rowdef.colstart[c]); - col[c].set(index,e); + col[c].set(index, e); c = c + col[c].row().columns(); } } return index; } + protected TreeMap addMultiple(List rows) throws IOException { + // result is a Integer/byte[] relation + // of newly added rows (index, key) + TreeMap indexref = new TreeMap(); + Iterator i; + kelondroRow.Entry rowentry; + // prepare storage for other columns + TreeMap[] colm = new TreeMap[col.length]; + for (int j = 0; j < col.length; j++) { + if (col[j] == null) colm[j] = null; else colm[j] = new TreeMap(); + } + i = rows.iterator(); + while (i.hasNext()) { + rowentry = (kelondroRow.Entry) i.next(); + assert rowentry.bytes().length == this.rowdef.objectsize; + + kelondroRow.Entry e; + int index = -1; + int lastcol; + synchronized (col) { + e = col[0].row().newEntry(rowentry.bytes(), 0, rowdef.width(0)); + index = col[0].add(e); + int c = col[0].row().columns(); + + while (c < rowdef.columns()) { + lastcol = c + col[c].row().columns() - 1; + e = col[c].row().newEntry( + rowentry.bytes(), + rowdef.colstart[c], + rowdef.colstart[lastcol] + rowdef.width(lastcol) - rowdef.colstart[c]); + // remember write to column, but do not write directly + colm[c].put(new Integer(index), e); // col[c].set(index,e); + c = c + col[c].row().columns(); + } + } + indexref.put(new Integer(index), rowentry.getColBytes(0)); + } + // write the other columns + for (int j = 1; j < col.length; j++) { + if (col[j] != null) col[j].setMultiple(colm[j]); + } + // retrun references to entries with key + return indexref; + } + public kelondroRow.Entry get(int index) throws IOException { int r = 0; kelondroRow.Entry e, p; diff --git a/source/de/anomic/kelondro/kelondroIndex.java b/source/de/anomic/kelondro/kelondroIndex.java index b98aadd41..df884fc5a 100644 --- a/source/de/anomic/kelondro/kelondroIndex.java +++ b/source/de/anomic/kelondro/kelondroIndex.java @@ -67,6 +67,7 @@ public interface kelondroIndex { public void putMultiple(List /* of kelondroRow.Entry*/ rows, Date entryDate) throws IOException; // for R/W head path optimization public void addUnique(kelondroRow.Entry row) throws IOException; // no double-check public void addUnique(kelondroRow.Entry row, Date entryDate) throws IOException; // no double-check + public void addUniqueMultiple(List /* of kelondroRow.Entry*/ rows, Date entryDate) throws IOException; // no double-check public kelondroRow.Entry remove(byte[] key) throws IOException; public kelondroRow.Entry removeOne() throws IOException; public Iterator rows(boolean up, boolean rotating, byte[] firstKey) throws IOException; diff --git a/source/de/anomic/kelondro/kelondroRowCollection.java b/source/de/anomic/kelondro/kelondroRowCollection.java index 7e8963737..1016453b7 100644 --- a/source/de/anomic/kelondro/kelondroRowCollection.java +++ b/source/de/anomic/kelondro/kelondroRowCollection.java @@ -24,8 +24,10 @@ package de.anomic.kelondro; +import java.io.IOException; import java.util.Date; import java.util.Iterator; +import java.util.List; import java.util.Set; import de.anomic.server.logging.serverLog; @@ -216,6 +218,11 @@ public class kelondroRowCollection { addUnique(row); } + public synchronized void addUniqueMultiple(List rows, Date entryDate) throws IOException { + Iterator i = rows.iterator(); + while (i.hasNext()) addUnique((kelondroRow.Entry) i.next()); + } + public void add(byte[] a) { addUnique(a, 0, a.length); } diff --git a/source/de/anomic/kelondro/kelondroRowSet.java b/source/de/anomic/kelondro/kelondroRowSet.java index 8335667a8..69de6e4a3 100644 --- a/source/de/anomic/kelondro/kelondroRowSet.java +++ b/source/de/anomic/kelondro/kelondroRowSet.java @@ -31,7 +31,6 @@ import java.util.List; import java.util.Random; import java.util.TreeMap; -import de.anomic.kelondro.kelondroRow.Entry; import de.anomic.server.logging.serverLog; public class kelondroRowSet extends kelondroRowCollection implements kelondroIndex { @@ -102,9 +101,18 @@ public class kelondroRowSet extends kelondroRowCollection implements kelondroInd } } + public void addUniqueMultiple(List rows, Date entryDate) throws IOException { + if (removeMarker.size() == 0) { + super.addUniqueMultiple(rows, entryDate); + } else { + Iterator i = rows.iterator(); + while (i.hasNext()) addUnique((kelondroRow.Entry) i.next(), entryDate); + } + } + public synchronized void putMultiple(List rows, Date entryDate) throws IOException { Iterator i = rows.iterator(); - while (i.hasNext()) put ((Entry) i.next(), entryDate); + while (i.hasNext()) put((kelondroRow.Entry) i.next(), entryDate); } public kelondroRow.Entry put(kelondroRow.Entry row, Date entryDate) { diff --git a/source/de/anomic/kelondro/kelondroSplittedTree.java b/source/de/anomic/kelondro/kelondroSplittedTree.java index 4047a63ae..ce2576294 100644 --- a/source/de/anomic/kelondro/kelondroSplittedTree.java +++ b/source/de/anomic/kelondro/kelondroSplittedTree.java @@ -139,6 +139,10 @@ public class kelondroSplittedTree implements kelondroIndex { throw new UnsupportedOperationException(); } + public synchronized void addUniqueMultiple(List rows, Date entryDate) throws IOException { + throw new UnsupportedOperationException(); + } + public kelondroRow.Entry put(kelondroRow.Entry row) throws IOException { return ktfs[partition(row.getColBytes(0))].put(row); } @@ -274,4 +278,5 @@ public class kelondroSplittedTree implements kelondroIndex { // a collection of different node cache status values return new int[]{0,0,0,0,0,0,0,0,0,0}; } + } diff --git a/source/de/anomic/kelondro/kelondroTree.java b/source/de/anomic/kelondro/kelondroTree.java index 37274a24c..67bee31a3 100644 --- a/source/de/anomic/kelondro/kelondroTree.java +++ b/source/de/anomic/kelondro/kelondroTree.java @@ -486,6 +486,11 @@ public class kelondroTree extends kelondroRecords implements kelondroIndex { this.put(row, entryDate); } + public synchronized void addUniqueMultiple(List rows, Date entryDate) throws IOException { + Iterator i = rows.iterator(); + while (i.hasNext()) addUnique((kelondroRow.Entry) i.next(), entryDate); + } + private void assignChild(Node parentNode, Node childNode, int childType) throws IOException { parentNode.setOHHandle(childType, childNode.handle()); childNode.setOHHandle(parent, parentNode.handle()); diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index d23ad82a0..36b86791a 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -1018,6 +1018,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser int wordInCacheMaxCount = (int) getConfigLong(INDEX_DIST_DHT_RECEIPT_LIMIT, 1000); wordIndex.setInMaxWordCount(wordInCacheMaxCount); + wordIndex.setWordFlushSize((int) getConfigLong("wordFlushSize", 1000)); // set a minimum amount of memory for the indexer thread setConfig(INDEXER_MEMPREREQ, Math.max(getConfigLong(INDEXER_MEMPREREQ, 0), wordIndex.minMem())); diff --git a/yacy.init b/yacy.init index d620488eb..41ccea82e 100644 --- a/yacy.init +++ b/yacy.init @@ -590,7 +590,7 @@ javastart_priority=0 # flushed to disc; this may last some minutes. wordCacheMaxCount = 20000 wordCacheInitCount = 30000 -wordFlushSize = 2000; +wordFlushSize = 1000; # Specifies if yacy can be used as transparent http proxy. #