diff --git a/source/dbtest.java b/source/dbtest.java index 1e3573a9b..54ff8f927 100644 --- a/source/dbtest.java +++ b/source/dbtest.java @@ -372,6 +372,7 @@ public class dbtest { if (command.equals("stressThreaded")) { // // args: + // example: kelondroFlexTable stressThreaded /Users/admin/dbtest 500 50 0 long writeCount = Long.parseLong(args[3]); long readCount = Long.parseLong(args[4]); long randomstart = Long.parseLong(args[5]); diff --git a/source/de/anomic/kelondro/kelondroBytesIntMap.java b/source/de/anomic/kelondro/kelondroBytesIntMap.java index 1b99b0f2e..2a6315bf3 100644 --- a/source/de/anomic/kelondro/kelondroBytesIntMap.java +++ b/source/de/anomic/kelondro/kelondroBytesIntMap.java @@ -25,51 +25,31 @@ package de.anomic.kelondro; import java.io.IOException; -import java.util.Iterator; public class kelondroBytesIntMap { private kelondroRow rowdef; - private kelondroIndex index0, index1; + private kelondroIndex index; public kelondroBytesIntMap(kelondroIndex ki) { assert (ki.row().columns() == 2); // must be a key/index relation assert (ki.row().width(1) == 4); // the value must be a b256-encoded int, 4 bytes long - this.index0 = null; // not used - this.index1 = ki; + this.index = ki; this.rowdef = ki.row(); } public kelondroBytesIntMap(int keylength, kelondroOrder objectOrder, int space) { this.rowdef = new kelondroRow(new kelondroColumn[]{new kelondroColumn("key", kelondroColumn.celltype_binary, kelondroColumn.encoder_bytes, keylength, "key"), new kelondroColumn("int c-4 {b256}")}, objectOrder, 0); - this.index0 = new kelondroRowSet(rowdef, space); - this.index1 = null; // to show that this is the initialization phase + this.index = new kelondroRAMIndex(rowdef, space); } public kelondroRow row() { - return index0.row(); + return index.row(); } public synchronized int geti(byte[] key) throws IOException { assert (key != null); - //assert (!(serverLog.allZero(key))); - if (index0 != null) { - if (index1 == null) { - // finish initialization phase - if (index0 instanceof kelondroRowSet) { - ((kelondroRowSet) index0).sort(); - ((kelondroRowSet) index0).uniq(); - } - index1 = new kelondroRowSet(rowdef, 0); - //System.out.println("finished initialization phase at size = " + index0.size() + " in geti"); - } - kelondroRow.Entry indexentry = index0.get(key); - //assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis(); - if (indexentry != null) return (int) indexentry.getColLong(1); - } - assert (index1 != null); - kelondroRow.Entry indexentry = index1.get(key); - //assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis(); + kelondroRow.Entry indexentry = index.get(key); if (indexentry == null) return -1; return (int) indexentry.getColLong(1); } @@ -77,244 +57,55 @@ public class kelondroBytesIntMap { public synchronized int puti(byte[] key, int i) throws IOException { assert i >= 0 : "i = " + i; assert (key != null); - //assert (!(serverLog.allZero(key))); - if (index0 != null) { - if (index1 == null) { - // finish initialization phase - if (index0 instanceof kelondroRowSet) { - ((kelondroRowSet) index0).sort(); - ((kelondroRowSet) index0).uniq(); - } - index1 = new kelondroRowSet(rowdef, 0); - //System.out.println("finished initialization phase at size = " + index0.size() + " in puti"); - } - // if the new entry is within the initialization part, just overwrite it - kelondroRow.Entry indexentry = index0.get(key); - if (indexentry != null) { - int oldi = (int) indexentry.getColLong(1); - indexentry.setCol(0, key); - indexentry.setCol(1, i); - index0.put(indexentry); - //assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis(); - return oldi; - } - // else place it in the index1 - } - // at this point index1 cannot be null - assert (index1 != null); - kelondroRow.Entry newentry = index1.row().newEntry(); + kelondroRow.Entry newentry = index.row().newEntry(); newentry.setCol(0, key); newentry.setCol(1, i); - kelondroRow.Entry oldentry = index1.put(newentry); + kelondroRow.Entry oldentry = index.put(newentry); if (oldentry == null) return -1; - //assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis(); return (int) oldentry.getColLong(1); } public synchronized void addi(byte[] key, int i) throws IOException { assert i >= 0 : "i = " + i; assert (key != null); - assert index0 != null; - //assert index1 == null; - if (index1 != null) { - // the initialization phase is over, put this entry to the secondary index - puti(key, i); - return; - } - //assert (!(serverLog.allZero(key))); kelondroRow.Entry newentry = this.rowdef.newEntry(); newentry.setCol(0, key); newentry.setCol(1, i); - index0.addUnique(newentry); - //assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis(); + index.addUnique(newentry); } public synchronized int removei(byte[] key) throws IOException { assert (key != null); - //assert (!(serverLog.allZero(key))); - // returns the integer index of the key, if the key can be found and was removed - // and -1 if the key was not found. - if (index0 != null) { - if (index1 == null) { - // finish initialization phase - if (index0 instanceof kelondroRowSet) { - ((kelondroRowSet) index0).sort(); - ((kelondroRowSet) index0).uniq(); - } - index1 = new kelondroRowSet(rowdef, 0); - //System.out.println("finished initialization phase at size = " + index0.size() + " in removei"); - } - // if the new entry is within the initialization part, just overwrite it - kelondroRow.Entry indexentry = index0.remove(key, true); - if (indexentry != null) { - assert index0.remove(key, true) == null; // check if remove worked - //assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis(); - return (int) indexentry.getColLong(1); - } - // else remove it from the index1 - } - // at this point index1 cannot be null - assert (index1 != null); - if (index1.size() == 0) return -1; - kelondroRow.Entry indexentry = index1.remove(key, true); + kelondroRow.Entry indexentry = index.remove(key, false); if (indexentry == null) return -1; - assert index1.remove(key, true) == null; // check if remove worked - //assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis(); return (int) indexentry.getColLong(1); } public synchronized int removeonei() throws IOException { - if ((index1 != null) && (index1.size() != 0)) { - kelondroRow.Entry indexentry = index1.removeOne(); - assert (indexentry != null); - if (indexentry == null) return -1; - //assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis(); - return (int) indexentry.getColLong(1); - } - if ((index0 != null) && (index0.size() != 0)) { - kelondroRow.Entry indexentry = index0.removeOne(); - assert (indexentry != null); - if (indexentry == null) return -1; - //assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis(); - return (int) indexentry.getColLong(1); - } - //assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis(); - return -1; + kelondroRow.Entry indexentry = index.removeOne(); + if (indexentry == null) return -1; + return (int) indexentry.getColLong(1); } public synchronized int size() { - if ((index0 != null) && (index1 == null)) { - //assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis(); - return index0.size(); - } - if ((index0 == null) && (index1 != null)) { - //assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis(); - return index1.size(); - } - assert ((index0 != null) && (index1 != null)); - //assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis(); - return index0.size() + index1.size(); + return index.size(); } public synchronized kelondroCloneableIterator keys(boolean up, byte[] firstKey) throws IOException { - // returns the key-iterator of the underlying kelondroIndex - // col[0] = key - // col[1] = integer as {b265} - if ((index0 != null) && (index1 == null)) { - // finish initialization phase - if (index0 instanceof kelondroRowSet) { - ((kelondroRowSet) index0).sort(); - ((kelondroRowSet) index0).uniq(); - } - index1 = new kelondroRowSet(rowdef, 0); - //System.out.println("finished initialization phase at size = " + index0.size() + " in rows"); - //assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis(); - return index0.keys(up, firstKey); - } - assert (index1 != null); - if (index0 == null) { - //assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis(); - return index1.keys(up, firstKey); - } - //assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis(); - return new kelondroMergeIterator( - index0.keys(up, firstKey), - index1.keys(up, firstKey), - rowdef.objectOrder, - kelondroMergeIterator.simpleMerge, - true); + return index.keys(up, firstKey); } public synchronized kelondroCloneableIterator rows(boolean up, byte[] firstKey) throws IOException { - // returns the row-iterator of the underlying kelondroIndex - // col[0] = key - // col[1] = integer as {b265} - if ((index0 != null) && (index1 == null)) { - // finish initialization phase - if (index0 instanceof kelondroRowSet) { - ((kelondroRowSet) index0).sort(); - ((kelondroRowSet) index0).uniq(); - } - index1 = new kelondroRowSet(rowdef, 0); - //System.out.println("finished initialization phase at size = " + index0.size() + " in rows"); - //assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis(); - return index0.rows(up, firstKey); - } - assert (index1 != null); - if (index0 == null) { - //assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis(); - return index1.rows(up, firstKey); - } - //assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis(); - return new kelondroMergeIterator( - index0.rows(up, firstKey), - index1.rows(up, firstKey), - rowdef.objectOrder, - kelondroMergeIterator.simpleMerge, - true); + return index.rows(up, firstKey); } public kelondroProfile profile() { - if (index0 == null) return index1.profile(); - if (index1 == null) return index0.profile(); - return kelondroProfile.consolidate(index0.profile(), index1.profile()); + return index.profile(); } public synchronized void close() { - if (index0 != null) index0.close(); - if (index1 != null) index1.close(); - } - - public synchronized String consistencyAnalysis() { - String s0 = (index0 == null) ? "index0: is NULL" : ("index0: " + singleConsistency((kelondroRowSet) index0)); - String s1 = (index1 == null) ? "index1: is NULL" : ("index1: " + singleConsistency((kelondroRowSet) index1)); - String combined = ""; - if ((index0 == null) && (index1 == null)) return "all null"; - if ((index0 != null) && (index1 != null)) { - Iterator i; - try { - i = index0.rows(true, null); - kelondroRow.Entry entry; - while (i.hasNext()) { - entry = i.next(); - if (index1.has(entry.getColBytes(0))) { - combined = combined + ", common = " + new String(entry.getColBytes(0)); - } - } - } catch (IOException e) {} - } - return s0 + ", " + s1 + combined; + index.close(); + index = null; } - public synchronized boolean consistencyAnalysis0() { - boolean s0 = ((index0 == null) || (!(index0 instanceof kelondroRowSet))) ? true : singleConsistency0((kelondroRowSet) index0); - boolean s1 = ((index1 == null) || (!(index1 instanceof kelondroRowSet))) ? true : singleConsistency0((kelondroRowSet) index1); - if (!(s0 && s1)) return false; - if ((index0 == null) && (index1 == null)) return true; - if ((index0 != null) && (index1 != null)) { - Iterator i; - try { - i = index0.rows(true, null); - kelondroRow.Entry entry; - while (i.hasNext()) { - entry = i.next(); - if (index1.has(entry.getColBytes(0))) return false; - } - } catch (IOException e) {} - } - return true; - } - - private String singleConsistency(kelondroRowSet rs) { - int s = rs.size(); - rs.sort(); - rs.uniq(); - if (rs.size() == s) return "set is sound"; else return "set has " + (rs.size() - s) + " double-entries"; - } - private boolean singleConsistency0(kelondroRowSet rs) { - int s = rs.size(); - rs.sort(); - rs.uniq(); - return rs.size() == s; - } } diff --git a/source/de/anomic/kelondro/kelondroFileRA.java b/source/de/anomic/kelondro/kelondroFileRA.java index efb536c8b..3b4b569cc 100644 --- a/source/de/anomic/kelondro/kelondroFileRA.java +++ b/source/de/anomic/kelondro/kelondroFileRA.java @@ -57,8 +57,8 @@ public final class kelondroFileRA extends kelondroAbstractRA implements kelondro public kelondroFileRA(File file) throws IOException, FileNotFoundException { this.name = file.getName(); - RAFile = new RandomAccessFile(file, "rw"); - } + RAFile = new RandomAccessFile(file, "rwd"); + } public long length() throws IOException { return RAFile.length(); diff --git a/source/de/anomic/kelondro/kelondroFlexTable.java b/source/de/anomic/kelondro/kelondroFlexTable.java index 48a4e787e..fb0aefad9 100644 --- a/source/de/anomic/kelondro/kelondroFlexTable.java +++ b/source/de/anomic/kelondro/kelondroFlexTable.java @@ -40,7 +40,7 @@ import de.anomic.server.logging.serverLog; public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondroIndex { // static tracker objects - private static TreeMap tableTracker = new TreeMap(); + private static TreeMap tableTracker = new TreeMap(); // class objects protected kelondroBytesIntMap index; @@ -156,12 +156,12 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr int space = Math.max(super.col[0].size(), initialSpace) + 1; if (space < 0) throw new kelondroException("wrong space: " + space); kelondroBytesIntMap ri = new kelondroBytesIntMap(super.row().column(0).cellwidth, super.rowdef.objectOrder, space); - Iterator content = super.col[0].contentNodes(-1); + Iterator content = super.col[0].contentNodes(-1); kelondroNode node; int i; byte[] key; while (content.hasNext()) { - node = (kelondroNode) content.next(); + node = content.next(); i = node.handle().hashCode(); key = node.getKey(); assert (key != null) : "DEBUG: empty key in initializeRamIndex"; // should not happen; if it does, it is an error of the condentNodes iterator @@ -181,14 +181,14 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr private kelondroIndex initializeTreeIndex(File indexfile, long preloadTime, kelondroOrder objectOrder) throws IOException { kelondroIndex treeindex = new kelondroCache(new kelondroTree(indexfile, true, preloadTime, treeIndexRow(rowdef.primaryKeyLength, objectOrder), 2, 80), true, false); - Iterator content = super.col[0].contentNodes(-1); + Iterator content = super.col[0].contentNodes(-1); kelondroNode node; kelondroRow.Entry indexentry; int i, c = 0, all = super.col[0].size(); long start = System.currentTimeMillis(); long last = start; while (content.hasNext()) { - node = (kelondroNode) content.next(); + node = content.next(); i = node.handle().hashCode(); indexentry = treeindex.row().newEntry(); indexentry.setCol(0, node.getValueRow()); @@ -211,7 +211,7 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr public synchronized kelondroRow.Entry get(byte[] key) throws IOException { if (index == null) return null; // case may happen during shutdown int pos = index.geti(key); - assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size() + ", analysis: " + index.consistencyAnalysis(); + assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size(); if (pos < 0) return null; // i may be greater than this.size(), because this table may have deleted entries // the deleted entries are subtracted from the 'real' tablesize, @@ -222,18 +222,18 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr return result; } - public synchronized void putMultiple(List rows) throws IOException { + public synchronized void putMultiple(List rows) throws IOException { // put a list of entries in a ordered way. // this should save R/W head positioning time - Iterator i = rows.iterator(); + Iterator i = rows.iterator(); kelondroRow.Entry row; int pos; byte[] key; - TreeMap old_rows_ordered = new TreeMap(); - ArrayList new_rows_sequential = new ArrayList(); + TreeMap old_rows_ordered = new TreeMap(); + ArrayList new_rows_sequential = new ArrayList(); assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size(); while (i.hasNext()) { - row = (kelondroRow.Entry) i.next(); + row = i.next(); key = row.getColBytes(0); pos = index.geti(key); if (pos < 0) { @@ -295,17 +295,17 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr index.addi(row.getColBytes(0), super.add(row)); } - public synchronized void addUniqueMultiple(List rows) throws IOException { + public synchronized void addUniqueMultiple(List rows) throws IOException { // add a list of entries in a ordered way. // this should save R/W head positioning time - TreeMap indexed_result = super.addMultiple(rows); + TreeMap indexed_result = super.addMultiple(rows); // indexed_result is a Integer/byte[] relation // that is used here to store the index - Iterator i = indexed_result.entrySet().iterator(); - Map.Entry entry; + Iterator> i = indexed_result.entrySet().iterator(); + Map.Entry entry; while (i.hasNext()) { - entry = (Map.Entry) i.next(); - index.puti((byte[]) entry.getValue(), ((Integer) entry.getKey()).intValue()); + entry = i.next(); + index.puti(entry.getValue(), entry.getKey().intValue()); } assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size(); @@ -406,7 +406,7 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr return index.profile(); } - public static final Iterator filenames() { + public static final Iterator filenames() { // iterates string objects; all file names from record tracker return tableTracker.keySet().iterator(); } diff --git a/source/de/anomic/kelondro/kelondroFlexWidthArray.java b/source/de/anomic/kelondro/kelondroFlexWidthArray.java index c0571ed54..18a6033ed 100644 --- a/source/de/anomic/kelondro/kelondroFlexWidthArray.java +++ b/source/de/anomic/kelondro/kelondroFlexWidthArray.java @@ -101,10 +101,10 @@ public class kelondroFlexWidthArray implements kelondroArray { // save/check property file for this array File propfile = new File(tabledir, "properties"); - Map props = new HashMap(); + Map props = new HashMap(); if (propfile.exists()) { props = serverFileUtils.loadHashMap(propfile); - String stored_rowdef = (String) props.get("rowdef"); + String stored_rowdef = props.get("rowdef"); if ((stored_rowdef == null) || (!(rowdef.subsumes(new kelondroRow(stored_rowdef, rowdef.objectOrder, 0))))) { System.out.println("FATAL ERROR: stored rowdef '" + stored_rowdef + "' does not match with new rowdef '" + rowdef + "' for flex table '" + path + "', table " + tablename); @@ -218,19 +218,19 @@ public class kelondroFlexWidthArray implements kelondroArray { return col[0].size(); } - public synchronized void setMultiple(TreeMap /*of {Integer, kelondroRow.Entry}*/ entries) throws IOException { + public synchronized void setMultiple(TreeMap entries) throws IOException { // a R/W head path-optimized option to write a set of entries - Iterator i; - Map.Entry entry; + Iterator> i; + Map.Entry entry; kelondroRow.Entry rowentry, e; int c = 0, index; // go across each file while (c < rowdef.columns()) { i = entries.entrySet().iterator(); while (i.hasNext()) { - entry = (Map.Entry) i.next(); - index = ((Integer) entry.getKey()).intValue(); - rowentry = (kelondroRow.Entry) entry.getValue(); + entry = i.next(); + index = entry.getKey().intValue(); + rowentry = entry.getValue(); assert rowentry.objectsize() == this.rowdef.objectsize; e = col[c].row().newEntry(rowentry.bytes(), rowdef.colstart[c], false); @@ -266,20 +266,20 @@ public class kelondroFlexWidthArray implements kelondroArray { return index; } - protected synchronized TreeMap addMultiple(List rows) throws IOException { + protected synchronized TreeMap addMultiple(List rows) throws IOException { // result is a Integer/byte[] relation // of newly added rows (index, key) - TreeMap indexref = new TreeMap(); - Iterator i; + TreeMap indexref = new TreeMap(); + Iterator i; kelondroRow.Entry rowentry; // prepare storage for other columns - TreeMap[] colm = new TreeMap[col.length]; + TreeMap[] colm = new TreeMap[col.length]; for (int j = 0; j < col.length; j++) { - if (col[j] == null) colm[j] = null; else colm[j] = new TreeMap(); + if (col[j] == null) colm[j] = null; else colm[j] = new TreeMap(); } i = rows.iterator(); while (i.hasNext()) { - rowentry = (kelondroRow.Entry) i.next(); + rowentry = i.next(); assert rowentry.objectsize() == this.rowdef.objectsize; kelondroRow.Entry e; @@ -301,7 +301,7 @@ public class kelondroFlexWidthArray implements kelondroArray { for (int j = 1; j < col.length; j++) { if (col[j] != null) col[j].setMultiple(colm[j]); } - // retrun references to entries with key + // return references to entries with key return indexref; } diff --git a/source/de/anomic/kelondro/kelondroObjectSpace.java b/source/de/anomic/kelondro/kelondroObjectSpace.java index dc9f5dda6..87a73a30e 100644 --- a/source/de/anomic/kelondro/kelondroObjectSpace.java +++ b/source/de/anomic/kelondro/kelondroObjectSpace.java @@ -52,8 +52,8 @@ public class kelondroObjectSpace { private static final int minSize = 10; private static final int maxSize = 256; - private static HashMap objHeap = new HashMap(); - private static TreeMap aliveNow = new TreeMap(); + private static HashMap> objHeap = new HashMap>(); + private static TreeMap aliveNow = new TreeMap(); //private static TreeMap aliveMax = new TreeMap(); private static void incAlive(int size) { @@ -67,7 +67,7 @@ public class kelondroObjectSpace { private static void decAlive(int size) { final Integer s = new Integer(size); synchronized (aliveNow) { - final Integer x = (Integer) aliveNow.get(s); + final Integer x = aliveNow.get(s); if (x == null) aliveNow.put(s, new Integer(-1)); else aliveNow.put(s, new Integer(x.intValue() - 1)); } } @@ -76,9 +76,9 @@ public class kelondroObjectSpace { if ((len < minSize) || (len > maxSize)) return new byte[len]; incAlive(len); synchronized (objHeap) { - ArrayList buf = (ArrayList) objHeap.get(new Integer(len)); + ArrayList buf = objHeap.get(new Integer(len)); if ((buf == null) || (buf.size() == 0)) return new byte[len]; - return (byte[]) buf.remove(buf.size() - 1); + return buf.remove(buf.size() - 1); } } @@ -90,9 +90,9 @@ public class kelondroObjectSpace { decAlive(b.length); synchronized (objHeap) { final Integer i = new Integer(b.length); - ArrayList buf = (ArrayList) objHeap.get(i); + ArrayList buf = objHeap.get(i); if (buf == null) { - buf = new ArrayList(); + buf = new ArrayList(); buf.add(b); objHeap.put(i, buf); } else { @@ -102,21 +102,21 @@ public class kelondroObjectSpace { b = null; } - public static TreeMap statAlive() { + public static TreeMap statAlive() { return aliveNow; } - public static TreeMap statHeap() { + public static TreeMap statHeap() { // creates a statistic output of this object space // the result is a mapping from Integer (chunk size) to Integer (number of counts) // and shows how many Objects are held in this space for usage - TreeMap result = new TreeMap(); + TreeMap result = new TreeMap(); synchronized (objHeap) { - Iterator i = objHeap.entrySet().iterator(); - Map.Entry entry; + Iterator>> i = objHeap.entrySet().iterator(); + Map.Entry> entry; while (i.hasNext()) { - entry = (Map.Entry) i.next(); - result.put(entry.getKey(), new Integer(((ArrayList) entry.getValue()).size())); + entry = i.next(); + result.put(entry.getKey(), new Integer(entry.getValue().size())); } } return result; diff --git a/source/de/anomic/kelondro/kelondroRAMIndex.java b/source/de/anomic/kelondro/kelondroRAMIndex.java new file mode 100644 index 000000000..a13c32bc8 --- /dev/null +++ b/source/de/anomic/kelondro/kelondroRAMIndex.java @@ -0,0 +1,219 @@ +// kelondroRAMIndex.java +// (C) 2008 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany +// first published 07.01.2008 on http://yacy.net +// +// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $ +// $LastChangedRevision: 1986 $ +// $LastChangedBy: orbiter $ +// +// LICENSE +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +package de.anomic.kelondro; + +import java.io.IOException; +import java.util.Date; +import java.util.Iterator; +import java.util.List; + +import de.anomic.kelondro.kelondroRow.Entry; + +public class kelondroRAMIndex implements kelondroIndex { + + private kelondroRow rowdef; + private kelondroRowSet index0, index1; + + public kelondroRAMIndex(kelondroRow rowdef, int initialspace) { + this.rowdef = rowdef; + reset(initialspace); + } + + public void reset() { + reset(0); + } + + public void reset(int initialspace) { + this.index0 = new kelondroRowSet(rowdef, initialspace); + this.index1 = null; // to show that this is the initialization phase + } + + public kelondroRow row() { + return index0.row(); + } + + private final void finishInitialization() { + if (index1 == null) { + // finish initialization phase + index0.sort(); + index0.uniq(); + index1 = new kelondroRowSet(rowdef, 0); + } + } + + public synchronized kelondroRow.Entry get(byte[] key) throws IOException { + assert (key != null); + finishInitialization(); + kelondroRow.Entry indexentry = index0.get(key); + if (indexentry != null) return indexentry; + return index1.get(key); + } + + public boolean has(byte[] key) throws IOException { + assert (key != null); + finishInitialization(); + if (index0.has(key)) return true; + return index1.has(key); + } + + public synchronized kelondroRow.Entry put(kelondroRow.Entry entry) throws IOException { + assert (entry != null); + finishInitialization(); + // if the new entry is within the initialization part, just overwrite it + kelondroRow.Entry indexentry = index0.get(entry.getPrimaryKeyBytes()); + if (indexentry != null) { + index0.put(entry); + return indexentry; + } + // else place it in the index1 + return index1.put(entry); + } + + public Entry put(Entry row, Date entryDate) throws IOException { + return put(row); + } + + public void putMultiple(List rows) throws IOException { + Iterator i = rows.iterator(); + while (i.hasNext()) { + put(i.next()); + } + } + + public synchronized void addUnique(kelondroRow.Entry entry) throws IOException { + assert (entry != null); + if (index1 == null) { + // we are in the initialization phase + index0.addUnique(entry); + } else { + // initialization is over, add to secondary index + index1.addUnique(entry); + } + } + + public void addUniqueMultiple(List rows) throws IOException { + Iterator i = rows.iterator(); + while (i.hasNext()) { + addUnique(i.next()); + } + } + + public synchronized kelondroRow.Entry remove(byte[] key, boolean keepOrder) throws IOException { + finishInitialization(); + // if the new entry is within the initialization part, just delete it + kelondroRow.Entry indexentry = index0.remove(key, keepOrder); + if (indexentry != null) { + assert index0.remove(key, true) == null; // check if remove worked + return indexentry; + } + // else remove it from the index1 + return index1.remove(key, keepOrder); + } + + public synchronized kelondroRow.Entry removeOne() throws IOException { + if ((index1 != null) && (index1.size() != 0)) { + return index1.removeOne(); + } + if ((index0 != null) && (index0.size() != 0)) { + return index0.removeOne(); + } + return null; + } + + public synchronized int size() { + if ((index0 != null) && (index1 == null)) { + return index0.size(); + } + if ((index0 == null) && (index1 != null)) { + return index1.size(); + } + assert ((index0 != null) && (index1 != null)); + return index0.size() + index1.size(); + } + + @SuppressWarnings("unchecked") + public synchronized kelondroCloneableIterator keys(boolean up, byte[] firstKey) throws IOException { + // returns the key-iterator of the underlying kelondroIndex + if (index1 == null) { + // finish initialization phase + index0.sort(); + index0.uniq(); + index1 = new kelondroRowSet(rowdef, 0); + return index0.keys(up, firstKey); + } + assert (index1 != null); + if (index0 == null) { + //assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis(); + return index1.keys(up, firstKey); + } + //assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis(); + return new kelondroMergeIterator( + index0.keys(up, firstKey), + index1.keys(up, firstKey), + rowdef.objectOrder, + kelondroMergeIterator.simpleMerge, + true); + } + + @SuppressWarnings("unchecked") + public synchronized kelondroCloneableIterator rows(boolean up, byte[] firstKey) throws IOException { + // returns the row-iterator of the underlying kelondroIndex + if (index1 == null) { + // finish initialization phase + index0.sort(); + index0.uniq(); + index1 = new kelondroRowSet(rowdef, 0); + return index0.rows(up, firstKey); + } + assert (index1 != null); + if (index0 == null) { + //assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis(); + return index1.rows(up, firstKey); + } + //assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis(); + return new kelondroMergeIterator( + index0.rows(up, firstKey), + index1.rows(up, firstKey), + rowdef.objectOrder, + kelondroMergeIterator.simpleMerge, + true); + } + + public kelondroProfile profile() { + if (index0 == null) return index1.profile(); + if (index1 == null) return index0.profile(); + return kelondroProfile.consolidate(index0.profile(), index1.profile()); + } + + public synchronized void close() { + if (index0 != null) index0.close(); + if (index1 != null) index1.close(); + } + + public String filename() { + return null; // this does not have a file name + } + +} diff --git a/source/de/anomic/plasma/plasmaCrawlBalancer.java b/source/de/anomic/plasma/plasmaCrawlBalancer.java index ea377bb3a..8111acac3 100644 --- a/source/de/anomic/plasma/plasmaCrawlBalancer.java +++ b/source/de/anomic/plasma/plasmaCrawlBalancer.java @@ -51,13 +51,13 @@ import java.util.LinkedList; import java.util.Map; import java.util.TreeMap; +import de.anomic.kelondro.kelondroAbstractRecords; import de.anomic.kelondro.kelondroBase64Order; import de.anomic.kelondro.kelondroCache; import de.anomic.kelondro.kelondroFlexTable; import de.anomic.kelondro.kelondroIndex; import de.anomic.kelondro.kelondroRow; import de.anomic.kelondro.kelondroStack; -import de.anomic.kelondro.kelondroAbstractRecords; import de.anomic.server.logging.serverLog; import de.anomic.yacy.yacySeedDB; @@ -67,27 +67,46 @@ public class plasmaCrawlBalancer { private static final String indexSuffix = "8.db"; // a shared domainAccess map for all balancers - private static final Map domainAccess = Collections.synchronizedMap(new HashMap()); + private static final Map domainAccess = Collections.synchronizedMap(new HashMap()); // definition of payload for fileStack private static final kelondroRow stackrow = new kelondroRow("byte[] urlhash-" + yacySeedDB.commonHashLength, kelondroBase64Order.enhancedCoder, 0); // class variables - private ArrayList urlRAMStack; // a list that is flused first - private kelondroStack urlFileStack; // a file with url hashes - private kelondroIndex urlFileIndex; - private HashMap domainStacks; // a map from domain name part to Lists with url hashs - private File cacheStacksPath; - private String stackname; - private boolean top; // to alternate between top and bottom of the file stack + private ArrayList urlRAMStack; // a list that is flushed first + private kelondroStack urlFileStack; // a file with url hashes + private kelondroIndex urlFileIndex; + private HashMap> domainStacks; // a map from domain name part to Lists with url hashs + private File cacheStacksPath; + private String stackname; + private boolean top; // to alternate between top and bottom of the file stack + + public static class domaccess { + long time; + int count; + public domaccess() { + this.time = System.currentTimeMillis(); + this.count = 0; + } + public void update() { + this.time = System.currentTimeMillis(); + this.count++; + } + public long time() { + return this.time; + } + public int count() { + return this.count; + } + } public plasmaCrawlBalancer(File cachePath, String stackname) { this.cacheStacksPath = cachePath; this.stackname = stackname; File stackFile = new File(cachePath, stackname + stackSuffix); this.urlFileStack = kelondroStack.open(stackFile, stackrow); - this.domainStacks = new HashMap(); - this.urlRAMStack = new ArrayList(); + this.domainStacks = new HashMap>(); + this.urlRAMStack = new ArrayList(); this.top = true; // create a stack for newly entered entries @@ -147,8 +166,8 @@ public class plasmaCrawlBalancer { // returns number of deletions // first find a list of url hashes that shall be deleted - Iterator i = urlFileIndex.rows(true, null); - ArrayList urlHashes = new ArrayList(); + Iterator i = urlFileIndex.rows(true, null); + ArrayList urlHashes = new ArrayList(); kelondroRow.Entry rowEntry; plasmaCrawlEntry crawlEntry; while (i.hasNext()) { @@ -160,15 +179,15 @@ public class plasmaCrawlBalancer { } // then delete all these urls from the queues and the file index - i = urlHashes.iterator(); - while (i.hasNext()) this.remove((String) i.next()); + Iterator j = urlHashes.iterator(); + while (j.hasNext()) this.remove(j.next()); return urlHashes.size(); } public synchronized plasmaCrawlEntry remove(String urlhash) throws IOException { // this method is only here, because so many import/export methods need it // and it was implemented in the previous architecture - // however, usage is not recommendet + // however, usage is not recommended int s = urlFileIndex.size(); kelondroRow.Entry entry = urlFileIndex.remove(urlhash.getBytes(), false); if (entry == null) return null; @@ -177,7 +196,7 @@ public class plasmaCrawlBalancer { // now delete that thing also from the queues // iterate through the RAM stack - Iterator i = urlRAMStack.iterator(); + Iterator i = urlRAMStack.iterator(); String h; while (i.hasNext()) { h = (String) i.next(); @@ -189,11 +208,11 @@ public class plasmaCrawlBalancer { // iterate through the file stack // in general this is a bad idea. But this can only be avoided by avoidance of this method - i = urlFileStack.stackIterator(true); - while (i.hasNext()) { - h = new String(((kelondroRow.Entry) i.next()).getColBytes(0)); + Iterator j = urlFileStack.stackIterator(true); + while (j.hasNext()) { + h = new String(j.next().getColBytes(0)); if (h.equals(urlhash)) { - i.remove(); + j.remove(); return new plasmaCrawlEntry(entry); } } @@ -236,9 +255,9 @@ public class plasmaCrawlBalancer { private boolean domainStacksNotEmpty() { if (domainStacks == null) return false; synchronized (domainStacks) { - Iterator i = domainStacks.values().iterator(); + Iterator> i = domainStacks.values().iterator(); while (i.hasNext()) { - if (((LinkedList) i.next()).size() > 0) return true; + if (i.next().size() > 0) return true; } } return false; @@ -248,8 +267,8 @@ public class plasmaCrawlBalancer { if (domainStacks == null) return 0; int sum = 0; synchronized (domainStacks) { - Iterator i = domainStacks.values().iterator(); - while (i.hasNext()) sum += ((LinkedList) i.next()).size(); + Iterator> i = domainStacks.values().iterator(); + while (i.hasNext()) sum += i.next().size(); } return sum; } @@ -259,12 +278,12 @@ public class plasmaCrawlBalancer { // the minimumleft value is a limit for the number of entries that should be left if (domainStacks.size() == 0) return; synchronized (domainStacks) { - Iterator i = domainStacks.entrySet().iterator(); - Map.Entry entry; - LinkedList list; + Iterator>> i = domainStacks.entrySet().iterator(); + Map.Entry> entry; + LinkedList list; while (i.hasNext()) { - entry = (Map.Entry) i.next(); - list = (LinkedList) entry.getValue(); + entry = i.next(); + list = entry.getValue(); if (list.size() > minimumleft) { if (ram) { urlRAMStack.add(list.removeFirst()); @@ -298,10 +317,10 @@ public class plasmaCrawlBalancer { // extend domain stack String dom = entry.url().hash().substring(6); - LinkedList domainList = (LinkedList) domainStacks.get(dom); + LinkedList domainList = domainStacks.get(dom); if (domainList == null) { // create new list - domainList = new LinkedList(); + domainList = new LinkedList(); synchronized (domainStacks) { domainList.add(entry.url().hash()); domainStacks.put(dom, domainList); @@ -336,19 +355,19 @@ public class plasmaCrawlBalancer { // we select specific domains that have not been used for a long time // i.e. 60 seconds. Latest arrivals that have not yet been crawled // fit also in that scheme - Iterator i = domainStacks.entrySet().iterator(); - Map.Entry entry; + Iterator>> i = domainStacks.entrySet().iterator(); + Map.Entry> entry; String domhash; long delta, maxdelta = 0; String maxhash = null; - LinkedList domlist; + LinkedList domlist; while (i.hasNext()) { - entry = (Map.Entry) i.next(); + entry = i.next(); domhash = (String) entry.getKey(); delta = lastAccessDelta(domhash); if (delta == Integer.MAX_VALUE) { // a brand new domain - we take it - domlist = (LinkedList) entry.getValue(); + domlist = entry.getValue(); result = (String) domlist.removeFirst(); if (domlist.size() == 0) i.remove(); break; @@ -360,7 +379,7 @@ public class plasmaCrawlBalancer { } if (maxdelta > maximumAge) { // success - we found an entry from a domain that has not been used for a long time - domlist = (LinkedList) domainStacks.get(maxhash); + domlist = domainStacks.get(maxhash); result = (String) domlist.removeFirst(); if (domlist.size() == 0) domainStacks.remove(maxhash); } @@ -371,17 +390,17 @@ public class plasmaCrawlBalancer { // we order all domains by the number of entries per domain // then we iterate through these domains in descending entry order // and that that one, that has a delta > minimumDelta - Iterator i = domainStacks.entrySet().iterator(); - Map.Entry entry; + Iterator>> i = domainStacks.entrySet().iterator(); + Map.Entry> entry; String domhash; - LinkedList domlist; - TreeMap hitlist = new TreeMap(); + LinkedList domlist; + TreeMap hitlist = new TreeMap(); int count = 0; // first collect information about sizes of the domain lists while (i.hasNext()) { - entry = (Map.Entry) i.next(); - domhash = (String) entry.getKey(); - domlist = (LinkedList) entry.getValue(); + entry = i.next(); + domhash = entry.getKey(); + domlist = entry.getValue(); hitlist.put(new Integer(domlist.size() * 100 + count++), domhash); } @@ -394,7 +413,7 @@ public class plasmaCrawlBalancer { if (maxhash == null) maxhash = domhash; // remember first entry delta = lastAccessDelta(domhash); if (delta > minimumGlobalDelta) { - domlist = (LinkedList) domainStacks.get(domhash); + domlist = domainStacks.get(domhash); result = (String) domlist.removeFirst(); if (domlist.size() == 0) domainStacks.remove(domhash); break; @@ -403,7 +422,7 @@ public class plasmaCrawlBalancer { // if we did yet not choose any entry, we simply take that one with the most entries if ((result == null) && (maxhash != null)) { - domlist = (LinkedList) domainStacks.get(maxhash); + domlist = domainStacks.get(maxhash); result = (String) domlist.removeFirst(); if (domlist.size() == 0) domainStacks.remove(maxhash); } @@ -467,16 +486,18 @@ public class plasmaCrawlBalancer { } // update statistical data - domainAccess.put(result.substring(6), new Long(System.currentTimeMillis())); + domaccess lastAccess = domainAccess.get(result.substring(6)); + if (lastAccess == null) lastAccess = new domaccess(); else lastAccess.update(); + domainAccess.put(result.substring(6), lastAccess); return crawlEntry; } private long lastAccessDelta(String hash) { assert hash != null; - Long lastAccess = (Long) domainAccess.get((hash.length() > 6) ? hash.substring(6) : hash); + domaccess lastAccess = domainAccess.get((hash.length() > 6) ? hash.substring(6) : hash); if (lastAccess == null) return Long.MAX_VALUE; // never accessed - return System.currentTimeMillis() - lastAccess.longValue(); + return System.currentTimeMillis() - lastAccess.time(); } public synchronized plasmaCrawlEntry top(int dist) throws IOException { @@ -507,13 +528,13 @@ public class plasmaCrawlBalancer { return new plasmaCrawlEntry(entry); } - public synchronized Iterator iterator() throws IOException { + public synchronized Iterator iterator() throws IOException { return new EntryIterator(); } - private class EntryIterator implements Iterator { + private class EntryIterator implements Iterator { - private Iterator rowIterator; + private Iterator rowIterator; public EntryIterator() throws IOException { rowIterator = urlFileIndex.rows(true, null); @@ -523,7 +544,7 @@ public class plasmaCrawlBalancer { return (rowIterator == null) ? false : rowIterator.hasNext(); } - public Object next() { + public plasmaCrawlEntry next() { kelondroRow.Entry entry = (kelondroRow.Entry) rowIterator.next(); try { return (entry == null) ? null : new plasmaCrawlEntry(entry);