- some refactoring and redesign of kelondroBytesIntMap (created new class kelondroRAMIndex)

- more generics
- preparation to extend the balancer for flexible forced delay times
- set different random-access type, should now omit update of metadata in file and could be a bit faster (lets see)

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4309 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 17 years ago
parent 89f48bae55
commit 3e3d2e39a4

@ -372,6 +372,7 @@ public class dbtest {
if (command.equals("stressThreaded")) { if (command.equals("stressThreaded")) {
// //
// args: <number-of-writes> <number-of-reads-per-write> <random-startpoint> // args: <number-of-writes> <number-of-reads-per-write> <random-startpoint>
// example: kelondroFlexTable stressThreaded /Users/admin/dbtest 500 50 0
long writeCount = Long.parseLong(args[3]); long writeCount = Long.parseLong(args[3]);
long readCount = Long.parseLong(args[4]); long readCount = Long.parseLong(args[4]);
long randomstart = Long.parseLong(args[5]); long randomstart = Long.parseLong(args[5]);

@ -25,51 +25,31 @@
package de.anomic.kelondro; package de.anomic.kelondro;
import java.io.IOException; import java.io.IOException;
import java.util.Iterator;
public class kelondroBytesIntMap { public class kelondroBytesIntMap {
private kelondroRow rowdef; private kelondroRow rowdef;
private kelondroIndex index0, index1; private kelondroIndex index;
public kelondroBytesIntMap(kelondroIndex ki) { public kelondroBytesIntMap(kelondroIndex ki) {
assert (ki.row().columns() == 2); // must be a key/index relation assert (ki.row().columns() == 2); // must be a key/index relation
assert (ki.row().width(1) == 4); // the value must be a b256-encoded int, 4 bytes long assert (ki.row().width(1) == 4); // the value must be a b256-encoded int, 4 bytes long
this.index0 = null; // not used this.index = ki;
this.index1 = ki;
this.rowdef = ki.row(); this.rowdef = ki.row();
} }
public kelondroBytesIntMap(int keylength, kelondroOrder objectOrder, int space) { public kelondroBytesIntMap(int keylength, kelondroOrder objectOrder, int space) {
this.rowdef = new kelondroRow(new kelondroColumn[]{new kelondroColumn("key", kelondroColumn.celltype_binary, kelondroColumn.encoder_bytes, keylength, "key"), new kelondroColumn("int c-4 {b256}")}, objectOrder, 0); this.rowdef = new kelondroRow(new kelondroColumn[]{new kelondroColumn("key", kelondroColumn.celltype_binary, kelondroColumn.encoder_bytes, keylength, "key"), new kelondroColumn("int c-4 {b256}")}, objectOrder, 0);
this.index0 = new kelondroRowSet(rowdef, space); this.index = new kelondroRAMIndex(rowdef, space);
this.index1 = null; // to show that this is the initialization phase
} }
public kelondroRow row() { public kelondroRow row() {
return index0.row(); return index.row();
} }
public synchronized int geti(byte[] key) throws IOException { public synchronized int geti(byte[] key) throws IOException {
assert (key != null); assert (key != null);
//assert (!(serverLog.allZero(key))); kelondroRow.Entry indexentry = index.get(key);
if (index0 != null) {
if (index1 == null) {
// finish initialization phase
if (index0 instanceof kelondroRowSet) {
((kelondroRowSet) index0).sort();
((kelondroRowSet) index0).uniq();
}
index1 = new kelondroRowSet(rowdef, 0);
//System.out.println("finished initialization phase at size = " + index0.size() + " in geti");
}
kelondroRow.Entry indexentry = index0.get(key);
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
if (indexentry != null) return (int) indexentry.getColLong(1);
}
assert (index1 != null);
kelondroRow.Entry indexentry = index1.get(key);
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
if (indexentry == null) return -1; if (indexentry == null) return -1;
return (int) indexentry.getColLong(1); return (int) indexentry.getColLong(1);
} }
@ -77,244 +57,55 @@ public class kelondroBytesIntMap {
public synchronized int puti(byte[] key, int i) throws IOException { public synchronized int puti(byte[] key, int i) throws IOException {
assert i >= 0 : "i = " + i; assert i >= 0 : "i = " + i;
assert (key != null); assert (key != null);
//assert (!(serverLog.allZero(key))); kelondroRow.Entry newentry = index.row().newEntry();
if (index0 != null) {
if (index1 == null) {
// finish initialization phase
if (index0 instanceof kelondroRowSet) {
((kelondroRowSet) index0).sort();
((kelondroRowSet) index0).uniq();
}
index1 = new kelondroRowSet(rowdef, 0);
//System.out.println("finished initialization phase at size = " + index0.size() + " in puti");
}
// if the new entry is within the initialization part, just overwrite it
kelondroRow.Entry indexentry = index0.get(key);
if (indexentry != null) {
int oldi = (int) indexentry.getColLong(1);
indexentry.setCol(0, key);
indexentry.setCol(1, i);
index0.put(indexentry);
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
return oldi;
}
// else place it in the index1
}
// at this point index1 cannot be null
assert (index1 != null);
kelondroRow.Entry newentry = index1.row().newEntry();
newentry.setCol(0, key); newentry.setCol(0, key);
newentry.setCol(1, i); newentry.setCol(1, i);
kelondroRow.Entry oldentry = index1.put(newentry); kelondroRow.Entry oldentry = index.put(newentry);
if (oldentry == null) return -1; if (oldentry == null) return -1;
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
return (int) oldentry.getColLong(1); return (int) oldentry.getColLong(1);
} }
public synchronized void addi(byte[] key, int i) throws IOException { public synchronized void addi(byte[] key, int i) throws IOException {
assert i >= 0 : "i = " + i; assert i >= 0 : "i = " + i;
assert (key != null); assert (key != null);
assert index0 != null;
//assert index1 == null;
if (index1 != null) {
// the initialization phase is over, put this entry to the secondary index
puti(key, i);
return;
}
//assert (!(serverLog.allZero(key)));
kelondroRow.Entry newentry = this.rowdef.newEntry(); kelondroRow.Entry newentry = this.rowdef.newEntry();
newentry.setCol(0, key); newentry.setCol(0, key);
newentry.setCol(1, i); newentry.setCol(1, i);
index0.addUnique(newentry); index.addUnique(newentry);
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
} }
public synchronized int removei(byte[] key) throws IOException { public synchronized int removei(byte[] key) throws IOException {
assert (key != null); assert (key != null);
//assert (!(serverLog.allZero(key))); kelondroRow.Entry indexentry = index.remove(key, false);
// returns the integer index of the key, if the key can be found and was removed
// and -1 if the key was not found.
if (index0 != null) {
if (index1 == null) {
// finish initialization phase
if (index0 instanceof kelondroRowSet) {
((kelondroRowSet) index0).sort();
((kelondroRowSet) index0).uniq();
}
index1 = new kelondroRowSet(rowdef, 0);
//System.out.println("finished initialization phase at size = " + index0.size() + " in removei");
}
// if the new entry is within the initialization part, just overwrite it
kelondroRow.Entry indexentry = index0.remove(key, true);
if (indexentry != null) {
assert index0.remove(key, true) == null; // check if remove worked
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
return (int) indexentry.getColLong(1);
}
// else remove it from the index1
}
// at this point index1 cannot be null
assert (index1 != null);
if (index1.size() == 0) return -1;
kelondroRow.Entry indexentry = index1.remove(key, true);
if (indexentry == null) return -1; if (indexentry == null) return -1;
assert index1.remove(key, true) == null; // check if remove worked
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
return (int) indexentry.getColLong(1); return (int) indexentry.getColLong(1);
} }
public synchronized int removeonei() throws IOException { public synchronized int removeonei() throws IOException {
if ((index1 != null) && (index1.size() != 0)) { kelondroRow.Entry indexentry = index.removeOne();
kelondroRow.Entry indexentry = index1.removeOne();
assert (indexentry != null);
if (indexentry == null) return -1; if (indexentry == null) return -1;
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
return (int) indexentry.getColLong(1); return (int) indexentry.getColLong(1);
} }
if ((index0 != null) && (index0.size() != 0)) {
kelondroRow.Entry indexentry = index0.removeOne();
assert (indexentry != null);
if (indexentry == null) return -1;
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
return (int) indexentry.getColLong(1);
}
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
return -1;
}
public synchronized int size() { public synchronized int size() {
if ((index0 != null) && (index1 == null)) { return index.size();
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
return index0.size();
}
if ((index0 == null) && (index1 != null)) {
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
return index1.size();
}
assert ((index0 != null) && (index1 != null));
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
return index0.size() + index1.size();
} }
public synchronized kelondroCloneableIterator keys(boolean up, byte[] firstKey) throws IOException { public synchronized kelondroCloneableIterator keys(boolean up, byte[] firstKey) throws IOException {
// returns the key-iterator of the underlying kelondroIndex return index.keys(up, firstKey);
// col[0] = key
// col[1] = integer as {b265}
if ((index0 != null) && (index1 == null)) {
// finish initialization phase
if (index0 instanceof kelondroRowSet) {
((kelondroRowSet) index0).sort();
((kelondroRowSet) index0).uniq();
}
index1 = new kelondroRowSet(rowdef, 0);
//System.out.println("finished initialization phase at size = " + index0.size() + " in rows");
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
return index0.keys(up, firstKey);
}
assert (index1 != null);
if (index0 == null) {
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
return index1.keys(up, firstKey);
}
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
return new kelondroMergeIterator(
index0.keys(up, firstKey),
index1.keys(up, firstKey),
rowdef.objectOrder,
kelondroMergeIterator.simpleMerge,
true);
} }
public synchronized kelondroCloneableIterator rows(boolean up, byte[] firstKey) throws IOException { public synchronized kelondroCloneableIterator rows(boolean up, byte[] firstKey) throws IOException {
// returns the row-iterator of the underlying kelondroIndex return index.rows(up, firstKey);
// col[0] = key
// col[1] = integer as {b265}
if ((index0 != null) && (index1 == null)) {
// finish initialization phase
if (index0 instanceof kelondroRowSet) {
((kelondroRowSet) index0).sort();
((kelondroRowSet) index0).uniq();
}
index1 = new kelondroRowSet(rowdef, 0);
//System.out.println("finished initialization phase at size = " + index0.size() + " in rows");
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
return index0.rows(up, firstKey);
}
assert (index1 != null);
if (index0 == null) {
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
return index1.rows(up, firstKey);
}
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
return new kelondroMergeIterator(
index0.rows(up, firstKey),
index1.rows(up, firstKey),
rowdef.objectOrder,
kelondroMergeIterator.simpleMerge,
true);
} }
public kelondroProfile profile() { public kelondroProfile profile() {
if (index0 == null) return index1.profile(); return index.profile();
if (index1 == null) return index0.profile();
return kelondroProfile.consolidate(index0.profile(), index1.profile());
} }
public synchronized void close() { public synchronized void close() {
if (index0 != null) index0.close(); index.close();
if (index1 != null) index1.close(); index = null;
}
public synchronized String consistencyAnalysis() {
String s0 = (index0 == null) ? "index0: is NULL" : ("index0: " + singleConsistency((kelondroRowSet) index0));
String s1 = (index1 == null) ? "index1: is NULL" : ("index1: " + singleConsistency((kelondroRowSet) index1));
String combined = "";
if ((index0 == null) && (index1 == null)) return "all null";
if ((index0 != null) && (index1 != null)) {
Iterator<kelondroRow.Entry> i;
try {
i = index0.rows(true, null);
kelondroRow.Entry entry;
while (i.hasNext()) {
entry = i.next();
if (index1.has(entry.getColBytes(0))) {
combined = combined + ", common = " + new String(entry.getColBytes(0));
}
}
} catch (IOException e) {}
}
return s0 + ", " + s1 + combined;
} }
public synchronized boolean consistencyAnalysis0() {
boolean s0 = ((index0 == null) || (!(index0 instanceof kelondroRowSet))) ? true : singleConsistency0((kelondroRowSet) index0);
boolean s1 = ((index1 == null) || (!(index1 instanceof kelondroRowSet))) ? true : singleConsistency0((kelondroRowSet) index1);
if (!(s0 && s1)) return false;
if ((index0 == null) && (index1 == null)) return true;
if ((index0 != null) && (index1 != null)) {
Iterator<kelondroRow.Entry> i;
try {
i = index0.rows(true, null);
kelondroRow.Entry entry;
while (i.hasNext()) {
entry = i.next();
if (index1.has(entry.getColBytes(0))) return false;
}
} catch (IOException e) {}
}
return true;
}
private String singleConsistency(kelondroRowSet rs) {
int s = rs.size();
rs.sort();
rs.uniq();
if (rs.size() == s) return "set is sound"; else return "set has " + (rs.size() - s) + " double-entries";
}
private boolean singleConsistency0(kelondroRowSet rs) {
int s = rs.size();
rs.sort();
rs.uniq();
return rs.size() == s;
}
} }

@ -57,7 +57,7 @@ public final class kelondroFileRA extends kelondroAbstractRA implements kelondro
public kelondroFileRA(File file) throws IOException, FileNotFoundException { public kelondroFileRA(File file) throws IOException, FileNotFoundException {
this.name = file.getName(); this.name = file.getName();
RAFile = new RandomAccessFile(file, "rw"); RAFile = new RandomAccessFile(file, "rwd");
} }
public long length() throws IOException { public long length() throws IOException {

@ -40,7 +40,7 @@ import de.anomic.server.logging.serverLog;
public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondroIndex { public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondroIndex {
// static tracker objects // static tracker objects
private static TreeMap tableTracker = new TreeMap(); private static TreeMap<String, kelondroFlexTable> tableTracker = new TreeMap<String, kelondroFlexTable>();
// class objects // class objects
protected kelondroBytesIntMap index; protected kelondroBytesIntMap index;
@ -156,12 +156,12 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr
int space = Math.max(super.col[0].size(), initialSpace) + 1; int space = Math.max(super.col[0].size(), initialSpace) + 1;
if (space < 0) throw new kelondroException("wrong space: " + space); if (space < 0) throw new kelondroException("wrong space: " + space);
kelondroBytesIntMap ri = new kelondroBytesIntMap(super.row().column(0).cellwidth, super.rowdef.objectOrder, space); kelondroBytesIntMap ri = new kelondroBytesIntMap(super.row().column(0).cellwidth, super.rowdef.objectOrder, space);
Iterator content = super.col[0].contentNodes(-1); Iterator<kelondroNode> content = super.col[0].contentNodes(-1);
kelondroNode node; kelondroNode node;
int i; int i;
byte[] key; byte[] key;
while (content.hasNext()) { while (content.hasNext()) {
node = (kelondroNode) content.next(); node = content.next();
i = node.handle().hashCode(); i = node.handle().hashCode();
key = node.getKey(); key = node.getKey();
assert (key != null) : "DEBUG: empty key in initializeRamIndex"; // should not happen; if it does, it is an error of the condentNodes iterator assert (key != null) : "DEBUG: empty key in initializeRamIndex"; // should not happen; if it does, it is an error of the condentNodes iterator
@ -181,14 +181,14 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr
private kelondroIndex initializeTreeIndex(File indexfile, long preloadTime, kelondroOrder objectOrder) throws IOException { private kelondroIndex initializeTreeIndex(File indexfile, long preloadTime, kelondroOrder objectOrder) throws IOException {
kelondroIndex treeindex = new kelondroCache(new kelondroTree(indexfile, true, preloadTime, treeIndexRow(rowdef.primaryKeyLength, objectOrder), 2, 80), true, false); kelondroIndex treeindex = new kelondroCache(new kelondroTree(indexfile, true, preloadTime, treeIndexRow(rowdef.primaryKeyLength, objectOrder), 2, 80), true, false);
Iterator content = super.col[0].contentNodes(-1); Iterator<kelondroNode> content = super.col[0].contentNodes(-1);
kelondroNode node; kelondroNode node;
kelondroRow.Entry indexentry; kelondroRow.Entry indexentry;
int i, c = 0, all = super.col[0].size(); int i, c = 0, all = super.col[0].size();
long start = System.currentTimeMillis(); long start = System.currentTimeMillis();
long last = start; long last = start;
while (content.hasNext()) { while (content.hasNext()) {
node = (kelondroNode) content.next(); node = content.next();
i = node.handle().hashCode(); i = node.handle().hashCode();
indexentry = treeindex.row().newEntry(); indexentry = treeindex.row().newEntry();
indexentry.setCol(0, node.getValueRow()); indexentry.setCol(0, node.getValueRow());
@ -211,7 +211,7 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr
public synchronized kelondroRow.Entry get(byte[] key) throws IOException { public synchronized kelondroRow.Entry get(byte[] key) throws IOException {
if (index == null) return null; // case may happen during shutdown if (index == null) return null; // case may happen during shutdown
int pos = index.geti(key); int pos = index.geti(key);
assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size() + ", analysis: " + index.consistencyAnalysis(); assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size();
if (pos < 0) return null; if (pos < 0) return null;
// i may be greater than this.size(), because this table may have deleted entries // i may be greater than this.size(), because this table may have deleted entries
// the deleted entries are subtracted from the 'real' tablesize, // the deleted entries are subtracted from the 'real' tablesize,
@ -222,18 +222,18 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr
return result; return result;
} }
public synchronized void putMultiple(List rows) throws IOException { public synchronized void putMultiple(List<kelondroRow.Entry> rows) throws IOException {
// put a list of entries in a ordered way. // put a list of entries in a ordered way.
// this should save R/W head positioning time // this should save R/W head positioning time
Iterator i = rows.iterator(); Iterator<kelondroRow.Entry> i = rows.iterator();
kelondroRow.Entry row; kelondroRow.Entry row;
int pos; int pos;
byte[] key; byte[] key;
TreeMap old_rows_ordered = new TreeMap(); TreeMap<Integer, kelondroRow.Entry> old_rows_ordered = new TreeMap<Integer, kelondroRow.Entry>();
ArrayList new_rows_sequential = new ArrayList(); ArrayList<kelondroRow.Entry> new_rows_sequential = new ArrayList<kelondroRow.Entry>();
assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size(); assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size();
while (i.hasNext()) { while (i.hasNext()) {
row = (kelondroRow.Entry) i.next(); row = i.next();
key = row.getColBytes(0); key = row.getColBytes(0);
pos = index.geti(key); pos = index.geti(key);
if (pos < 0) { if (pos < 0) {
@ -295,17 +295,17 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr
index.addi(row.getColBytes(0), super.add(row)); index.addi(row.getColBytes(0), super.add(row));
} }
public synchronized void addUniqueMultiple(List rows) throws IOException { public synchronized void addUniqueMultiple(List<kelondroRow.Entry> rows) throws IOException {
// add a list of entries in a ordered way. // add a list of entries in a ordered way.
// this should save R/W head positioning time // this should save R/W head positioning time
TreeMap indexed_result = super.addMultiple(rows); TreeMap<Integer, byte[]> indexed_result = super.addMultiple(rows);
// indexed_result is a Integer/byte[] relation // indexed_result is a Integer/byte[] relation
// that is used here to store the index // that is used here to store the index
Iterator i = indexed_result.entrySet().iterator(); Iterator<Map.Entry<Integer, byte[]>> i = indexed_result.entrySet().iterator();
Map.Entry entry; Map.Entry<Integer, byte[]> entry;
while (i.hasNext()) { while (i.hasNext()) {
entry = (Map.Entry) i.next(); entry = i.next();
index.puti((byte[]) entry.getValue(), ((Integer) entry.getKey()).intValue()); index.puti(entry.getValue(), entry.getKey().intValue());
} }
assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size(); assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size();
@ -406,7 +406,7 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr
return index.profile(); return index.profile();
} }
public static final Iterator filenames() { public static final Iterator<String> filenames() {
// iterates string objects; all file names from record tracker // iterates string objects; all file names from record tracker
return tableTracker.keySet().iterator(); return tableTracker.keySet().iterator();
} }

@ -101,10 +101,10 @@ public class kelondroFlexWidthArray implements kelondroArray {
// save/check property file for this array // save/check property file for this array
File propfile = new File(tabledir, "properties"); File propfile = new File(tabledir, "properties");
Map props = new HashMap(); Map<String, String> props = new HashMap<String, String>();
if (propfile.exists()) { if (propfile.exists()) {
props = serverFileUtils.loadHashMap(propfile); props = serverFileUtils.loadHashMap(propfile);
String stored_rowdef = (String) props.get("rowdef"); String stored_rowdef = props.get("rowdef");
if ((stored_rowdef == null) || (!(rowdef.subsumes(new kelondroRow(stored_rowdef, rowdef.objectOrder, 0))))) { if ((stored_rowdef == null) || (!(rowdef.subsumes(new kelondroRow(stored_rowdef, rowdef.objectOrder, 0))))) {
System.out.println("FATAL ERROR: stored rowdef '" + stored_rowdef + "' does not match with new rowdef '" + System.out.println("FATAL ERROR: stored rowdef '" + stored_rowdef + "' does not match with new rowdef '" +
rowdef + "' for flex table '" + path + "', table " + tablename); rowdef + "' for flex table '" + path + "', table " + tablename);
@ -218,19 +218,19 @@ public class kelondroFlexWidthArray implements kelondroArray {
return col[0].size(); return col[0].size();
} }
public synchronized void setMultiple(TreeMap /*of {Integer, kelondroRow.Entry}*/ entries) throws IOException { public synchronized void setMultiple(TreeMap<Integer, kelondroRow.Entry> entries) throws IOException {
// a R/W head path-optimized option to write a set of entries // a R/W head path-optimized option to write a set of entries
Iterator i; Iterator<Map.Entry<Integer, kelondroRow.Entry>> i;
Map.Entry entry; Map.Entry<Integer, kelondroRow.Entry> entry;
kelondroRow.Entry rowentry, e; kelondroRow.Entry rowentry, e;
int c = 0, index; int c = 0, index;
// go across each file // go across each file
while (c < rowdef.columns()) { while (c < rowdef.columns()) {
i = entries.entrySet().iterator(); i = entries.entrySet().iterator();
while (i.hasNext()) { while (i.hasNext()) {
entry = (Map.Entry) i.next(); entry = i.next();
index = ((Integer) entry.getKey()).intValue(); index = entry.getKey().intValue();
rowentry = (kelondroRow.Entry) entry.getValue(); rowentry = entry.getValue();
assert rowentry.objectsize() == this.rowdef.objectsize; assert rowentry.objectsize() == this.rowdef.objectsize;
e = col[c].row().newEntry(rowentry.bytes(), rowdef.colstart[c], false); e = col[c].row().newEntry(rowentry.bytes(), rowdef.colstart[c], false);
@ -266,20 +266,20 @@ public class kelondroFlexWidthArray implements kelondroArray {
return index; return index;
} }
protected synchronized TreeMap addMultiple(List rows) throws IOException { protected synchronized TreeMap<Integer, byte[]> addMultiple(List<kelondroRow.Entry> rows) throws IOException {
// result is a Integer/byte[] relation // result is a Integer/byte[] relation
// of newly added rows (index, key) // of newly added rows (index, key)
TreeMap indexref = new TreeMap(); TreeMap<Integer, byte[]> indexref = new TreeMap<Integer, byte[]>();
Iterator i; Iterator<kelondroRow.Entry> i;
kelondroRow.Entry rowentry; kelondroRow.Entry rowentry;
// prepare storage for other columns // prepare storage for other columns
TreeMap[] colm = new TreeMap[col.length]; TreeMap<Integer, kelondroRow.Entry>[] colm = new TreeMap[col.length];
for (int j = 0; j < col.length; j++) { for (int j = 0; j < col.length; j++) {
if (col[j] == null) colm[j] = null; else colm[j] = new TreeMap(); if (col[j] == null) colm[j] = null; else colm[j] = new TreeMap<Integer, kelondroRow.Entry>();
} }
i = rows.iterator(); i = rows.iterator();
while (i.hasNext()) { while (i.hasNext()) {
rowentry = (kelondroRow.Entry) i.next(); rowentry = i.next();
assert rowentry.objectsize() == this.rowdef.objectsize; assert rowentry.objectsize() == this.rowdef.objectsize;
kelondroRow.Entry e; kelondroRow.Entry e;
@ -301,7 +301,7 @@ public class kelondroFlexWidthArray implements kelondroArray {
for (int j = 1; j < col.length; j++) { for (int j = 1; j < col.length; j++) {
if (col[j] != null) col[j].setMultiple(colm[j]); if (col[j] != null) col[j].setMultiple(colm[j]);
} }
// retrun references to entries with key // return references to entries with key
return indexref; return indexref;
} }

@ -52,8 +52,8 @@ public class kelondroObjectSpace {
private static final int minSize = 10; private static final int minSize = 10;
private static final int maxSize = 256; private static final int maxSize = 256;
private static HashMap objHeap = new HashMap(); private static HashMap<Integer, ArrayList<byte[]>> objHeap = new HashMap<Integer, ArrayList<byte[]>>();
private static TreeMap aliveNow = new TreeMap(); private static TreeMap<Integer, Integer> aliveNow = new TreeMap<Integer, Integer>();
//private static TreeMap aliveMax = new TreeMap(); //private static TreeMap aliveMax = new TreeMap();
private static void incAlive(int size) { private static void incAlive(int size) {
@ -67,7 +67,7 @@ public class kelondroObjectSpace {
private static void decAlive(int size) { private static void decAlive(int size) {
final Integer s = new Integer(size); final Integer s = new Integer(size);
synchronized (aliveNow) { synchronized (aliveNow) {
final Integer x = (Integer) aliveNow.get(s); final Integer x = aliveNow.get(s);
if (x == null) aliveNow.put(s, new Integer(-1)); else aliveNow.put(s, new Integer(x.intValue() - 1)); if (x == null) aliveNow.put(s, new Integer(-1)); else aliveNow.put(s, new Integer(x.intValue() - 1));
} }
} }
@ -76,9 +76,9 @@ public class kelondroObjectSpace {
if ((len < minSize) || (len > maxSize)) return new byte[len]; if ((len < minSize) || (len > maxSize)) return new byte[len];
incAlive(len); incAlive(len);
synchronized (objHeap) { synchronized (objHeap) {
ArrayList buf = (ArrayList) objHeap.get(new Integer(len)); ArrayList<byte[]> buf = objHeap.get(new Integer(len));
if ((buf == null) || (buf.size() == 0)) return new byte[len]; if ((buf == null) || (buf.size() == 0)) return new byte[len];
return (byte[]) buf.remove(buf.size() - 1); return buf.remove(buf.size() - 1);
} }
} }
@ -90,9 +90,9 @@ public class kelondroObjectSpace {
decAlive(b.length); decAlive(b.length);
synchronized (objHeap) { synchronized (objHeap) {
final Integer i = new Integer(b.length); final Integer i = new Integer(b.length);
ArrayList buf = (ArrayList) objHeap.get(i); ArrayList<byte[]> buf = objHeap.get(i);
if (buf == null) { if (buf == null) {
buf = new ArrayList(); buf = new ArrayList<byte[]>();
buf.add(b); buf.add(b);
objHeap.put(i, buf); objHeap.put(i, buf);
} else { } else {
@ -102,21 +102,21 @@ public class kelondroObjectSpace {
b = null; b = null;
} }
public static TreeMap statAlive() { public static TreeMap<Integer, Integer> statAlive() {
return aliveNow; return aliveNow;
} }
public static TreeMap statHeap() { public static TreeMap<Integer, Integer> statHeap() {
// creates a statistic output of this object space // creates a statistic output of this object space
// the result is a mapping from Integer (chunk size) to Integer (number of counts) // the result is a mapping from Integer (chunk size) to Integer (number of counts)
// and shows how many Objects are held in this space for usage // and shows how many Objects are held in this space for usage
TreeMap result = new TreeMap(); TreeMap<Integer, Integer> result = new TreeMap<Integer, Integer>();
synchronized (objHeap) { synchronized (objHeap) {
Iterator i = objHeap.entrySet().iterator(); Iterator<Map.Entry<Integer, ArrayList<byte[]>>> i = objHeap.entrySet().iterator();
Map.Entry entry; Map.Entry<Integer, ArrayList<byte[]>> entry;
while (i.hasNext()) { while (i.hasNext()) {
entry = (Map.Entry) i.next(); entry = i.next();
result.put(entry.getKey(), new Integer(((ArrayList) entry.getValue()).size())); result.put(entry.getKey(), new Integer(entry.getValue().size()));
} }
} }
return result; return result;

@ -0,0 +1,219 @@
// kelondroRAMIndex.java
// (C) 2008 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 07.01.2008 on http://yacy.net
//
// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
// $LastChangedRevision: 1986 $
// $LastChangedBy: orbiter $
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package de.anomic.kelondro;
import java.io.IOException;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import de.anomic.kelondro.kelondroRow.Entry;
public class kelondroRAMIndex implements kelondroIndex {
private kelondroRow rowdef;
private kelondroRowSet index0, index1;
public kelondroRAMIndex(kelondroRow rowdef, int initialspace) {
this.rowdef = rowdef;
reset(initialspace);
}
public void reset() {
reset(0);
}
public void reset(int initialspace) {
this.index0 = new kelondroRowSet(rowdef, initialspace);
this.index1 = null; // to show that this is the initialization phase
}
public kelondroRow row() {
return index0.row();
}
private final void finishInitialization() {
if (index1 == null) {
// finish initialization phase
index0.sort();
index0.uniq();
index1 = new kelondroRowSet(rowdef, 0);
}
}
public synchronized kelondroRow.Entry get(byte[] key) throws IOException {
assert (key != null);
finishInitialization();
kelondroRow.Entry indexentry = index0.get(key);
if (indexentry != null) return indexentry;
return index1.get(key);
}
public boolean has(byte[] key) throws IOException {
assert (key != null);
finishInitialization();
if (index0.has(key)) return true;
return index1.has(key);
}
public synchronized kelondroRow.Entry put(kelondroRow.Entry entry) throws IOException {
assert (entry != null);
finishInitialization();
// if the new entry is within the initialization part, just overwrite it
kelondroRow.Entry indexentry = index0.get(entry.getPrimaryKeyBytes());
if (indexentry != null) {
index0.put(entry);
return indexentry;
}
// else place it in the index1
return index1.put(entry);
}
public Entry put(Entry row, Date entryDate) throws IOException {
return put(row);
}
public void putMultiple(List<Entry> rows) throws IOException {
Iterator<Entry> i = rows.iterator();
while (i.hasNext()) {
put(i.next());
}
}
public synchronized void addUnique(kelondroRow.Entry entry) throws IOException {
assert (entry != null);
if (index1 == null) {
// we are in the initialization phase
index0.addUnique(entry);
} else {
// initialization is over, add to secondary index
index1.addUnique(entry);
}
}
public void addUniqueMultiple(List<Entry> rows) throws IOException {
Iterator<Entry> i = rows.iterator();
while (i.hasNext()) {
addUnique(i.next());
}
}
public synchronized kelondroRow.Entry remove(byte[] key, boolean keepOrder) throws IOException {
finishInitialization();
// if the new entry is within the initialization part, just delete it
kelondroRow.Entry indexentry = index0.remove(key, keepOrder);
if (indexentry != null) {
assert index0.remove(key, true) == null; // check if remove worked
return indexentry;
}
// else remove it from the index1
return index1.remove(key, keepOrder);
}
public synchronized kelondroRow.Entry removeOne() throws IOException {
if ((index1 != null) && (index1.size() != 0)) {
return index1.removeOne();
}
if ((index0 != null) && (index0.size() != 0)) {
return index0.removeOne();
}
return null;
}
public synchronized int size() {
if ((index0 != null) && (index1 == null)) {
return index0.size();
}
if ((index0 == null) && (index1 != null)) {
return index1.size();
}
assert ((index0 != null) && (index1 != null));
return index0.size() + index1.size();
}
@SuppressWarnings("unchecked")
public synchronized kelondroCloneableIterator<byte[]> keys(boolean up, byte[] firstKey) throws IOException {
// returns the key-iterator of the underlying kelondroIndex
if (index1 == null) {
// finish initialization phase
index0.sort();
index0.uniq();
index1 = new kelondroRowSet(rowdef, 0);
return index0.keys(up, firstKey);
}
assert (index1 != null);
if (index0 == null) {
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
return index1.keys(up, firstKey);
}
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
return new kelondroMergeIterator(
index0.keys(up, firstKey),
index1.keys(up, firstKey),
rowdef.objectOrder,
kelondroMergeIterator.simpleMerge,
true);
}
@SuppressWarnings("unchecked")
public synchronized kelondroCloneableIterator<kelondroRow.Entry> rows(boolean up, byte[] firstKey) throws IOException {
// returns the row-iterator of the underlying kelondroIndex
if (index1 == null) {
// finish initialization phase
index0.sort();
index0.uniq();
index1 = new kelondroRowSet(rowdef, 0);
return index0.rows(up, firstKey);
}
assert (index1 != null);
if (index0 == null) {
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
return index1.rows(up, firstKey);
}
//assert consistencyAnalysis0() : "consistency problem: " + consistencyAnalysis();
return new kelondroMergeIterator(
index0.rows(up, firstKey),
index1.rows(up, firstKey),
rowdef.objectOrder,
kelondroMergeIterator.simpleMerge,
true);
}
public kelondroProfile profile() {
if (index0 == null) return index1.profile();
if (index1 == null) return index0.profile();
return kelondroProfile.consolidate(index0.profile(), index1.profile());
}
public synchronized void close() {
if (index0 != null) index0.close();
if (index1 != null) index1.close();
}
public String filename() {
return null; // this does not have a file name
}
}

@ -51,13 +51,13 @@ import java.util.LinkedList;
import java.util.Map; import java.util.Map;
import java.util.TreeMap; import java.util.TreeMap;
import de.anomic.kelondro.kelondroAbstractRecords;
import de.anomic.kelondro.kelondroBase64Order; import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroCache; import de.anomic.kelondro.kelondroCache;
import de.anomic.kelondro.kelondroFlexTable; import de.anomic.kelondro.kelondroFlexTable;
import de.anomic.kelondro.kelondroIndex; import de.anomic.kelondro.kelondroIndex;
import de.anomic.kelondro.kelondroRow; import de.anomic.kelondro.kelondroRow;
import de.anomic.kelondro.kelondroStack; import de.anomic.kelondro.kelondroStack;
import de.anomic.kelondro.kelondroAbstractRecords;
import de.anomic.server.logging.serverLog; import de.anomic.server.logging.serverLog;
import de.anomic.yacy.yacySeedDB; import de.anomic.yacy.yacySeedDB;
@ -67,27 +67,46 @@ public class plasmaCrawlBalancer {
private static final String indexSuffix = "8.db"; private static final String indexSuffix = "8.db";
// a shared domainAccess map for all balancers // a shared domainAccess map for all balancers
private static final Map domainAccess = Collections.synchronizedMap(new HashMap()); private static final Map<String, domaccess> domainAccess = Collections.synchronizedMap(new HashMap<String, domaccess>());
// definition of payload for fileStack // definition of payload for fileStack
private static final kelondroRow stackrow = new kelondroRow("byte[] urlhash-" + yacySeedDB.commonHashLength, kelondroBase64Order.enhancedCoder, 0); private static final kelondroRow stackrow = new kelondroRow("byte[] urlhash-" + yacySeedDB.commonHashLength, kelondroBase64Order.enhancedCoder, 0);
// class variables // class variables
private ArrayList urlRAMStack; // a list that is flused first private ArrayList<String> urlRAMStack; // a list that is flushed first
private kelondroStack urlFileStack; // a file with url hashes private kelondroStack urlFileStack; // a file with url hashes
private kelondroIndex urlFileIndex; private kelondroIndex urlFileIndex;
private HashMap domainStacks; // a map from domain name part to Lists with url hashs private HashMap<String, LinkedList<String>> domainStacks; // a map from domain name part to Lists with url hashs
private File cacheStacksPath; private File cacheStacksPath;
private String stackname; private String stackname;
private boolean top; // to alternate between top and bottom of the file stack private boolean top; // to alternate between top and bottom of the file stack
public static class domaccess {
long time;
int count;
public domaccess() {
this.time = System.currentTimeMillis();
this.count = 0;
}
public void update() {
this.time = System.currentTimeMillis();
this.count++;
}
public long time() {
return this.time;
}
public int count() {
return this.count;
}
}
public plasmaCrawlBalancer(File cachePath, String stackname) { public plasmaCrawlBalancer(File cachePath, String stackname) {
this.cacheStacksPath = cachePath; this.cacheStacksPath = cachePath;
this.stackname = stackname; this.stackname = stackname;
File stackFile = new File(cachePath, stackname + stackSuffix); File stackFile = new File(cachePath, stackname + stackSuffix);
this.urlFileStack = kelondroStack.open(stackFile, stackrow); this.urlFileStack = kelondroStack.open(stackFile, stackrow);
this.domainStacks = new HashMap(); this.domainStacks = new HashMap<String, LinkedList<String>>();
this.urlRAMStack = new ArrayList(); this.urlRAMStack = new ArrayList<String>();
this.top = true; this.top = true;
// create a stack for newly entered entries // create a stack for newly entered entries
@ -147,8 +166,8 @@ public class plasmaCrawlBalancer {
// returns number of deletions // returns number of deletions
// first find a list of url hashes that shall be deleted // first find a list of url hashes that shall be deleted
Iterator i = urlFileIndex.rows(true, null); Iterator<kelondroRow.Entry> i = urlFileIndex.rows(true, null);
ArrayList urlHashes = new ArrayList(); ArrayList<String> urlHashes = new ArrayList<String>();
kelondroRow.Entry rowEntry; kelondroRow.Entry rowEntry;
plasmaCrawlEntry crawlEntry; plasmaCrawlEntry crawlEntry;
while (i.hasNext()) { while (i.hasNext()) {
@ -160,15 +179,15 @@ public class plasmaCrawlBalancer {
} }
// then delete all these urls from the queues and the file index // then delete all these urls from the queues and the file index
i = urlHashes.iterator(); Iterator<String> j = urlHashes.iterator();
while (i.hasNext()) this.remove((String) i.next()); while (j.hasNext()) this.remove(j.next());
return urlHashes.size(); return urlHashes.size();
} }
public synchronized plasmaCrawlEntry remove(String urlhash) throws IOException { public synchronized plasmaCrawlEntry remove(String urlhash) throws IOException {
// this method is only here, because so many import/export methods need it // this method is only here, because so many import/export methods need it
// and it was implemented in the previous architecture // and it was implemented in the previous architecture
// however, usage is not recommendet // however, usage is not recommended
int s = urlFileIndex.size(); int s = urlFileIndex.size();
kelondroRow.Entry entry = urlFileIndex.remove(urlhash.getBytes(), false); kelondroRow.Entry entry = urlFileIndex.remove(urlhash.getBytes(), false);
if (entry == null) return null; if (entry == null) return null;
@ -177,7 +196,7 @@ public class plasmaCrawlBalancer {
// now delete that thing also from the queues // now delete that thing also from the queues
// iterate through the RAM stack // iterate through the RAM stack
Iterator i = urlRAMStack.iterator(); Iterator<String> i = urlRAMStack.iterator();
String h; String h;
while (i.hasNext()) { while (i.hasNext()) {
h = (String) i.next(); h = (String) i.next();
@ -189,11 +208,11 @@ public class plasmaCrawlBalancer {
// iterate through the file stack // iterate through the file stack
// in general this is a bad idea. But this can only be avoided by avoidance of this method // in general this is a bad idea. But this can only be avoided by avoidance of this method
i = urlFileStack.stackIterator(true); Iterator<kelondroRow.Entry> j = urlFileStack.stackIterator(true);
while (i.hasNext()) { while (j.hasNext()) {
h = new String(((kelondroRow.Entry) i.next()).getColBytes(0)); h = new String(j.next().getColBytes(0));
if (h.equals(urlhash)) { if (h.equals(urlhash)) {
i.remove(); j.remove();
return new plasmaCrawlEntry(entry); return new plasmaCrawlEntry(entry);
} }
} }
@ -236,9 +255,9 @@ public class plasmaCrawlBalancer {
private boolean domainStacksNotEmpty() { private boolean domainStacksNotEmpty() {
if (domainStacks == null) return false; if (domainStacks == null) return false;
synchronized (domainStacks) { synchronized (domainStacks) {
Iterator i = domainStacks.values().iterator(); Iterator<LinkedList<String>> i = domainStacks.values().iterator();
while (i.hasNext()) { while (i.hasNext()) {
if (((LinkedList) i.next()).size() > 0) return true; if (i.next().size() > 0) return true;
} }
} }
return false; return false;
@ -248,8 +267,8 @@ public class plasmaCrawlBalancer {
if (domainStacks == null) return 0; if (domainStacks == null) return 0;
int sum = 0; int sum = 0;
synchronized (domainStacks) { synchronized (domainStacks) {
Iterator i = domainStacks.values().iterator(); Iterator<LinkedList<String>> i = domainStacks.values().iterator();
while (i.hasNext()) sum += ((LinkedList) i.next()).size(); while (i.hasNext()) sum += i.next().size();
} }
return sum; return sum;
} }
@ -259,12 +278,12 @@ public class plasmaCrawlBalancer {
// the minimumleft value is a limit for the number of entries that should be left // the minimumleft value is a limit for the number of entries that should be left
if (domainStacks.size() == 0) return; if (domainStacks.size() == 0) return;
synchronized (domainStacks) { synchronized (domainStacks) {
Iterator i = domainStacks.entrySet().iterator(); Iterator<Map.Entry<String, LinkedList<String>>> i = domainStacks.entrySet().iterator();
Map.Entry entry; Map.Entry<String, LinkedList<String>> entry;
LinkedList list; LinkedList<String> list;
while (i.hasNext()) { while (i.hasNext()) {
entry = (Map.Entry) i.next(); entry = i.next();
list = (LinkedList) entry.getValue(); list = entry.getValue();
if (list.size() > minimumleft) { if (list.size() > minimumleft) {
if (ram) { if (ram) {
urlRAMStack.add(list.removeFirst()); urlRAMStack.add(list.removeFirst());
@ -298,10 +317,10 @@ public class plasmaCrawlBalancer {
// extend domain stack // extend domain stack
String dom = entry.url().hash().substring(6); String dom = entry.url().hash().substring(6);
LinkedList domainList = (LinkedList) domainStacks.get(dom); LinkedList<String> domainList = domainStacks.get(dom);
if (domainList == null) { if (domainList == null) {
// create new list // create new list
domainList = new LinkedList(); domainList = new LinkedList<String>();
synchronized (domainStacks) { synchronized (domainStacks) {
domainList.add(entry.url().hash()); domainList.add(entry.url().hash());
domainStacks.put(dom, domainList); domainStacks.put(dom, domainList);
@ -336,19 +355,19 @@ public class plasmaCrawlBalancer {
// we select specific domains that have not been used for a long time // we select specific domains that have not been used for a long time
// i.e. 60 seconds. Latest arrivals that have not yet been crawled // i.e. 60 seconds. Latest arrivals that have not yet been crawled
// fit also in that scheme // fit also in that scheme
Iterator i = domainStacks.entrySet().iterator(); Iterator<Map.Entry<String, LinkedList<String>>> i = domainStacks.entrySet().iterator();
Map.Entry entry; Map.Entry<String, LinkedList<String>> entry;
String domhash; String domhash;
long delta, maxdelta = 0; long delta, maxdelta = 0;
String maxhash = null; String maxhash = null;
LinkedList domlist; LinkedList<String> domlist;
while (i.hasNext()) { while (i.hasNext()) {
entry = (Map.Entry) i.next(); entry = i.next();
domhash = (String) entry.getKey(); domhash = (String) entry.getKey();
delta = lastAccessDelta(domhash); delta = lastAccessDelta(domhash);
if (delta == Integer.MAX_VALUE) { if (delta == Integer.MAX_VALUE) {
// a brand new domain - we take it // a brand new domain - we take it
domlist = (LinkedList) entry.getValue(); domlist = entry.getValue();
result = (String) domlist.removeFirst(); result = (String) domlist.removeFirst();
if (domlist.size() == 0) i.remove(); if (domlist.size() == 0) i.remove();
break; break;
@ -360,7 +379,7 @@ public class plasmaCrawlBalancer {
} }
if (maxdelta > maximumAge) { if (maxdelta > maximumAge) {
// success - we found an entry from a domain that has not been used for a long time // success - we found an entry from a domain that has not been used for a long time
domlist = (LinkedList) domainStacks.get(maxhash); domlist = domainStacks.get(maxhash);
result = (String) domlist.removeFirst(); result = (String) domlist.removeFirst();
if (domlist.size() == 0) domainStacks.remove(maxhash); if (domlist.size() == 0) domainStacks.remove(maxhash);
} }
@ -371,17 +390,17 @@ public class plasmaCrawlBalancer {
// we order all domains by the number of entries per domain // we order all domains by the number of entries per domain
// then we iterate through these domains in descending entry order // then we iterate through these domains in descending entry order
// and that that one, that has a delta > minimumDelta // and that that one, that has a delta > minimumDelta
Iterator i = domainStacks.entrySet().iterator(); Iterator<Map.Entry<String, LinkedList<String>>> i = domainStacks.entrySet().iterator();
Map.Entry entry; Map.Entry<String, LinkedList<String>> entry;
String domhash; String domhash;
LinkedList domlist; LinkedList<String> domlist;
TreeMap hitlist = new TreeMap(); TreeMap<Integer, String> hitlist = new TreeMap<Integer, String>();
int count = 0; int count = 0;
// first collect information about sizes of the domain lists // first collect information about sizes of the domain lists
while (i.hasNext()) { while (i.hasNext()) {
entry = (Map.Entry) i.next(); entry = i.next();
domhash = (String) entry.getKey(); domhash = entry.getKey();
domlist = (LinkedList) entry.getValue(); domlist = entry.getValue();
hitlist.put(new Integer(domlist.size() * 100 + count++), domhash); hitlist.put(new Integer(domlist.size() * 100 + count++), domhash);
} }
@ -394,7 +413,7 @@ public class plasmaCrawlBalancer {
if (maxhash == null) maxhash = domhash; // remember first entry if (maxhash == null) maxhash = domhash; // remember first entry
delta = lastAccessDelta(domhash); delta = lastAccessDelta(domhash);
if (delta > minimumGlobalDelta) { if (delta > minimumGlobalDelta) {
domlist = (LinkedList) domainStacks.get(domhash); domlist = domainStacks.get(domhash);
result = (String) domlist.removeFirst(); result = (String) domlist.removeFirst();
if (domlist.size() == 0) domainStacks.remove(domhash); if (domlist.size() == 0) domainStacks.remove(domhash);
break; break;
@ -403,7 +422,7 @@ public class plasmaCrawlBalancer {
// if we did yet not choose any entry, we simply take that one with the most entries // if we did yet not choose any entry, we simply take that one with the most entries
if ((result == null) && (maxhash != null)) { if ((result == null) && (maxhash != null)) {
domlist = (LinkedList) domainStacks.get(maxhash); domlist = domainStacks.get(maxhash);
result = (String) domlist.removeFirst(); result = (String) domlist.removeFirst();
if (domlist.size() == 0) domainStacks.remove(maxhash); if (domlist.size() == 0) domainStacks.remove(maxhash);
} }
@ -467,16 +486,18 @@ public class plasmaCrawlBalancer {
} }
// update statistical data // update statistical data
domainAccess.put(result.substring(6), new Long(System.currentTimeMillis())); domaccess lastAccess = domainAccess.get(result.substring(6));
if (lastAccess == null) lastAccess = new domaccess(); else lastAccess.update();
domainAccess.put(result.substring(6), lastAccess);
return crawlEntry; return crawlEntry;
} }
private long lastAccessDelta(String hash) { private long lastAccessDelta(String hash) {
assert hash != null; assert hash != null;
Long lastAccess = (Long) domainAccess.get((hash.length() > 6) ? hash.substring(6) : hash); domaccess lastAccess = domainAccess.get((hash.length() > 6) ? hash.substring(6) : hash);
if (lastAccess == null) return Long.MAX_VALUE; // never accessed if (lastAccess == null) return Long.MAX_VALUE; // never accessed
return System.currentTimeMillis() - lastAccess.longValue(); return System.currentTimeMillis() - lastAccess.time();
} }
public synchronized plasmaCrawlEntry top(int dist) throws IOException { public synchronized plasmaCrawlEntry top(int dist) throws IOException {
@ -507,13 +528,13 @@ public class plasmaCrawlBalancer {
return new plasmaCrawlEntry(entry); return new plasmaCrawlEntry(entry);
} }
public synchronized Iterator iterator() throws IOException { public synchronized Iterator<plasmaCrawlEntry> iterator() throws IOException {
return new EntryIterator(); return new EntryIterator();
} }
private class EntryIterator implements Iterator { private class EntryIterator implements Iterator<plasmaCrawlEntry> {
private Iterator rowIterator; private Iterator<kelondroRow.Entry> rowIterator;
public EntryIterator() throws IOException { public EntryIterator() throws IOException {
rowIterator = urlFileIndex.rows(true, null); rowIterator = urlFileIndex.rows(true, null);
@ -523,7 +544,7 @@ public class plasmaCrawlBalancer {
return (rowIterator == null) ? false : rowIterator.hasNext(); return (rowIterator == null) ? false : rowIterator.hasNext();
} }
public Object next() { public plasmaCrawlEntry next() {
kelondroRow.Entry entry = (kelondroRow.Entry) rowIterator.next(); kelondroRow.Entry entry = (kelondroRow.Entry) rowIterator.next();
try { try {
return (entry == null) ? null : new plasmaCrawlEntry(entry); return (entry == null) ? null : new plasmaCrawlEntry(entry);

Loading…
Cancel
Save