- removed all InputStream.available() because this does not work for files > 2GB
- iterator terminate when a IOException occurs
- added handling of non-executing index.add methods to enhance assert usage
- added index for file indexes > 2GB, to be used in new indexHeap

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4666 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 17 years ago
parent 94d3d3a86f
commit 696b8ee3f5

@ -45,7 +45,7 @@ import java.util.TreeMap;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroBufferedRA;
import de.anomic.kelondro.kelondroByteOrder;
import de.anomic.kelondro.kelondroBytesIntMap;
import de.anomic.kelondro.kelondroBytesLongMap;
import de.anomic.kelondro.kelondroCloneableIterator;
import de.anomic.kelondro.kelondroException;
import de.anomic.kelondro.kelondroFixedWidthArray;
@ -60,7 +60,7 @@ public final class indexContainerHeap {
private kelondroRow payloadrow;
private serverLog log;
private kelondroBytesIntMap index;
private kelondroBytesLongMap index;
private SortedMap<String, indexContainer> cache;
private File backupFile;
private boolean readOnlyMode;
@ -145,27 +145,36 @@ public final class indexContainerHeap {
if (log != null) log.logInfo("creating index for rwi heap '" + heapFile.getName() + "'");
long start = System.currentTimeMillis();
this.index = new kelondroBytesIntMap(payloadrow.primaryKeyLength, (kelondroByteOrder) payloadrow.getOrdering(), 0);
this.index = new kelondroBytesLongMap(payloadrow.primaryKeyLength, (kelondroByteOrder) payloadrow.getOrdering(), 0);
DataInputStream is = null;
long urlCount = 0;
String wordHash;
byte[] word = new byte[payloadrow.primaryKeyLength];
int seek = 0, seek0;
long seek = 0, seek0;
synchronized (index) {
is = new DataInputStream(new BufferedInputStream(new FileInputStream(heapFile), 64*1024));
while (is.available() > 0) {
// dont test available() here because this does not work for files > 2GB
loop: while (true) {
// remember seek position
seek0 = seek;
// read word
is.readFully(word);
try {
is.readFully(word);
} catch (IOException e) {
break loop; // terminate loop
}
wordHash = new String(word);
seek += wordHash.length();
seek += (long) wordHash.length();
// read collection
seek += kelondroRowSet.skipNextRowSet(is, payloadrow);
index.addi(word, seek0);
try {
seek += (long) kelondroRowSet.skipNextRowSet(is, payloadrow);
} catch (IOException e) {
break loop; // terminate loop
}
index.addl(word, seek0);
}
}
is.close();
@ -222,23 +231,21 @@ public final class indexContainerHeap {
DataInputStream is;
byte[] word;
kelondroRow payloadrow;
indexContainer nextContainer;
public heapFileEntries(File heapFile, kelondroRow payloadrow) throws IOException {
if (!(heapFile.exists())) throw new IOException("file " + heapFile + " does not exist");
is = new DataInputStream(new BufferedInputStream(new FileInputStream(heapFile), 64*1024));
word = new byte[payloadrow.primaryKeyLength];
this.payloadrow = payloadrow;
this.nextContainer = next0();
}
public boolean hasNext() {
try {
return is.available() > 0;
} catch (IOException e) {
return false;
}
return this.nextContainer != null;
}
public indexContainer next() {
private indexContainer next0() {
try {
is.readFully(word);
return new indexContainer(new String(word), kelondroRowSet.importRowSet(is, payloadrow));
@ -246,6 +253,12 @@ public final class indexContainerHeap {
return null;
}
}
public indexContainer next() {
indexContainer n = this.nextContainer;
this.nextContainer = next0();
return n;
}
public void remove() {
throw new UnsupportedOperationException("heap dumps are read-only");
@ -340,7 +353,7 @@ public final class indexContainerHeap {
// check if the index contains the key
try {
return index.geti(key.getBytes()) >= 0;
return index.getl(key.getBytes()) >= 0;
} catch (IOException e) {
e.printStackTrace();
return false;
@ -361,7 +374,7 @@ public final class indexContainerHeap {
assert index.row().primaryKeyLength == key.length();
// check if the index contains the key
int pos = index.geti(key.getBytes());
long pos = index.getl(key.getBytes());
if (pos < 0) return null;
// access the file and read the container

@ -67,13 +67,13 @@ public class kelondroBytesIntMap {
return (int) oldentry.getColLong(1);
}
public synchronized void addi(byte[] key, int i) throws IOException {
public synchronized boolean addi(byte[] key, int i) throws IOException {
assert i >= 0 : "i = " + i;
assert (key != null);
kelondroRow.Entry newentry = this.rowdef.newEntry();
newentry.setCol(0, key);
newentry.setCol(1, i);
index.addUnique(newentry);
return index.addUnique(newentry);
}
public synchronized ArrayList<Integer[]> removeDoubles() throws IOException {

@ -0,0 +1,131 @@
// kelondroBytesLongMap.java
// (C) 2008 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 08.04.2008 on http://yacy.net
//
// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
// $LastChangedRevision: 1986 $
// $LastChangedBy: orbiter $
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package de.anomic.kelondro;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
public class kelondroBytesLongMap {
private kelondroRow rowdef;
private kelondroIndex index;
public kelondroBytesLongMap(kelondroIndex ki) {
assert (ki.row().columns() == 2); // must be a key/index relation
assert (ki.row().width(1) == 8); // the value must be a b256-encoded int, 4 bytes long
this.index = ki;
this.rowdef = ki.row();
}
public kelondroBytesLongMap(int keylength, kelondroByteOrder objectOrder, int space) {
this.rowdef = new kelondroRow(new kelondroColumn[]{new kelondroColumn("key", kelondroColumn.celltype_binary, kelondroColumn.encoder_bytes, keylength, "key"), new kelondroColumn("int c-8 {b256}")}, objectOrder, 0);
this.index = new kelondroRAMIndex(rowdef, space);
}
public kelondroRow row() {
return index.row();
}
public synchronized long getl(byte[] key) throws IOException {
assert (key != null);
kelondroRow.Entry indexentry = index.get(key);
if (indexentry == null) return -1;
return indexentry.getColLong(1);
}
public synchronized long putl(byte[] key, long l) throws IOException {
assert l >= 0 : "l = " + l;
assert (key != null);
kelondroRow.Entry newentry = index.row().newEntry();
newentry.setCol(0, key);
newentry.setCol(1, l);
kelondroRow.Entry oldentry = index.put(newentry);
if (oldentry == null) return -1;
return oldentry.getColLong(1);
}
public synchronized boolean addl(byte[] key, long l) throws IOException {
assert l >= 0 : "l = " + l;
assert (key != null);
kelondroRow.Entry newentry = this.rowdef.newEntry();
newentry.setCol(0, key);
newentry.setCol(1, l);
return index.addUnique(newentry);
}
public synchronized ArrayList<Long[]> removeDoubles() throws IOException {
ArrayList<kelondroRowSet> indexreport = index.removeDoubles();
ArrayList<Long[]> report = new ArrayList<Long[]>();
Long[] is;
Iterator<kelondroRow.Entry> ei;
int c;
for (kelondroRowSet rowset: indexreport) {
is = new Long[rowset.size()];
ei = rowset.rows();
c = 0;
while (ei.hasNext()) {
is[c++] = new Long(ei.next().getColLong(1));
}
report.add(is);
}
return report;
}
public synchronized long removel(byte[] key) throws IOException {
assert (key != null);
kelondroRow.Entry indexentry = index.remove(key, true); // keeping the order will prevent multiple re-sorts
if (indexentry == null) return -1;
return indexentry.getColLong(1);
}
public synchronized long removeonel() throws IOException {
kelondroRow.Entry indexentry = index.removeOne();
if (indexentry == null) return -1;
return indexentry.getColLong(1);
}
public synchronized int size() {
return index.size();
}
public synchronized kelondroCloneableIterator<byte[]> keys(boolean up, byte[] firstKey) throws IOException {
return index.keys(up, firstKey);
}
public synchronized kelondroCloneableIterator<kelondroRow.Entry> rows(boolean up, byte[] firstKey) throws IOException {
return index.rows(up, firstKey);
}
public kelondroProfile profile() {
return index.profile();
}
public synchronized void close() {
index.close();
index = null;
}
}

@ -294,7 +294,7 @@ public class kelondroCache implements kelondroIndex {
throw new UnsupportedOperationException("put with date is inefficient in kelondroCache");
}
public synchronized void addUnique(Entry row) throws IOException {
public synchronized boolean addUnique(Entry row) throws IOException {
assert (row != null);
assert (row.columns() == row().columns());
//assert (!(serverLog.allZero(row.getColBytes(index.primarykey()))));
@ -307,20 +307,21 @@ public class kelondroCache implements kelondroIndex {
this.readMissCache.remove(key, true);
this.hasnotDelete++;
// the entry does not exist before
index.addUnique(row); // write to backend
if (readHitCache != null) {
boolean added = index.addUnique(row); // write to backend
if (added && (readHitCache != null)) {
kelondroRow.Entry dummy = readHitCache.put(row); // learn that entry
if (dummy == null) this.writeUnique++; else this.writeDouble++;
}
return;
return added;
}
// the worst case: we must write to the back-end directly
index.addUnique(row);
if (readHitCache != null) {
boolean added = index.addUnique(row);
if (added && (readHitCache != null)) {
kelondroRow.Entry dummy = readHitCache.put(row); // learn that entry
if (dummy == null) this.writeUnique++; else this.writeDouble++;
}
return added;
}
public synchronized void addUnique(Entry row, Date entryDate) throws IOException {
@ -349,9 +350,13 @@ public class kelondroCache implements kelondroIndex {
}
}
public synchronized void addUniqueMultiple(List<Entry> rows) throws IOException {
public synchronized int addUniqueMultiple(List<Entry> rows) throws IOException {
Iterator<Entry> i = rows.iterator();
while (i.hasNext()) addUnique((Entry) i.next());
int c = 0;
while (i.hasNext()) {
if (addUnique((Entry) i.next())) c++;
}
return c;
}
public synchronized ArrayList<kelondroRowSet> removeDoubles() throws IOException {

@ -34,8 +34,6 @@ import java.io.IOException;
import java.io.RandomAccessFile;
import java.util.Iterator;
import de.anomic.server.logging.serverLog;
/**
* The EcoFS is a flat file with records of fixed length. The file does not contain
* any meta information and the first record starts right at file position 0
@ -530,10 +528,9 @@ public class kelondroEcoFS {
public static class ChunkIterator implements Iterator<byte[]> {
private int recordsize, chunksize, chunkcounter;
private int recordsize, chunksize;
private DataInputStream stream;
private serverLog log;
private File file;
private byte[] nextBytes;
/**
* create a ChunkIterator
@ -544,27 +541,20 @@ public class kelondroEcoFS {
* @param chunksize: the size of the chunks that are returned by next(). remaining bytes until the lenght of recordsize are skipped
* @throws FileNotFoundException
*/
public ChunkIterator(File file, int recordsize, int chunksize, serverLog log) throws FileNotFoundException {
public ChunkIterator(File file, int recordsize, int chunksize) throws FileNotFoundException {
assert (file.exists());
assert file.length() % recordsize == 0;
this.recordsize = recordsize;
this.chunksize = chunksize;
this.chunkcounter = 0; // only for logging
this.stream = new DataInputStream(new BufferedInputStream(new FileInputStream(file), 64 * 1024));
this.log = log;
this.file = file;
this.nextBytes = next0();
}
public boolean hasNext() {
try {
return stream != null && stream.available() > 0;
} catch (IOException e) {
e.printStackTrace();
return false;
}
return nextBytes != null;
}
public byte[] next() {
public byte[] next0() {
byte[] chunk = new byte[chunksize];
int r, s;
try {
@ -579,16 +569,16 @@ public class kelondroEcoFS {
}
return chunk;
} catch (IOException e) {
if (log == null) {
serverLog.logWarning("kelondroEcoFS", "ChunkIterator for file " + file.toString() + " ended with " + e.getCause().getMessage() + " at chunk " + this.chunkcounter, e);
} else {
log.logWarning("ChunkIterator for file " + file.toString() + " ended with " + e.getCause().getMessage() + " at chunk " + this.chunkcounter, e);
}
this.stream = null;
return null;
}
}
public byte[] next() {
byte[] n = this.nextBytes;
this.nextBytes = next0();
return n;
}
public void remove() {
throw new UnsupportedOperationException();
}

@ -65,6 +65,7 @@ public class kelondroEcoTable implements kelondroIndex {
kelondroBytesIntMap index;
kelondroBufferedEcoFS file;
kelondroRow rowdef;
int fail;
kelondroRow taildef;
private int buffersize;
@ -72,6 +73,7 @@ public class kelondroEcoTable implements kelondroIndex {
public kelondroEcoTable(File tablefile, kelondroRow rowdef, int useTailCache, int buffersize, int initialSpace) {
this.rowdef = rowdef;
this.buffersize = buffersize;
this.fail = 0;
assert rowdef.primaryKeyIndex == 0;
// define the taildef, a row like the rowdef but without the first column
kelondroColumn[] cols = new kelondroColumn[rowdef.columns() - 1];
@ -129,8 +131,9 @@ public class kelondroEcoTable implements kelondroIndex {
// write the key into the index table
assert key != null;
if (key == null) {i++; continue;}
index.addi(key, i++);
if (!index.addi(key, i++)) fail++;
assert index.size() + fail == i : "index.size() = " + index.size() + ", i = " + i + ", fail = " + fail + ", key = '" + new String(key) + "'";
if ((i % 10000) == 0) {
System.out.print('.');
System.out.flush();
@ -139,7 +142,7 @@ public class kelondroEcoTable implements kelondroIndex {
} else {
byte[] record;
key = new byte[rowdef.primaryKeyLength];
Iterator<byte[]> ri = new kelondroEcoFS.ChunkIterator(tablefile, rowdef.objectsize, rowdef.objectsize, null);
Iterator<byte[]> ri = new kelondroEcoFS.ChunkIterator(tablefile, rowdef.objectsize, rowdef.objectsize);
while (ri.hasNext()) {
record = ri.next();
assert record != null;
@ -147,8 +150,8 @@ public class kelondroEcoTable implements kelondroIndex {
System.arraycopy(record, 0, key, 0, rowdef.primaryKeyLength);
// write the key into the index table
index.addi(key, i++);
if (!index.addi(key, i++)) fail++;
// write the tail into the table
table.addUnique(taildef.newEntry(record, rowdef.primaryKeyLength, true));
@ -164,6 +167,7 @@ public class kelondroEcoTable implements kelondroIndex {
System.out.flush();
this.file = new kelondroBufferedEcoFS(new kelondroEcoFS(tablefile, rowdef.objectsize), this.buffersize);
ArrayList<Integer[]> doubles = index.removeDoubles();
assert index.size() + doubles.size() + fail == i;
System.out.println(" -removed " + doubles.size() + " doubles- done.");
if (doubles.size() > 0) {
System.out.println("DEBUG " + tablefile + ": WARNING - EcoTable " + tablefile + " has " + doubles.size() + " doubles");
@ -174,7 +178,7 @@ public class kelondroEcoTable implements kelondroIndex {
for (Integer[] ds: doubles) {
file.get(ds[0].longValue(), record, 0);
System.arraycopy(record, 0, key, 0, rowdef.primaryKeyLength);
index.addi(key, ds[0].intValue());
if (!index.addi(key, ds[0].intValue())) fail++;
}
// then remove the other doubles by removing them from the table, but do a re-indexing while doing that
// first aggregate all the delete positions because the elements from the top positions must be removed first
@ -190,6 +194,12 @@ public class kelondroEcoTable implements kelondroIndex {
removeInFile(top.intValue());
}
}
try {
assert file.size() == index.size() + doubles.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size() + ", doubles.size() = " + doubles.size() + ", fail = " + fail + ", i = " + i;
} catch (IOException e) {
e.printStackTrace();
}
} catch (FileNotFoundException e) {
// should never happen
e.printStackTrace();
@ -198,11 +208,6 @@ public class kelondroEcoTable implements kelondroIndex {
e.printStackTrace();
throw new kelondroException(e.getMessage());
}
try {
assert file.size() == index.size() : "file.size() = " + file.size() + ", index.size() = " + index.size();
} catch (IOException e) {
e.printStackTrace();
}
// track this table
tableTracker.put(tablefile.toString(), this);
@ -217,7 +222,7 @@ public class kelondroEcoTable implements kelondroIndex {
*/
public Iterator<byte[]> keyIterator(File file, kelondroRow rowdef) throws FileNotFoundException {
assert rowdef.primaryKeyIndex == 0;
return new kelondroEcoFS.ChunkIterator(file, rowdef.objectsize, rowdef.primaryKeyLength, null);
return new kelondroEcoFS.ChunkIterator(file, rowdef.objectsize, rowdef.primaryKeyLength);
}
public static long tableSize(File tablefile, int recordsize) {
@ -254,26 +259,30 @@ public class kelondroEcoTable implements kelondroIndex {
return (int) ((rowdef.primaryKeyLength + 4) * tableSize(f, rowdef.objectsize) * kelondroRowCollection.growfactor);
}
public synchronized void addUnique(Entry row) throws IOException {
assert file.size() == index.size() : "file.size() = " + file.size() + ", index.size() = " + index.size();
public synchronized boolean addUnique(Entry row) throws IOException {
assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size();
assert ((table == null) || (table.size() == index.size()));
int i = (int) file.size();
index.addi(row.getPrimaryKeyBytes(), i);
boolean added = index.addi(row.getPrimaryKeyBytes(), i);
if (!added) return false;
if (table != null) {
assert table.size() == i;
table.addUnique(taildef.newEntry(row.bytes(), rowdef.primaryKeyLength, true));
}
file.put(i, row.bytes(), 0);
assert file.size() == index.size() : "file.size() = " + file.size() + ", index.size() = " + index.size();
assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size();
return true;
}
public synchronized void addUniqueMultiple(List<Entry> rows) throws IOException {
assert file.size() == index.size() : "file.size() = " + file.size() + ", index.size() = " + index.size();
public synchronized int addUniqueMultiple(List<Entry> rows) throws IOException {
assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size();
Iterator<Entry> i = rows.iterator();
int c = 0;
while (i.hasNext()) {
addUnique(i.next());
if (addUnique(i.next())) c++;
}
assert file.size() == index.size() : "file.size() = " + file.size() + ", index.size() = " + index.size();
assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size();
return c;
}
public synchronized ArrayList<kelondroRowSet> removeDoubles() throws IOException {
@ -318,7 +327,7 @@ public class kelondroEcoTable implements kelondroIndex {
}
public synchronized Entry get(byte[] key) throws IOException {
assert file.size() == index.size() : "file.size() = " + file.size() + ", index.size() = " + index.size();
assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size();
assert ((table == null) || (table.size() == index.size()));
int i = index.geti(key);
if (i == -1) return null;
@ -334,13 +343,13 @@ public class kelondroEcoTable implements kelondroIndex {
System.arraycopy(key, 0, b, 0, key.length);
System.arraycopy(v.bytes(), 0, b, rowdef.primaryKeyLength, rowdef.objectsize - rowdef.primaryKeyLength);
}
assert file.size() == index.size() : "file.size() = " + file.size() + ", index.size() = " + index.size();
assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size();
assert ((table == null) || (table.size() == index.size()));
return rowdef.newEntry(b);
}
public synchronized boolean has(byte[] key) throws IOException {
assert file.size() == index.size() : "file.size() = " + file.size() + ", index.size() = " + index.size();
assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size();
assert ((table == null) || (table.size() == index.size()));
return index.geti(key) >= 0;
}
@ -354,7 +363,7 @@ public class kelondroEcoTable implements kelondroIndex {
}
public synchronized Entry put(Entry row) throws IOException {
assert file.size() == index.size() : "file.size() = " + file.size() + ", index.size() = " + index.size();
assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size();
assert ((table == null) || (table.size() == index.size()));
assert row != null;
assert row.bytes() != null;
@ -381,7 +390,7 @@ public class kelondroEcoTable implements kelondroIndex {
table.set(i, taildef.newEntry(row.bytes(), rowdef.primaryKeyLength, true));
file.put(i, row.bytes(), 0);
}
assert file.size() == index.size() : "file.size() = " + file.size() + ", index.size() = " + index.size();
assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size();
assert ((table == null) || (table.size() == index.size()));
// return old value
return rowdef.newEntry(b);
@ -392,12 +401,12 @@ public class kelondroEcoTable implements kelondroIndex {
}
public synchronized void putMultiple(List<Entry> rows) throws IOException {
assert file.size() == index.size() : "file.size() = " + file.size() + ", index.size() = " + index.size();
assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size();
Iterator<Entry> i = rows.iterator();
while (i.hasNext()) {
put(i.next());
}
assert file.size() == index.size() : "file.size() = " + file.size() + ", index.size() = " + index.size();
assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size();
}
private void removeInFile(int i) throws IOException {
@ -433,7 +442,7 @@ public class kelondroEcoTable implements kelondroIndex {
}
public synchronized Entry remove(byte[] key, boolean keepOrder) throws IOException {
assert file.size() == index.size() : "file.size() = " + file.size() + ", index.size() = " + index.size();
assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size();
assert ((table == null) || (table.size() == index.size()));
assert keepOrder == false; // this class cannot keep the order during a remove
assert key.length == rowdef.primaryKeyLength;
@ -462,7 +471,7 @@ public class kelondroEcoTable implements kelondroIndex {
System.arraycopy(p, 0, k, 0, rowdef.primaryKeyLength);
index.puti(k, i);
}
assert (file.size() == index.size());
assert (file.size() == index.size() + fail);
} else {
// get result value from the table copy, so we don't need to read it from the file
kelondroRow.Entry v = table.get(i);
@ -488,17 +497,17 @@ public class kelondroEcoTable implements kelondroIndex {
kelondroRow.Entry lr = rowdef.newEntry(p);
index.puti(lr.getPrimaryKeyBytes(), i);
}
assert (file.size() == index.size());
assert (file.size() == index.size() + fail);
assert (table.size() == index.size()) : "table.size() = " + table.size() + ", index.size() = " + index.size();
}
assert file.size() == index.size() : "file.size() = " + file.size() + ", index.size() = " + index.size();
assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size();
assert ((table == null) || (table.size() == index.size()));
assert index.size() + 1 == sb : "index.size() = " + index.size() + ", sb = " + sb;
return rowdef.newEntry(b);
}
public synchronized Entry removeOne() throws IOException {
assert file.size() == index.size() : "file.size() = " + file.size() + ", index.size() = " + index.size();
assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size();
assert ((table == null) || (table.size() == index.size()));
byte[] le = new byte[rowdef.objectsize];
file.cleanLast(le, 0);
@ -506,7 +515,7 @@ public class kelondroEcoTable implements kelondroIndex {
int i = index.removei(lr.getPrimaryKeyBytes());
assert i >= 0;
if (table != null) table.removeOne();
assert file.size() == index.size() : "file.size() = " + file.size() + ", index.size() = " + index.size();
assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size();
return lr;
}

@ -299,13 +299,13 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr
return oldentry;
}
public synchronized void addUnique(kelondroRow.Entry row) throws IOException {
public synchronized boolean addUnique(kelondroRow.Entry row) throws IOException {
assert row.objectsize() == this.rowdef.objectsize;
assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size();
index.addi(row.getColBytes(0), super.add(row));
return index.addi(row.getColBytes(0), super.add(row));
}
public synchronized void addUniqueMultiple(List<kelondroRow.Entry> rows) throws IOException {
public synchronized int addUniqueMultiple(List<kelondroRow.Entry> rows) throws IOException {
// add a list of entries in a ordered way.
// this should save R/W head positioning time
TreeMap<Integer, byte[]> indexed_result = super.addMultiple(rows);
@ -318,7 +318,7 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr
index.puti(entry.getValue(), entry.getKey().intValue());
}
assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size();
return indexed_result.size();
}
public synchronized ArrayList<kelondroRowSet> removeDoubles() throws IOException {

@ -66,8 +66,8 @@ public interface kelondroIndex {
public kelondroRow.Entry put(kelondroRow.Entry row) throws IOException;
public kelondroRow.Entry put(kelondroRow.Entry row, Date entryDate) throws IOException;
public void putMultiple(List<kelondroRow.Entry> rows) throws IOException; // for R/W head path optimization
public void addUnique(kelondroRow.Entry row) throws IOException; // no double-check
public void addUniqueMultiple(List<kelondroRow.Entry> rows) throws IOException; // no double-check
public boolean addUnique(kelondroRow.Entry row) throws IOException; // no double-check
public int addUniqueMultiple(List<kelondroRow.Entry> rows) throws IOException; // no double-check
public ArrayList<kelondroRowSet> removeDoubles() throws IOException; // removes all elements that are double (to be used after all addUnique)
public kelondroRow.Entry remove(byte[] key, boolean keepOrder) throws IOException;
public kelondroRow.Entry removeOne() throws IOException;

@ -105,22 +105,24 @@ public class kelondroRAMIndex implements kelondroIndex {
}
}
public synchronized void addUnique(kelondroRow.Entry entry) {
public synchronized boolean addUnique(kelondroRow.Entry entry) {
assert (entry != null);
if (index1 == null) {
// we are in the initialization phase
index0.addUnique(entry);
return index0.addUnique(entry);
} else {
// initialization is over, add to secondary index
index1.addUnique(entry);
return index1.addUnique(entry);
}
}
public void addUniqueMultiple(List<Entry> rows) {
public int addUniqueMultiple(List<Entry> rows) {
Iterator<Entry> i = rows.iterator();
int c = 0;
while (i.hasNext()) {
addUnique(i.next());
if (addUnique(i.next())) c++;
}
return c;
}
public synchronized ArrayList<kelondroRowSet> removeDoubles() {

@ -296,22 +296,26 @@ public class kelondroRowCollection {
set(index, a);
}
public synchronized void addUnique(kelondroRow.Entry row) {
public synchronized boolean addUnique(kelondroRow.Entry row) {
byte[] r = row.bytes();
addUnique(r, 0, r.length);
return addUnique(r, 0, r.length);
}
public synchronized void addUniqueMultiple(List<kelondroRow.Entry> rows) {
public synchronized int addUniqueMultiple(List<kelondroRow.Entry> rows) {
assert this.sortBound == 0 : "sortBound = " + this.sortBound + ", chunkcount = " + this.chunkcount;
Iterator<kelondroRow.Entry> i = rows.iterator();
while (i.hasNext()) addUnique(i.next());
int c = 0;
while (i.hasNext()) {
if (addUnique(i.next())) c++;
}
return c;
}
public synchronized void add(byte[] a) {
addUnique(a, 0, a.length);
}
private final void addUnique(byte[] a, int astart, int alength) {
private final boolean addUnique(byte[] a, int astart, int alength) {
assert (a != null);
assert (astart >= 0) && (astart < a.length) : " astart = " + a;
assert (!(serverLog.allZero(a, astart, alength))) : "a = " + serverLog.arrayList(a, astart, alength);
@ -319,7 +323,7 @@ public class kelondroRowCollection {
assert (astart + alength <= a.length);
if (bugappearance(a, astart, alength)) {
System.out.println("*** DEBUG: patched wrong a = " + serverLog.arrayList(a, astart, alength));
return; // TODO: this is temporary; remote peers may still submit bad entries
return false; // TODO: this is temporary; remote peers may still submit bad entries
}
assert (!(bugappearance(a, astart, alength))) : "a = " + serverLog.arrayList(a, astart, alength);
int l = Math.min(rowdef.objectsize, Math.min(alength, a.length - astart));
@ -327,6 +331,7 @@ public class kelondroRowCollection {
System.arraycopy(a, astart, chunkcache, rowdef.objectsize * chunkcount, l);
chunkcount++;
this.lastTimeWrote = System.currentTimeMillis();
return true;
}
private static boolean bugappearance(byte[] a, int astart, int alength) {

@ -209,7 +209,7 @@ public class kelondroSQLTable implements kelondroIndex {
}
}
public synchronized void addUnique(kelondroRow.Entry row) throws IOException {
public synchronized boolean addUnique(kelondroRow.Entry row) throws IOException {
throw new UnsupportedOperationException();
}
@ -217,7 +217,7 @@ public class kelondroSQLTable implements kelondroIndex {
throw new UnsupportedOperationException();
}
public synchronized void addUniqueMultiple(List<kelondroRow.Entry> rows) throws IOException {
public synchronized int addUniqueMultiple(List<kelondroRow.Entry> rows) throws IOException {
throw new UnsupportedOperationException();
}

@ -308,15 +308,15 @@ public class kelondroSplitTable implements kelondroIndex {
return null;
}
public synchronized void addUnique(kelondroRow.Entry row) throws IOException {
addUnique(row, null);
public synchronized boolean addUnique(kelondroRow.Entry row) throws IOException {
return addUnique(row, null);
}
public synchronized void addUnique(kelondroRow.Entry row, Date entryDate) throws IOException {
public synchronized boolean addUnique(kelondroRow.Entry row, Date entryDate) throws IOException {
assert row.objectsize() <= this.rowdef.objectsize;
if ((entryDate == null) || (entryDate.after(new Date()))) entryDate = new Date(); // fix date
String suffix = dateSuffix(entryDate);
if (suffix == null) return;
if (suffix == null) return false;
kelondroIndex table = (kelondroIndex) tables.get(suffix);
if (table == null) {
// make new table
@ -329,12 +329,16 @@ public class kelondroSplitTable implements kelondroIndex {
}
tables.put(suffix, table);
}
table.addUnique(row);
return table.addUnique(row);
}
public synchronized void addUniqueMultiple(List<kelondroRow.Entry> rows) throws IOException {
public synchronized int addUniqueMultiple(List<kelondroRow.Entry> rows) throws IOException {
Iterator<kelondroRow.Entry> i = rows.iterator();
while (i.hasNext()) addUnique(i.next());
int c = 0;
while (i.hasNext()) {
if (addUnique(i.next())) c++;
}
return c;
}
public synchronized void addUniqueMultiple(List<kelondroRow.Entry> rows, Date entryDate) throws IOException {

@ -489,17 +489,23 @@ public class kelondroTree extends kelondroCachedRecords implements kelondroIndex
return result;
}
public synchronized void addUnique(kelondroRow.Entry row) throws IOException {
public synchronized boolean addUnique(kelondroRow.Entry row) throws IOException {
int s = this.size();
this.put(row);
return this.size() > s;
}
public synchronized void addUnique(kelondroRow.Entry row, Date entryDate) throws IOException {
this.put(row, entryDate);
}
public synchronized void addUniqueMultiple(List<kelondroRow.Entry> rows) throws IOException {
public synchronized int addUniqueMultiple(List<kelondroRow.Entry> rows) throws IOException {
Iterator<kelondroRow.Entry> i = rows.iterator();
while (i.hasNext()) addUnique(i.next());
int c = 0;
while (i.hasNext()) {
if (addUnique(i.next())) c++;
}
return c;
}
private void assignChild(kelondroNode parentNode, kelondroNode childNode, int childType) throws IOException {

@ -52,7 +52,6 @@ import java.net.MalformedURLException;
import java.net.NoRouteToHostException;
import java.net.SocketException;
import java.net.UnknownHostException;
import java.util.Arrays;
import java.util.Date;
import de.anomic.http.HttpClient;

@ -181,7 +181,7 @@ public final class plasmaCrawlStacker extends Thread {
public void close() {
if (this.dbtype == QUEUE_DB_TYPE_RAM) {
this.log.logFine("Shutdown. Flushing remaining " + size() + " crawl stacker job entries. please wait.");
this.log.logInfo("Shutdown. Flushing remaining " + size() + " crawl stacker job entries. please wait.");
while (size() > 0) {
if (!job()) break;
}

@ -480,9 +480,7 @@ public final class serverCore extends serverAbstractBusyThread implements server
Thread.interrupted();
// shut down all busySessions
for (Session session: this.busySessions) {
try {session.notify();} catch (IllegalMonitorStateException e) {e.printStackTrace();}
try {session.notifyAll();} catch (IllegalMonitorStateException e) {e.printStackTrace();}
if (this.busySessions != null) for (Session session: this.busySessions) {
try {session.interrupt();} catch (SecurityException e ) {e.printStackTrace();}
}

@ -21,7 +21,7 @@ WORDMIGRATION.level = FINE
FILEHANDLER.level = INFO
SESSION-POOL.level = INFO
CRAWLER-POOL.level = INFO
STACKCRAWL-POOL.level = INFO
STACKCRAWL.level = INFO
MEMORY.level = INFO
# List of global handlers

Loading…
Cancel
Save