From 696b8ee3f5bf90e5d837bd166b83520431101e51 Mon Sep 17 00:00:00 2001 From: orbiter Date: Tue, 8 Apr 2008 11:55:59 +0000 Subject: [PATCH] fix for http://forum.yacy-websuche.de/viewtopic.php?p=6806#p6806 - removed all InputStream.available() because this does not work for files > 2GB - iterator terminate when a IOException occurs - added handling of non-executing index.add methods to enhance assert usage - added index for file indexes > 2GB, to be used in new indexHeap git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4666 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- .../de/anomic/index/indexContainerHeap.java | 47 ++++--- .../anomic/kelondro/kelondroBytesIntMap.java | 4 +- .../anomic/kelondro/kelondroBytesLongMap.java | 131 ++++++++++++++++++ source/de/anomic/kelondro/kelondroCache.java | 21 +-- source/de/anomic/kelondro/kelondroEcoFS.java | 34 ++--- .../de/anomic/kelondro/kelondroEcoTable.java | 75 +++++----- .../de/anomic/kelondro/kelondroFlexTable.java | 8 +- source/de/anomic/kelondro/kelondroIndex.java | 4 +- .../de/anomic/kelondro/kelondroRAMIndex.java | 12 +- .../kelondro/kelondroRowCollection.java | 17 ++- .../de/anomic/kelondro/kelondroSQLTable.java | 4 +- .../anomic/kelondro/kelondroSplitTable.java | 18 ++- source/de/anomic/kelondro/kelondroTree.java | 12 +- .../plasma/crawler/plasmaHTTPLoader.java | 1 - .../de/anomic/plasma/plasmaCrawlStacker.java | 2 +- source/de/anomic/server/serverCore.java | 4 +- yacy.logging | 2 +- 17 files changed, 279 insertions(+), 117 deletions(-) create mode 100644 source/de/anomic/kelondro/kelondroBytesLongMap.java diff --git a/source/de/anomic/index/indexContainerHeap.java b/source/de/anomic/index/indexContainerHeap.java index 5f7d5f953..4019cbb1a 100755 --- a/source/de/anomic/index/indexContainerHeap.java +++ b/source/de/anomic/index/indexContainerHeap.java @@ -45,7 +45,7 @@ import java.util.TreeMap; import de.anomic.kelondro.kelondroBase64Order; import de.anomic.kelondro.kelondroBufferedRA; import de.anomic.kelondro.kelondroByteOrder; -import de.anomic.kelondro.kelondroBytesIntMap; +import de.anomic.kelondro.kelondroBytesLongMap; import de.anomic.kelondro.kelondroCloneableIterator; import de.anomic.kelondro.kelondroException; import de.anomic.kelondro.kelondroFixedWidthArray; @@ -60,7 +60,7 @@ public final class indexContainerHeap { private kelondroRow payloadrow; private serverLog log; - private kelondroBytesIntMap index; + private kelondroBytesLongMap index; private SortedMap cache; private File backupFile; private boolean readOnlyMode; @@ -145,27 +145,36 @@ public final class indexContainerHeap { if (log != null) log.logInfo("creating index for rwi heap '" + heapFile.getName() + "'"); long start = System.currentTimeMillis(); - this.index = new kelondroBytesIntMap(payloadrow.primaryKeyLength, (kelondroByteOrder) payloadrow.getOrdering(), 0); + this.index = new kelondroBytesLongMap(payloadrow.primaryKeyLength, (kelondroByteOrder) payloadrow.getOrdering(), 0); DataInputStream is = null; long urlCount = 0; String wordHash; byte[] word = new byte[payloadrow.primaryKeyLength]; - int seek = 0, seek0; + long seek = 0, seek0; synchronized (index) { is = new DataInputStream(new BufferedInputStream(new FileInputStream(heapFile), 64*1024)); - while (is.available() > 0) { + // dont test available() here because this does not work for files > 2GB + loop: while (true) { // remember seek position seek0 = seek; // read word - is.readFully(word); + try { + is.readFully(word); + } catch (IOException e) { + break loop; // terminate loop + } wordHash = new String(word); - seek += wordHash.length(); + seek += (long) wordHash.length(); // read collection - seek += kelondroRowSet.skipNextRowSet(is, payloadrow); - index.addi(word, seek0); + try { + seek += (long) kelondroRowSet.skipNextRowSet(is, payloadrow); + } catch (IOException e) { + break loop; // terminate loop + } + index.addl(word, seek0); } } is.close(); @@ -222,23 +231,21 @@ public final class indexContainerHeap { DataInputStream is; byte[] word; kelondroRow payloadrow; + indexContainer nextContainer; public heapFileEntries(File heapFile, kelondroRow payloadrow) throws IOException { if (!(heapFile.exists())) throw new IOException("file " + heapFile + " does not exist"); is = new DataInputStream(new BufferedInputStream(new FileInputStream(heapFile), 64*1024)); word = new byte[payloadrow.primaryKeyLength]; this.payloadrow = payloadrow; + this.nextContainer = next0(); } public boolean hasNext() { - try { - return is.available() > 0; - } catch (IOException e) { - return false; - } + return this.nextContainer != null; } - public indexContainer next() { + private indexContainer next0() { try { is.readFully(word); return new indexContainer(new String(word), kelondroRowSet.importRowSet(is, payloadrow)); @@ -246,6 +253,12 @@ public final class indexContainerHeap { return null; } } + + public indexContainer next() { + indexContainer n = this.nextContainer; + this.nextContainer = next0(); + return n; + } public void remove() { throw new UnsupportedOperationException("heap dumps are read-only"); @@ -340,7 +353,7 @@ public final class indexContainerHeap { // check if the index contains the key try { - return index.geti(key.getBytes()) >= 0; + return index.getl(key.getBytes()) >= 0; } catch (IOException e) { e.printStackTrace(); return false; @@ -361,7 +374,7 @@ public final class indexContainerHeap { assert index.row().primaryKeyLength == key.length(); // check if the index contains the key - int pos = index.geti(key.getBytes()); + long pos = index.getl(key.getBytes()); if (pos < 0) return null; // access the file and read the container diff --git a/source/de/anomic/kelondro/kelondroBytesIntMap.java b/source/de/anomic/kelondro/kelondroBytesIntMap.java index 6b3845be1..a08ac06bc 100644 --- a/source/de/anomic/kelondro/kelondroBytesIntMap.java +++ b/source/de/anomic/kelondro/kelondroBytesIntMap.java @@ -67,13 +67,13 @@ public class kelondroBytesIntMap { return (int) oldentry.getColLong(1); } - public synchronized void addi(byte[] key, int i) throws IOException { + public synchronized boolean addi(byte[] key, int i) throws IOException { assert i >= 0 : "i = " + i; assert (key != null); kelondroRow.Entry newentry = this.rowdef.newEntry(); newentry.setCol(0, key); newentry.setCol(1, i); - index.addUnique(newentry); + return index.addUnique(newentry); } public synchronized ArrayList removeDoubles() throws IOException { diff --git a/source/de/anomic/kelondro/kelondroBytesLongMap.java b/source/de/anomic/kelondro/kelondroBytesLongMap.java new file mode 100644 index 000000000..39d2f334b --- /dev/null +++ b/source/de/anomic/kelondro/kelondroBytesLongMap.java @@ -0,0 +1,131 @@ +// kelondroBytesLongMap.java +// (C) 2008 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany +// first published 08.04.2008 on http://yacy.net +// +// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $ +// $LastChangedRevision: 1986 $ +// $LastChangedBy: orbiter $ +// +// LICENSE +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +package de.anomic.kelondro; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Iterator; + +public class kelondroBytesLongMap { + + private kelondroRow rowdef; + private kelondroIndex index; + + public kelondroBytesLongMap(kelondroIndex ki) { + assert (ki.row().columns() == 2); // must be a key/index relation + assert (ki.row().width(1) == 8); // the value must be a b256-encoded int, 4 bytes long + this.index = ki; + this.rowdef = ki.row(); + } + + public kelondroBytesLongMap(int keylength, kelondroByteOrder objectOrder, int space) { + this.rowdef = new kelondroRow(new kelondroColumn[]{new kelondroColumn("key", kelondroColumn.celltype_binary, kelondroColumn.encoder_bytes, keylength, "key"), new kelondroColumn("int c-8 {b256}")}, objectOrder, 0); + this.index = new kelondroRAMIndex(rowdef, space); + } + + public kelondroRow row() { + return index.row(); + } + + public synchronized long getl(byte[] key) throws IOException { + assert (key != null); + kelondroRow.Entry indexentry = index.get(key); + if (indexentry == null) return -1; + return indexentry.getColLong(1); + } + + public synchronized long putl(byte[] key, long l) throws IOException { + assert l >= 0 : "l = " + l; + assert (key != null); + kelondroRow.Entry newentry = index.row().newEntry(); + newentry.setCol(0, key); + newentry.setCol(1, l); + kelondroRow.Entry oldentry = index.put(newentry); + if (oldentry == null) return -1; + return oldentry.getColLong(1); + } + + public synchronized boolean addl(byte[] key, long l) throws IOException { + assert l >= 0 : "l = " + l; + assert (key != null); + kelondroRow.Entry newentry = this.rowdef.newEntry(); + newentry.setCol(0, key); + newentry.setCol(1, l); + return index.addUnique(newentry); + } + + public synchronized ArrayList removeDoubles() throws IOException { + ArrayList indexreport = index.removeDoubles(); + ArrayList report = new ArrayList(); + Long[] is; + Iterator ei; + int c; + for (kelondroRowSet rowset: indexreport) { + is = new Long[rowset.size()]; + ei = rowset.rows(); + c = 0; + while (ei.hasNext()) { + is[c++] = new Long(ei.next().getColLong(1)); + } + report.add(is); + } + return report; + } + + public synchronized long removel(byte[] key) throws IOException { + assert (key != null); + kelondroRow.Entry indexentry = index.remove(key, true); // keeping the order will prevent multiple re-sorts + if (indexentry == null) return -1; + return indexentry.getColLong(1); + } + + public synchronized long removeonel() throws IOException { + kelondroRow.Entry indexentry = index.removeOne(); + if (indexentry == null) return -1; + return indexentry.getColLong(1); + } + + public synchronized int size() { + return index.size(); + } + + public synchronized kelondroCloneableIterator keys(boolean up, byte[] firstKey) throws IOException { + return index.keys(up, firstKey); + } + + public synchronized kelondroCloneableIterator rows(boolean up, byte[] firstKey) throws IOException { + return index.rows(up, firstKey); + } + + public kelondroProfile profile() { + return index.profile(); + } + + public synchronized void close() { + index.close(); + index = null; + } + +} diff --git a/source/de/anomic/kelondro/kelondroCache.java b/source/de/anomic/kelondro/kelondroCache.java index 955ef8cf4..64c222474 100644 --- a/source/de/anomic/kelondro/kelondroCache.java +++ b/source/de/anomic/kelondro/kelondroCache.java @@ -294,7 +294,7 @@ public class kelondroCache implements kelondroIndex { throw new UnsupportedOperationException("put with date is inefficient in kelondroCache"); } - public synchronized void addUnique(Entry row) throws IOException { + public synchronized boolean addUnique(Entry row) throws IOException { assert (row != null); assert (row.columns() == row().columns()); //assert (!(serverLog.allZero(row.getColBytes(index.primarykey())))); @@ -307,20 +307,21 @@ public class kelondroCache implements kelondroIndex { this.readMissCache.remove(key, true); this.hasnotDelete++; // the entry does not exist before - index.addUnique(row); // write to backend - if (readHitCache != null) { + boolean added = index.addUnique(row); // write to backend + if (added && (readHitCache != null)) { kelondroRow.Entry dummy = readHitCache.put(row); // learn that entry if (dummy == null) this.writeUnique++; else this.writeDouble++; } - return; + return added; } // the worst case: we must write to the back-end directly - index.addUnique(row); - if (readHitCache != null) { + boolean added = index.addUnique(row); + if (added && (readHitCache != null)) { kelondroRow.Entry dummy = readHitCache.put(row); // learn that entry if (dummy == null) this.writeUnique++; else this.writeDouble++; } + return added; } public synchronized void addUnique(Entry row, Date entryDate) throws IOException { @@ -349,9 +350,13 @@ public class kelondroCache implements kelondroIndex { } } - public synchronized void addUniqueMultiple(List rows) throws IOException { + public synchronized int addUniqueMultiple(List rows) throws IOException { Iterator i = rows.iterator(); - while (i.hasNext()) addUnique((Entry) i.next()); + int c = 0; + while (i.hasNext()) { + if (addUnique((Entry) i.next())) c++; + } + return c; } public synchronized ArrayList removeDoubles() throws IOException { diff --git a/source/de/anomic/kelondro/kelondroEcoFS.java b/source/de/anomic/kelondro/kelondroEcoFS.java index a0e85f2b7..161128a24 100644 --- a/source/de/anomic/kelondro/kelondroEcoFS.java +++ b/source/de/anomic/kelondro/kelondroEcoFS.java @@ -34,8 +34,6 @@ import java.io.IOException; import java.io.RandomAccessFile; import java.util.Iterator; -import de.anomic.server.logging.serverLog; - /** * The EcoFS is a flat file with records of fixed length. The file does not contain * any meta information and the first record starts right at file position 0 @@ -530,10 +528,9 @@ public class kelondroEcoFS { public static class ChunkIterator implements Iterator { - private int recordsize, chunksize, chunkcounter; + private int recordsize, chunksize; private DataInputStream stream; - private serverLog log; - private File file; + private byte[] nextBytes; /** * create a ChunkIterator @@ -544,27 +541,20 @@ public class kelondroEcoFS { * @param chunksize: the size of the chunks that are returned by next(). remaining bytes until the lenght of recordsize are skipped * @throws FileNotFoundException */ - public ChunkIterator(File file, int recordsize, int chunksize, serverLog log) throws FileNotFoundException { + public ChunkIterator(File file, int recordsize, int chunksize) throws FileNotFoundException { assert (file.exists()); assert file.length() % recordsize == 0; this.recordsize = recordsize; this.chunksize = chunksize; - this.chunkcounter = 0; // only for logging this.stream = new DataInputStream(new BufferedInputStream(new FileInputStream(file), 64 * 1024)); - this.log = log; - this.file = file; + this.nextBytes = next0(); } public boolean hasNext() { - try { - return stream != null && stream.available() > 0; - } catch (IOException e) { - e.printStackTrace(); - return false; - } + return nextBytes != null; } - public byte[] next() { + public byte[] next0() { byte[] chunk = new byte[chunksize]; int r, s; try { @@ -579,16 +569,16 @@ public class kelondroEcoFS { } return chunk; } catch (IOException e) { - if (log == null) { - serverLog.logWarning("kelondroEcoFS", "ChunkIterator for file " + file.toString() + " ended with " + e.getCause().getMessage() + " at chunk " + this.chunkcounter, e); - } else { - log.logWarning("ChunkIterator for file " + file.toString() + " ended with " + e.getCause().getMessage() + " at chunk " + this.chunkcounter, e); - } - this.stream = null; return null; } } + public byte[] next() { + byte[] n = this.nextBytes; + this.nextBytes = next0(); + return n; + } + public void remove() { throw new UnsupportedOperationException(); } diff --git a/source/de/anomic/kelondro/kelondroEcoTable.java b/source/de/anomic/kelondro/kelondroEcoTable.java index b450e7800..faf2932ec 100644 --- a/source/de/anomic/kelondro/kelondroEcoTable.java +++ b/source/de/anomic/kelondro/kelondroEcoTable.java @@ -65,6 +65,7 @@ public class kelondroEcoTable implements kelondroIndex { kelondroBytesIntMap index; kelondroBufferedEcoFS file; kelondroRow rowdef; + int fail; kelondroRow taildef; private int buffersize; @@ -72,6 +73,7 @@ public class kelondroEcoTable implements kelondroIndex { public kelondroEcoTable(File tablefile, kelondroRow rowdef, int useTailCache, int buffersize, int initialSpace) { this.rowdef = rowdef; this.buffersize = buffersize; + this.fail = 0; assert rowdef.primaryKeyIndex == 0; // define the taildef, a row like the rowdef but without the first column kelondroColumn[] cols = new kelondroColumn[rowdef.columns() - 1]; @@ -129,8 +131,9 @@ public class kelondroEcoTable implements kelondroIndex { // write the key into the index table assert key != null; if (key == null) {i++; continue;} - index.addi(key, i++); - + if (!index.addi(key, i++)) fail++; + assert index.size() + fail == i : "index.size() = " + index.size() + ", i = " + i + ", fail = " + fail + ", key = '" + new String(key) + "'"; + if ((i % 10000) == 0) { System.out.print('.'); System.out.flush(); @@ -139,7 +142,7 @@ public class kelondroEcoTable implements kelondroIndex { } else { byte[] record; key = new byte[rowdef.primaryKeyLength]; - Iterator ri = new kelondroEcoFS.ChunkIterator(tablefile, rowdef.objectsize, rowdef.objectsize, null); + Iterator ri = new kelondroEcoFS.ChunkIterator(tablefile, rowdef.objectsize, rowdef.objectsize); while (ri.hasNext()) { record = ri.next(); assert record != null; @@ -147,8 +150,8 @@ public class kelondroEcoTable implements kelondroIndex { System.arraycopy(record, 0, key, 0, rowdef.primaryKeyLength); // write the key into the index table - index.addi(key, i++); - + if (!index.addi(key, i++)) fail++; + // write the tail into the table table.addUnique(taildef.newEntry(record, rowdef.primaryKeyLength, true)); @@ -164,6 +167,7 @@ public class kelondroEcoTable implements kelondroIndex { System.out.flush(); this.file = new kelondroBufferedEcoFS(new kelondroEcoFS(tablefile, rowdef.objectsize), this.buffersize); ArrayList doubles = index.removeDoubles(); + assert index.size() + doubles.size() + fail == i; System.out.println(" -removed " + doubles.size() + " doubles- done."); if (doubles.size() > 0) { System.out.println("DEBUG " + tablefile + ": WARNING - EcoTable " + tablefile + " has " + doubles.size() + " doubles"); @@ -174,7 +178,7 @@ public class kelondroEcoTable implements kelondroIndex { for (Integer[] ds: doubles) { file.get(ds[0].longValue(), record, 0); System.arraycopy(record, 0, key, 0, rowdef.primaryKeyLength); - index.addi(key, ds[0].intValue()); + if (!index.addi(key, ds[0].intValue())) fail++; } // then remove the other doubles by removing them from the table, but do a re-indexing while doing that // first aggregate all the delete positions because the elements from the top positions must be removed first @@ -190,6 +194,12 @@ public class kelondroEcoTable implements kelondroIndex { removeInFile(top.intValue()); } } + + try { + assert file.size() == index.size() + doubles.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size() + ", doubles.size() = " + doubles.size() + ", fail = " + fail + ", i = " + i; + } catch (IOException e) { + e.printStackTrace(); + } } catch (FileNotFoundException e) { // should never happen e.printStackTrace(); @@ -198,11 +208,6 @@ public class kelondroEcoTable implements kelondroIndex { e.printStackTrace(); throw new kelondroException(e.getMessage()); } - try { - assert file.size() == index.size() : "file.size() = " + file.size() + ", index.size() = " + index.size(); - } catch (IOException e) { - e.printStackTrace(); - } // track this table tableTracker.put(tablefile.toString(), this); @@ -217,7 +222,7 @@ public class kelondroEcoTable implements kelondroIndex { */ public Iterator keyIterator(File file, kelondroRow rowdef) throws FileNotFoundException { assert rowdef.primaryKeyIndex == 0; - return new kelondroEcoFS.ChunkIterator(file, rowdef.objectsize, rowdef.primaryKeyLength, null); + return new kelondroEcoFS.ChunkIterator(file, rowdef.objectsize, rowdef.primaryKeyLength); } public static long tableSize(File tablefile, int recordsize) { @@ -254,26 +259,30 @@ public class kelondroEcoTable implements kelondroIndex { return (int) ((rowdef.primaryKeyLength + 4) * tableSize(f, rowdef.objectsize) * kelondroRowCollection.growfactor); } - public synchronized void addUnique(Entry row) throws IOException { - assert file.size() == index.size() : "file.size() = " + file.size() + ", index.size() = " + index.size(); + public synchronized boolean addUnique(Entry row) throws IOException { + assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size(); assert ((table == null) || (table.size() == index.size())); int i = (int) file.size(); - index.addi(row.getPrimaryKeyBytes(), i); + boolean added = index.addi(row.getPrimaryKeyBytes(), i); + if (!added) return false; if (table != null) { assert table.size() == i; table.addUnique(taildef.newEntry(row.bytes(), rowdef.primaryKeyLength, true)); } file.put(i, row.bytes(), 0); - assert file.size() == index.size() : "file.size() = " + file.size() + ", index.size() = " + index.size(); + assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size(); + return true; } - public synchronized void addUniqueMultiple(List rows) throws IOException { - assert file.size() == index.size() : "file.size() = " + file.size() + ", index.size() = " + index.size(); + public synchronized int addUniqueMultiple(List rows) throws IOException { + assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size(); Iterator i = rows.iterator(); + int c = 0; while (i.hasNext()) { - addUnique(i.next()); + if (addUnique(i.next())) c++; } - assert file.size() == index.size() : "file.size() = " + file.size() + ", index.size() = " + index.size(); + assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size(); + return c; } public synchronized ArrayList removeDoubles() throws IOException { @@ -318,7 +327,7 @@ public class kelondroEcoTable implements kelondroIndex { } public synchronized Entry get(byte[] key) throws IOException { - assert file.size() == index.size() : "file.size() = " + file.size() + ", index.size() = " + index.size(); + assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size(); assert ((table == null) || (table.size() == index.size())); int i = index.geti(key); if (i == -1) return null; @@ -334,13 +343,13 @@ public class kelondroEcoTable implements kelondroIndex { System.arraycopy(key, 0, b, 0, key.length); System.arraycopy(v.bytes(), 0, b, rowdef.primaryKeyLength, rowdef.objectsize - rowdef.primaryKeyLength); } - assert file.size() == index.size() : "file.size() = " + file.size() + ", index.size() = " + index.size(); + assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size(); assert ((table == null) || (table.size() == index.size())); return rowdef.newEntry(b); } public synchronized boolean has(byte[] key) throws IOException { - assert file.size() == index.size() : "file.size() = " + file.size() + ", index.size() = " + index.size(); + assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size(); assert ((table == null) || (table.size() == index.size())); return index.geti(key) >= 0; } @@ -354,7 +363,7 @@ public class kelondroEcoTable implements kelondroIndex { } public synchronized Entry put(Entry row) throws IOException { - assert file.size() == index.size() : "file.size() = " + file.size() + ", index.size() = " + index.size(); + assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size(); assert ((table == null) || (table.size() == index.size())); assert row != null; assert row.bytes() != null; @@ -381,7 +390,7 @@ public class kelondroEcoTable implements kelondroIndex { table.set(i, taildef.newEntry(row.bytes(), rowdef.primaryKeyLength, true)); file.put(i, row.bytes(), 0); } - assert file.size() == index.size() : "file.size() = " + file.size() + ", index.size() = " + index.size(); + assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size(); assert ((table == null) || (table.size() == index.size())); // return old value return rowdef.newEntry(b); @@ -392,12 +401,12 @@ public class kelondroEcoTable implements kelondroIndex { } public synchronized void putMultiple(List rows) throws IOException { - assert file.size() == index.size() : "file.size() = " + file.size() + ", index.size() = " + index.size(); + assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size(); Iterator i = rows.iterator(); while (i.hasNext()) { put(i.next()); } - assert file.size() == index.size() : "file.size() = " + file.size() + ", index.size() = " + index.size(); + assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size(); } private void removeInFile(int i) throws IOException { @@ -433,7 +442,7 @@ public class kelondroEcoTable implements kelondroIndex { } public synchronized Entry remove(byte[] key, boolean keepOrder) throws IOException { - assert file.size() == index.size() : "file.size() = " + file.size() + ", index.size() = " + index.size(); + assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size(); assert ((table == null) || (table.size() == index.size())); assert keepOrder == false; // this class cannot keep the order during a remove assert key.length == rowdef.primaryKeyLength; @@ -462,7 +471,7 @@ public class kelondroEcoTable implements kelondroIndex { System.arraycopy(p, 0, k, 0, rowdef.primaryKeyLength); index.puti(k, i); } - assert (file.size() == index.size()); + assert (file.size() == index.size() + fail); } else { // get result value from the table copy, so we don't need to read it from the file kelondroRow.Entry v = table.get(i); @@ -488,17 +497,17 @@ public class kelondroEcoTable implements kelondroIndex { kelondroRow.Entry lr = rowdef.newEntry(p); index.puti(lr.getPrimaryKeyBytes(), i); } - assert (file.size() == index.size()); + assert (file.size() == index.size() + fail); assert (table.size() == index.size()) : "table.size() = " + table.size() + ", index.size() = " + index.size(); } - assert file.size() == index.size() : "file.size() = " + file.size() + ", index.size() = " + index.size(); + assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size(); assert ((table == null) || (table.size() == index.size())); assert index.size() + 1 == sb : "index.size() = " + index.size() + ", sb = " + sb; return rowdef.newEntry(b); } public synchronized Entry removeOne() throws IOException { - assert file.size() == index.size() : "file.size() = " + file.size() + ", index.size() = " + index.size(); + assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size(); assert ((table == null) || (table.size() == index.size())); byte[] le = new byte[rowdef.objectsize]; file.cleanLast(le, 0); @@ -506,7 +515,7 @@ public class kelondroEcoTable implements kelondroIndex { int i = index.removei(lr.getPrimaryKeyBytes()); assert i >= 0; if (table != null) table.removeOne(); - assert file.size() == index.size() : "file.size() = " + file.size() + ", index.size() = " + index.size(); + assert file.size() == index.size() + fail : "file.size() = " + file.size() + ", index.size() = " + index.size(); return lr; } diff --git a/source/de/anomic/kelondro/kelondroFlexTable.java b/source/de/anomic/kelondro/kelondroFlexTable.java index 4dd4db5e7..851745834 100644 --- a/source/de/anomic/kelondro/kelondroFlexTable.java +++ b/source/de/anomic/kelondro/kelondroFlexTable.java @@ -299,13 +299,13 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr return oldentry; } - public synchronized void addUnique(kelondroRow.Entry row) throws IOException { + public synchronized boolean addUnique(kelondroRow.Entry row) throws IOException { assert row.objectsize() == this.rowdef.objectsize; assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size(); - index.addi(row.getColBytes(0), super.add(row)); + return index.addi(row.getColBytes(0), super.add(row)); } - public synchronized void addUniqueMultiple(List rows) throws IOException { + public synchronized int addUniqueMultiple(List rows) throws IOException { // add a list of entries in a ordered way. // this should save R/W head positioning time TreeMap indexed_result = super.addMultiple(rows); @@ -318,7 +318,7 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr index.puti(entry.getValue(), entry.getKey().intValue()); } assert this.size() == index.size() : "content.size() = " + this.size() + ", index.size() = " + index.size(); - + return indexed_result.size(); } public synchronized ArrayList removeDoubles() throws IOException { diff --git a/source/de/anomic/kelondro/kelondroIndex.java b/source/de/anomic/kelondro/kelondroIndex.java index 8e806dab7..d7b8982a8 100644 --- a/source/de/anomic/kelondro/kelondroIndex.java +++ b/source/de/anomic/kelondro/kelondroIndex.java @@ -66,8 +66,8 @@ public interface kelondroIndex { public kelondroRow.Entry put(kelondroRow.Entry row) throws IOException; public kelondroRow.Entry put(kelondroRow.Entry row, Date entryDate) throws IOException; public void putMultiple(List rows) throws IOException; // for R/W head path optimization - public void addUnique(kelondroRow.Entry row) throws IOException; // no double-check - public void addUniqueMultiple(List rows) throws IOException; // no double-check + public boolean addUnique(kelondroRow.Entry row) throws IOException; // no double-check + public int addUniqueMultiple(List rows) throws IOException; // no double-check public ArrayList removeDoubles() throws IOException; // removes all elements that are double (to be used after all addUnique) public kelondroRow.Entry remove(byte[] key, boolean keepOrder) throws IOException; public kelondroRow.Entry removeOne() throws IOException; diff --git a/source/de/anomic/kelondro/kelondroRAMIndex.java b/source/de/anomic/kelondro/kelondroRAMIndex.java index 93cb1d786..335cd11ed 100644 --- a/source/de/anomic/kelondro/kelondroRAMIndex.java +++ b/source/de/anomic/kelondro/kelondroRAMIndex.java @@ -105,22 +105,24 @@ public class kelondroRAMIndex implements kelondroIndex { } } - public synchronized void addUnique(kelondroRow.Entry entry) { + public synchronized boolean addUnique(kelondroRow.Entry entry) { assert (entry != null); if (index1 == null) { // we are in the initialization phase - index0.addUnique(entry); + return index0.addUnique(entry); } else { // initialization is over, add to secondary index - index1.addUnique(entry); + return index1.addUnique(entry); } } - public void addUniqueMultiple(List rows) { + public int addUniqueMultiple(List rows) { Iterator i = rows.iterator(); + int c = 0; while (i.hasNext()) { - addUnique(i.next()); + if (addUnique(i.next())) c++; } + return c; } public synchronized ArrayList removeDoubles() { diff --git a/source/de/anomic/kelondro/kelondroRowCollection.java b/source/de/anomic/kelondro/kelondroRowCollection.java index 2a84a0cc7..a6bac0988 100644 --- a/source/de/anomic/kelondro/kelondroRowCollection.java +++ b/source/de/anomic/kelondro/kelondroRowCollection.java @@ -296,22 +296,26 @@ public class kelondroRowCollection { set(index, a); } - public synchronized void addUnique(kelondroRow.Entry row) { + public synchronized boolean addUnique(kelondroRow.Entry row) { byte[] r = row.bytes(); - addUnique(r, 0, r.length); + return addUnique(r, 0, r.length); } - public synchronized void addUniqueMultiple(List rows) { + public synchronized int addUniqueMultiple(List rows) { assert this.sortBound == 0 : "sortBound = " + this.sortBound + ", chunkcount = " + this.chunkcount; Iterator i = rows.iterator(); - while (i.hasNext()) addUnique(i.next()); + int c = 0; + while (i.hasNext()) { + if (addUnique(i.next())) c++; + } + return c; } public synchronized void add(byte[] a) { addUnique(a, 0, a.length); } - private final void addUnique(byte[] a, int astart, int alength) { + private final boolean addUnique(byte[] a, int astart, int alength) { assert (a != null); assert (astart >= 0) && (astart < a.length) : " astart = " + a; assert (!(serverLog.allZero(a, astart, alength))) : "a = " + serverLog.arrayList(a, astart, alength); @@ -319,7 +323,7 @@ public class kelondroRowCollection { assert (astart + alength <= a.length); if (bugappearance(a, astart, alength)) { System.out.println("*** DEBUG: patched wrong a = " + serverLog.arrayList(a, astart, alength)); - return; // TODO: this is temporary; remote peers may still submit bad entries + return false; // TODO: this is temporary; remote peers may still submit bad entries } assert (!(bugappearance(a, astart, alength))) : "a = " + serverLog.arrayList(a, astart, alength); int l = Math.min(rowdef.objectsize, Math.min(alength, a.length - astart)); @@ -327,6 +331,7 @@ public class kelondroRowCollection { System.arraycopy(a, astart, chunkcache, rowdef.objectsize * chunkcount, l); chunkcount++; this.lastTimeWrote = System.currentTimeMillis(); + return true; } private static boolean bugappearance(byte[] a, int astart, int alength) { diff --git a/source/de/anomic/kelondro/kelondroSQLTable.java b/source/de/anomic/kelondro/kelondroSQLTable.java index 0ddde077c..b486f4511 100644 --- a/source/de/anomic/kelondro/kelondroSQLTable.java +++ b/source/de/anomic/kelondro/kelondroSQLTable.java @@ -209,7 +209,7 @@ public class kelondroSQLTable implements kelondroIndex { } } - public synchronized void addUnique(kelondroRow.Entry row) throws IOException { + public synchronized boolean addUnique(kelondroRow.Entry row) throws IOException { throw new UnsupportedOperationException(); } @@ -217,7 +217,7 @@ public class kelondroSQLTable implements kelondroIndex { throw new UnsupportedOperationException(); } - public synchronized void addUniqueMultiple(List rows) throws IOException { + public synchronized int addUniqueMultiple(List rows) throws IOException { throw new UnsupportedOperationException(); } diff --git a/source/de/anomic/kelondro/kelondroSplitTable.java b/source/de/anomic/kelondro/kelondroSplitTable.java index d6d9966f6..ff74ac176 100644 --- a/source/de/anomic/kelondro/kelondroSplitTable.java +++ b/source/de/anomic/kelondro/kelondroSplitTable.java @@ -308,15 +308,15 @@ public class kelondroSplitTable implements kelondroIndex { return null; } - public synchronized void addUnique(kelondroRow.Entry row) throws IOException { - addUnique(row, null); + public synchronized boolean addUnique(kelondroRow.Entry row) throws IOException { + return addUnique(row, null); } - public synchronized void addUnique(kelondroRow.Entry row, Date entryDate) throws IOException { + public synchronized boolean addUnique(kelondroRow.Entry row, Date entryDate) throws IOException { assert row.objectsize() <= this.rowdef.objectsize; if ((entryDate == null) || (entryDate.after(new Date()))) entryDate = new Date(); // fix date String suffix = dateSuffix(entryDate); - if (suffix == null) return; + if (suffix == null) return false; kelondroIndex table = (kelondroIndex) tables.get(suffix); if (table == null) { // make new table @@ -329,12 +329,16 @@ public class kelondroSplitTable implements kelondroIndex { } tables.put(suffix, table); } - table.addUnique(row); + return table.addUnique(row); } - public synchronized void addUniqueMultiple(List rows) throws IOException { + public synchronized int addUniqueMultiple(List rows) throws IOException { Iterator i = rows.iterator(); - while (i.hasNext()) addUnique(i.next()); + int c = 0; + while (i.hasNext()) { + if (addUnique(i.next())) c++; + } + return c; } public synchronized void addUniqueMultiple(List rows, Date entryDate) throws IOException { diff --git a/source/de/anomic/kelondro/kelondroTree.java b/source/de/anomic/kelondro/kelondroTree.java index f1f76e59b..389502ff3 100644 --- a/source/de/anomic/kelondro/kelondroTree.java +++ b/source/de/anomic/kelondro/kelondroTree.java @@ -489,17 +489,23 @@ public class kelondroTree extends kelondroCachedRecords implements kelondroIndex return result; } - public synchronized void addUnique(kelondroRow.Entry row) throws IOException { + public synchronized boolean addUnique(kelondroRow.Entry row) throws IOException { + int s = this.size(); this.put(row); + return this.size() > s; } public synchronized void addUnique(kelondroRow.Entry row, Date entryDate) throws IOException { this.put(row, entryDate); } - public synchronized void addUniqueMultiple(List rows) throws IOException { + public synchronized int addUniqueMultiple(List rows) throws IOException { Iterator i = rows.iterator(); - while (i.hasNext()) addUnique(i.next()); + int c = 0; + while (i.hasNext()) { + if (addUnique(i.next())) c++; + } + return c; } private void assignChild(kelondroNode parentNode, kelondroNode childNode, int childType) throws IOException { diff --git a/source/de/anomic/plasma/crawler/plasmaHTTPLoader.java b/source/de/anomic/plasma/crawler/plasmaHTTPLoader.java index f96050b80..634a0ab8a 100644 --- a/source/de/anomic/plasma/crawler/plasmaHTTPLoader.java +++ b/source/de/anomic/plasma/crawler/plasmaHTTPLoader.java @@ -52,7 +52,6 @@ import java.net.MalformedURLException; import java.net.NoRouteToHostException; import java.net.SocketException; import java.net.UnknownHostException; -import java.util.Arrays; import java.util.Date; import de.anomic.http.HttpClient; diff --git a/source/de/anomic/plasma/plasmaCrawlStacker.java b/source/de/anomic/plasma/plasmaCrawlStacker.java index bc668c38c..3d5fe89c7 100644 --- a/source/de/anomic/plasma/plasmaCrawlStacker.java +++ b/source/de/anomic/plasma/plasmaCrawlStacker.java @@ -181,7 +181,7 @@ public final class plasmaCrawlStacker extends Thread { public void close() { if (this.dbtype == QUEUE_DB_TYPE_RAM) { - this.log.logFine("Shutdown. Flushing remaining " + size() + " crawl stacker job entries. please wait."); + this.log.logInfo("Shutdown. Flushing remaining " + size() + " crawl stacker job entries. please wait."); while (size() > 0) { if (!job()) break; } diff --git a/source/de/anomic/server/serverCore.java b/source/de/anomic/server/serverCore.java index faef71dc8..e0d1e3687 100644 --- a/source/de/anomic/server/serverCore.java +++ b/source/de/anomic/server/serverCore.java @@ -480,9 +480,7 @@ public final class serverCore extends serverAbstractBusyThread implements server Thread.interrupted(); // shut down all busySessions - for (Session session: this.busySessions) { - try {session.notify();} catch (IllegalMonitorStateException e) {e.printStackTrace();} - try {session.notifyAll();} catch (IllegalMonitorStateException e) {e.printStackTrace();} + if (this.busySessions != null) for (Session session: this.busySessions) { try {session.interrupt();} catch (SecurityException e ) {e.printStackTrace();} } diff --git a/yacy.logging b/yacy.logging index f3907c9ad..855c7b199 100644 --- a/yacy.logging +++ b/yacy.logging @@ -21,7 +21,7 @@ WORDMIGRATION.level = FINE FILEHANDLER.level = INFO SESSION-POOL.level = INFO CRAWLER-POOL.level = INFO -STACKCRAWL-POOL.level = INFO +STACKCRAWL.level = INFO MEMORY.level = INFO # List of global handlers