From dad5b586a4b1db86e6773eb5bbda155e6607cdde Mon Sep 17 00:00:00 2001 From: orbiter Date: Thu, 15 Sep 2011 10:01:21 +0000 Subject: [PATCH] added a concurrent warmin-up of Table data structures. that should speed-up the start-up process but may also cause stronger CPU load at that time. git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7956 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- source/de/anomic/crawler/Balancer.java | 4 +- source/de/anomic/crawler/ZURL.java | 4 +- source/de/anomic/yacy/yacyNewsDB.java | 8 +- source/de/anomic/yacy/yacyNewsQueue.java | 72 ++++++++--------- source/net/yacy/dbtest.java | 2 +- source/net/yacy/kelondro/table/Relations.java | 12 +-- .../net/yacy/kelondro/table/SplitTable.java | 21 +++-- source/net/yacy/kelondro/table/Table.java | 79 +++++++++++-------- 8 files changed, 113 insertions(+), 89 deletions(-) diff --git a/source/de/anomic/crawler/Balancer.java b/source/de/anomic/crawler/Balancer.java index 0f86767b2..a8b5a6c92 100644 --- a/source/de/anomic/crawler/Balancer.java +++ b/source/de/anomic/crawler/Balancer.java @@ -103,10 +103,10 @@ public class Balancer { this.cacheStacksPath.mkdirs(); final File f = new File(this.cacheStacksPath, stackname + indexSuffix); try { - this.urlFileIndex = new BufferedObjectIndex(new Table(f, Request.rowdef, EcoFSBufferSize, 0, useTailCache, exceed134217727), objectIndexBufferSize); + this.urlFileIndex = new BufferedObjectIndex(new Table(f, Request.rowdef, EcoFSBufferSize, 0, useTailCache, exceed134217727, true), objectIndexBufferSize); } catch (final RowSpaceExceededException e) { try { - this.urlFileIndex = new BufferedObjectIndex(new Table(f, Request.rowdef, 0, 0, false, exceed134217727), objectIndexBufferSize); + this.urlFileIndex = new BufferedObjectIndex(new Table(f, Request.rowdef, 0, 0, false, exceed134217727, true), objectIndexBufferSize); } catch (final RowSpaceExceededException e1) { Log.logException(e1); } diff --git a/source/de/anomic/crawler/ZURL.java b/source/de/anomic/crawler/ZURL.java index 7ebc1b223..352686fd5 100755 --- a/source/de/anomic/crawler/ZURL.java +++ b/source/de/anomic/crawler/ZURL.java @@ -96,10 +96,10 @@ public class ZURL implements Iterable { } } try { - this.urlIndex = new Table(f, rowdef, EcoFSBufferSize, 0, useTailCache, exceed134217727); + this.urlIndex = new Table(f, rowdef, EcoFSBufferSize, 0, useTailCache, exceed134217727, true); } catch (final RowSpaceExceededException e) { try { - this.urlIndex = new Table(f, rowdef, 0, 0, false, exceed134217727); + this.urlIndex = new Table(f, rowdef, 0, 0, false, exceed134217727, true); } catch (final RowSpaceExceededException e1) { Log.logException(e1); } diff --git a/source/de/anomic/yacy/yacyNewsDB.java b/source/de/anomic/yacy/yacyNewsDB.java index 041ee4263..259fe5659 100644 --- a/source/de/anomic/yacy/yacyNewsDB.java +++ b/source/de/anomic/yacy/yacyNewsDB.java @@ -98,10 +98,10 @@ public class yacyNewsDB { NaturalOrder.naturalOrder ); try { - this.news = new Table(path, this.rowdef, 10, 0, useTailCache, exceed134217727); + this.news = new Table(path, this.rowdef, 10, 0, useTailCache, exceed134217727, true); } catch (final RowSpaceExceededException e) { try { - this.news = new Table(path, this.rowdef, 0, 0, false, exceed134217727); + this.news = new Table(path, this.rowdef, 0, 0, false, exceed134217727, true); } catch (final RowSpaceExceededException e1) { Log.logException(e1); } @@ -112,10 +112,10 @@ public class yacyNewsDB { try {close();} catch (final Exception e) {} if (this.path.exists()) FileUtils.deletedelete(this.path); try { - this.news = new Table(this.path, this.rowdef, 10, 0, false, false); + this.news = new Table(this.path, this.rowdef, 10, 0, false, false, true); } catch (final RowSpaceExceededException e) { try { - this.news = new Table(this.path, this.rowdef, 0, 0, false, false); + this.news = new Table(this.path, this.rowdef, 0, 0, false, false, true); } catch (final RowSpaceExceededException e1) { Log.logException(e1); } diff --git a/source/de/anomic/yacy/yacyNewsQueue.java b/source/de/anomic/yacy/yacyNewsQueue.java index 2c605378e..e59660968 100644 --- a/source/de/anomic/yacy/yacyNewsQueue.java +++ b/source/de/anomic/yacy/yacyNewsQueue.java @@ -65,7 +65,7 @@ public class yacyNewsQueue { private final File path; private Table queueStack; private final yacyNewsDB newsDB; - + private static final Row rowdef = new Row(new Column[]{ new Column("newsid", Column.celltype_string, Column.encoder_bytes, yacyNewsDB.idLength, "id = created + originator"), new Column("last touched", Column.celltype_string, Column.encoder_bytes, GenericFormatter.PATTERN_SHORT_SECOND.length(), "") @@ -77,22 +77,22 @@ public class yacyNewsQueue { this.path = path; this.newsDB = newsDB; try { - this.queueStack = new Table(path, rowdef, 10, 0, false, false); - } catch (RowSpaceExceededException e) { + this.queueStack = new Table(path, rowdef, 10, 0, false, false, true); + } catch (final RowSpaceExceededException e) { Log.logException(e); this.queueStack = null; } } - + public void clear() { try { this.queueStack.clear(); - } catch (IOException e) { + } catch (final IOException e) { try {close();} catch (final Exception ee) {} - if (path.exists()) FileUtils.deletedelete(path); + if (this.path.exists()) FileUtils.deletedelete(this.path); try { - this.queueStack = new Table(path, rowdef, 10, 0, false, false); - } catch (RowSpaceExceededException ee) { + this.queueStack = new Table(this.path, rowdef, 10, 0, false, false, true); + } catch (final RowSpaceExceededException ee) { Log.logException(e); this.queueStack = null; } @@ -100,8 +100,8 @@ public class yacyNewsQueue { } public void close() { - if (queueStack != null) queueStack.close(); - queueStack = null; + if (this.queueStack != null) this.queueStack.close(); + this.queueStack = null; } @Override @@ -110,24 +110,24 @@ public class yacyNewsQueue { } public int size() { - return queueStack.size(); + return this.queueStack.size(); } - + public boolean isEmpty() { - return queueStack.isEmpty(); + return this.queueStack.isEmpty(); } public synchronized void push(final yacyNewsDB.Record entry) throws IOException, RowSpaceExceededException { - if (!queueStack.consistencyCheck()) { + if (!this.queueStack.consistencyCheck()) { Log.logSevere("yacyNewsQueue", "reset of table " + this.path); - queueStack.clear(); + this.queueStack.clear(); } - queueStack.addUnique(r2b(entry)); + this.queueStack.addUnique(r2b(entry)); } public synchronized yacyNewsDB.Record pop() throws IOException { - if (queueStack.isEmpty()) return null; - return b2r(queueStack.removeOne()); + if (this.queueStack.isEmpty()) return null; + return b2r(this.queueStack.removeOne()); } public synchronized yacyNewsDB.Record get(final String id) { @@ -148,7 +148,7 @@ public class yacyNewsQueue { if ((record != null) && (record.id().equals(id))) { try { this.queueStack.remove(UTF8.getBytes(id)); - } catch (IOException e) { + } catch (final IOException e) { Log.logException(e); } return record; @@ -161,45 +161,45 @@ public class yacyNewsQueue { if (b == null) return null; final String id = b.getColString(0); //Date touched = yacyCore.parseUniversalDate(UTF8.String(b[1])); - return newsDB.get(id); + return this.newsDB.get(id); } private Row.Entry r2b(final yacyNewsDB.Record r) throws IOException, RowSpaceExceededException { if (r == null) return null; - newsDB.put(r); - final Row.Entry b = queueStack.row().newEntry(new byte[][]{ + this.newsDB.put(r); + final Row.Entry b = this.queueStack.row().newEntry(new byte[][]{ UTF8.getBytes(r.id()), UTF8.getBytes(GenericFormatter.SHORT_SECOND_FORMATTER.format())}); return b; } - + public Iterator records(final boolean up) { // iterates yacyNewsRecord-type objects - if (queueStack == null) return new HashSet().iterator(); + if (this.queueStack == null) return new HashSet().iterator(); return new newsIterator(up); } - + private class newsIterator implements Iterator { // iterates yacyNewsRecord-type objects - + Iterator stackNodeIterator; - + private newsIterator(final boolean up) { try { - stackNodeIterator = queueStack.rows(); - } catch (IOException e) { + this.stackNodeIterator = yacyNewsQueue.this.queueStack.rows(); + } catch (final IOException e) { Log.logException(e); - stackNodeIterator = null; + this.stackNodeIterator = null; } } - + public boolean hasNext() { - return stackNodeIterator != null && stackNodeIterator.hasNext(); + return this.stackNodeIterator != null && this.stackNodeIterator.hasNext(); } public yacyNewsDB.Record next() { - if (stackNodeIterator == null) return null; - final Row.Entry row = stackNodeIterator.next(); + if (this.stackNodeIterator == null) return null; + final Row.Entry row = this.stackNodeIterator.next(); try { return b2r(row); } catch (final IOException e) { @@ -208,9 +208,9 @@ public class yacyNewsQueue { } public void remove() { - if (stackNodeIterator != null) stackNodeIterator.remove(); + if (this.stackNodeIterator != null) this.stackNodeIterator.remove(); } - + } } \ No newline at end of file diff --git a/source/net/yacy/dbtest.java b/source/net/yacy/dbtest.java index 6d55117e9..b93affde6 100644 --- a/source/net/yacy/dbtest.java +++ b/source/net/yacy/dbtest.java @@ -206,7 +206,7 @@ public class dbtest { return new SplitTable(tablepath, new File(tablename).getName(), testRow, true, true); } if (dbe.equals("kelondroEcoTable")) { - return new Table(new File(tablename), testRow, 1000, 0, true, true); + return new Table(new File(tablename), testRow, 1000, 0, true, true, true); } if (dbe.equals("mysql")) { return new SQLTable("mysql", testRow); diff --git a/source/net/yacy/kelondro/table/Relations.java b/source/net/yacy/kelondro/table/Relations.java index 3c9618441..a9f549bcd 100755 --- a/source/net/yacy/kelondro/table/Relations.java +++ b/source/net/yacy/kelondro/table/Relations.java @@ -94,9 +94,9 @@ public class Relations { if (row.primaryKeyLength != keysize || row.column(1).cellwidth != payloadsize) continue; // a wrong table Index table; try { - table = new Table(new File(this.baseDir, list[i]), row, 1024*1024, 0, this.useTailCache, this.exceed134217727); + table = new Table(new File(this.baseDir, list[i]), row, 1024*1024, 0, this.useTailCache, this.exceed134217727, true); } catch (final RowSpaceExceededException e) { - table = new Table(new File(this.baseDir, list[i]), row, 0, 0, false, this.exceed134217727); + table = new Table(new File(this.baseDir, list[i]), row, 0, 0, false, this.exceed134217727, true); } this.relations.put(name, table); return; @@ -106,9 +106,9 @@ public class Relations { final Row row = rowdef(keysize, payloadsize); Index table; try { - table = new Table(new File(this.baseDir, targetfilename), row, 1024*1024, 0, this.useTailCache, this.exceed134217727); + table = new Table(new File(this.baseDir, targetfilename), row, 1024*1024, 0, this.useTailCache, this.exceed134217727, true); } catch (final RowSpaceExceededException e) { - table = new Table(new File(this.baseDir, targetfilename), row, 0, 0, false, this.exceed134217727); + table = new Table(new File(this.baseDir, targetfilename), row, 0, 0, false, this.exceed134217727, true); } this.relations.put(name, table); } @@ -124,9 +124,9 @@ public class Relations { final Row row = rowdef(element); Index table; try { - table = new Table(new File(this.baseDir, element), row, 1024*1024, 0, this.useTailCache, this.exceed134217727); + table = new Table(new File(this.baseDir, element), row, 1024*1024, 0, this.useTailCache, this.exceed134217727, true); } catch (final RowSpaceExceededException e) { - table = new Table(new File(this.baseDir, element), row, 0, 0, false, this.exceed134217727); + table = new Table(new File(this.baseDir, element), row, 0, 0, false, this.exceed134217727, true); } this.relations.put(name, table); return table; diff --git a/source/net/yacy/kelondro/table/SplitTable.java b/source/net/yacy/kelondro/table/SplitTable.java index 008f4c12e..8fc687803 100644 --- a/source/net/yacy/kelondro/table/SplitTable.java +++ b/source/net/yacy/kelondro/table/SplitTable.java @@ -195,7 +195,7 @@ public class SplitTable implements Index, Iterable { Map.Entry entry; String maxf; long maxram; - Index table; + final List warmingUp = new ArrayList(); // for concurrent warming up while (!t.isEmpty()) { // find maximum table maxram = 0; @@ -214,13 +214,24 @@ public class SplitTable implements Index, Iterable { t.remove(maxf); f = new File(this.path, maxf); Log.logInfo("kelondroSplitTable", "opening partial eco table " + f); + Table table; try { - table = new Table(f, this.rowdef, EcoFSBufferSize, 0, this.useTailCache, this.exceed134217727); + table = new Table(f, this.rowdef, EcoFSBufferSize, 0, this.useTailCache, this.exceed134217727, false); } catch (final RowSpaceExceededException e) { - table = new Table(f, this.rowdef, 0, 0, false, this.exceed134217727); + table = new Table(f, this.rowdef, 0, 0, false, this.exceed134217727, false); } + final Table a = table; + final Thread p = new Thread() { + public void run() { + a.warmUp(); + } + }; + p.start(); + warmingUp.add(p); this.tables.put(maxf, table); } + // collect warming up threads + for (final Thread p: warmingUp) try {p.join();} catch (final InterruptedException e) {} assert this.current == null || this.tables.get(this.current) != null : "this.current = " + this.current; // init the thread pool for the keeperOf executor service @@ -323,10 +334,10 @@ public class SplitTable implements Index, Iterable { final File f = new File(this.path, this.current); Table table = null; try { - table = new Table(f, this.rowdef, EcoFSBufferSize, 0, this.useTailCache, this.exceed134217727); + table = new Table(f, this.rowdef, EcoFSBufferSize, 0, this.useTailCache, this.exceed134217727, true); } catch (final RowSpaceExceededException e) { try { - table = new Table(f, this.rowdef, 0, 0, false, this.exceed134217727); + table = new Table(f, this.rowdef, 0, 0, false, this.exceed134217727, true); } catch (final RowSpaceExceededException e1) { Log.logException(e1); } diff --git a/source/net/yacy/kelondro/table/Table.java b/source/net/yacy/kelondro/table/Table.java index eab9c1054..1ae57a373 100644 --- a/source/net/yacy/kelondro/table/Table.java +++ b/source/net/yacy/kelondro/table/Table.java @@ -87,7 +87,8 @@ public class Table implements Index, Iterable { final int buffersize, final int initialSpace, boolean useTailCache, - final boolean exceed134217727) throws RowSpaceExceededException { + final boolean exceed134217727, + final boolean warmUp) throws RowSpaceExceededException { useTailCache = true; // fixed for testing this.rowdef = rowdef; @@ -204,37 +205,8 @@ public class Table implements Index, Iterable { errors.close(); assert this.file.size() == this.index.size() : "file.size() = " + this.file.size() + ", index.size() = " + this.index.size() + ", file = " + filename(); - // remove doubles - if (!freshFile) { - final ArrayList doubles = this.index.removeDoubles(); - //assert index.size() + doubles.size() + fail == i; - //System.out.println(" -removed " + doubles.size() + " doubles- done."); - if (!doubles.isEmpty()) { - Log.logInfo("TABLE", tablefile + ": WARNING - TABLE " + tablefile + " has " + doubles.size() + " doubles"); - // from all the doubles take one, put it back to the index and remove the others from the file - // first put back one element each - final byte[] record = new byte[rowdef.objectsize]; - key = new byte[rowdef.primaryKeyLength]; - for (final long[] ds: doubles) { - this.file.get((int) ds[0], record, 0); - System.arraycopy(record, 0, key, 0, rowdef.primaryKeyLength); - this.index.putUnique(key, (int) ds[0]); - } - // then remove the other doubles by removing them from the table, but do a re-indexing while doing that - // first aggregate all the delete positions because the elements from the top positions must be removed first - final TreeSet delpos = new TreeSet(); - for (final long[] ds: doubles) { - for (int j = 1; j < ds.length; j++) delpos.add(ds[j]); - } - // now remove the entries in a sorted way (top-down) - Long top; - while (!delpos.isEmpty()) { - top = delpos.last(); - delpos.remove(top); - removeInFile(top.intValue()); - } - } - } + // warm up + if (!freshFile && warmUp) {warmUp0();} } catch (final FileNotFoundException e) { // should never happen Log.logSevere("Table", "", e); @@ -248,6 +220,47 @@ public class Table implements Index, Iterable { tableTracker.put(tablefile.toString(), this); } + public synchronized void warmUp() { + warmUp0(); + } + + private void warmUp0() { + // remove doubles + try { + final ArrayList doubles = this.index.removeDoubles(); + //assert index.size() + doubles.size() == i; + //System.out.println(" -removed " + doubles.size() + " doubles- done."); + if (doubles.isEmpty()) return; + Log.logInfo("TABLE", filename() + ": WARNING - TABLE " + filename() + " has " + doubles.size() + " doubles"); + // from all the doubles take one, put it back to the index and remove the others from the file + // first put back one element each + final byte[] record = new byte[this.rowdef.objectsize]; + final byte[] key = new byte[this.rowdef.primaryKeyLength]; + for (final long[] ds: doubles) { + this.file.get((int) ds[0], record, 0); + System.arraycopy(record, 0, key, 0, this.rowdef.primaryKeyLength); + this.index.putUnique(key, (int) ds[0]); + } + // then remove the other doubles by removing them from the table, but do a re-indexing while doing that + // first aggregate all the delete positions because the elements from the top positions must be removed first + final TreeSet delpos = new TreeSet(); + for (final long[] ds: doubles) { + for (int j = 1; j < ds.length; j++) delpos.add(ds[j]); + } + // now remove the entries in a sorted way (top-down) + Long top; + while (!delpos.isEmpty()) { + top = delpos.last(); + delpos.remove(top); + removeInFile(top.intValue()); + } + } catch (final RowSpaceExceededException e) { + Log.logSevere("Table", "", e); + } catch (final IOException e) { + Log.logSevere("Table", "", e); + } + } + public long mem() { return this.index.mem() + ((this.table == null) ? 0 : this.table.mem()); } @@ -964,7 +977,7 @@ public class Table implements Index, Iterable { private static Table testTable(final File f, final String testentities, final boolean useTailCache, final boolean exceed134217727) throws IOException, RowSpaceExceededException { if (f.exists()) FileUtils.deletedelete(f); final Row rowdef = new Row("byte[] a-4, byte[] b-4", NaturalOrder.naturalOrder); - final Table tt = new Table(f, rowdef, 100, 0, useTailCache, exceed134217727); + final Table tt = new Table(f, rowdef, 100, 0, useTailCache, exceed134217727, true); byte[] b; final Row.Entry row = rowdef.newEntry(); for (int i = 0; i < testentities.length(); i++) {