From 5e182a566f09d427f228cb357e50386d1462b50e Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Fri, 23 Nov 2012 13:58:39 +0100 Subject: [PATCH] - added another enumeration method in kelondro data structure to get a more random access to data for the balancer - added random access inside the balancer --- source/net/yacy/crawler/Balancer.java | 10 +++------- .../kelondro/index/BufferedObjectIndex.java | 12 +++++++++++ source/net/yacy/kelondro/index/Cache.java | 5 +++++ source/net/yacy/kelondro/index/Index.java | 1 + source/net/yacy/kelondro/index/RAMIndex.java | 10 ++++++++++ .../yacy/kelondro/index/RAMIndexCluster.java | 19 ++++++++++++++++++ .../yacy/kelondro/index/RowCollection.java | 19 +++++++++++++++++- source/net/yacy/kelondro/table/SQLTable.java | 5 +++++ .../net/yacy/kelondro/table/SplitTable.java | 20 +++++++++++++++++++ source/net/yacy/kelondro/table/Table.java | 20 ++++++++++++++++++- 10 files changed, 112 insertions(+), 9 deletions(-) diff --git a/source/net/yacy/crawler/Balancer.java b/source/net/yacy/crawler/Balancer.java index 4d80b6296..4d8682789 100644 --- a/source/net/yacy/crawler/Balancer.java +++ b/source/net/yacy/crawler/Balancer.java @@ -43,7 +43,6 @@ import net.yacy.cora.document.ASCII; import net.yacy.cora.document.UTF8; import net.yacy.cora.federate.yacy.CacheStrategy; import net.yacy.cora.order.Base64Order; -import net.yacy.cora.order.CloneableIterator; import net.yacy.cora.protocol.Domains; import net.yacy.cora.sorting.OrderedScoreMap; import net.yacy.cora.storage.HandleSet; @@ -481,6 +480,7 @@ public class Balancer { rest = rest + 1000 * loops; loops = 0; } + Thread.currentThread().setName("Balancer waiting for " +crawlEntry.url().getHost() + ": " + sleeptime + " milliseconds"); synchronized(this) { // must be synchronized here to avoid 'takeover' moves from other threads which then idle the same time which would not be enough if (rest > 0) {try {this.wait(rest);} catch (final InterruptedException e) {}} @@ -618,20 +618,16 @@ public class Balancer { this.lastDomainStackFill = System.currentTimeMillis(); //final HandleSet handles = this.urlFileIndex.keysFromBuffer(objectIndexBufferSize / 2); //final CloneableIterator i = handles.keys(true, null); - final CloneableIterator i = this.urlFileIndex.keys(true, null); - byte[] handle; String host; Request request; int count = 0; long timeout = System.currentTimeMillis() + 5000; - while (i.hasNext()) { - handle = i.next(); - final Row.Entry entry = this.urlFileIndex.get(handle, false); + for (Row.Entry entry: this.urlFileIndex.random(10000)) { if (entry == null) continue; request = new Request(entry); host = request.url().getHost(); try { - pushHashToDomainStacks(host, request.url().hosthash(), handle); + pushHashToDomainStacks(host, request.url().hosthash(), entry.getPrimaryKeyBytes()); } catch (final SpaceExceededException e) { break; } diff --git a/source/net/yacy/kelondro/index/BufferedObjectIndex.java b/source/net/yacy/kelondro/index/BufferedObjectIndex.java index deeef495f..3d9fc3884 100644 --- a/source/net/yacy/kelondro/index/BufferedObjectIndex.java +++ b/source/net/yacy/kelondro/index/BufferedObjectIndex.java @@ -230,6 +230,18 @@ public class BufferedObjectIndex implements Index, Iterable { return list; } + @Override + public List random(final int count) throws IOException { + final List list = new ArrayList(); + synchronized (this.backend) { + List list0 = this.buffer.random(count); + list.addAll(list0); + list0 = this.backend.random(count - list.size()); + list.addAll(list0); + } + return list; + } + @Override public Entry removeOne() throws IOException { synchronized (this.backend) { diff --git a/source/net/yacy/kelondro/index/Cache.java b/source/net/yacy/kelondro/index/Cache.java index fd82ffb72..1f551bd91 100644 --- a/source/net/yacy/kelondro/index/Cache.java +++ b/source/net/yacy/kelondro/index/Cache.java @@ -585,6 +585,11 @@ public final class Cache implements Index, Iterable { return this.index.top(count); } + @Override + public synchronized List random(final int count) throws IOException { + return this.index.random(count); + } + @Override public final synchronized Row row() { return this.index.row(); diff --git a/source/net/yacy/kelondro/index/Index.java b/source/net/yacy/kelondro/index/Index.java index fb27734d0..a0e265210 100644 --- a/source/net/yacy/kelondro/index/Index.java +++ b/source/net/yacy/kelondro/index/Index.java @@ -62,6 +62,7 @@ public interface Index extends Iterable { public Row.Entry remove(byte[] key) throws IOException; public Row.Entry removeOne() throws IOException; public List top(int count) throws IOException; + public List random(int count) throws IOException; public CloneableIterator keys(boolean up, byte[] firstKey) throws IOException; // iterates only the key public CloneableIterator rows(boolean up, byte[] firstKey) throws IOException; // iterates the whole row using the order of the keys public CloneableIterator rows() throws IOException; // iterates the whole row without any order diff --git a/source/net/yacy/kelondro/index/RAMIndex.java b/source/net/yacy/kelondro/index/RAMIndex.java index 7c6528888..51f979766 100644 --- a/source/net/yacy/kelondro/index/RAMIndex.java +++ b/source/net/yacy/kelondro/index/RAMIndex.java @@ -301,6 +301,16 @@ public final class RAMIndex implements Index, Iterable { list.addAll(list0); return list; } + + @Override + public synchronized List random(final int count) throws IOException { + final List list = new ArrayList(); + List list0 = this.index1.random(count); + list.addAll(list0); + list0 = this.index0.random(count - list.size()); + list.addAll(list0); + return list; + } @Override public long mem() { diff --git a/source/net/yacy/kelondro/index/RAMIndexCluster.java b/source/net/yacy/kelondro/index/RAMIndexCluster.java index 6e95c8a83..95c4579e6 100644 --- a/source/net/yacy/kelondro/index/RAMIndexCluster.java +++ b/source/net/yacy/kelondro/index/RAMIndexCluster.java @@ -283,6 +283,25 @@ public final class RAMIndexCluster implements Index, Iterable, Clonea return list; } + @Override + public List random(final int count) { + final List list = new ArrayList(); + synchronized (this.cluster) { + for (final RAMIndex element : this.cluster) { + if (element != null) { + try { + final List list0 = element.random(count - list.size()); + list.addAll(list0); + } catch (final IOException e) { + continue; + } + } + if (list.size() >= count) return list; + } + } + return list; + } + @Override public final Entry replace(final Entry row) throws SpaceExceededException { final int i = indexFor(row); diff --git a/source/net/yacy/kelondro/index/RowCollection.java b/source/net/yacy/kelondro/index/RowCollection.java index c9ab458c7..523bad0e4 100644 --- a/source/net/yacy/kelondro/index/RowCollection.java +++ b/source/net/yacy/kelondro/index/RowCollection.java @@ -504,8 +504,9 @@ public class RowCollection implements Sortable, Iterable, } public synchronized List top(int count) { + if (count > this.chunkcount) count = this.chunkcount; final ArrayList list = new ArrayList(); - if (this.chunkcount == 0) return list; + if (this.chunkcount == 0 || count == 0) return list; Row.Entry entry; int cursor = this.chunkcount - 1; while (count > 0 && cursor >= 0) { @@ -516,6 +517,22 @@ public class RowCollection implements Sortable, Iterable, } return list; } + + public synchronized List random(int count) { + if (count > this.chunkcount) count = this.chunkcount; + final ArrayList list = new ArrayList(); + if (this.chunkcount == 0 || count == 0) return list; + Row.Entry entry; + int cursor = 0; + int stepsize = this.chunkcount / count; + while (count > 0 && cursor < this.chunkcount) { + entry = get(cursor, true); + list.add(entry); + count--; + cursor += stepsize; + } + return list; + } public synchronized byte[] smallestKey() { if (this.chunkcount == 0) return null; diff --git a/source/net/yacy/kelondro/table/SQLTable.java b/source/net/yacy/kelondro/table/SQLTable.java index 51bbf7b5a..c46897e7e 100644 --- a/source/net/yacy/kelondro/table/SQLTable.java +++ b/source/net/yacy/kelondro/table/SQLTable.java @@ -311,6 +311,11 @@ public class SQLTable implements Index, Iterable { return null; } + @Override + public List random(final int count) throws IOException { + return null; + } + @Override public CloneableIterator rows(final boolean up, final byte[] startKey) throws IOException { // Objects are of type kelondroRow.Entry diff --git a/source/net/yacy/kelondro/table/SplitTable.java b/source/net/yacy/kelondro/table/SplitTable.java index fc885794c..fcd2f42ed 100644 --- a/source/net/yacy/kelondro/table/SplitTable.java +++ b/source/net/yacy/kelondro/table/SplitTable.java @@ -507,6 +507,26 @@ public class SplitTable implements Index, Iterable { } } + @Override + public List random(final int count) throws IOException { + final Iterator i = this.tables.values().iterator(); + Index table, maxtable = null; + int maxcount = -1; + while (i.hasNext()) { + table = i.next(); + if (table.size() > maxcount) { + maxtable = table; + maxcount = table.size(); + } + } + if (maxtable == null) { + return null; + } + synchronized (this) { // avoid concurrent IO from different methods + return maxtable.random(count); + } + } + @Override public CloneableIterator keys(final boolean up, final byte[] firstKey) throws IOException { final List> c = new ArrayList>(this.tables.size()); diff --git a/source/net/yacy/kelondro/table/Table.java b/source/net/yacy/kelondro/table/Table.java index 8c0af0c6b..f29366c09 100644 --- a/source/net/yacy/kelondro/table/Table.java +++ b/source/net/yacy/kelondro/table/Table.java @@ -833,8 +833,9 @@ public class Table implements Index, Iterable { @Override public List top(int count) throws IOException { + if (count > this.size()) count = this.size(); final ArrayList list = new ArrayList(); - if ((this.file == null) || (this.index == null)) return list; + if (this.file == null || this.index == null || this.size() == 0 || count == 0) return list; long i = this.file.size() - 1; while (count > 0 && i >= 0) { final byte[] b = new byte[this.rowdef.objectsize]; @@ -846,6 +847,23 @@ public class Table implements Index, Iterable { return list; } + @Override + public List random(int count) throws IOException { + if (count > this.size()) count = this.size(); + final ArrayList list = new ArrayList(); + if (this.file == null || this.index == null || this.size() == 0 || count == 0) return list; + long cursor = 0; + int stepsize = this.size() / count; + while (count > 0 && cursor < this.size()) { + final byte[] b = new byte[this.rowdef.objectsize]; + this.file.get(cursor, b, 0); + list.add(this.rowdef.newEntry(b)); + count--; + cursor += stepsize; + } + return list; + } + @Override public synchronized void clear() throws IOException { final File f = this.file.filename();