From 9cde05418ff64d52fa380242763b91b90e121df5 Mon Sep 17 00:00:00 2001 From: orbiter Date: Mon, 31 May 2010 00:27:00 +0000 Subject: [PATCH] fixed url crawl list display git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6908 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- source/de/anomic/crawler/Balancer.java | 68 ++++++++++--------- .../kelondro/index/BufferedObjectIndex.java | 12 ++++ source/net/yacy/kelondro/index/Cache.java | 4 ++ source/net/yacy/kelondro/index/HandleMap.java | 10 +++ .../net/yacy/kelondro/index/ObjectIndex.java | 2 + .../yacy/kelondro/index/ObjectIndexCache.java | 10 +++ .../yacy/kelondro/index/RowCollection.java | 14 ++++ .../net/yacy/kelondro/index/RowSetArray.java | 19 ++++++ source/net/yacy/kelondro/table/SQLTable.java | 4 ++ .../net/yacy/kelondro/table/SplitTable.java | 17 +++++ source/net/yacy/kelondro/table/Table.java | 14 ++++ 11 files changed, 143 insertions(+), 31 deletions(-) diff --git a/source/de/anomic/crawler/Balancer.java b/source/de/anomic/crawler/Balancer.java index bab6b6873..40e4ec100 100644 --- a/source/de/anomic/crawler/Balancer.java +++ b/source/de/anomic/crawler/Balancer.java @@ -27,6 +27,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Iterator; import java.util.LinkedList; +import java.util.List; import java.util.Map; import java.util.TreeMap; import java.util.concurrent.ConcurrentHashMap; @@ -515,41 +516,46 @@ public class Balancer { } public ArrayList top(int count) { - count = Math.min(count, top.size()); final ArrayList cel = new ArrayList(); if (count == 0) return cel; - byte[][] ta = new byte[count][]; + byte[][] ta = new byte[Math.min(count, top.size())][]; ta = top.toArray(ta); - synchronized (this) { - for (byte[] n: ta) { - try { - final Row.Entry rowEntry = urlFileIndex.get(n); - if (rowEntry == null) continue; - final Request crawlEntry = new Request(rowEntry); - cel.add(crawlEntry); - count--; - if (count <= 0) break; - } catch (IOException e) {} - } - - int depth = 0; - loop: while (count > 0) { - // iterate over the domain stacks - for (LinkedList list: this.domainStacks.values()) { - if (list.size() <= depth) continue loop; - byte[] n = list.get(depth); - try { - Row.Entry rowEntry = urlFileIndex.get(n); - if (rowEntry == null) continue; - final Request crawlEntry = new Request(rowEntry); - cel.add(crawlEntry); - count--; - if (count <= 0) break loop; - } catch (IOException e) {} - } - } - + for (byte[] n: ta) { + if (n == null) break; + try { + final Row.Entry rowEntry = urlFileIndex.get(n); + if (rowEntry == null) continue; + final Request crawlEntry = new Request(rowEntry); + cel.add(crawlEntry); + count--; + if (count <= 0) break; + } catch (IOException e) {} } + + int depth = 0; + loop: while (count > 0) { + // iterate over the domain stacks + int celsize = cel.size(); + ll: for (LinkedList list: this.domainStacks.values()) { + if (list.size() <= depth) continue ll; + byte[] n = list.get(depth); + try { + Row.Entry rowEntry = urlFileIndex.get(n); + if (rowEntry == null) continue; + final Request crawlEntry = new Request(rowEntry); + cel.add(crawlEntry); + count--; + if (count <= 0) break loop; + } catch (IOException e) {} + } + if (cel.size() == celsize) break loop; + depth++; + } + + if (cel.size() < count) try { + List list = urlFileIndex.top(count - cel.size()); + for (Row.Entry entry: list) cel.add(new Request(entry)); + } catch (IOException e) { } return cel; } diff --git a/source/net/yacy/kelondro/index/BufferedObjectIndex.java b/source/net/yacy/kelondro/index/BufferedObjectIndex.java index 0f12c1278..e3fe82f71 100644 --- a/source/net/yacy/kelondro/index/BufferedObjectIndex.java +++ b/source/net/yacy/kelondro/index/BufferedObjectIndex.java @@ -25,6 +25,7 @@ package net.yacy.kelondro.index; import java.io.IOException; import java.util.ArrayList; import java.util.Iterator; +import java.util.List; import net.yacy.kelondro.index.Row.Entry; import net.yacy.kelondro.logging.Log; @@ -172,6 +173,17 @@ public class BufferedObjectIndex implements ObjectIndex, Iterable { } } + public List top(int count) throws IOException { + List list = new ArrayList(); + synchronized (this.backend) { + List list0 = buffer.top(count); + list.addAll(list0); + list0 = backend.top(count - list.size()); + list.addAll(list0); + } + return list; + } + public Entry removeOne() throws IOException { synchronized (this.backend) { if (!this.buffer.isEmpty()) { diff --git a/source/net/yacy/kelondro/index/Cache.java b/source/net/yacy/kelondro/index/Cache.java index 3d2aabe76..ac6d1bf0a 100644 --- a/source/net/yacy/kelondro/index/Cache.java +++ b/source/net/yacy/kelondro/index/Cache.java @@ -534,6 +534,10 @@ public final class Cache implements ObjectIndex, Iterable { return entry; } + public synchronized List top(int count) throws IOException { + return this.index.top(count); + } + public final synchronized Row row() { return index.row(); } diff --git a/source/net/yacy/kelondro/index/HandleMap.java b/source/net/yacy/kelondro/index/HandleMap.java index 520a4478a..1c2ec1fac 100644 --- a/source/net/yacy/kelondro/index/HandleMap.java +++ b/source/net/yacy/kelondro/index/HandleMap.java @@ -34,6 +34,7 @@ import java.io.InputStream; import java.io.OutputStream; import java.util.ArrayList; import java.util.Iterator; +import java.util.List; import java.util.concurrent.BlockingQueue; import java.util.concurrent.Callable; import java.util.concurrent.ExecutionException; @@ -250,6 +251,15 @@ public final class HandleMap implements Iterable { return report; } + public final synchronized ArrayList top(int count) { + List list0 = index.top(count); + ArrayList list = new ArrayList(); + for (Row.Entry entry: list0) { + list.add(entry.getPrimaryKeyBytes()); + } + return list; + } + public final synchronized long remove(final byte[] key) { assert (key != null); final Row.Entry indexentry = index.remove(key); diff --git a/source/net/yacy/kelondro/index/ObjectIndex.java b/source/net/yacy/kelondro/index/ObjectIndex.java index 6c96fafcd..62741422e 100644 --- a/source/net/yacy/kelondro/index/ObjectIndex.java +++ b/source/net/yacy/kelondro/index/ObjectIndex.java @@ -34,6 +34,7 @@ package net.yacy.kelondro.index; import java.io.IOException; import java.util.ArrayList; import java.util.Iterator; +import java.util.List; import net.yacy.kelondro.order.CloneableIterator; @@ -55,6 +56,7 @@ public interface ObjectIndex extends Iterable { public boolean delete(byte[] key) throws IOException; public Row.Entry remove(byte[] key) throws IOException; public Row.Entry removeOne() throws IOException; + public List top(int count) throws IOException; public CloneableIterator keys(boolean up, byte[] firstKey) throws IOException; // iterates only the key public CloneableIterator rows(boolean up, byte[] firstKey) throws IOException; // iterates the whole row using the order of the keys public CloneableIterator rows() throws IOException; // iterates the whole row without any order diff --git a/source/net/yacy/kelondro/index/ObjectIndexCache.java b/source/net/yacy/kelondro/index/ObjectIndexCache.java index 5fbbc6e57..156b43133 100644 --- a/source/net/yacy/kelondro/index/ObjectIndexCache.java +++ b/source/net/yacy/kelondro/index/ObjectIndexCache.java @@ -24,6 +24,7 @@ package net.yacy.kelondro.index; +import java.io.IOException; import java.util.ArrayList; import java.util.Iterator; import java.util.List; @@ -237,6 +238,15 @@ public final class ObjectIndexCache implements ObjectIndex, Iterable return null; } + public synchronized List top(int count) throws IOException { + List list = new ArrayList(); + List list0 = index1.top(count); + list.addAll(list0); + list0 = index0.top(count - list.size()); + list.addAll(list0); + return list; + } + public final synchronized int size() { if (index0 != null && index1 == null) { return index0.size(); diff --git a/source/net/yacy/kelondro/index/RowCollection.java b/source/net/yacy/kelondro/index/RowCollection.java index effe2b4da..a676ec473 100644 --- a/source/net/yacy/kelondro/index/RowCollection.java +++ b/source/net/yacy/kelondro/index/RowCollection.java @@ -487,6 +487,20 @@ public class RowCollection implements Iterable, Cloneable { return r; } + public synchronized List top(int count) { + ArrayList list = new ArrayList(); + if (chunkcount == 0) return list; + Row.Entry entry; + int cursor = chunkcount - 1; + while (count > 0 && cursor >= 0) { + entry = get(cursor, true); + list.add(entry); + count--; + cursor--; + } + return list; + } + public synchronized byte[] smallestKey() { if (chunkcount == 0) return null; this.sort(); diff --git a/source/net/yacy/kelondro/index/RowSetArray.java b/source/net/yacy/kelondro/index/RowSetArray.java index 96929bece..1b6b6cd68 100644 --- a/source/net/yacy/kelondro/index/RowSetArray.java +++ b/source/net/yacy/kelondro/index/RowSetArray.java @@ -21,6 +21,7 @@ package net.yacy.kelondro.index; +import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.Iterator; @@ -205,6 +206,24 @@ public final class RowSetArray implements ObjectIndex, Iterable, Clon return null; } + public List top(int count) { + List list = new ArrayList(); + synchronized (this.array) { + for (int i = 0; i < this.array.length; i++) { + if (this.array[i] != null) { + try { + List list0 = this.array[i].top(count - list.size()); + list.addAll(list0); + } catch (IOException e) { + continue; + } + } + if (list.size() >= count) return list; + } + } + return list; + } + public final Entry replace(final Entry row) throws RowSpaceExceededException { final int i = indexFor(row); if (i < 0) return null; diff --git a/source/net/yacy/kelondro/table/SQLTable.java b/source/net/yacy/kelondro/table/SQLTable.java index 77fb88aa2..d78b9ad8f 100644 --- a/source/net/yacy/kelondro/table/SQLTable.java +++ b/source/net/yacy/kelondro/table/SQLTable.java @@ -281,6 +281,10 @@ public class SQLTable implements ObjectIndex, Iterable { return null; } + public List top(int count) throws IOException { + return null; + } + public CloneableIterator rows(final boolean up, final byte[] startKey) throws IOException { // Objects are of type kelondroRow.Entry return null; diff --git a/source/net/yacy/kelondro/table/SplitTable.java b/source/net/yacy/kelondro/table/SplitTable.java index 9318c3e32..9d63207c9 100644 --- a/source/net/yacy/kelondro/table/SplitTable.java +++ b/source/net/yacy/kelondro/table/SplitTable.java @@ -416,6 +416,23 @@ public class SplitTable implements ObjectIndex, Iterable { return maxtable.removeOne(); } + public List top(int count) throws IOException { + final Iterator i = tables.values().iterator(); + ObjectIndex table, maxtable = null; + int maxcount = -1; + while (i.hasNext()) { + table = i.next(); + if (table.size() > maxcount) { + maxtable = table; + maxcount = table.size(); + } + } + if (maxtable == null) { + return null; + } + return maxtable.top(count); + } + public CloneableIterator keys(final boolean up, final byte[] firstKey) throws IOException { final List> c = new ArrayList>(tables.size()); final Iterator i = tables.values().iterator(); diff --git a/source/net/yacy/kelondro/table/Table.java b/source/net/yacy/kelondro/table/Table.java index 69555c8dd..a37de7bfb 100644 --- a/source/net/yacy/kelondro/table/Table.java +++ b/source/net/yacy/kelondro/table/Table.java @@ -733,6 +733,20 @@ public class Table implements ObjectIndex, Iterable { assert table == null || table.size() == index.size() : "table.size() = " + table.size() + ", index.size() = " + index.size(); return lr; } + + public List top(int count) throws IOException { + ArrayList list = new ArrayList(); + if ((file == null) || (index == null)) return list; + long i = file.size() - 1; + while (count > 0 && i >= 0) { + byte[] b = new byte[rowdef.objectsize]; + file.get(i, b, 0); + list.add(rowdef.newEntry(b)); + i--; + count--; + } + return list; + } public synchronized void clear() throws IOException { final File f = file.filename();