From 29fde9ed4931fec006afba2f220bc2c38cee9605 Mon Sep 17 00:00:00 2001 From: orbiter Date: Thu, 3 Dec 2009 00:36:07 +0000 Subject: [PATCH] better control of ranking order in sort stack git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6514 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- source/de/anomic/search/RankingProcess.java | 7 +++-- source/de/anomic/search/ResultFetcher.java | 4 +-- .../kelondro/data/meta/URIMetadataRow.java | 2 +- source/net/yacy/kelondro/util/SortStack.java | 28 +++++++++++++------ source/net/yacy/kelondro/util/SortStore.java | 18 ++++++++---- 5 files changed, 39 insertions(+), 20 deletions(-) diff --git a/source/de/anomic/search/RankingProcess.java b/source/de/anomic/search/RankingProcess.java index 4d72ef06b..a17dc6463 100644 --- a/source/de/anomic/search/RankingProcess.java +++ b/source/de/anomic/search/RankingProcess.java @@ -89,7 +89,7 @@ public final class RankingProcess extends Thread { // attention: if minEntries is too high, this method will not terminate within the maxTime // sortorder: 0 = hash, 1 = url, 2 = ranking this.localSearchInclusion = null; - this.stack = new SortStack(maxentries); + this.stack = new SortStack(maxentries, true); this.doubleDomCache = new HashMap>(); this.handover = new HashSet(); this.query = query; @@ -234,7 +234,7 @@ public final class RankingProcess extends Thread { // kick out entries that are too bad according to current findings r = Long.valueOf(this.query.getOrder().cardinal(fEntry)); assert maxentries != 0; - if ((maxentries >= 0) && (stack.size() >= maxentries) && (stack.bottom(r.longValue()))) continue; + if (maxentries >= 0 && stack.size() >= maxentries && stack.bottom(r.longValue())) continue; // insert if ((maxentries < 0) || (stack.size() < maxentries)) { @@ -315,7 +315,7 @@ public final class RankingProcess extends Thread { m = this.doubleDomCache.get(domhash); if (m == null) { // first appearance of dom - m = new SortStack((query.specialRights) ? maxDoubleDomSpecial : maxDoubleDomAll); + m = new SortStack((query.specialRights) ? maxDoubleDomSpecial : maxDoubleDomAll, true); this.doubleDomCache.put(domhash, m); return rwi; } @@ -446,6 +446,7 @@ public final class RankingProcess extends Thread { } // accept url + //System.out.println("handing over hash " + page.hash()); this.handover.add(page.hash()); // remember that we handed over this url return page; } diff --git a/source/de/anomic/search/ResultFetcher.java b/source/de/anomic/search/ResultFetcher.java index 9b7eb813b..62623868d 100644 --- a/source/de/anomic/search/ResultFetcher.java +++ b/source/de/anomic/search/ResultFetcher.java @@ -80,8 +80,8 @@ public class ResultFetcher { this.urlRetrievalAllTime = 0; this.snippetComputationAllTime = 0; - this.result = new SortStore(-1); // this is the result, enriched with snippets, ranked and ordered by ranking - this.images = new SortStore(-1); + this.result = new SortStore(-1, true); // this is the result, enriched with snippets, ranked and ordered by ranking + this.images = new SortStore(-1, true); this.failedURLs = new HashMap(); // a map of urls to reason strings where a worker thread tried to work on, but failed. // snippets do not need to match with the complete query hashes, diff --git a/source/net/yacy/kelondro/data/meta/URIMetadataRow.java b/source/net/yacy/kelondro/data/meta/URIMetadataRow.java index 1a1dd8068..e60fd642f 100644 --- a/source/net/yacy/kelondro/data/meta/URIMetadataRow.java +++ b/source/net/yacy/kelondro/data/meta/URIMetadataRow.java @@ -356,7 +356,7 @@ public class URIMetadataRow implements URIMetadata { // the result is a String of 12 bytes within a 72-bit space // (each byte has an 6-bit range) // that should be enough for all web pages on the world - return this.entry.getColString(col_hash, null); + return new String(this.entry.getPrimaryKeyBytes()); } public long ranking() { diff --git a/source/net/yacy/kelondro/util/SortStack.java b/source/net/yacy/kelondro/util/SortStack.java index a4f29adb6..84b34944f 100644 --- a/source/net/yacy/kelondro/util/SortStack.java +++ b/source/net/yacy/kelondro/util/SortStack.java @@ -43,17 +43,27 @@ public class SortStack { private TreeMap> onstack; // object within the stack private ConcurrentHashMap instack; // keeps track which element has been on the stack protected int maxsize; + private boolean upward; - public SortStack() { - this(-1); + public SortStack(boolean upward) { + this(-1, upward); } - - public SortStack(final int maxsize) { + + /** + * create a new sort stack + * all elements in the stack are not ordered by their insert order but by a given element weight + * weights that are preferred are returned first when a pop from the stack is made + * the stack may be ordered upward (preferring small weights) or downward (preferring high wights) + * @param maxsize the maximum size of the stack. When the stack exceeds this number, then the worst entries according to entry order are removed + * @param upward is the entry order and controls which elements are returned on pop. if true, then the smallest is returned first + */ + public SortStack(final int maxsize, boolean upward) { // the maxsize is the maximum number of entries in the stack // if this is set to -1, the size is unlimited this.onstack = new TreeMap>(); this.instack = new ConcurrentHashMap(); this.maxsize = maxsize; + this.upward = upward; } @@ -73,7 +83,7 @@ public class SortStack { } /** - * put a elememt on the stack using a order of the weight + * put a element on the stack using a order of the weight * @param element * @param weight */ @@ -97,7 +107,7 @@ public class SortStack { while (!this.onstack.isEmpty() && this.onstack.size() > this.maxsize) synchronized (this.onstack) { List l; if (!this.onstack.isEmpty() && this.onstack.size() > this.maxsize) { - l = this.onstack.remove(this.onstack.lastKey()); + l = this.onstack.remove((this.upward) ? this.onstack.lastKey() : this.onstack.firstKey()); for (E e: l) instack.remove(e); } } @@ -113,7 +123,7 @@ public class SortStack { final Long w; synchronized (this.onstack) { if (this.onstack.isEmpty()) return null; - w = this.onstack.firstKey(); + w = (this.upward) ? this.onstack.firstKey() : this.onstack.lastKey(); final List l = this.onstack.get(w); element = l.get(0); } @@ -131,7 +141,7 @@ public class SortStack { final Long w; synchronized (this.onstack) { if (this.onstack.isEmpty()) return null; - w = this.onstack.firstKey(); + w = (this.upward) ? this.onstack.firstKey() : this.onstack.lastKey(); final List l = this.onstack.get(w); element = l.remove(0); this.instack.remove(element); @@ -168,7 +178,7 @@ public class SortStack { if (this.onstack.isEmpty()) return true; Long l; synchronized (this.onstack) { - l = this.onstack.lastKey(); + l = (this.upward) ? this.onstack.lastKey() : this.onstack.firstKey(); } return weight > l.longValue(); } diff --git a/source/net/yacy/kelondro/util/SortStore.java b/source/net/yacy/kelondro/util/SortStore.java index 6f8d7f3fa..a320c149d 100644 --- a/source/net/yacy/kelondro/util/SortStore.java +++ b/source/net/yacy/kelondro/util/SortStore.java @@ -43,12 +43,20 @@ public class SortStore extends SortStack { private ConcurrentHashMap offset; // keeps track which element has been on the stack or is now in the offstack private long largest; - public SortStore() { - this(-1); + public SortStore(boolean upward) { + this(-1, upward); } - public SortStore(final int maxsize) { - super(maxsize); + /** + * create a new sort stack + * all elements in the stack are not ordered by their insert order but by a given element weight + * weights that are preferred are returned first when a pop from the stack is made + * the stack may be ordered upward (preferring small weights) or downward (preferring high wights) + * @param maxsize the maximum size of the stack. When the stack exceeds this number, then the worst entries according to entry order are removed + * @param upward is the entry order and controls which elements are returned on pop. if true, then the smallest is returned first + */ + public SortStore(final int maxsize, boolean upward) { + super(maxsize, upward); this.largest = Long.MIN_VALUE; this.offstack = new ArrayList(); this.offset = new ConcurrentHashMap(); @@ -152,7 +160,7 @@ public class SortStore extends SortStack { } public static void main(String[] args) { - SortStore a = new SortStore(); + SortStore a = new SortStore(true); a.push("abc", 1L); a.pop(); a.push("abc", 2L);