better control of ranking order in sort stack

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6514 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 15 years ago
parent 93caa38d55
commit 29fde9ed49

@ -89,7 +89,7 @@ public final class RankingProcess extends Thread {
// attention: if minEntries is too high, this method will not terminate within the maxTime // attention: if minEntries is too high, this method will not terminate within the maxTime
// sortorder: 0 = hash, 1 = url, 2 = ranking // sortorder: 0 = hash, 1 = url, 2 = ranking
this.localSearchInclusion = null; this.localSearchInclusion = null;
this.stack = new SortStack<WordReferenceVars>(maxentries); this.stack = new SortStack<WordReferenceVars>(maxentries, true);
this.doubleDomCache = new HashMap<String, SortStack<WordReferenceVars>>(); this.doubleDomCache = new HashMap<String, SortStack<WordReferenceVars>>();
this.handover = new HashSet<String>(); this.handover = new HashSet<String>();
this.query = query; this.query = query;
@ -234,7 +234,7 @@ public final class RankingProcess extends Thread {
// kick out entries that are too bad according to current findings // kick out entries that are too bad according to current findings
r = Long.valueOf(this.query.getOrder().cardinal(fEntry)); r = Long.valueOf(this.query.getOrder().cardinal(fEntry));
assert maxentries != 0; assert maxentries != 0;
if ((maxentries >= 0) && (stack.size() >= maxentries) && (stack.bottom(r.longValue()))) continue; if (maxentries >= 0 && stack.size() >= maxentries && stack.bottom(r.longValue())) continue;
// insert // insert
if ((maxentries < 0) || (stack.size() < maxentries)) { if ((maxentries < 0) || (stack.size() < maxentries)) {
@ -315,7 +315,7 @@ public final class RankingProcess extends Thread {
m = this.doubleDomCache.get(domhash); m = this.doubleDomCache.get(domhash);
if (m == null) { if (m == null) {
// first appearance of dom // first appearance of dom
m = new SortStack<WordReferenceVars>((query.specialRights) ? maxDoubleDomSpecial : maxDoubleDomAll); m = new SortStack<WordReferenceVars>((query.specialRights) ? maxDoubleDomSpecial : maxDoubleDomAll, true);
this.doubleDomCache.put(domhash, m); this.doubleDomCache.put(domhash, m);
return rwi; return rwi;
} }
@ -446,6 +446,7 @@ public final class RankingProcess extends Thread {
} }
// accept url // accept url
//System.out.println("handing over hash " + page.hash());
this.handover.add(page.hash()); // remember that we handed over this url this.handover.add(page.hash()); // remember that we handed over this url
return page; return page;
} }

@ -80,8 +80,8 @@ public class ResultFetcher {
this.urlRetrievalAllTime = 0; this.urlRetrievalAllTime = 0;
this.snippetComputationAllTime = 0; this.snippetComputationAllTime = 0;
this.result = new SortStore<ResultEntry>(-1); // this is the result, enriched with snippets, ranked and ordered by ranking this.result = new SortStore<ResultEntry>(-1, true); // this is the result, enriched with snippets, ranked and ordered by ranking
this.images = new SortStore<MediaSnippet>(-1); this.images = new SortStore<MediaSnippet>(-1, true);
this.failedURLs = new HashMap<String, String>(); // a map of urls to reason strings where a worker thread tried to work on, but failed. this.failedURLs = new HashMap<String, String>(); // a map of urls to reason strings where a worker thread tried to work on, but failed.
// snippets do not need to match with the complete query hashes, // snippets do not need to match with the complete query hashes,

@ -356,7 +356,7 @@ public class URIMetadataRow implements URIMetadata {
// the result is a String of 12 bytes within a 72-bit space // the result is a String of 12 bytes within a 72-bit space
// (each byte has an 6-bit range) // (each byte has an 6-bit range)
// that should be enough for all web pages on the world // that should be enough for all web pages on the world
return this.entry.getColString(col_hash, null); return new String(this.entry.getPrimaryKeyBytes());
} }
public long ranking() { public long ranking() {

@ -43,17 +43,27 @@ public class SortStack<E> {
private TreeMap<Long, List<E>> onstack; // object within the stack private TreeMap<Long, List<E>> onstack; // object within the stack
private ConcurrentHashMap<E, Object> instack; // keeps track which element has been on the stack private ConcurrentHashMap<E, Object> instack; // keeps track which element has been on the stack
protected int maxsize; protected int maxsize;
private boolean upward;
public SortStack() { public SortStack(boolean upward) {
this(-1); this(-1, upward);
} }
public SortStack(final int maxsize) { /**
* create a new sort stack
* all elements in the stack are not ordered by their insert order but by a given element weight
* weights that are preferred are returned first when a pop from the stack is made
* the stack may be ordered upward (preferring small weights) or downward (preferring high wights)
* @param maxsize the maximum size of the stack. When the stack exceeds this number, then the worst entries according to entry order are removed
* @param upward is the entry order and controls which elements are returned on pop. if true, then the smallest is returned first
*/
public SortStack(final int maxsize, boolean upward) {
// the maxsize is the maximum number of entries in the stack // the maxsize is the maximum number of entries in the stack
// if this is set to -1, the size is unlimited // if this is set to -1, the size is unlimited
this.onstack = new TreeMap<Long, List<E>>(); this.onstack = new TreeMap<Long, List<E>>();
this.instack = new ConcurrentHashMap<E, Object>(); this.instack = new ConcurrentHashMap<E, Object>();
this.maxsize = maxsize; this.maxsize = maxsize;
this.upward = upward;
} }
@ -73,7 +83,7 @@ public class SortStack<E> {
} }
/** /**
* put a elememt on the stack using a order of the weight * put a element on the stack using a order of the weight
* @param element * @param element
* @param weight * @param weight
*/ */
@ -97,7 +107,7 @@ public class SortStack<E> {
while (!this.onstack.isEmpty() && this.onstack.size() > this.maxsize) synchronized (this.onstack) { while (!this.onstack.isEmpty() && this.onstack.size() > this.maxsize) synchronized (this.onstack) {
List<E> l; List<E> l;
if (!this.onstack.isEmpty() && this.onstack.size() > this.maxsize) { if (!this.onstack.isEmpty() && this.onstack.size() > this.maxsize) {
l = this.onstack.remove(this.onstack.lastKey()); l = this.onstack.remove((this.upward) ? this.onstack.lastKey() : this.onstack.firstKey());
for (E e: l) instack.remove(e); for (E e: l) instack.remove(e);
} }
} }
@ -113,7 +123,7 @@ public class SortStack<E> {
final Long w; final Long w;
synchronized (this.onstack) { synchronized (this.onstack) {
if (this.onstack.isEmpty()) return null; if (this.onstack.isEmpty()) return null;
w = this.onstack.firstKey(); w = (this.upward) ? this.onstack.firstKey() : this.onstack.lastKey();
final List<E> l = this.onstack.get(w); final List<E> l = this.onstack.get(w);
element = l.get(0); element = l.get(0);
} }
@ -131,7 +141,7 @@ public class SortStack<E> {
final Long w; final Long w;
synchronized (this.onstack) { synchronized (this.onstack) {
if (this.onstack.isEmpty()) return null; if (this.onstack.isEmpty()) return null;
w = this.onstack.firstKey(); w = (this.upward) ? this.onstack.firstKey() : this.onstack.lastKey();
final List<E> l = this.onstack.get(w); final List<E> l = this.onstack.get(w);
element = l.remove(0); element = l.remove(0);
this.instack.remove(element); this.instack.remove(element);
@ -168,7 +178,7 @@ public class SortStack<E> {
if (this.onstack.isEmpty()) return true; if (this.onstack.isEmpty()) return true;
Long l; Long l;
synchronized (this.onstack) { synchronized (this.onstack) {
l = this.onstack.lastKey(); l = (this.upward) ? this.onstack.lastKey() : this.onstack.firstKey();
} }
return weight > l.longValue(); return weight > l.longValue();
} }

@ -43,12 +43,20 @@ public class SortStore<E> extends SortStack<E> {
private ConcurrentHashMap<E, Object> offset; // keeps track which element has been on the stack or is now in the offstack private ConcurrentHashMap<E, Object> offset; // keeps track which element has been on the stack or is now in the offstack
private long largest; private long largest;
public SortStore() { public SortStore(boolean upward) {
this(-1); this(-1, upward);
} }
public SortStore(final int maxsize) { /**
super(maxsize); * create a new sort stack
* all elements in the stack are not ordered by their insert order but by a given element weight
* weights that are preferred are returned first when a pop from the stack is made
* the stack may be ordered upward (preferring small weights) or downward (preferring high wights)
* @param maxsize the maximum size of the stack. When the stack exceeds this number, then the worst entries according to entry order are removed
* @param upward is the entry order and controls which elements are returned on pop. if true, then the smallest is returned first
*/
public SortStore(final int maxsize, boolean upward) {
super(maxsize, upward);
this.largest = Long.MIN_VALUE; this.largest = Long.MIN_VALUE;
this.offstack = new ArrayList<stackElement>(); this.offstack = new ArrayList<stackElement>();
this.offset = new ConcurrentHashMap<E, Object>(); this.offset = new ConcurrentHashMap<E, Object>();
@ -152,7 +160,7 @@ public class SortStore<E> extends SortStack<E> {
} }
public static void main(String[] args) { public static void main(String[] args) {
SortStore<String> a = new SortStore<String>(); SortStore<String> a = new SortStore<String>(true);
a.push("abc", 1L); a.push("abc", 1L);
a.pop(); a.pop();
a.push("abc", 2L); a.push("abc", 2L);

Loading…
Cancel
Save