better control of ranking order in sort stack

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6514 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 15 years ago
parent 93caa38d55
commit 29fde9ed49

@ -89,7 +89,7 @@ public final class RankingProcess extends Thread {
// attention: if minEntries is too high, this method will not terminate within the maxTime
// sortorder: 0 = hash, 1 = url, 2 = ranking
this.localSearchInclusion = null;
this.stack = new SortStack<WordReferenceVars>(maxentries);
this.stack = new SortStack<WordReferenceVars>(maxentries, true);
this.doubleDomCache = new HashMap<String, SortStack<WordReferenceVars>>();
this.handover = new HashSet<String>();
this.query = query;
@ -234,7 +234,7 @@ public final class RankingProcess extends Thread {
// kick out entries that are too bad according to current findings
r = Long.valueOf(this.query.getOrder().cardinal(fEntry));
assert maxentries != 0;
if ((maxentries >= 0) && (stack.size() >= maxentries) && (stack.bottom(r.longValue()))) continue;
if (maxentries >= 0 && stack.size() >= maxentries && stack.bottom(r.longValue())) continue;
// insert
if ((maxentries < 0) || (stack.size() < maxentries)) {
@ -315,7 +315,7 @@ public final class RankingProcess extends Thread {
m = this.doubleDomCache.get(domhash);
if (m == null) {
// first appearance of dom
m = new SortStack<WordReferenceVars>((query.specialRights) ? maxDoubleDomSpecial : maxDoubleDomAll);
m = new SortStack<WordReferenceVars>((query.specialRights) ? maxDoubleDomSpecial : maxDoubleDomAll, true);
this.doubleDomCache.put(domhash, m);
return rwi;
}
@ -446,6 +446,7 @@ public final class RankingProcess extends Thread {
}
// accept url
//System.out.println("handing over hash " + page.hash());
this.handover.add(page.hash()); // remember that we handed over this url
return page;
}

@ -80,8 +80,8 @@ public class ResultFetcher {
this.urlRetrievalAllTime = 0;
this.snippetComputationAllTime = 0;
this.result = new SortStore<ResultEntry>(-1); // this is the result, enriched with snippets, ranked and ordered by ranking
this.images = new SortStore<MediaSnippet>(-1);
this.result = new SortStore<ResultEntry>(-1, true); // this is the result, enriched with snippets, ranked and ordered by ranking
this.images = new SortStore<MediaSnippet>(-1, true);
this.failedURLs = new HashMap<String, String>(); // a map of urls to reason strings where a worker thread tried to work on, but failed.
// snippets do not need to match with the complete query hashes,

@ -356,7 +356,7 @@ public class URIMetadataRow implements URIMetadata {
// the result is a String of 12 bytes within a 72-bit space
// (each byte has an 6-bit range)
// that should be enough for all web pages on the world
return this.entry.getColString(col_hash, null);
return new String(this.entry.getPrimaryKeyBytes());
}
public long ranking() {

@ -43,17 +43,27 @@ public class SortStack<E> {
private TreeMap<Long, List<E>> onstack; // object within the stack
private ConcurrentHashMap<E, Object> instack; // keeps track which element has been on the stack
protected int maxsize;
private boolean upward;
public SortStack() {
this(-1);
public SortStack(boolean upward) {
this(-1, upward);
}
public SortStack(final int maxsize) {
/**
* create a new sort stack
* all elements in the stack are not ordered by their insert order but by a given element weight
* weights that are preferred are returned first when a pop from the stack is made
* the stack may be ordered upward (preferring small weights) or downward (preferring high wights)
* @param maxsize the maximum size of the stack. When the stack exceeds this number, then the worst entries according to entry order are removed
* @param upward is the entry order and controls which elements are returned on pop. if true, then the smallest is returned first
*/
public SortStack(final int maxsize, boolean upward) {
// the maxsize is the maximum number of entries in the stack
// if this is set to -1, the size is unlimited
this.onstack = new TreeMap<Long, List<E>>();
this.instack = new ConcurrentHashMap<E, Object>();
this.maxsize = maxsize;
this.upward = upward;
}
@ -73,7 +83,7 @@ public class SortStack<E> {
}
/**
* put a elememt on the stack using a order of the weight
* put a element on the stack using a order of the weight
* @param element
* @param weight
*/
@ -97,7 +107,7 @@ public class SortStack<E> {
while (!this.onstack.isEmpty() && this.onstack.size() > this.maxsize) synchronized (this.onstack) {
List<E> l;
if (!this.onstack.isEmpty() && this.onstack.size() > this.maxsize) {
l = this.onstack.remove(this.onstack.lastKey());
l = this.onstack.remove((this.upward) ? this.onstack.lastKey() : this.onstack.firstKey());
for (E e: l) instack.remove(e);
}
}
@ -113,7 +123,7 @@ public class SortStack<E> {
final Long w;
synchronized (this.onstack) {
if (this.onstack.isEmpty()) return null;
w = this.onstack.firstKey();
w = (this.upward) ? this.onstack.firstKey() : this.onstack.lastKey();
final List<E> l = this.onstack.get(w);
element = l.get(0);
}
@ -131,7 +141,7 @@ public class SortStack<E> {
final Long w;
synchronized (this.onstack) {
if (this.onstack.isEmpty()) return null;
w = this.onstack.firstKey();
w = (this.upward) ? this.onstack.firstKey() : this.onstack.lastKey();
final List<E> l = this.onstack.get(w);
element = l.remove(0);
this.instack.remove(element);
@ -168,7 +178,7 @@ public class SortStack<E> {
if (this.onstack.isEmpty()) return true;
Long l;
synchronized (this.onstack) {
l = this.onstack.lastKey();
l = (this.upward) ? this.onstack.lastKey() : this.onstack.firstKey();
}
return weight > l.longValue();
}

@ -43,12 +43,20 @@ public class SortStore<E> extends SortStack<E> {
private ConcurrentHashMap<E, Object> offset; // keeps track which element has been on the stack or is now in the offstack
private long largest;
public SortStore() {
this(-1);
public SortStore(boolean upward) {
this(-1, upward);
}
public SortStore(final int maxsize) {
super(maxsize);
/**
* create a new sort stack
* all elements in the stack are not ordered by their insert order but by a given element weight
* weights that are preferred are returned first when a pop from the stack is made
* the stack may be ordered upward (preferring small weights) or downward (preferring high wights)
* @param maxsize the maximum size of the stack. When the stack exceeds this number, then the worst entries according to entry order are removed
* @param upward is the entry order and controls which elements are returned on pop. if true, then the smallest is returned first
*/
public SortStore(final int maxsize, boolean upward) {
super(maxsize, upward);
this.largest = Long.MIN_VALUE;
this.offstack = new ArrayList<stackElement>();
this.offset = new ConcurrentHashMap<E, Object>();
@ -152,7 +160,7 @@ public class SortStore<E> extends SortStack<E> {
}
public static void main(String[] args) {
SortStore<String> a = new SortStore<String>();
SortStore<String> a = new SortStore<String>(true);
a.push("abc", 1L);
a.pop();
a.push("abc", 2L);

Loading…
Cancel
Save