performance hacks

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7129 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 15 years ago
parent 348dece62f
commit 570ca577c6

@ -69,7 +69,6 @@ public final class RankingProcess extends Thread {
private static final int maxDoubleDomAll = 100, maxDoubleDomSpecial = 10000;
private final QueryParams query;
private final int maxentries;
private final HandleSet urlhashes; // map for double-check; String/Long relation, addresses ranking number (backreference for deletion)
private final int[] flagcount; // flag counter
private final HandleSet misses; // contains url-hashes that could not been found in the LURL-DB
@ -99,7 +98,6 @@ public final class RankingProcess extends Thread {
//this.handover = new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.getOrdering(), 0);
this.query = query;
this.order = order;
this.maxentries = maxentries;
this.remote_peerCount = 0;
this.remote_resourceSize = 0;
this.remote_indexCount = 0;
@ -113,8 +111,6 @@ public final class RankingProcess extends Thread {
this.authorNavigator = new Navigator();
this.namespaceNavigator = new Navigator();
this.ref = new Navigator();
//this.domZones = new int[8];
//for (int i = 0; i < 8; i++) {this.domZones[i] = 0;}
this.feeders = concurrency;
assert this.feeders >= 1;
}
@ -178,7 +174,6 @@ public final class RankingProcess extends Thread {
timer = System.currentTimeMillis();
String domhash;
boolean nav_hosts = this.query.navigators.equals("all") || this.query.navigators.indexOf("hosts") >= 0;
Long r;
final ArrayList<WordReferenceVars> filteredEntries = new ArrayList<WordReferenceVars>();
// apply all constraints
@ -237,16 +232,12 @@ public final class RankingProcess extends Thread {
// do the ranking
for (WordReferenceVars fEntry: filteredEntries) {
// kick out entries that are too bad according to current findings
r = Long.valueOf(this.order.cardinal(fEntry));
assert maxentries != 0;
// double-check
if (urlhashes.has(fEntry.metadataHash())) continue;
// insert
stack.put(new ReverseElement<WordReferenceVars>(fEntry, r)); // inserts the element and removed the worst (which is smallest)
stack.put(new ReverseElement<WordReferenceVars>(fEntry, this.order.cardinal(fEntry))); // inserts the element and removed the worst (which is smallest)
try {
urlhashes.put(fEntry.metadataHash());
} catch (RowSpaceExceededException e) {
@ -299,30 +290,31 @@ public final class RankingProcess extends Thread {
return localSearchInclusion;
}
// todo:
// - remove redundant urls (sub-path occurred before)
// - move up shorter urls
// - root-domain guessing to prefer the root domain over other urls if search word appears in domain name
private ReverseElement<WordReferenceVars> takeRWI(final boolean skipDoubleDom) {
private ReverseElement<WordReferenceVars> takeRWI(final boolean skipDoubleDom, long timeout) {
// returns from the current RWI list the best entry and removes this entry from the list
WeakPriorityBlockingQueue<ReverseElement<WordReferenceVars>> m;
ReverseElement<WordReferenceVars> rwi;
while ((rwi = stack.poll()) != null) {
if (!skipDoubleDom) return rwi;
// check doubledom
final String domhash = new String(rwi.getElement().metadataHash()).substring(6);
m = this.doubleDomCache.get(domhash);
if (m == null) {
// first appearance of dom
m = new WeakPriorityBlockingQueue<ReverseElement<WordReferenceVars>>((query.specialRights) ? maxDoubleDomSpecial : maxDoubleDomAll);
this.doubleDomCache.put(domhash, m);
return rwi;
try {
while ((rwi = stack.poll(timeout)) != null) {
if (!skipDoubleDom) return rwi;
// check doubledom
final String domhash = new String(rwi.getElement().metadataHash()).substring(6);
m = this.doubleDomCache.get(domhash);
if (m == null) {
// first appearance of dom
m = new WeakPriorityBlockingQueue<ReverseElement<WordReferenceVars>>((query.specialRights) ? maxDoubleDomSpecial : maxDoubleDomAll);
this.doubleDomCache.put(domhash, m);
return rwi;
}
// second appearances of dom
m.put(rwi);
}
// second appearances of dom
m.put(rwi);
} catch (InterruptedException e1) {
}
// no more entries in sorted RWI entries. Now take Elements from the doubleDomCache
// find best entry from all caches
ReverseElement<WordReferenceVars> bestEntry = null;
@ -343,12 +335,14 @@ public final class RankingProcess extends Thread {
continue;
}
o = m.peek();
if (o == null) continue;
if (o.getWeight() < bestEntry.getWeight()) {
bestEntry = o;
}
}
}
if (bestEntry == null) return null;
// finally remove the best entry from the doubledom cache
m = this.doubleDomCache.get(new String(bestEntry.getElement().metadataHash()).substring(6));
o = m.poll();
@ -370,8 +364,9 @@ public final class RankingProcess extends Thread {
long timeLimit = System.currentTimeMillis() + timeout;
int p = -1;
byte[] urlhash;
while (System.currentTimeMillis() < timeLimit) {
final ReverseElement<WordReferenceVars> obrwi = takeRWI(skipDoubleDom);
long timeleft;
while ((timeleft = timeLimit - System.currentTimeMillis()) > 0) {
final ReverseElement<WordReferenceVars> obrwi = takeRWI(skipDoubleDom, timeleft);
if (obrwi == null) {
if (this.feedingIsFinished()) return null;
try {Thread.sleep(50);} catch (final InterruptedException e1) {}

@ -154,7 +154,7 @@ public final class SearchEvent {
}
// start worker threads to fetch urls and snippets
this.results = new ResultFetcher(loader, rankedCache, query, peers, 10000);
this.results = new ResultFetcher(loader, rankedCache, query, peers, 3000);
} else {
// do a local search
this.rankedCache = new RankingProcess(this.query, this.order, max_results_preparation, 2);

@ -549,10 +549,10 @@ public final class yacyClient {
}
// store remote result to local result container
synchronized (containerCache) {
// insert one container into the search result buffer
containerCache.add(container[0], false, joincount); // one is enough, only the references are used, not the word
}
// insert one container into the search result buffer
// one is enough, only the references are used, not the word
containerCache.add(container[0], false, joincount);
// insert the containers to the index
for (ReferenceContainer<WordReference> c: container) try {
indexSegment.termIndex().add(c);

@ -82,14 +82,19 @@ abstract class SimpleARC<K, V> extends AbstractMap<K, V> implements Map<K, V>, I
* @return the value
*/
@SuppressWarnings("unchecked")
public final synchronized V get(final Object s) {
V v = this.levelB.get(s);
public final V get(final Object s) {
V v;
synchronized (this.levelB) {
v = this.levelB.get(s);
}
if (v != null) return v;
v = this.levelA.remove(s);
if (v == null) return null;
// move value from A to B; since it was already removed from A, just put it to B
//System.out.println("ARC: moving A->B, size(A) = " + this.levelA.size() + ", size(B) = " + this.levelB.size());
this.levelB.put((K) s, v);
synchronized (this) {
v = this.levelA.remove(s);
if (v == null) return null;
// move value from A to B; since it was already removed from A, just put it to B
//System.out.println("ARC: moving A->B, size(A) = " + this.levelA.size() + ", size(B) = " + this.levelB.size());
this.levelB.put((K) s, v);
}
assert (this.levelB.size() <= cacheSize); // the cache should shrink automatically
return v;
}
@ -99,9 +104,13 @@ abstract class SimpleARC<K, V> extends AbstractMap<K, V> implements Map<K, V>, I
* @param s
* @return
*/
public final synchronized boolean containsKey(final Object s) {
if (this.levelB.containsKey(s)) return true;
return this.levelA.containsKey(s);
public final boolean containsKey(final Object s) {
synchronized (this.levelB) {
if (this.levelB.containsKey(s)) return true;
}
synchronized (this.levelA) {
return this.levelA.containsKey(s);
}
}

@ -121,7 +121,7 @@ public class WeakPriorityBlockingQueue<E> {
this.queue.add(element);
this.enqueued.release();
}
assert this.queue.size() == this.enqueued.availablePermits();
assert this.queue.size() >= this.enqueued.availablePermits() : "queue.size() = " + this.queue.size() + ", enqueued.availablePermits() = " + this.enqueued.availablePermits();
}
/**
@ -141,10 +141,12 @@ public class WeakPriorityBlockingQueue<E> {
* @return the head element from the queue
* @throws InterruptedException
*/
public synchronized E poll(long timeout) throws InterruptedException {
public E poll(long timeout) throws InterruptedException {
boolean a = this.enqueued.tryAcquire(timeout, TimeUnit.MILLISECONDS);
if (!a) return null;
return takeUnsafe();
synchronized (this) {
return takeUnsafe();
}
}
/**
@ -152,9 +154,11 @@ public class WeakPriorityBlockingQueue<E> {
* @return the head element from the queue
* @throws InterruptedException
*/
public synchronized E take() throws InterruptedException {
public E take() throws InterruptedException {
this.enqueued.acquire();
return takeUnsafe();
synchronized (this) {
return takeUnsafe();
}
}
private E takeUnsafe() {
@ -162,7 +166,7 @@ public class WeakPriorityBlockingQueue<E> {
assert element != null;
this.queue.remove(element);
this.drained.add(element);
assert this.queue.size() == this.enqueued.availablePermits();
assert this.queue.size() >= this.enqueued.availablePermits() : "queue.size() = " + this.queue.size() + ", enqueued.availablePermits() = " + this.enqueued.availablePermits();
return element;
}

@ -473,10 +473,15 @@ public final class Condenser {
int pos = 0;
StringBuilder word;
byte[] hash;
Integer oldpos;
while (words.hasMoreElements()) {
word = words.nextElement();
hash = Word.word2hash(word.toString());
if (!map.containsKey(hash)) map.put(hash, Integer.valueOf(pos)); // don't overwrite old values, that leads to too far word distances
// don't overwrite old values, that leads to too far word distances
oldpos = map.put(hash, Integer.valueOf(pos));
if (oldpos != null) map.put(hash, oldpos);
pos += word.length() + 1;
}
return map;

@ -50,9 +50,10 @@ import net.yacy.kelondro.logging.Log;
public class Digest {
public static BlockingQueue<MessageDigest> digestPool = new ArrayBlockingQueue<MessageDigest>(10);
private final static int digestThreads = Runtime.getRuntime().availableProcessors() + 1;
public static BlockingQueue<MessageDigest> digestPool = new ArrayBlockingQueue<MessageDigest>(digestThreads);
static {
for (int i = 0; i < 10; i++)
for (int i = 0; i < digestThreads; i++)
try {
MessageDigest digest = MessageDigest.getInstance("MD5");
digest.reset();

Loading…
Cancel
Save