From 570ca577c65e0bdadee210510c4157839e93ac03 Mon Sep 17 00:00:00 2001 From: orbiter Date: Thu, 9 Sep 2010 22:42:54 +0000 Subject: [PATCH] performance hacks git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7129 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- source/de/anomic/search/RankingProcess.java | 57 +++++++++---------- source/de/anomic/search/SearchEvent.java | 2 +- source/de/anomic/yacy/yacyClient.java | 8 +-- source/net/yacy/cora/storage/SimpleARC.java | 29 ++++++---- .../storage/WeakPriorityBlockingQueue.java | 16 ++++-- source/net/yacy/document/Condenser.java | 7 ++- source/net/yacy/kelondro/order/Digest.java | 5 +- 7 files changed, 69 insertions(+), 55 deletions(-) diff --git a/source/de/anomic/search/RankingProcess.java b/source/de/anomic/search/RankingProcess.java index 6f545ccca..18e244fa0 100644 --- a/source/de/anomic/search/RankingProcess.java +++ b/source/de/anomic/search/RankingProcess.java @@ -69,7 +69,6 @@ public final class RankingProcess extends Thread { private static final int maxDoubleDomAll = 100, maxDoubleDomSpecial = 10000; private final QueryParams query; - private final int maxentries; private final HandleSet urlhashes; // map for double-check; String/Long relation, addresses ranking number (backreference for deletion) private final int[] flagcount; // flag counter private final HandleSet misses; // contains url-hashes that could not been found in the LURL-DB @@ -99,7 +98,6 @@ public final class RankingProcess extends Thread { //this.handover = new HandleSet(URIMetadataRow.rowdef.primaryKeyLength, URIMetadataRow.rowdef.getOrdering(), 0); this.query = query; this.order = order; - this.maxentries = maxentries; this.remote_peerCount = 0; this.remote_resourceSize = 0; this.remote_indexCount = 0; @@ -113,8 +111,6 @@ public final class RankingProcess extends Thread { this.authorNavigator = new Navigator(); this.namespaceNavigator = new Navigator(); this.ref = new Navigator(); - //this.domZones = new int[8]; - //for (int i = 0; i < 8; i++) {this.domZones[i] = 0;} this.feeders = concurrency; assert this.feeders >= 1; } @@ -178,7 +174,6 @@ public final class RankingProcess extends Thread { timer = System.currentTimeMillis(); String domhash; boolean nav_hosts = this.query.navigators.equals("all") || this.query.navigators.indexOf("hosts") >= 0; - Long r; final ArrayList filteredEntries = new ArrayList(); // apply all constraints @@ -237,16 +232,12 @@ public final class RankingProcess extends Thread { // do the ranking for (WordReferenceVars fEntry: filteredEntries) { - - // kick out entries that are too bad according to current findings - r = Long.valueOf(this.order.cardinal(fEntry)); - assert maxentries != 0; // double-check if (urlhashes.has(fEntry.metadataHash())) continue; // insert - stack.put(new ReverseElement(fEntry, r)); // inserts the element and removed the worst (which is smallest) + stack.put(new ReverseElement(fEntry, this.order.cardinal(fEntry))); // inserts the element and removed the worst (which is smallest) try { urlhashes.put(fEntry.metadataHash()); } catch (RowSpaceExceededException e) { @@ -299,30 +290,31 @@ public final class RankingProcess extends Thread { return localSearchInclusion; } - // todo: - // - remove redundant urls (sub-path occurred before) - // - move up shorter urls - // - root-domain guessing to prefer the root domain over other urls if search word appears in domain name - - - private ReverseElement takeRWI(final boolean skipDoubleDom) { + private ReverseElement takeRWI(final boolean skipDoubleDom, long timeout) { + // returns from the current RWI list the best entry and removes this entry from the list WeakPriorityBlockingQueue> m; ReverseElement rwi; - while ((rwi = stack.poll()) != null) { - if (!skipDoubleDom) return rwi; - // check doubledom - final String domhash = new String(rwi.getElement().metadataHash()).substring(6); - m = this.doubleDomCache.get(domhash); - if (m == null) { - // first appearance of dom - m = new WeakPriorityBlockingQueue>((query.specialRights) ? maxDoubleDomSpecial : maxDoubleDomAll); - this.doubleDomCache.put(domhash, m); - return rwi; + try { + while ((rwi = stack.poll(timeout)) != null) { + if (!skipDoubleDom) return rwi; + + // check doubledom + final String domhash = new String(rwi.getElement().metadataHash()).substring(6); + m = this.doubleDomCache.get(domhash); + if (m == null) { + // first appearance of dom + m = new WeakPriorityBlockingQueue>((query.specialRights) ? maxDoubleDomSpecial : maxDoubleDomAll); + this.doubleDomCache.put(domhash, m); + return rwi; + } + + // second appearances of dom + m.put(rwi); } - // second appearances of dom - m.put(rwi); + } catch (InterruptedException e1) { } + // no more entries in sorted RWI entries. Now take Elements from the doubleDomCache // find best entry from all caches ReverseElement bestEntry = null; @@ -343,12 +335,14 @@ public final class RankingProcess extends Thread { continue; } o = m.peek(); + if (o == null) continue; if (o.getWeight() < bestEntry.getWeight()) { bestEntry = o; } } } if (bestEntry == null) return null; + // finally remove the best entry from the doubledom cache m = this.doubleDomCache.get(new String(bestEntry.getElement().metadataHash()).substring(6)); o = m.poll(); @@ -370,8 +364,9 @@ public final class RankingProcess extends Thread { long timeLimit = System.currentTimeMillis() + timeout; int p = -1; byte[] urlhash; - while (System.currentTimeMillis() < timeLimit) { - final ReverseElement obrwi = takeRWI(skipDoubleDom); + long timeleft; + while ((timeleft = timeLimit - System.currentTimeMillis()) > 0) { + final ReverseElement obrwi = takeRWI(skipDoubleDom, timeleft); if (obrwi == null) { if (this.feedingIsFinished()) return null; try {Thread.sleep(50);} catch (final InterruptedException e1) {} diff --git a/source/de/anomic/search/SearchEvent.java b/source/de/anomic/search/SearchEvent.java index 5a82241c2..d0c2ec2a5 100644 --- a/source/de/anomic/search/SearchEvent.java +++ b/source/de/anomic/search/SearchEvent.java @@ -154,7 +154,7 @@ public final class SearchEvent { } // start worker threads to fetch urls and snippets - this.results = new ResultFetcher(loader, rankedCache, query, peers, 10000); + this.results = new ResultFetcher(loader, rankedCache, query, peers, 3000); } else { // do a local search this.rankedCache = new RankingProcess(this.query, this.order, max_results_preparation, 2); diff --git a/source/de/anomic/yacy/yacyClient.java b/source/de/anomic/yacy/yacyClient.java index 26fe6ce61..d7b20d98c 100644 --- a/source/de/anomic/yacy/yacyClient.java +++ b/source/de/anomic/yacy/yacyClient.java @@ -549,10 +549,10 @@ public final class yacyClient { } // store remote result to local result container - synchronized (containerCache) { - // insert one container into the search result buffer - containerCache.add(container[0], false, joincount); // one is enough, only the references are used, not the word - } + // insert one container into the search result buffer + // one is enough, only the references are used, not the word + containerCache.add(container[0], false, joincount); + // insert the containers to the index for (ReferenceContainer c: container) try { indexSegment.termIndex().add(c); diff --git a/source/net/yacy/cora/storage/SimpleARC.java b/source/net/yacy/cora/storage/SimpleARC.java index b489f251b..7e5af9c18 100644 --- a/source/net/yacy/cora/storage/SimpleARC.java +++ b/source/net/yacy/cora/storage/SimpleARC.java @@ -82,14 +82,19 @@ abstract class SimpleARC extends AbstractMap implements Map, I * @return the value */ @SuppressWarnings("unchecked") - public final synchronized V get(final Object s) { - V v = this.levelB.get(s); + public final V get(final Object s) { + V v; + synchronized (this.levelB) { + v = this.levelB.get(s); + } if (v != null) return v; - v = this.levelA.remove(s); - if (v == null) return null; - // move value from A to B; since it was already removed from A, just put it to B - //System.out.println("ARC: moving A->B, size(A) = " + this.levelA.size() + ", size(B) = " + this.levelB.size()); - this.levelB.put((K) s, v); + synchronized (this) { + v = this.levelA.remove(s); + if (v == null) return null; + // move value from A to B; since it was already removed from A, just put it to B + //System.out.println("ARC: moving A->B, size(A) = " + this.levelA.size() + ", size(B) = " + this.levelB.size()); + this.levelB.put((K) s, v); + } assert (this.levelB.size() <= cacheSize); // the cache should shrink automatically return v; } @@ -99,9 +104,13 @@ abstract class SimpleARC extends AbstractMap implements Map, I * @param s * @return */ - public final synchronized boolean containsKey(final Object s) { - if (this.levelB.containsKey(s)) return true; - return this.levelA.containsKey(s); + public final boolean containsKey(final Object s) { + synchronized (this.levelB) { + if (this.levelB.containsKey(s)) return true; + } + synchronized (this.levelA) { + return this.levelA.containsKey(s); + } } diff --git a/source/net/yacy/cora/storage/WeakPriorityBlockingQueue.java b/source/net/yacy/cora/storage/WeakPriorityBlockingQueue.java index 2c6d0dbd9..ad7fe0093 100644 --- a/source/net/yacy/cora/storage/WeakPriorityBlockingQueue.java +++ b/source/net/yacy/cora/storage/WeakPriorityBlockingQueue.java @@ -121,7 +121,7 @@ public class WeakPriorityBlockingQueue { this.queue.add(element); this.enqueued.release(); } - assert this.queue.size() == this.enqueued.availablePermits(); + assert this.queue.size() >= this.enqueued.availablePermits() : "queue.size() = " + this.queue.size() + ", enqueued.availablePermits() = " + this.enqueued.availablePermits(); } /** @@ -141,10 +141,12 @@ public class WeakPriorityBlockingQueue { * @return the head element from the queue * @throws InterruptedException */ - public synchronized E poll(long timeout) throws InterruptedException { + public E poll(long timeout) throws InterruptedException { boolean a = this.enqueued.tryAcquire(timeout, TimeUnit.MILLISECONDS); if (!a) return null; - return takeUnsafe(); + synchronized (this) { + return takeUnsafe(); + } } /** @@ -152,9 +154,11 @@ public class WeakPriorityBlockingQueue { * @return the head element from the queue * @throws InterruptedException */ - public synchronized E take() throws InterruptedException { + public E take() throws InterruptedException { this.enqueued.acquire(); - return takeUnsafe(); + synchronized (this) { + return takeUnsafe(); + } } private E takeUnsafe() { @@ -162,7 +166,7 @@ public class WeakPriorityBlockingQueue { assert element != null; this.queue.remove(element); this.drained.add(element); - assert this.queue.size() == this.enqueued.availablePermits(); + assert this.queue.size() >= this.enqueued.availablePermits() : "queue.size() = " + this.queue.size() + ", enqueued.availablePermits() = " + this.enqueued.availablePermits(); return element; } diff --git a/source/net/yacy/document/Condenser.java b/source/net/yacy/document/Condenser.java index 486178727..858197268 100644 --- a/source/net/yacy/document/Condenser.java +++ b/source/net/yacy/document/Condenser.java @@ -473,10 +473,15 @@ public final class Condenser { int pos = 0; StringBuilder word; byte[] hash; + Integer oldpos; while (words.hasMoreElements()) { word = words.nextElement(); hash = Word.word2hash(word.toString()); - if (!map.containsKey(hash)) map.put(hash, Integer.valueOf(pos)); // don't overwrite old values, that leads to too far word distances + + // don't overwrite old values, that leads to too far word distances + oldpos = map.put(hash, Integer.valueOf(pos)); + if (oldpos != null) map.put(hash, oldpos); + pos += word.length() + 1; } return map; diff --git a/source/net/yacy/kelondro/order/Digest.java b/source/net/yacy/kelondro/order/Digest.java index 528f37ca0..8cc31e033 100644 --- a/source/net/yacy/kelondro/order/Digest.java +++ b/source/net/yacy/kelondro/order/Digest.java @@ -50,9 +50,10 @@ import net.yacy.kelondro.logging.Log; public class Digest { - public static BlockingQueue digestPool = new ArrayBlockingQueue(10); + private final static int digestThreads = Runtime.getRuntime().availableProcessors() + 1; + public static BlockingQueue digestPool = new ArrayBlockingQueue(digestThreads); static { - for (int i = 0; i < 10; i++) + for (int i = 0; i < digestThreads; i++) try { MessageDigest digest = MessageDigest.getInstance("MD5"); digest.reset();