From 24502fe3de42045237b73953b470d398c59dc4a4 Mon Sep 17 00:00:00 2001 From: orbiter Date: Mon, 6 Sep 2010 12:59:33 +0000 Subject: [PATCH] performance hacks git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7116 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- .../yacy/cora/document/MultiProtocolURI.java | 27 ++++++++++++-- source/net/yacy/cora/storage/SimpleARC.java | 6 ++-- .../document/parser/html/ContentScraper.java | 2 +- source/net/yacy/kelondro/blob/Compressor.java | 35 ++++++++++--------- source/net/yacy/kelondro/data/word/Word.java | 2 +- .../net/yacy/repository/LoaderDispatcher.java | 2 +- 6 files changed, 49 insertions(+), 25 deletions(-) diff --git a/source/net/yacy/cora/document/MultiProtocolURI.java b/source/net/yacy/cora/document/MultiProtocolURI.java index 33e594668..1c761d6fd 100644 --- a/source/net/yacy/cora/document/MultiProtocolURI.java +++ b/source/net/yacy/cora/document/MultiProtocolURI.java @@ -587,13 +587,27 @@ public class MultiProtocolURI implements Serializable { // this is the path plus quest plus ref // if there is no quest and no ref the result is identical to getPath // this is defined according to http://java.sun.com/j2se/1.4.2/docs/api/java/net/URL.html#getFile() - if (quest == null) return (excludeReference || ref == null) ? path : path + "#" + ref; + if (quest == null) { + if (excludeReference || ref == null) return path; + StringBuilder sb = new StringBuilder(120); + sb.append(path); + sb.append('#'); + sb.append(ref); + return sb.toString(); + } String q = quest; if (removeSessionID) { for (String sid: sessionIDnames) { if (q.toLowerCase().startsWith(sid.toLowerCase() + "=")) { int p = q.indexOf('&'); - if (p < 0) return (excludeReference || ref == null) ? path : path + "#" + ref; + if (p < 0) { + if (excludeReference || ref == null) return path; + StringBuilder sb = new StringBuilder(120); + sb.append(path); + sb.append('#'); + sb.append(ref); + return sb.toString(); + } q = q.substring(p + 1); continue; } @@ -607,7 +621,14 @@ public class MultiProtocolURI implements Serializable { } } } - return (excludeReference || ref == null) ? path + "?" + q : path + "?" + q + "#" + ref; + StringBuilder sb = new StringBuilder(120); + sb.append(path); + sb.append('?'); + sb.append(q); + if (excludeReference || ref == null) return sb.toString(); + sb.append('#'); + sb.append(ref); + return sb.toString(); } public String getFileName() { diff --git a/source/net/yacy/cora/storage/SimpleARC.java b/source/net/yacy/cora/storage/SimpleARC.java index 28b86462b..b489f251b 100644 --- a/source/net/yacy/cora/storage/SimpleARC.java +++ b/source/net/yacy/cora/storage/SimpleARC.java @@ -135,7 +135,7 @@ abstract class SimpleARC extends AbstractMap implements Map, I /** * iterator implements the Iterable interface */ - public Iterator> iterator() { + public final Iterator> iterator() { return entrySet().iterator(); } @@ -147,7 +147,7 @@ abstract class SimpleARC extends AbstractMap implements Map, I * @return a set view of the mappings contained in this map */ @Override - public Set> entrySet() { + public final synchronized Set> entrySet() { Set> m = new HashSet>(); for (Map.Entry entry: this.levelA.entrySet()) m.add(entry); for (Map.Entry entry: this.levelB.entrySet()) m.add(entry); @@ -158,7 +158,7 @@ abstract class SimpleARC extends AbstractMap implements Map, I * a hash code for this ARC * @return the hash code of one of the ARC partial hash tables */ - public int hashCode() { + public final int hashCode() { return this.levelA.hashCode(); } } diff --git a/source/net/yacy/document/parser/html/ContentScraper.java b/source/net/yacy/document/parser/html/ContentScraper.java index 10089b08b..d3012e342 100644 --- a/source/net/yacy/document/parser/html/ContentScraper.java +++ b/source/net/yacy/document/parser/html/ContentScraper.java @@ -284,7 +284,7 @@ public class ContentScraper extends AbstractScraper implements Scraper { return cleanLine(super.stripAll(new String(scraper.content.getChars()))); } - private static String cleanLine(String s) { + private final static String cleanLine(String s) { StringBuilder sb = new StringBuilder(s.length()); char c, l = ' '; for (int i = 0; i < s.length(); i++) { diff --git a/source/net/yacy/kelondro/blob/Compressor.java b/source/net/yacy/kelondro/blob/Compressor.java index dab126d93..91a41278d 100644 --- a/source/net/yacy/kelondro/blob/Compressor.java +++ b/source/net/yacy/kelondro/blob/Compressor.java @@ -228,24 +228,28 @@ public class Compressor implements BLOB { } } - public synchronized byte[] get(byte[] key) throws IOException, RowSpaceExceededException { + public byte[] get(byte[] key) throws IOException, RowSpaceExceededException { // depending on the source of the result, we additionally do entry compression // because if a document was read once, we think that it will not be retrieved another time again soon - byte[] b = buffer.remove(new String(key)); - if (b != null) { - // compress the entry now and put it to the backend - try { - this.writeQueue.put(new Entity(new String(key), b)); - this.bufferlength = this.bufferlength - b.length; - return b; - } catch (InterruptedException e) { - Log.logException(e); - buffer.put(new String(key), b); + String keys = new String(key); + byte[] b = null; + synchronized (this) { + b = buffer.remove(keys); + if (b != null) { + // compress the entry now and put it to the backend + try { + this.writeQueue.put(new Entity(new String(key), b)); + this.bufferlength = this.bufferlength - b.length; + return b; + } catch (InterruptedException e) { + Log.logException(e); + buffer.put(keys, b); + } } + + // return from the backend + b = this.backend.get(key); } - - // return from the backend - b = this.backend.get(key); if (b == null) return null; return decompress(b); } @@ -263,8 +267,7 @@ public class Compressor implements BLOB { } public synchronized boolean containsKey(byte[] key) { - return - this.buffer.containsKey(new String(key)) || this.backend.containsKey(key); + return this.buffer.containsKey(new String(key)) || this.backend.containsKey(key); } public int keylength() { diff --git a/source/net/yacy/kelondro/data/word/Word.java b/source/net/yacy/kelondro/data/word/Word.java index 4408e4007..ec1c867b3 100644 --- a/source/net/yacy/kelondro/data/word/Word.java +++ b/source/net/yacy/kelondro/data/word/Word.java @@ -54,7 +54,7 @@ public class Word { */ public static final int commonHashLength = 12; - private static final int hashCacheSize = Math.max(2048, Math.min(100000, (int) (MemoryControl.available() / 20000L))); + private static final int hashCacheSize = Math.max(10000, Math.min(100000, (int) (MemoryControl.available() / 20000L))); private static final ARC hashCache = new ConcurrentARC(hashCacheSize, Runtime.getRuntime().availableProcessors()); // object carries statistics for words and sentences diff --git a/source/net/yacy/repository/LoaderDispatcher.java b/source/net/yacy/repository/LoaderDispatcher.java index ad2d9c89d..a93ad4cbf 100644 --- a/source/net/yacy/repository/LoaderDispatcher.java +++ b/source/net/yacy/repository/LoaderDispatcher.java @@ -177,7 +177,7 @@ public final class LoaderDispatcher { * @return the loaded entity in a Response object * @throws IOException */ - public Response loadInternal(final Request request, CrawlProfile.CacheStrategy cacheStrategy, long maxFileSize) throws IOException { + private Response loadInternal(final Request request, CrawlProfile.CacheStrategy cacheStrategy, long maxFileSize) throws IOException { // get the protocol of the next URL final DigestURI url = request.url(); final String protocol = url.getProtocol();