performance hacks for better search performance

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7230 6c8d7289-2bf4-0310-a012-ef5d649a1542
15 years ago · b8aee6d402
parent 11bebe356b
commit b8aee6d402
10 changed files with 48 additions and 31 deletions
--- a/source/de/anomic/search/QueryParams.java
+++ b/source/de/anomic/search/QueryParams.java
@ -313,7 +313,7 @@ public final class QueryParams {
        if ((querystring != null) && (querystring.length() > 0)) {
        
            // convert Umlaute
-            querystring = AbstractScraper.stripAll(querystring).toLowerCase().trim();
+            querystring = AbstractScraper.stripAll(querystring.toCharArray()).toLowerCase().trim();
            int c;
            for (int i = 0; i < seps.length(); i++) {
                while ((c = querystring.indexOf(seps.charAt(i))) >= 0) {
--- a/source/de/anomic/search/TextSnippet.java
+++ b/source/de/anomic/search/TextSnippet.java
@ -155,7 +155,8 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
        try {
            // first try to get the snippet from metadata
            String loc;
-            boolean objectWasInCache = de.anomic.http.client.Cache.has(url);
+            boolean noCacheUsage = url.isFile() || url.isSMB();
+            boolean objectWasInCache = (noCacheUsage) ? false : de.anomic.http.client.Cache.has(url);
            boolean useMetadata = !objectWasInCache && !cacheStrategy.mustBeOffline();
            if (useMetadata && containsAllHashes(loc = comp.dc_title(), queryhashes)) {
                // try to create the snippet from information given in the url itself
@ -175,7 +176,7 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
                return;
            } else {
                // try to load the resource from the cache
-                response = loader.load(loader.request(url, true, reindexing), cacheStrategy, Long.MAX_VALUE);
+                response = loader.load(loader.request(url, true, reindexing), noCacheUsage ? CrawlProfile.CacheStrategy.NOCACHE : cacheStrategy, Long.MAX_VALUE);
                if (response == null) {
                    // in case that we did not get any result we can still return a success when we are not allowed to go online
                    if (cacheStrategy.mustBeOffline()) {
--- a/source/net/yacy/cora/document/MultiProtocolURI.java
+++ b/source/net/yacy/cora/document/MultiProtocolURI.java
@ -787,7 +787,14 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
        if (obj == null) return false;
        if (!(obj instanceof MultiProtocolURI)) return false;
        MultiProtocolURI other = (MultiProtocolURI) obj;
-        return this.toString().equals(other.toString());
+        
+        return
+          this.protocol.equals(other.protocol) && 
+          this.host.equals(other.host) && 
+          this.userInfo.equals(other.userInfo) && 
+          this.path.equals(other.path) && 
+          this.quest.equals(other.quest) && 
+          this.port == other.port; 
    }

    public int compareTo(MultiProtocolURI h) {
--- a/source/net/yacy/cora/protocol/Domains.java
+++ b/source/net/yacy/cora/protocol/Domains.java
@ -679,6 +679,13 @@ public class Domains {
        // check dns lookup: may be a local address even if the domain name looks global
        if (!recursive) return false;
        InetAddress a = dnsResolve(host);
+        /*
+        if (a == null) {
+            // unknown if this is a local address. Could also be a timeout.
+            // It would be harmful to declare any public address as local, therefore return false
+            return false;
+        }
+        */
        return a == null || a.isAnyLocalAddress() || a.isLinkLocalAddress() || a.isLoopbackAddress() || a.isSiteLocalAddress() || isLocal(a.getHostAddress(), false);
    }
 }
--- a/source/net/yacy/cora/storage/ComparableARC.java
+++ b/source/net/yacy/cora/storage/ComparableARC.java
@ -45,6 +45,9 @@ public final class ComparableARC<K, V> extends SimpleARC<K, V> implements Map<K,
            this.limit = cacheSize;
            this.keys = new LinkedList<K>();
        }
+        public synchronized V get(Object k) {
+            return super.get(k);
+        }
        public synchronized V put(K k, V v) {
            V r = super.put(k, v);
            if (r == null) keys.add(k);
--- a/source/net/yacy/cora/storage/HashARC.java
+++ b/source/net/yacy/cora/storage/HashARC.java
@ -21,6 +21,7 @@

 package net.yacy.cora.storage;

+import java.util.Collections;
 import java.util.LinkedHashMap;
 import java.util.Map;

@ -30,17 +31,17 @@ public final class HashARC<K, V> extends SimpleARC<K, V> implements Map<K, V>, I
    
    public HashARC(final int cacheSize) {
        this.cacheSize = cacheSize / 2;
-        super.levelA = new LinkedHashMap<K, V>(cacheSize, 0.1f, accessOrder) {
+        super.levelA = Collections.synchronizedMap(new LinkedHashMap<K, V>(cacheSize, 0.1f, accessOrder) {
            private static final long serialVersionUID = 1L;
            @Override protected boolean removeEldestEntry(final Map.Entry<K, V> eldest) {
                return size() > HashARC.this.cacheSize;
            }
-        };
-        this.levelB = new LinkedHashMap<K, V>(cacheSize, 0.1f, accessOrder) {
+        });
+        this.levelB = Collections.synchronizedMap(new LinkedHashMap<K, V>(cacheSize, 0.1f, accessOrder) {
            private static final long serialVersionUID = 1L;
            @Override protected boolean removeEldestEntry(final Map.Entry<K, V> eldest) {
                return size() > HashARC.this.cacheSize;
            }
-        };
+        });
    }
 }
--- a/source/net/yacy/cora/storage/SimpleARC.java
+++ b/source/net/yacy/cora/storage/SimpleARC.java
@ -85,10 +85,7 @@ abstract class SimpleARC<K, V> extends AbstractMap<K, V> implements Map<K, V>, I
     */
    @SuppressWarnings("unchecked")
    public final V get(final Object s) {
-        V v;
-        synchronized (this.levelB) {
-            v = this.levelB.get(s);
-        }
+        V v = this.levelB.get(s);
        if (v != null) return v;
        synchronized (this) {
            v = this.levelA.remove(s);
@ -127,12 +124,8 @@ abstract class SimpleARC<K, V> extends AbstractMap<K, V> implements Map<K, V>, I
     * @return
     */
    public final boolean containsKey(final Object s) {
-        synchronized (this.levelB) {
-            if (this.levelB.containsKey(s)) return true;
-        }
-        synchronized (this.levelA) {
-            return this.levelA.containsKey(s);
-        }
+        if (this.levelB.containsKey(s)) return true;
+        return this.levelA.containsKey(s);
    }
   
    
--- a/source/net/yacy/document/parser/html/AbstractScraper.java
+++ b/source/net/yacy/document/parser/html/AbstractScraper.java
@ -67,12 +67,12 @@ public abstract class AbstractScraper implements Scraper {

    public abstract void scrapeTag1(String tagname, Properties tagopts, char[] text);

-    protected static String stripAllTags(String s) {
-        StringBuilder r = new StringBuilder(s.length());
+    protected static String stripAllTags(final char[] s) {
+        StringBuilder r = new StringBuilder(s.length);
        int bc = 0;
        char c;
-        for (int p = 0; p < s.length(); p++) {
-            c = s.charAt(p);
+        for (int p = 0; p < s.length; p++) {
+            c = s[p];
            if (c == lb) {
                bc++;
                r.append(' ');
@ -85,7 +85,7 @@ public abstract class AbstractScraper implements Scraper {
        return r.toString().trim();
    }

-    public static String stripAll(String s) {
+    public static String stripAll(final char[] s) {
        return CharacterCoding.html2unicode(stripAllTags(s));
    }

--- a/source/net/yacy/document/parser/html/CharacterCoding.java
+++ b/source/net/yacy/document/parser/html/CharacterCoding.java
@ -240,14 +240,19 @@ public class CharacterCoding {
        Character r;
        while (p < text.length()) {
            p1 = text.indexOf('&', p);
-            if (p1 < 0) p1 = text.length();
-            sb.append(text.subSequence(p, p1));
+            if (p1 < 0) {
+                sb.append(text, p, text.length());
+                break;
+            }
+            sb.append(text, p, p1);
            p = p1;
            if (p >= text.length()) break;
            q = text.indexOf(';', p);
            if (q < 0) {
-                p++;
-                continue;
+                // if there is now no semicolon, then this will also fail when another ampersand is found afterwards
+                // we are finished here
+                sb.append(text, p, text.length());
+                break;
            }
            s = text.substring(p, q + 1);
            p = q + 1;
--- a/source/net/yacy/document/parser/html/ContentScraper.java
+++ b/source/net/yacy/document/parser/html/ContentScraper.java
@ -122,7 +122,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
    
    public void scrapeText(final char[] newtext, final String insideTag) {
        // System.out.println("SCRAPE: " + new String(newtext));
-        String b = cleanLine(super.stripAll(new String(newtext)));
+        String b = cleanLine(super.stripAll(newtext));
        if ((insideTag != null) && (!(insideTag.equals("a")))) {
            // texts inside tags sometimes have no punctuation at the line end
            // this is bad for the text sematics, because it is not possible for the
@ -265,7 +265,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
    }

    private String recursiveParse(char[] inlineHtml) {
-        if (inlineHtml.length < 14) return cleanLine(super.stripAll(new String(inlineHtml)));
+        if (inlineHtml.length < 14) return cleanLine(super.stripAll(inlineHtml));
        
        // start a new scraper to parse links inside this text
        // parsing the content
@ -276,12 +276,12 @@ public class ContentScraper extends AbstractScraper implements Scraper {
            writer.close();
        } catch (IOException e) {
            Log.logException(e);
-            return cleanLine(super.stripAll(new String(inlineHtml)));
+            return cleanLine(super.stripAll(inlineHtml));
        }
        this.anchors.putAll(scraper.getAnchors());
        this.images.putAll(scraper.images);
        
-        return cleanLine(super.stripAll(new String(scraper.content.getChars())));
+        return cleanLine(super.stripAll(scraper.content.getChars()));
    }
    
    private final static String cleanLine(String s) {