performance hacks for better search performance

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7230 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 15 years ago
parent 11bebe356b
commit b8aee6d402

@ -313,7 +313,7 @@ public final class QueryParams {
if ((querystring != null) && (querystring.length() > 0)) {
// convert Umlaute
querystring = AbstractScraper.stripAll(querystring).toLowerCase().trim();
querystring = AbstractScraper.stripAll(querystring.toCharArray()).toLowerCase().trim();
int c;
for (int i = 0; i < seps.length(); i++) {
while ((c = querystring.indexOf(seps.charAt(i))) >= 0) {

@ -155,7 +155,8 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
try {
// first try to get the snippet from metadata
String loc;
boolean objectWasInCache = de.anomic.http.client.Cache.has(url);
boolean noCacheUsage = url.isFile() || url.isSMB();
boolean objectWasInCache = (noCacheUsage) ? false : de.anomic.http.client.Cache.has(url);
boolean useMetadata = !objectWasInCache && !cacheStrategy.mustBeOffline();
if (useMetadata && containsAllHashes(loc = comp.dc_title(), queryhashes)) {
// try to create the snippet from information given in the url itself
@ -175,7 +176,7 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
return;
} else {
// try to load the resource from the cache
response = loader.load(loader.request(url, true, reindexing), cacheStrategy, Long.MAX_VALUE);
response = loader.load(loader.request(url, true, reindexing), noCacheUsage ? CrawlProfile.CacheStrategy.NOCACHE : cacheStrategy, Long.MAX_VALUE);
if (response == null) {
// in case that we did not get any result we can still return a success when we are not allowed to go online
if (cacheStrategy.mustBeOffline()) {

@ -787,7 +787,14 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
if (obj == null) return false;
if (!(obj instanceof MultiProtocolURI)) return false;
MultiProtocolURI other = (MultiProtocolURI) obj;
return this.toString().equals(other.toString());
return
this.protocol.equals(other.protocol) &&
this.host.equals(other.host) &&
this.userInfo.equals(other.userInfo) &&
this.path.equals(other.path) &&
this.quest.equals(other.quest) &&
this.port == other.port;
}
public int compareTo(MultiProtocolURI h) {

@ -679,6 +679,13 @@ public class Domains {
// check dns lookup: may be a local address even if the domain name looks global
if (!recursive) return false;
InetAddress a = dnsResolve(host);
/*
if (a == null) {
// unknown if this is a local address. Could also be a timeout.
// It would be harmful to declare any public address as local, therefore return false
return false;
}
*/
return a == null || a.isAnyLocalAddress() || a.isLinkLocalAddress() || a.isLoopbackAddress() || a.isSiteLocalAddress() || isLocal(a.getHostAddress(), false);
}
}

@ -45,6 +45,9 @@ public final class ComparableARC<K, V> extends SimpleARC<K, V> implements Map<K,
this.limit = cacheSize;
this.keys = new LinkedList<K>();
}
public synchronized V get(Object k) {
return super.get(k);
}
public synchronized V put(K k, V v) {
V r = super.put(k, v);
if (r == null) keys.add(k);

@ -21,6 +21,7 @@
package net.yacy.cora.storage;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.Map;
@ -30,17 +31,17 @@ public final class HashARC<K, V> extends SimpleARC<K, V> implements Map<K, V>, I
public HashARC(final int cacheSize) {
this.cacheSize = cacheSize / 2;
super.levelA = new LinkedHashMap<K, V>(cacheSize, 0.1f, accessOrder) {
super.levelA = Collections.synchronizedMap(new LinkedHashMap<K, V>(cacheSize, 0.1f, accessOrder) {
private static final long serialVersionUID = 1L;
@Override protected boolean removeEldestEntry(final Map.Entry<K, V> eldest) {
return size() > HashARC.this.cacheSize;
}
};
this.levelB = new LinkedHashMap<K, V>(cacheSize, 0.1f, accessOrder) {
});
this.levelB = Collections.synchronizedMap(new LinkedHashMap<K, V>(cacheSize, 0.1f, accessOrder) {
private static final long serialVersionUID = 1L;
@Override protected boolean removeEldestEntry(final Map.Entry<K, V> eldest) {
return size() > HashARC.this.cacheSize;
}
};
});
}
}

@ -85,10 +85,7 @@ abstract class SimpleARC<K, V> extends AbstractMap<K, V> implements Map<K, V>, I
*/
@SuppressWarnings("unchecked")
public final V get(final Object s) {
V v;
synchronized (this.levelB) {
v = this.levelB.get(s);
}
V v = this.levelB.get(s);
if (v != null) return v;
synchronized (this) {
v = this.levelA.remove(s);
@ -127,12 +124,8 @@ abstract class SimpleARC<K, V> extends AbstractMap<K, V> implements Map<K, V>, I
* @return
*/
public final boolean containsKey(final Object s) {
synchronized (this.levelB) {
if (this.levelB.containsKey(s)) return true;
}
synchronized (this.levelA) {
return this.levelA.containsKey(s);
}
if (this.levelB.containsKey(s)) return true;
return this.levelA.containsKey(s);
}

@ -67,12 +67,12 @@ public abstract class AbstractScraper implements Scraper {
public abstract void scrapeTag1(String tagname, Properties tagopts, char[] text);
protected static String stripAllTags(String s) {
StringBuilder r = new StringBuilder(s.length());
protected static String stripAllTags(final char[] s) {
StringBuilder r = new StringBuilder(s.length);
int bc = 0;
char c;
for (int p = 0; p < s.length(); p++) {
c = s.charAt(p);
for (int p = 0; p < s.length; p++) {
c = s[p];
if (c == lb) {
bc++;
r.append(' ');
@ -85,7 +85,7 @@ public abstract class AbstractScraper implements Scraper {
return r.toString().trim();
}
public static String stripAll(String s) {
public static String stripAll(final char[] s) {
return CharacterCoding.html2unicode(stripAllTags(s));
}

@ -240,14 +240,19 @@ public class CharacterCoding {
Character r;
while (p < text.length()) {
p1 = text.indexOf('&', p);
if (p1 < 0) p1 = text.length();
sb.append(text.subSequence(p, p1));
if (p1 < 0) {
sb.append(text, p, text.length());
break;
}
sb.append(text, p, p1);
p = p1;
if (p >= text.length()) break;
q = text.indexOf(';', p);
if (q < 0) {
p++;
continue;
// if there is now no semicolon, then this will also fail when another ampersand is found afterwards
// we are finished here
sb.append(text, p, text.length());
break;
}
s = text.substring(p, q + 1);
p = q + 1;

@ -122,7 +122,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
public void scrapeText(final char[] newtext, final String insideTag) {
// System.out.println("SCRAPE: " + new String(newtext));
String b = cleanLine(super.stripAll(new String(newtext)));
String b = cleanLine(super.stripAll(newtext));
if ((insideTag != null) && (!(insideTag.equals("a")))) {
// texts inside tags sometimes have no punctuation at the line end
// this is bad for the text sematics, because it is not possible for the
@ -265,7 +265,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
}
private String recursiveParse(char[] inlineHtml) {
if (inlineHtml.length < 14) return cleanLine(super.stripAll(new String(inlineHtml)));
if (inlineHtml.length < 14) return cleanLine(super.stripAll(inlineHtml));
// start a new scraper to parse links inside this text
// parsing the content
@ -276,12 +276,12 @@ public class ContentScraper extends AbstractScraper implements Scraper {
writer.close();
} catch (IOException e) {
Log.logException(e);
return cleanLine(super.stripAll(new String(inlineHtml)));
return cleanLine(super.stripAll(inlineHtml));
}
this.anchors.putAll(scraper.getAnchors());
this.images.putAll(scraper.images);
return cleanLine(super.stripAll(new String(scraper.content.getChars())));
return cleanLine(super.stripAll(scraper.content.getChars()));
}
private final static String cleanLine(String s) {

Loading…
Cancel
Save