diff --git a/source/de/anomic/search/QueryParams.java b/source/de/anomic/search/QueryParams.java index 8279d1fb2..3badac9d8 100644 --- a/source/de/anomic/search/QueryParams.java +++ b/source/de/anomic/search/QueryParams.java @@ -313,7 +313,7 @@ public final class QueryParams { if ((querystring != null) && (querystring.length() > 0)) { // convert Umlaute - querystring = AbstractScraper.stripAll(querystring).toLowerCase().trim(); + querystring = AbstractScraper.stripAll(querystring.toCharArray()).toLowerCase().trim(); int c; for (int i = 0; i < seps.length(); i++) { while ((c = querystring.indexOf(seps.charAt(i))) >= 0) { diff --git a/source/de/anomic/search/TextSnippet.java b/source/de/anomic/search/TextSnippet.java index 0d18d458a..7827da6af 100644 --- a/source/de/anomic/search/TextSnippet.java +++ b/source/de/anomic/search/TextSnippet.java @@ -155,7 +155,8 @@ public class TextSnippet implements Comparable, Comparator, Comparator extends SimpleARC implements Map(); } + public synchronized V get(Object k) { + return super.get(k); + } public synchronized V put(K k, V v) { V r = super.put(k, v); if (r == null) keys.add(k); diff --git a/source/net/yacy/cora/storage/HashARC.java b/source/net/yacy/cora/storage/HashARC.java index aad33d4f6..0f9c88fda 100644 --- a/source/net/yacy/cora/storage/HashARC.java +++ b/source/net/yacy/cora/storage/HashARC.java @@ -21,6 +21,7 @@ package net.yacy.cora.storage; +import java.util.Collections; import java.util.LinkedHashMap; import java.util.Map; @@ -30,17 +31,17 @@ public final class HashARC extends SimpleARC implements Map, I public HashARC(final int cacheSize) { this.cacheSize = cacheSize / 2; - super.levelA = new LinkedHashMap(cacheSize, 0.1f, accessOrder) { + super.levelA = Collections.synchronizedMap(new LinkedHashMap(cacheSize, 0.1f, accessOrder) { private static final long serialVersionUID = 1L; @Override protected boolean removeEldestEntry(final Map.Entry eldest) { return size() > HashARC.this.cacheSize; } - }; - this.levelB = new LinkedHashMap(cacheSize, 0.1f, accessOrder) { + }); + this.levelB = Collections.synchronizedMap(new LinkedHashMap(cacheSize, 0.1f, accessOrder) { private static final long serialVersionUID = 1L; @Override protected boolean removeEldestEntry(final Map.Entry eldest) { return size() > HashARC.this.cacheSize; } - }; + }); } } diff --git a/source/net/yacy/cora/storage/SimpleARC.java b/source/net/yacy/cora/storage/SimpleARC.java index 2f8fa3e19..58b9fa99f 100644 --- a/source/net/yacy/cora/storage/SimpleARC.java +++ b/source/net/yacy/cora/storage/SimpleARC.java @@ -85,10 +85,7 @@ abstract class SimpleARC extends AbstractMap implements Map, I */ @SuppressWarnings("unchecked") public final V get(final Object s) { - V v; - synchronized (this.levelB) { - v = this.levelB.get(s); - } + V v = this.levelB.get(s); if (v != null) return v; synchronized (this) { v = this.levelA.remove(s); @@ -127,12 +124,8 @@ abstract class SimpleARC extends AbstractMap implements Map, I * @return */ public final boolean containsKey(final Object s) { - synchronized (this.levelB) { - if (this.levelB.containsKey(s)) return true; - } - synchronized (this.levelA) { - return this.levelA.containsKey(s); - } + if (this.levelB.containsKey(s)) return true; + return this.levelA.containsKey(s); } diff --git a/source/net/yacy/document/parser/html/AbstractScraper.java b/source/net/yacy/document/parser/html/AbstractScraper.java index 05e811d07..cc55b0b6b 100644 --- a/source/net/yacy/document/parser/html/AbstractScraper.java +++ b/source/net/yacy/document/parser/html/AbstractScraper.java @@ -67,12 +67,12 @@ public abstract class AbstractScraper implements Scraper { public abstract void scrapeTag1(String tagname, Properties tagopts, char[] text); - protected static String stripAllTags(String s) { - StringBuilder r = new StringBuilder(s.length()); + protected static String stripAllTags(final char[] s) { + StringBuilder r = new StringBuilder(s.length); int bc = 0; char c; - for (int p = 0; p < s.length(); p++) { - c = s.charAt(p); + for (int p = 0; p < s.length; p++) { + c = s[p]; if (c == lb) { bc++; r.append(' '); @@ -85,7 +85,7 @@ public abstract class AbstractScraper implements Scraper { return r.toString().trim(); } - public static String stripAll(String s) { + public static String stripAll(final char[] s) { return CharacterCoding.html2unicode(stripAllTags(s)); } diff --git a/source/net/yacy/document/parser/html/CharacterCoding.java b/source/net/yacy/document/parser/html/CharacterCoding.java index 16d07493f..0f0bda721 100644 --- a/source/net/yacy/document/parser/html/CharacterCoding.java +++ b/source/net/yacy/document/parser/html/CharacterCoding.java @@ -240,14 +240,19 @@ public class CharacterCoding { Character r; while (p < text.length()) { p1 = text.indexOf('&', p); - if (p1 < 0) p1 = text.length(); - sb.append(text.subSequence(p, p1)); + if (p1 < 0) { + sb.append(text, p, text.length()); + break; + } + sb.append(text, p, p1); p = p1; if (p >= text.length()) break; q = text.indexOf(';', p); if (q < 0) { - p++; - continue; + // if there is now no semicolon, then this will also fail when another ampersand is found afterwards + // we are finished here + sb.append(text, p, text.length()); + break; } s = text.substring(p, q + 1); p = q + 1; diff --git a/source/net/yacy/document/parser/html/ContentScraper.java b/source/net/yacy/document/parser/html/ContentScraper.java index d3012e342..ce9cd9556 100644 --- a/source/net/yacy/document/parser/html/ContentScraper.java +++ b/source/net/yacy/document/parser/html/ContentScraper.java @@ -122,7 +122,7 @@ public class ContentScraper extends AbstractScraper implements Scraper { public void scrapeText(final char[] newtext, final String insideTag) { // System.out.println("SCRAPE: " + new String(newtext)); - String b = cleanLine(super.stripAll(new String(newtext))); + String b = cleanLine(super.stripAll(newtext)); if ((insideTag != null) && (!(insideTag.equals("a")))) { // texts inside tags sometimes have no punctuation at the line end // this is bad for the text sematics, because it is not possible for the @@ -265,7 +265,7 @@ public class ContentScraper extends AbstractScraper implements Scraper { } private String recursiveParse(char[] inlineHtml) { - if (inlineHtml.length < 14) return cleanLine(super.stripAll(new String(inlineHtml))); + if (inlineHtml.length < 14) return cleanLine(super.stripAll(inlineHtml)); // start a new scraper to parse links inside this text // parsing the content @@ -276,12 +276,12 @@ public class ContentScraper extends AbstractScraper implements Scraper { writer.close(); } catch (IOException e) { Log.logException(e); - return cleanLine(super.stripAll(new String(inlineHtml))); + return cleanLine(super.stripAll(inlineHtml)); } this.anchors.putAll(scraper.getAnchors()); this.images.putAll(scraper.images); - return cleanLine(super.stripAll(new String(scraper.content.getChars()))); + return cleanLine(super.stripAll(scraper.content.getChars())); } private final static String cleanLine(String s) {