diff --git a/source/de/anomic/data/DidYouMean.java b/source/de/anomic/data/DidYouMean.java index de39a2136..12defa1b9 100644 --- a/source/de/anomic/data/DidYouMean.java +++ b/source/de/anomic/data/DidYouMean.java @@ -138,12 +138,12 @@ public class DidYouMean { final ReversibleScoreMap scored = new ClusteredScoreMap(); for (final String s: preSorted) { if (System.currentTimeMillis() > timelimit) break; - if (scored.size() >= 2 * preSortSelection) break; + if (!(scored.sizeSmaller(2 * preSortSelection))) break; scored.inc(s, index.count(Word.word2hash(s))); } final SortedSet countSorted = Collections.synchronizedSortedSet(new TreeSet(new headMatchingComparator(this.word, this.INDEX_SIZE_COMPARATOR))); final int wc = index.count(Word.word2hash(this.word)); // all counts must be greater than this - while (scored.size() > 0 && countSorted.size() < preSortSelection) { + while (!scored.isEmpty() && countSorted.size() < preSortSelection) { final String s = scored.getMaxKey(); int score = scored.delete(s); if (s.length() >= MinimumOutputWordLength && score > wc) countSorted.add(s); diff --git a/source/de/anomic/search/MetadataRepository.java b/source/de/anomic/search/MetadataRepository.java index 6fd5ab297..7acbdf68d 100644 --- a/source/de/anomic/search/MetadataRepository.java +++ b/source/de/anomic/search/MetadataRepository.java @@ -612,7 +612,7 @@ public final class MetadataRepository implements Iterable { URIMetadataRow urlref; String urlhash; count += 10; // make some more to prevent that we have to do this again after deletions too soon. - if (count < 0 || count > s.size()) count = s.size(); + if (count < 0 || s.sizeSmaller(count)) count = s.size(); statsDump = new ArrayList(); URIMetadataRow.Components comps; DigestURI url; diff --git a/source/de/anomic/search/RankingProcess.java b/source/de/anomic/search/RankingProcess.java index 2a6559268..26f7e78ea 100644 --- a/source/de/anomic/search/RankingProcess.java +++ b/source/de/anomic/search/RankingProcess.java @@ -572,7 +572,7 @@ public final class RankingProcess extends Thread { public ScoreMap getNamespaceNavigator() { if (!this.query.navigators.equals("all") && this.query.navigators.indexOf("namespace") < 0) return new ClusteredScoreMap(); - if (this.namespaceNavigator.size() < 2) this.namespaceNavigator.clear(); // navigators with one entry are not useful + if (this.namespaceNavigator.sizeSmaller(2)) this.namespaceNavigator.clear(); // navigators with one entry are not useful return this.namespaceNavigator; } @@ -583,7 +583,7 @@ public final class RankingProcess extends Thread { final Iterator domhashs = this.hostNavigator.keys(false); URIMetadataRow row; String domhash, urlhash, hostname; - if (this.hostResolver != null) while (domhashs.hasNext() && result.size() < 30) { + if (this.hostResolver != null) while (domhashs.hasNext() && result.sizeSmaller(30)) { domhash = domhashs.next(); if (domhash == null) continue; urlhash = this.hostResolver.get(domhash); @@ -593,7 +593,7 @@ public final class RankingProcess extends Thread { result.set(hostname, this.hostNavigator.get(domhash)); } } - if (result.size() < 2) result.clear(); // navigators with one entry are not useful + if (result.sizeSmaller(2)) result.clear(); // navigators with one entry are not useful return result; } @@ -610,7 +610,7 @@ public final class RankingProcess extends Thread { // words that appeared in the url or the description of all urls final ScoreMap result = new ConcurrentScoreMap(); if (!this.query.navigators.equals("all") && this.query.navigators.indexOf("topics") < 0) return result; - if (this.ref.size() < 2) this.ref.clear(); // navigators with one entry are not useful + if (this.ref.sizeSmaller(2)) this.ref.clear(); // navigators with one entry are not useful final Map counts = new HashMap(); final Iterator i = this.ref.keys(false); String word; @@ -666,7 +666,7 @@ public final class RankingProcess extends Thread { // create a list of words that had been computed by statistics over all // words that appeared in the url or the description of all urls if (!this.query.navigators.equals("all") && this.query.navigators.indexOf("authors") < 0) return new ConcurrentScoreMap(); - if (this.authorNavigator.size() < 2) this.authorNavigator.clear(); // navigators with one entry are not useful + if (this.authorNavigator.sizeSmaller(2)) this.authorNavigator.clear(); // navigators with one entry are not useful return this.authorNavigator; } diff --git a/source/net/yacy/cora/storage/ClusteredScoreMap.java b/source/net/yacy/cora/storage/ClusteredScoreMap.java index df55283cc..b066013db 100644 --- a/source/net/yacy/cora/storage/ClusteredScoreMap.java +++ b/source/net/yacy/cora/storage/ClusteredScoreMap.java @@ -179,6 +179,10 @@ public final class ClusteredScoreMap implements ReversibleScoreMap { return map.size(); } + public boolean sizeSmaller(int size) { + return map.size() < size; + } + public synchronized boolean isEmpty() { return map.isEmpty(); } diff --git a/source/net/yacy/cora/storage/ConcurrentScoreMap.java b/source/net/yacy/cora/storage/ConcurrentScoreMap.java index 087d60aa9..3cdcebb44 100644 --- a/source/net/yacy/cora/storage/ConcurrentScoreMap.java +++ b/source/net/yacy/cora/storage/ConcurrentScoreMap.java @@ -84,6 +84,10 @@ public class ConcurrentScoreMap implements ScoreMap { return map.size(); } + public boolean sizeSmaller(int size) { + return map.size() < size; + } + public boolean isEmpty() { return map.isEmpty(); } diff --git a/source/net/yacy/cora/storage/OrderedScoreMap.java b/source/net/yacy/cora/storage/OrderedScoreMap.java index 4cdca3397..50d7d4ed1 100644 --- a/source/net/yacy/cora/storage/OrderedScoreMap.java +++ b/source/net/yacy/cora/storage/OrderedScoreMap.java @@ -39,7 +39,7 @@ import java.util.concurrent.atomic.AtomicInteger; public class OrderedScoreMap implements ScoreMap { - + protected final Map map; // a mapping from a reference to the cluster key public OrderedScoreMap(Comparator comparator) { @@ -82,17 +82,38 @@ public class OrderedScoreMap implements ScoreMap { } } - public synchronized int size() { - return map.size(); + public int size() { + synchronized (map) { + return map.size(); + } + } + + /** + * return true if the size of the score map is smaller then the given size + * @param size + * @return + */ + public boolean sizeSmaller(int size) { + if (map.size() < size) return true; + synchronized (map) { + return map.size() < size; + } } - public synchronized boolean isEmpty() { - return map.isEmpty(); + public boolean isEmpty() { + if (map.isEmpty()) return true; + synchronized (map) { + return map.isEmpty(); + } } public void inc(final E obj) { if (obj == null) return; - AtomicInteger score; + AtomicInteger score = this.map.get(obj); + if (score != null) { + score.incrementAndGet(); + return; + } synchronized (map) { score = this.map.get(obj); if (score == null) { @@ -156,8 +177,10 @@ public class OrderedScoreMap implements ScoreMap { return score.intValue(); } - public synchronized boolean containsKey(final E obj) { - return map.containsKey(obj); + public boolean containsKey(final E obj) { + synchronized (map) { + return map.containsKey(obj); + } } public int get(final E obj) { diff --git a/source/net/yacy/cora/storage/ScoreMap.java b/source/net/yacy/cora/storage/ScoreMap.java index 49a60ee37..c94c4e5c3 100644 --- a/source/net/yacy/cora/storage/ScoreMap.java +++ b/source/net/yacy/cora/storage/ScoreMap.java @@ -43,7 +43,7 @@ public interface ScoreMap { public void shrinkToMinScore(int minScore); public int size(); - + public boolean sizeSmaller(int size); public boolean isEmpty(); public void set(final E obj, final int newScore); diff --git a/source/net/yacy/document/Condenser.java b/source/net/yacy/document/Condenser.java index 0bb17dc1f..2c9729c5f 100644 --- a/source/net/yacy/document/Condenser.java +++ b/source/net/yacy/document/Condenser.java @@ -83,11 +83,6 @@ public final class Condenser { public static final int flag_cat_hasapp = 23; // the page refers to (at least one) application file private final static int numlength = 5; - private final static NumberFormat intStringFormatter = NumberFormat.getIntegerInstance(); - static { - intStringFormatter.setMinimumIntegerDigits(numlength); - intStringFormatter.setMaximumIntegerDigits(numlength); - } //private Properties analysis; private Map words; // a string (the words) to (indexWord) - relation @@ -99,6 +94,7 @@ public final class Condenser { public int RESULT_DIFF_SENTENCES = -1; public Bitfield RESULT_FLAGS = new Bitfield(4); private Identificator languageIdentificator; + private final NumberFormat intStringFormatter = NumberFormat.getIntegerInstance(); // use a new instance for each object for a better concurrency public Condenser( final Document document, @@ -108,6 +104,8 @@ public final class Condenser { ) { // if addMedia == true, then all the media links are also parsed and added to the words // added media words are flagged with the appropriate media flag + this.intStringFormatter.setMinimumIntegerDigits(numlength); + this.intStringFormatter.setMaximumIntegerDigits(numlength); this.words = new HashMap(); this.RESULT_FLAGS = new Bitfield(4); diff --git a/source/net/yacy/document/WordCache.java b/source/net/yacy/document/WordCache.java index a917728cf..236c9d780 100644 --- a/source/net/yacy/document/WordCache.java +++ b/source/net/yacy/document/WordCache.java @@ -77,7 +77,7 @@ public class WordCache { if (word.length() < commonWordsMinLength) return; if (MemoryControl.shortStatus()) commonWords.clear(); commonWords.inc(word); - if (commonWords.size() > commonWordsMaxSize) { + if (!(commonWords.sizeSmaller(commonWordsMaxSize))) { commonWords.shrinkToMaxSize(commonWordsMaxSize / 2); } }