enhanced speed for OrderedScoreMap inc method and size comparisment in concurrent environments

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7653 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 14 years ago
parent b788182954
commit c17d102bd8

@ -138,12 +138,12 @@ public class DidYouMean {
final ReversibleScoreMap<String> scored = new ClusteredScoreMap<String>();
for (final String s: preSorted) {
if (System.currentTimeMillis() > timelimit) break;
if (scored.size() >= 2 * preSortSelection) break;
if (!(scored.sizeSmaller(2 * preSortSelection))) break;
scored.inc(s, index.count(Word.word2hash(s)));
}
final SortedSet<String> countSorted = Collections.synchronizedSortedSet(new TreeSet<String>(new headMatchingComparator(this.word, this.INDEX_SIZE_COMPARATOR)));
final int wc = index.count(Word.word2hash(this.word)); // all counts must be greater than this
while (scored.size() > 0 && countSorted.size() < preSortSelection) {
while (!scored.isEmpty() && countSorted.size() < preSortSelection) {
final String s = scored.getMaxKey();
int score = scored.delete(s);
if (s.length() >= MinimumOutputWordLength && score > wc) countSorted.add(s);

@ -612,7 +612,7 @@ public final class MetadataRepository implements Iterable<byte[]> {
URIMetadataRow urlref;
String urlhash;
count += 10; // make some more to prevent that we have to do this again after deletions too soon.
if (count < 0 || count > s.size()) count = s.size();
if (count < 0 || s.sizeSmaller(count)) count = s.size();
statsDump = new ArrayList<hostStat>();
URIMetadataRow.Components comps;
DigestURI url;

@ -572,7 +572,7 @@ public final class RankingProcess extends Thread {
public ScoreMap<String> getNamespaceNavigator() {
if (!this.query.navigators.equals("all") && this.query.navigators.indexOf("namespace") < 0) return new ClusteredScoreMap<String>();
if (this.namespaceNavigator.size() < 2) this.namespaceNavigator.clear(); // navigators with one entry are not useful
if (this.namespaceNavigator.sizeSmaller(2)) this.namespaceNavigator.clear(); // navigators with one entry are not useful
return this.namespaceNavigator;
}
@ -583,7 +583,7 @@ public final class RankingProcess extends Thread {
final Iterator<String> domhashs = this.hostNavigator.keys(false);
URIMetadataRow row;
String domhash, urlhash, hostname;
if (this.hostResolver != null) while (domhashs.hasNext() && result.size() < 30) {
if (this.hostResolver != null) while (domhashs.hasNext() && result.sizeSmaller(30)) {
domhash = domhashs.next();
if (domhash == null) continue;
urlhash = this.hostResolver.get(domhash);
@ -593,7 +593,7 @@ public final class RankingProcess extends Thread {
result.set(hostname, this.hostNavigator.get(domhash));
}
}
if (result.size() < 2) result.clear(); // navigators with one entry are not useful
if (result.sizeSmaller(2)) result.clear(); // navigators with one entry are not useful
return result;
}
@ -610,7 +610,7 @@ public final class RankingProcess extends Thread {
// words that appeared in the url or the description of all urls
final ScoreMap<String> result = new ConcurrentScoreMap<String>();
if (!this.query.navigators.equals("all") && this.query.navigators.indexOf("topics") < 0) return result;
if (this.ref.size() < 2) this.ref.clear(); // navigators with one entry are not useful
if (this.ref.sizeSmaller(2)) this.ref.clear(); // navigators with one entry are not useful
final Map<String, Float> counts = new HashMap<String, Float>();
final Iterator<String> i = this.ref.keys(false);
String word;
@ -666,7 +666,7 @@ public final class RankingProcess extends Thread {
// create a list of words that had been computed by statistics over all
// words that appeared in the url or the description of all urls
if (!this.query.navigators.equals("all") && this.query.navigators.indexOf("authors") < 0) return new ConcurrentScoreMap<String>();
if (this.authorNavigator.size() < 2) this.authorNavigator.clear(); // navigators with one entry are not useful
if (this.authorNavigator.sizeSmaller(2)) this.authorNavigator.clear(); // navigators with one entry are not useful
return this.authorNavigator;
}

@ -179,6 +179,10 @@ public final class ClusteredScoreMap<E> implements ReversibleScoreMap<E> {
return map.size();
}
public boolean sizeSmaller(int size) {
return map.size() < size;
}
public synchronized boolean isEmpty() {
return map.isEmpty();
}

@ -84,6 +84,10 @@ public class ConcurrentScoreMap<E> implements ScoreMap<E> {
return map.size();
}
public boolean sizeSmaller(int size) {
return map.size() < size;
}
public boolean isEmpty() {
return map.isEmpty();
}

@ -39,7 +39,7 @@ import java.util.concurrent.atomic.AtomicInteger;
public class OrderedScoreMap<E> implements ScoreMap<E> {
protected final Map<E, AtomicInteger> map; // a mapping from a reference to the cluster key
public OrderedScoreMap(Comparator<? super E> comparator) {
@ -82,17 +82,38 @@ public class OrderedScoreMap<E> implements ScoreMap<E> {
}
}
public synchronized int size() {
return map.size();
public int size() {
synchronized (map) {
return map.size();
}
}
/**
* return true if the size of the score map is smaller then the given size
* @param size
* @return
*/
public boolean sizeSmaller(int size) {
if (map.size() < size) return true;
synchronized (map) {
return map.size() < size;
}
}
public synchronized boolean isEmpty() {
return map.isEmpty();
public boolean isEmpty() {
if (map.isEmpty()) return true;
synchronized (map) {
return map.isEmpty();
}
}
public void inc(final E obj) {
if (obj == null) return;
AtomicInteger score;
AtomicInteger score = this.map.get(obj);
if (score != null) {
score.incrementAndGet();
return;
}
synchronized (map) {
score = this.map.get(obj);
if (score == null) {
@ -156,8 +177,10 @@ public class OrderedScoreMap<E> implements ScoreMap<E> {
return score.intValue();
}
public synchronized boolean containsKey(final E obj) {
return map.containsKey(obj);
public boolean containsKey(final E obj) {
synchronized (map) {
return map.containsKey(obj);
}
}
public int get(final E obj) {

@ -43,7 +43,7 @@ public interface ScoreMap<E> {
public void shrinkToMinScore(int minScore);
public int size();
public boolean sizeSmaller(int size);
public boolean isEmpty();
public void set(final E obj, final int newScore);

@ -83,11 +83,6 @@ public final class Condenser {
public static final int flag_cat_hasapp = 23; // the page refers to (at least one) application file
private final static int numlength = 5;
private final static NumberFormat intStringFormatter = NumberFormat.getIntegerInstance();
static {
intStringFormatter.setMinimumIntegerDigits(numlength);
intStringFormatter.setMaximumIntegerDigits(numlength);
}
//private Properties analysis;
private Map<String, Word> words; // a string (the words) to (indexWord) - relation
@ -99,6 +94,7 @@ public final class Condenser {
public int RESULT_DIFF_SENTENCES = -1;
public Bitfield RESULT_FLAGS = new Bitfield(4);
private Identificator languageIdentificator;
private final NumberFormat intStringFormatter = NumberFormat.getIntegerInstance(); // use a new instance for each object for a better concurrency
public Condenser(
final Document document,
@ -108,6 +104,8 @@ public final class Condenser {
) {
// if addMedia == true, then all the media links are also parsed and added to the words
// added media words are flagged with the appropriate media flag
this.intStringFormatter.setMinimumIntegerDigits(numlength);
this.intStringFormatter.setMaximumIntegerDigits(numlength);
this.words = new HashMap<String, Word>();
this.RESULT_FLAGS = new Bitfield(4);

@ -77,7 +77,7 @@ public class WordCache {
if (word.length() < commonWordsMinLength) return;
if (MemoryControl.shortStatus()) commonWords.clear();
commonWords.inc(word);
if (commonWords.size() > commonWordsMaxSize) {
if (!(commonWords.sizeSmaller(commonWordsMaxSize))) {
commonWords.shrinkToMaxSize(commonWordsMaxSize / 2);
}
}

Loading…
Cancel
Save