added more score cluster options and made score cluster usage more transparent

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7248 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 15 years ago
parent e8f90201a5
commit e4d561971e

@ -32,12 +32,13 @@ import java.util.HashMap;
import java.util.Iterator; import java.util.Iterator;
import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.storage.DynamicScore;
import net.yacy.cora.storage.ScoreCluster;
import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.index.Row; import net.yacy.kelondro.index.Row;
import net.yacy.kelondro.index.Row.Entry; import net.yacy.kelondro.index.Row.Entry;
import net.yacy.kelondro.order.NaturalOrder; import net.yacy.kelondro.order.NaturalOrder;
import net.yacy.kelondro.util.DateFormatter; import net.yacy.kelondro.util.DateFormatter;
import net.yacy.kelondro.util.ScoreCluster;
import net.yacy.repository.Blacklist; import net.yacy.repository.Blacklist;
import de.anomic.search.Switchboard; import de.anomic.search.Switchboard;
@ -104,7 +105,7 @@ public class Supporter {
accumulateVotes(sb, negativeHashes, positiveHashes, yacyNewsPool.INCOMING_DB); accumulateVotes(sb, negativeHashes, positiveHashes, yacyNewsPool.INCOMING_DB);
//accumulateVotes(negativeHashes, positiveHashes, yacyNewsPool.OUTGOING_DB); //accumulateVotes(negativeHashes, positiveHashes, yacyNewsPool.OUTGOING_DB);
//accumulateVotes(negativeHashes, positiveHashes, yacyNewsPool.PUBLISHED_DB); //accumulateVotes(negativeHashes, positiveHashes, yacyNewsPool.PUBLISHED_DB);
final ScoreCluster<String> ranking = new ScoreCluster<String>(); // score cluster for url hashes final DynamicScore<String> ranking = new ScoreCluster<String>(); // score cluster for url hashes
final Row rowdef = new Row("String url-255, String title-120, String description-120, String refid-" + (DateFormatter.PATTERN_SHORT_SECOND.length() + 12), NaturalOrder.naturalOrder); final Row rowdef = new Row("String url-255, String title-120, String description-120, String refid-" + (DateFormatter.PATTERN_SHORT_SECOND.length() + 12), NaturalOrder.naturalOrder);
final HashMap<String, Entry> Supporter = new HashMap<String, Entry>(); // a mapping from an url hash to a kelondroRow.Entry with display properties final HashMap<String, Entry> Supporter = new HashMap<String, Entry>(); // a mapping from an url hash to a kelondroRow.Entry with display properties
accumulateSupporter(sb, Supporter, ranking, rowdef, negativeHashes, positiveHashes, yacyNewsPool.INCOMING_DB); accumulateSupporter(sb, Supporter, ranking, rowdef, negativeHashes, positiveHashes, yacyNewsPool.INCOMING_DB);
@ -197,7 +198,7 @@ public class Supporter {
private static void accumulateSupporter( private static void accumulateSupporter(
final Switchboard sb, final Switchboard sb,
final HashMap<String, Entry> Supporter, final ScoreCluster<String> ranking, final Row rowdef, final HashMap<String, Entry> Supporter, final DynamicScore<String> ranking, final Row rowdef,
final HashMap<String, Integer> negativeHashes, final HashMap<String, Integer> positiveHashes, final int dbtype) { final HashMap<String, Integer> negativeHashes, final HashMap<String, Integer> positiveHashes, final int dbtype) {
final int maxCount = Math.min(1000, sb.peers.newsPool.size(dbtype)); final int maxCount = Math.min(1000, sb.peers.newsPool.size(dbtype));
yacyNewsDB.Record record; yacyNewsDB.Record record;

@ -32,12 +32,13 @@ import java.util.HashMap;
import java.util.Iterator; import java.util.Iterator;
import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.storage.DynamicScore;
import net.yacy.cora.storage.ScoreCluster;
import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.index.Row; import net.yacy.kelondro.index.Row;
import net.yacy.kelondro.index.Row.Entry; import net.yacy.kelondro.index.Row.Entry;
import net.yacy.kelondro.order.NaturalOrder; import net.yacy.kelondro.order.NaturalOrder;
import net.yacy.kelondro.util.DateFormatter; import net.yacy.kelondro.util.DateFormatter;
import net.yacy.kelondro.util.ScoreCluster;
import net.yacy.repository.Blacklist; import net.yacy.repository.Blacklist;
import de.anomic.search.Switchboard; import de.anomic.search.Switchboard;
@ -112,7 +113,7 @@ public class Surftips {
accumulateVotes(sb , negativeHashes, positiveHashes, yacyNewsPool.INCOMING_DB); accumulateVotes(sb , negativeHashes, positiveHashes, yacyNewsPool.INCOMING_DB);
//accumulateVotes(negativeHashes, positiveHashes, yacyNewsPool.OUTGOING_DB); //accumulateVotes(negativeHashes, positiveHashes, yacyNewsPool.OUTGOING_DB);
//accumulateVotes(negativeHashes, positiveHashes, yacyNewsPool.PUBLISHED_DB); //accumulateVotes(negativeHashes, positiveHashes, yacyNewsPool.PUBLISHED_DB);
final ScoreCluster<String> ranking = new ScoreCluster<String>(); // score cluster for url hashes final DynamicScore<String> ranking = new ScoreCluster<String>(); // score cluster for url hashes
final Row rowdef = new Row("String url-255, String title-120, String description-120, String refid-" + (DateFormatter.PATTERN_SHORT_SECOND.length() + 12), NaturalOrder.naturalOrder); final Row rowdef = new Row("String url-255, String title-120, String description-120, String refid-" + (DateFormatter.PATTERN_SHORT_SECOND.length() + 12), NaturalOrder.naturalOrder);
final HashMap<String, Entry> surftips = new HashMap<String, Entry>(); // a mapping from an url hash to a kelondroRow.Entry with display properties final HashMap<String, Entry> surftips = new HashMap<String, Entry>(); // a mapping from an url hash to a kelondroRow.Entry with display properties
accumulateSurftips(sb, surftips, ranking, rowdef, negativeHashes, positiveHashes, yacyNewsPool.INCOMING_DB); accumulateSurftips(sb, surftips, ranking, rowdef, negativeHashes, positiveHashes, yacyNewsPool.INCOMING_DB);
@ -206,7 +207,7 @@ public class Surftips {
private static void accumulateSurftips( private static void accumulateSurftips(
final Switchboard sb, final Switchboard sb,
final HashMap<String, Entry> surftips, final ScoreCluster<String> ranking, final Row rowdef, final HashMap<String, Entry> surftips, final DynamicScore<String> ranking, final Row rowdef,
final HashMap<String, Integer> negativeHashes, final HashMap<String, Integer> positiveHashes, final int dbtype) { final HashMap<String, Integer> negativeHashes, final HashMap<String, Integer> positiveHashes, final int dbtype) {
final int maxCount = Math.min(1000, sb.peers.newsPool.size(dbtype)); final int maxCount = Math.min(1000, sb.peers.newsPool.size(dbtype));
yacyNewsDB.Record record; yacyNewsDB.Record record;

@ -32,19 +32,20 @@ import java.util.LinkedHashMap;
import java.util.Map; import java.util.Map;
import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentHashMap;
import net.yacy.cora.storage.ScoreCluster;
import net.yacy.cora.storage.DynamicScore;
import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.meta.URIMetadataRow; import net.yacy.kelondro.data.meta.URIMetadataRow;
import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.Bitfield; import net.yacy.kelondro.order.Bitfield;
import net.yacy.kelondro.util.ReverseMapIterator; import net.yacy.kelondro.util.ReverseMapIterator;
import net.yacy.kelondro.util.ScoreCluster;
import de.anomic.crawler.retrieval.EventOrigin; import de.anomic.crawler.retrieval.EventOrigin;
public final class ResultURLs { public final class ResultURLs {
private final Map<EventOrigin, Map<String, InitExecEntry>> resultStacks; // a mapping from urlHash to Entries private final Map<EventOrigin, Map<String, InitExecEntry>> resultStacks; // a mapping from urlHash to Entries
private final Map<EventOrigin, ScoreCluster<String>> resultDomains; private final Map<EventOrigin, DynamicScore<String>> resultDomains;
public class InitExecEntry { public class InitExecEntry {
public byte[] initiatorHash, executorHash; public byte[] initiatorHash, executorHash;
@ -57,7 +58,7 @@ public final class ResultURLs {
public ResultURLs(int initialStackCapacity) { public ResultURLs(int initialStackCapacity) {
// init result stacks // init result stacks
resultStacks = new ConcurrentHashMap<EventOrigin, Map<String, InitExecEntry>>(initialStackCapacity); resultStacks = new ConcurrentHashMap<EventOrigin, Map<String, InitExecEntry>>(initialStackCapacity);
resultDomains = new ConcurrentHashMap<EventOrigin, ScoreCluster<String>>(initialStackCapacity); resultDomains = new ConcurrentHashMap<EventOrigin, DynamicScore<String>>(initialStackCapacity);
for (EventOrigin origin: EventOrigin.values()) { for (EventOrigin origin: EventOrigin.values()) {
resultStacks.put(origin, new LinkedHashMap<String, InitExecEntry>()); resultStacks.put(origin, new LinkedHashMap<String, InitExecEntry>());
resultDomains.put(origin, new ScoreCluster<String>()); resultDomains.put(origin, new ScoreCluster<String>());
@ -82,7 +83,7 @@ public final class ResultURLs {
return; return;
} }
try { try {
final ScoreCluster<String> domains = getDomains(stackType); final DynamicScore<String> domains = getDomains(stackType);
if (domains != null) { if (domains != null) {
domains.incScore(e.metadata().url().getHost()); domains.incScore(e.metadata().url().getHost());
} }
@ -99,7 +100,7 @@ public final class ResultURLs {
} }
public int getDomainListSize(final EventOrigin stack) { public int getDomainListSize(final EventOrigin stack) {
final ScoreCluster<String> domains = getDomains(stack); final DynamicScore<String> domains = getDomains(stack);
if (domains == null) return 0; if (domains == null) return 0;
return domains.size(); return domains.size();
} }
@ -155,7 +156,7 @@ public final class ResultURLs {
private Map<String, InitExecEntry> getStack(final EventOrigin stack) { private Map<String, InitExecEntry> getStack(final EventOrigin stack) {
return resultStacks.get(stack); return resultStacks.get(stack);
} }
private ScoreCluster<String> getDomains(final EventOrigin stack) { private DynamicScore<String> getDomains(final EventOrigin stack) {
return resultDomains.get(stack); return resultDomains.get(stack);
} }
@ -166,7 +167,7 @@ public final class ResultURLs {
public synchronized void clearStack(final EventOrigin stack) { public synchronized void clearStack(final EventOrigin stack) {
final Map<String, InitExecEntry> resultStack = getStack(stack); final Map<String, InitExecEntry> resultStack = getStack(stack);
if (resultStack != null) resultStack.clear(); if (resultStack != null) resultStack.clear();
final ScoreCluster<String> resultDomains = getDomains(stack); final DynamicScore<String> resultDomains = getDomains(stack);
if (resultDomains != null) { if (resultDomains != null) {
// we do not clear this completely, just remove most of the less important entries // we do not clear this completely, just remove most of the less important entries
resultDomains.shrinkToMaxSize(100); resultDomains.shrinkToMaxSize(100);

@ -7,11 +7,12 @@ import java.util.SortedSet;
import java.util.TreeSet; import java.util.TreeSet;
import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.LinkedBlockingQueue;
import net.yacy.cora.storage.DynamicScore;
import net.yacy.cora.storage.ScoreCluster;
import net.yacy.kelondro.data.word.Word; import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.data.word.WordReference; import net.yacy.kelondro.data.word.WordReference;
import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.rwi.IndexCell; import net.yacy.kelondro.rwi.IndexCell;
import net.yacy.kelondro.util.ScoreCluster;
/** /**
@ -127,7 +128,7 @@ public class DidYouMean {
if (this.word.indexOf(' ') > 0) return getSuggestions(this.word.split(" "), timeout, preSortSelection, this.index); if (this.word.indexOf(' ') > 0) return getSuggestions(this.word.split(" "), timeout, preSortSelection, this.index);
SortedSet<String> preSorted = getSuggestions(timeout); SortedSet<String> preSorted = getSuggestions(timeout);
if (System.currentTimeMillis() > timelimit) return preSorted; if (System.currentTimeMillis() > timelimit) return preSorted;
ScoreCluster<String> scored = new ScoreCluster<String>(); DynamicScore<String> scored = new ScoreCluster<String>();
for (final String s: preSorted) { for (final String s: preSorted) {
if (System.currentTimeMillis() > timelimit) break; if (System.currentTimeMillis() > timelimit) break;
if (scored.size() >= 2 * preSortSelection) break; if (scored.size() >= 2 * preSortSelection) break;

@ -38,6 +38,8 @@ import java.util.SortedSet;
import java.util.TreeSet; import java.util.TreeSet;
import java.util.zip.GZIPInputStream; import java.util.zip.GZIPInputStream;
import net.yacy.cora.storage.DynamicScore;
import net.yacy.cora.storage.ScoreMap;
import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.logging.Log;
/** /**
@ -46,8 +48,15 @@ import net.yacy.kelondro.logging.Log;
*/ */
public class DidYouMeanLibrary { public class DidYouMeanLibrary {
// common word cache
private static final int commonWordsMaxSize = 100000; // maximum size of common word cache
private static final int commonWordsMinLength = 4; // words must have that length at minimum
private DynamicScore<String> commonWords = new ScoreMap<String>();
// dictionaries
private final File dictionaryPath; private final File dictionaryPath;
private TreeSet<String> dict, tcid; private TreeSet<String> dict; // the word dictionary
private TreeSet<String> tcid; // the dictionary of reverse words
/** /**
* create a new dictionary * create a new dictionary
@ -61,6 +70,20 @@ public class DidYouMeanLibrary {
reload(); reload();
} }
/**
* add a word to the generic dictionary
* @param word
*/
public void learn(String word) {
if (word == null) return;
word = word.trim().toLowerCase();
if (word.length() < commonWordsMinLength) return;
commonWords.incScore(word);
if (commonWords.size() >= commonWordsMaxSize) {
commonWords.shrinkToMaxSize(commonWordsMaxSize / 2);
}
}
/** /**
* scan the input directory and load all dictionaries (again) * scan the input directory and load all dictionaries (again)
*/ */

@ -40,6 +40,8 @@ import java.util.TreeSet;
import net.yacy.cora.document.MultiProtocolURI; import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.protocol.http.HTTPClient; import net.yacy.cora.protocol.http.HTTPClient;
import net.yacy.cora.storage.DynamicScore;
import net.yacy.cora.storage.ScoreCluster;
import net.yacy.document.parser.html.CharacterCoding; import net.yacy.document.parser.html.CharacterCoding;
import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.meta.URIMetadataRow; import net.yacy.kelondro.data.meta.URIMetadataRow;
@ -52,12 +54,8 @@ import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.CloneableIterator; import net.yacy.kelondro.order.CloneableIterator;
import net.yacy.kelondro.table.SplitTable; import net.yacy.kelondro.table.SplitTable;
import net.yacy.kelondro.util.ScoreCluster;
import net.yacy.repository.Blacklist; import net.yacy.repository.Blacklist;
//import de.anomic.http.client.Client;
//import de.anomic.http.server.ResponseContainer;
public final class MetadataRepository implements Iterable<byte[]> { public final class MetadataRepository implements Iterable<byte[]> {
// class objects // class objects
@ -610,7 +608,7 @@ public final class MetadataRepository implements Iterable<byte[]> {
Map<String, hashStat> map = domainSampleCollector(); Map<String, hashStat> map = domainSampleCollector();
// order elements by size // order elements by size
ScoreCluster<String> s = new ScoreCluster<String>(); DynamicScore<String> s = new ScoreCluster<String>();
for (Map.Entry<String, hashStat> e: map.entrySet()) { for (Map.Entry<String, hashStat> e: map.entrySet()) {
s.addScore(e.getValue().urlhash, e.getValue().count); s.addScore(e.getValue().urlhash, e.getValue().count);
} }

@ -33,6 +33,8 @@ import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.Semaphore; import java.util.concurrent.Semaphore;
import net.yacy.cora.storage.DynamicScore;
import net.yacy.cora.storage.ScoreCluster;
import net.yacy.document.Condenser; import net.yacy.document.Condenser;
import net.yacy.document.LargeNumberCache; import net.yacy.document.LargeNumberCache;
import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.DigestURI;
@ -42,7 +44,6 @@ import net.yacy.kelondro.data.word.WordReferenceVars;
import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.Bitfield; import net.yacy.kelondro.order.Bitfield;
import net.yacy.kelondro.rwi.ReferenceContainer; import net.yacy.kelondro.rwi.ReferenceContainer;
import net.yacy.kelondro.util.ScoreCluster;
public class ReferenceOrder { public class ReferenceOrder {
@ -51,7 +52,7 @@ public class ReferenceOrder {
private int maxdomcount; private int maxdomcount;
private WordReferenceVars min, max; private WordReferenceVars min, max;
private final ScoreCluster<String> doms; // collected for "authority" heuristic private final DynamicScore<String> doms; // collected for "authority" heuristic
private final RankingProfile ranking; private final RankingProfile ranking;
private final String language; private final String language;

@ -29,6 +29,8 @@ import java.util.HashMap;
import java.util.Iterator; import java.util.Iterator;
import java.util.Map; import java.util.Map;
import net.yacy.cora.storage.DynamicScore;
import net.yacy.cora.storage.ScoreCluster;
import net.yacy.kelondro.data.word.Word; import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.index.HandleSet; import net.yacy.kelondro.index.HandleSet;
import net.yacy.kelondro.index.RowSpaceExceededException; import net.yacy.kelondro.index.RowSpaceExceededException;
@ -36,7 +38,6 @@ import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.Base64Order; import net.yacy.kelondro.order.Base64Order;
import net.yacy.kelondro.order.Digest; import net.yacy.kelondro.order.Digest;
import net.yacy.kelondro.util.DateFormatter; import net.yacy.kelondro.util.DateFormatter;
import net.yacy.kelondro.util.ScoreCluster;
import net.yacy.kelondro.util.kelondroException; import net.yacy.kelondro.util.kelondroException;
import de.anomic.yacy.yacyCore; import de.anomic.yacy.yacyCore;
@ -57,7 +58,7 @@ public class PeerSelection {
byte[] wordhash, byte[] wordhash,
int redundancy, int redundancy,
HashMap<String, yacySeed> regularSeeds, HashMap<String, yacySeed> regularSeeds,
ScoreCluster<String> ranking) { DynamicScore<String> ranking) {
// this method is called from the search target computation // this method is called from the search target computation
long[] dhtVerticalTargets = seedDB.scheme.dhtPositions(wordhash); long[] dhtVerticalTargets = seedDB.scheme.dhtPositions(wordhash);
yacySeed seed; yacySeed seed;
@ -339,7 +340,7 @@ public class PeerSelection {
if (count > seedDB.sizeConnected()) count = seedDB.sizeConnected(); if (count > seedDB.sizeConnected()) count = seedDB.sizeConnected();
// fill a score object // fill a score object
final ScoreCluster<String> seedScore = new ScoreCluster<String>(); final DynamicScore<String> seedScore = new ScoreCluster<String>();
yacySeed ys; yacySeed ys;
long absage; long absage;
final Iterator<yacySeed> s = seedDB.seedsConnected(true, false, null, (float) 0.0); final Iterator<yacySeed> s = seedDB.seedsConnected(true, false, null, (float) 0.0);

@ -31,10 +31,11 @@ import java.util.Map;
import java.util.TreeMap; import java.util.TreeMap;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import net.yacy.cora.storage.DynamicScore;
import net.yacy.cora.storage.ScoreCluster;
import net.yacy.kelondro.index.HandleSet; import net.yacy.kelondro.index.HandleSet;
import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.Bitfield; import net.yacy.kelondro.order.Bitfield;
import net.yacy.kelondro.util.ScoreCluster;
import net.yacy.repository.Blacklist; import net.yacy.repository.Blacklist;
import de.anomic.crawler.ResultURLs; import de.anomic.crawler.ResultURLs;
@ -185,7 +186,7 @@ public class yacySearch extends Thread {
} }
// put in seeds according to dht // put in seeds according to dht
final ScoreCluster<String> ranking = new ScoreCluster<String>(); final DynamicScore<String> ranking = new ScoreCluster<String>();
final HashMap<String, yacySeed> regularSeeds = new HashMap<String, yacySeed>(); final HashMap<String, yacySeed> regularSeeds = new HashMap<String, yacySeed>();
final HashMap<String, yacySeed> matchingSeeds = new HashMap<String, yacySeed>(); final HashMap<String, yacySeed> matchingSeeds = new HashMap<String, yacySeed>();
yacySeed seed; yacySeed seed;

@ -0,0 +1,31 @@
/**
* DynamicScore
* Copyright 2010 by Michael Peter Christen, mc@yacy.net, Frankfurt am Main, Germany
* First released 14.10.2010 at http://yacy.net
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.cora.storage;
public interface DynamicScore<E> extends StaticScore<E> {
public void incScore(final E obj);
public void decScore(final E obj);
public void addScore(final E obj, final int incrementScore);
}

@ -0,0 +1,96 @@
/**
* IntScore
* Copyright 2010 by Michael Peter Christen, mc@yacy.net, Frankfurt am Main, Germany
* First released 14.10.2010 at http://yacy.net
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.cora.storage;
import java.util.Comparator;
/**
* This class acts as a replacement for Long and shall be used as counter object in Object-Counter relations
* The use case of this class is given when an value element of a map must be increased or decreased. If
* the normal Long class is used, the new value must be rewritten to the map with an increased and newly allocated number object
* When using this class, then only the score of the Number object can be changed without the need of
* rewriting the new key value to a map.
*/
public class IntScore implements Comparable<IntScore>, Comparator<IntScore> {
public static IntScore ZERO = new IntScore(0);
public static IntScore ONE = new IntScore(1);
private int value;
public IntScore(int value) {
this.value = value;
}
public final static IntScore valueOf(final int n) {
return new IntScore(n);
}
public int intValue() {
return this.value;
}
public void inc() {
this.value++;
}
public void inc(int n) {
this.value += n;
}
public void dec() {
this.value--;
}
public void dec(int n) {
this.value -= n;
}
public void set(int n) {
this.value = n;
}
public void min(int n) {
if (n < this.value) this.value = n;
}
public void max(int n) {
if (n > this.value) this.value = n;
}
public boolean equals(Object o) {
return (o instanceof IntScore) && this.value == ((IntScore) o).value;
}
public int hashCode() {
return (int) (this.value ^ (this.value >>> 32));
}
public int compareTo(IntScore o) {
int thisVal = this.value;
int anotherVal = o.value;
return thisVal < anotherVal ? -1 : (thisVal == anotherVal ? 0 : 1);
}
public int compare(IntScore o1, IntScore o2) {
return o1.compareTo(o2);
}
}

@ -1,25 +1,24 @@
// kelondroMScoreCluster.java /**
// ----------------------- * ScoreCluster
// (C) by Michael Peter Christen; mc@yacy.net * Copyright 2004, 2010 by Michael Peter Christen, mc@yacy.net, Frankfurt am Main, Germany
// first published on http://www.anomic.de * First released 28.09.2004 at http://yacy.net
// Frankfurt, Germany, 2004 *
// last major change: 28.09.2004 * This library is free software; you can redistribute it and/or
// * modify it under the terms of the GNU Lesser General Public
// This program is free software; you can redistribute it and/or modify * License as published by the Free Software Foundation; either
// it under the terms of the GNU General Public License as published by * version 2.1 of the License, or (at your option) any later version.
// the Free Software Foundation; either version 2 of the License, or *
// (at your option) any later version. * This library is distributed in the hope that it will be useful,
// * but WITHOUT ANY WARRANTY; without even the implied warranty of
// This program is distributed in the hope that it will be useful, * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// but WITHOUT ANY WARRANTY; without even the implied warranty of * Lesser General Public License for more details.
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
// GNU General Public License for more details. * You should have received a copy of the GNU Lesser General Public License
// * along with this program in the file lgpl21.txt
// You should have received a copy of the GNU General Public License * If not, see <http://www.gnu.org/licenses/>.
// along with this program; if not, write to the Free Software */
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package net.yacy.cora.storage;
package net.yacy.kelondro.util;
import java.text.ParseException; import java.text.ParseException;
import java.text.SimpleDateFormat; import java.text.SimpleDateFormat;
@ -31,12 +30,13 @@ import java.util.Random;
import java.util.SortedMap; import java.util.SortedMap;
import java.util.TreeMap; import java.util.TreeMap;
import net.yacy.kelondro.util.kelondroOutOfLimitsException;
public final class ScoreCluster<E> { public final class ScoreCluster<E> implements DynamicScore<E> {
protected final TreeMap<E, Long> refkeyDB; // a mapping from a reference to the cluster key protected final Map<E, Long> map; // a mapping from a reference to the cluster key
protected final TreeMap<Long, E> keyrefDB; // a mapping from the cluster key to the reference protected final TreeMap<Long, E> pam; // a mapping from the cluster key to the reference
private long gcount; private long gcount;
private int encnt; private int encnt;
@ -45,19 +45,20 @@ public final class ScoreCluster<E> {
} }
public ScoreCluster(Comparator<? super E> comparator) { public ScoreCluster(Comparator<? super E> comparator) {
if(comparator != null) { if (comparator == null) {
refkeyDB = new TreeMap<E, Long>(comparator); //map = new HashMap<E, Long>();
map = new TreeMap<E, Long>(comparator);
} else { } else {
refkeyDB = new TreeMap<E, Long>(); map = new TreeMap<E, Long>(comparator);
} }
keyrefDB = new TreeMap<Long, E>(); pam = new TreeMap<Long, E>();
gcount = 0; gcount = 0;
encnt = 0; encnt = 0;
} }
public synchronized void clear() { public synchronized void clear() {
refkeyDB.clear(); map.clear();
keyrefDB.clear(); pam.clear();
gcount = 0; gcount = 0;
encnt = 0; encnt = 0;
} }
@ -70,11 +71,11 @@ public final class ScoreCluster<E> {
if (maxsize < 0) return; if (maxsize < 0) return;
Long key; Long key;
synchronized (this) { synchronized (this) {
while (refkeyDB.size() > maxsize) { while (map.size() > maxsize) {
// find and remove smallest objects until cluster has demanded size // find and remove smallest objects until cluster has demanded size
key = keyrefDB.firstKey(); key = pam.firstKey();
if (key == null) break; if (key == null) break;
refkeyDB.remove(keyrefDB.remove(key)); map.remove(pam.remove(key));
} }
} }
} }
@ -87,13 +88,13 @@ public final class ScoreCluster<E> {
int score; int score;
Long key; Long key;
synchronized (this) { synchronized (this) {
while (keyrefDB.size() > 0) { while (pam.size() > 0) {
// find and remove objects where their score is smaller than the demanded minimum score // find and remove objects where their score is smaller than the demanded minimum score
key = keyrefDB.firstKey(); key = pam.firstKey();
if (key == null) break; if (key == null) break;
score = (int) ((key.longValue() & 0xFFFFFFFF00000000L) >> 32); score = (int) ((key.longValue() & 0xFFFFFFFF00000000L) >> 32);
if (score >= minScore) break; if (score >= minScore) break;
refkeyDB.remove(keyrefDB.remove(key)); map.remove(pam.remove(key));
} }
} }
} }
@ -178,19 +179,11 @@ public final class ScoreCluster<E> {
} }
public synchronized int size() { public synchronized int size() {
return refkeyDB.size(); return map.size();
} }
public synchronized boolean isEmpty() { public synchronized boolean isEmpty() {
return refkeyDB.isEmpty(); return map.isEmpty();
}
public synchronized void incScore(final E[] objs) {
for (int i = 0; i < objs.length; i++) addScore(objs[i], 1);
}
public synchronized void decScore(final E[] objs) {
for (int i = 0; i < objs.length; i++) addScore(objs[i], -1);
} }
public synchronized void incScore(final E obj) { public synchronized void incScore(final E obj) {
@ -204,7 +197,7 @@ public final class ScoreCluster<E> {
public void setScore(final E obj, final int newScore) { public void setScore(final E obj, final int newScore) {
if (obj == null) return; if (obj == null) return;
synchronized (this) { synchronized (this) {
Long usk = refkeyDB.remove(obj); // get unique score key, old entry is not needed any more Long usk = map.remove(obj); // get unique score key, old entry is not needed any more
if (newScore < 0) throw new kelondroOutOfLimitsException(newScore); if (newScore < 0) throw new kelondroOutOfLimitsException(newScore);
if (usk == null) { if (usk == null) {
@ -212,12 +205,12 @@ public final class ScoreCluster<E> {
usk = Long.valueOf(scoreKey(encnt++, newScore)); usk = Long.valueOf(scoreKey(encnt++, newScore));
// put new value into cluster // put new value into cluster
refkeyDB.put(obj, usk); map.put(obj, usk);
keyrefDB.put(usk, obj); pam.put(usk, obj);
} else { } else {
// delete old entry // delete old entry
keyrefDB.remove(usk); pam.remove(usk);
// get previous handle and score // get previous handle and score
final long c = usk.longValue(); final long c = usk.longValue();
@ -227,8 +220,8 @@ public final class ScoreCluster<E> {
// set new value // set new value
usk = Long.valueOf(scoreKey(oldHandle, newScore)); // generates an unique key for a specific score usk = Long.valueOf(scoreKey(oldHandle, newScore)); // generates an unique key for a specific score
refkeyDB.put(obj, usk); map.put(obj, usk);
keyrefDB.put(usk, obj); pam.put(usk, obj);
} }
} }
// increase overall counter // increase overall counter
@ -238,7 +231,7 @@ public final class ScoreCluster<E> {
public void addScore(final E obj, final int incrementScore) { public void addScore(final E obj, final int incrementScore) {
if (obj == null) return; if (obj == null) return;
synchronized (this) { synchronized (this) {
Long usk = refkeyDB.remove(obj); // get unique score key, old entry is not needed any more Long usk = map.remove(obj); // get unique score key, old entry is not needed any more
if (usk == null) { if (usk == null) {
// set new value // set new value
@ -246,12 +239,12 @@ public final class ScoreCluster<E> {
usk = Long.valueOf(scoreKey(encnt++, incrementScore)); usk = Long.valueOf(scoreKey(encnt++, incrementScore));
// put new value into cluster // put new value into cluster
refkeyDB.put(obj, usk); map.put(obj, usk);
keyrefDB.put(usk, obj); pam.put(usk, obj);
} else { } else {
// delete old entry // delete old entry
keyrefDB.remove(usk); pam.remove(usk);
// get previous handle and score // get previous handle and score
final long c = usk.longValue(); final long c = usk.longValue();
@ -262,8 +255,8 @@ public final class ScoreCluster<E> {
final int newValue = oldScore + incrementScore; final int newValue = oldScore + incrementScore;
if (newValue < 0) throw new kelondroOutOfLimitsException(newValue); if (newValue < 0) throw new kelondroOutOfLimitsException(newValue);
usk = Long.valueOf(scoreKey(oldHandle, newValue)); // generates an unique key for a specific score usk = Long.valueOf(scoreKey(oldHandle, newValue)); // generates an unique key for a specific score
refkeyDB.put(obj, usk); map.put(obj, usk);
keyrefDB.put(usk, obj); pam.put(usk, obj);
} }
} }
// increase overall counter // increase overall counter
@ -275,11 +268,11 @@ public final class ScoreCluster<E> {
if (obj == null) return 0; if (obj == null) return 0;
final Long usk; final Long usk;
synchronized (this) { synchronized (this) {
usk = refkeyDB.remove(obj); // get unique score key, old entry is not needed any more usk = map.remove(obj); // get unique score key, old entry is not needed any more
if (usk == null) return 0; if (usk == null) return 0;
// delete old entry // delete old entry
keyrefDB.remove(usk); pam.remove(usk);
} }
// get previous handle and score // get previous handle and score
@ -292,61 +285,41 @@ public final class ScoreCluster<E> {
} }
public synchronized boolean existsScore(final E obj) { public synchronized boolean existsScore(final E obj) {
return (refkeyDB.get(obj) != null); return map.containsKey(obj);
} }
public int getScore(final E obj) { public int getScore(final E obj) {
if (obj == null) return 0; if (obj == null) return 0;
final Long cs; final Long cs;
synchronized (this) { synchronized (this) {
cs = refkeyDB.get(obj); cs = map.get(obj);
} }
if (cs == null) return 0; if (cs == null) return 0;
return (int) ((cs.longValue() & 0xFFFFFFFF00000000L) >> 32); return (int) ((cs.longValue() & 0xFFFFFFFF00000000L) >> 32);
} }
public synchronized int getMaxScore() { public synchronized int getMaxScore() {
if (refkeyDB.isEmpty()) return -1; if (map.isEmpty()) return -1;
return (int) ((keyrefDB.lastKey().longValue() & 0xFFFFFFFF00000000L) >> 32); return (int) ((pam.lastKey().longValue() & 0xFFFFFFFF00000000L) >> 32);
} }
public synchronized int getMinScore() { public synchronized int getMinScore() {
if (refkeyDB.isEmpty()) return -1; if (map.isEmpty()) return -1;
return (int) ((keyrefDB.firstKey().longValue() & 0xFFFFFFFF00000000L) >> 32); return (int) ((pam.firstKey().longValue() & 0xFFFFFFFF00000000L) >> 32);
} }
public synchronized E getMaxObject() { public synchronized E getMaxObject() {
if (refkeyDB.isEmpty()) return null; if (map.isEmpty()) return null;
return keyrefDB.get(keyrefDB.lastKey()); return pam.get(pam.lastKey());
} }
public synchronized E getMinObject() { public synchronized E getMinObject() {
if (refkeyDB.isEmpty()) return null; if (map.isEmpty()) return null;
return keyrefDB.get(keyrefDB.firstKey()); return pam.get(pam.firstKey());
}
public synchronized E[] getScores(final int maxCount, final boolean up) {
return getScores(maxCount, up, Integer.MIN_VALUE, Integer.MAX_VALUE);
}
@SuppressWarnings("unchecked")
public synchronized E[] getScores(int maxCount, final boolean up, final int minScore, final int maxScore) {
if (maxCount > refkeyDB.size()) maxCount = refkeyDB.size();
E[] s = (E[]) new Object[maxCount];
final Iterator<E> it = scores(up, minScore, maxScore);
int i = 0;
while ((i < maxCount) && (it.hasNext())) s[i++] = it.next();
if (i < maxCount) {
// re-copy the result array
E[] sc = (E[]) new Object[i];
System.arraycopy(s, 0, sc, 0, i);
s = sc;
}
return s;
} }
public String toString() { public String toString() {
return refkeyDB + " / " + keyrefDB; return map + " / " + pam;
} }
public synchronized Iterator<E> scores(final boolean up) { public synchronized Iterator<E> scores(final boolean up) {
@ -354,66 +327,13 @@ public final class ScoreCluster<E> {
return new reverseScoreIterator<E>(); return new reverseScoreIterator<E>();
} }
public synchronized Iterator<E> scores(final boolean up, final int minScore, final int maxScore) {
return new komplexScoreIterator<E>(up, minScore, maxScore);
}
private class komplexScoreIterator<A extends E> implements Iterator<E> {
boolean up;
TreeMap<Long, E> keyrefDBcopy;
E n;
int min, max;
@SuppressWarnings("unchecked")
public komplexScoreIterator(final boolean up, final int minScore, final int maxScore) {
this.up = up;
this.min = minScore;
this.max = maxScore;
this.keyrefDBcopy = (TreeMap<Long, E>) keyrefDB.clone(); // NoSuchElementException here?
internalNext();
}
public boolean hasNext() {
return (n != null);
}
private void internalNext() {
Long key;
int score = (max + min) / 2;
while (!keyrefDBcopy.isEmpty()) {
key = ((up) ? keyrefDBcopy.firstKey() : keyrefDBcopy.lastKey());
n = keyrefDBcopy.remove(key);
score = (int) ((key.longValue() & 0xFFFFFFFF00000000L) >> 32);
if ((score >= min) && (score <= max)) return;
if (((up) && (score > max)) || ((!(up)) && (score < min))) {
keyrefDBcopy = new TreeMap<Long, E>();
n = null;
return;
}
}
n = null;
}
public E next() {
final E o = n;
internalNext();
return o;
}
public void remove() {
if (n != null) deleteScore(n);
}
}
private class reverseScoreIterator<A extends E> implements Iterator<E> { private class reverseScoreIterator<A extends E> implements Iterator<E> {
SortedMap<Long, E> view; SortedMap<Long, E> view;
Long key; Long key;
public reverseScoreIterator() { public reverseScoreIterator() {
view = keyrefDB; view = pam;
} }
public boolean hasNext() { public boolean hasNext() {
@ -423,14 +343,14 @@ public final class ScoreCluster<E> {
public E next() { public E next() {
key = view.lastKey(); key = view.lastKey();
view = view.headMap(key); view = view.headMap(key);
final E value = keyrefDB.get(key); final E value = pam.get(key);
//System.out.println("cluster reverse iterator: score = " + ((((Long) key).longValue() & 0xFFFFFFFF00000000L) >> 32) + ", handle = " + (((Long) key).longValue() & 0xFFFFFFFFL) + ", value = " + value); //System.out.println("cluster reverse iterator: score = " + ((((Long) key).longValue() & 0xFFFFFFFF00000000L) >> 32) + ", handle = " + (((Long) key).longValue() & 0xFFFFFFFFL) + ", value = " + value);
return value; return value;
} }
public void remove() { public void remove() {
final Object val = keyrefDB.remove(key); final Object val = pam.remove(key);
if (val != null) refkeyDB.remove(val); if (val != null) map.remove(val);
} }
} }
@ -441,7 +361,7 @@ public final class ScoreCluster<E> {
Map.Entry<Long, E> entry; Map.Entry<Long, E> entry;
public simpleScoreIterator() { public simpleScoreIterator() {
ii = keyrefDB.entrySet().iterator(); ii = pam.entrySet().iterator();
} }
public boolean hasNext() { public boolean hasNext() {
@ -456,7 +376,7 @@ public final class ScoreCluster<E> {
public void remove() { public void remove() {
ii.remove(); ii.remove();
if (entry.getValue() != null) refkeyDB.remove(entry.getValue()); if (entry.getValue() != null) map.remove(entry.getValue());
} }
} }
@ -502,24 +422,12 @@ public final class ScoreCluster<E> {
} }
System.out.println("result:"); System.out.println("result:");
Object[] result; Iterator<String> i = s.scores(true);
result = s.getScores(s.size(), true); while (i.hasNext()) System.out.println("up: " + i.next());
for (int i = 0; i < s.size(); i++) System.out.println("up: " + result[i]); i = s.scores(false);
result = s.getScores(s.size(), false); while (i.hasNext()) System.out.println("down: " + i.next());
for (int i = 0; i < s.size(); i++) System.out.println("down: " + result[i]);
System.out.println("finished create. time = " + (System.currentTimeMillis() - time)); System.out.println("finished create. time = " + (System.currentTimeMillis() - time));
System.out.println("total=" + s.totalCount() + ", elements=" + s.size() + ", redundant count=" + c); System.out.println("total=" + s.totalCount() + ", elements=" + s.size() + ", redundant count=" + c);
/*
// delete cluster
time = System.currentTimeMillis();
for (int i = 0; i < 10000; i++) {
s.deleteScore("score#" + i + "xxx" + i + "xxx" + i + "xxx" + i + "xxx");
c -= i/10;
}
System.out.println("finished delete. time = " + (System.currentTimeMillis() - time));
System.out.println("total=" + s.totalCount() + ", elements=" + s.size() + ", redundant count=" + c);
*/
} }
} }

@ -0,0 +1,265 @@
/**
* ScoreMap
* Copyright 2010 by Michael Peter Christen, mc@yacy.net, Frankfurt am Main, Germany
* First released 14.10.2010 at http://yacy.net
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.cora.storage;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
public class ScoreMap<E> implements DynamicScore<E> {
protected final Map<E, IntScore> map; // a mapping from a reference to the cluster key
private long gcount;
public ScoreMap() {
this(null);
}
public ScoreMap(Comparator<? super E> comparator) {
if (comparator == null) {
map = new HashMap<E, IntScore>();
} else {
map = new TreeMap<E, IntScore>(comparator);
}
gcount = 0;
}
public synchronized void clear() {
map.clear();
gcount = 0;
}
/**
* shrink the cluster to a demanded size
* @param maxsize
*/
public void shrinkToMaxSize(int maxsize) {
if (this.map.size() <= maxsize) return;
int minScore = getMinScore();
while (this.map.size() > maxsize) {
minScore++;
shrinkToMinScore(minScore);
}
}
/**
* shrink the cluster in such a way that the smallest score is equal or greater than a given minScore
* @param minScore
*/
public void shrinkToMinScore(int minScore) {
synchronized (this) {
Iterator<Map.Entry<E, IntScore>> i = this.map.entrySet().iterator();
Map.Entry<E, IntScore> entry;
while (i.hasNext()) {
entry = i.next();
if (entry.getValue().intValue() < minScore) i.remove();
}
}
}
public synchronized long totalCount() {
return gcount;
}
public synchronized int size() {
return map.size();
}
public synchronized boolean isEmpty() {
return map.isEmpty();
}
public void incScore(final E obj) {
if (obj == null) return;
synchronized (this) {
IntScore score = this.map.get(obj);
if (score == null) {
this.map.put(obj, IntScore.ONE);
} else {
score.inc();
}
}
// increase overall counter
gcount++;
}
public void decScore(final E obj) {
if (obj == null) return;
synchronized (this) {
IntScore score = this.map.get(obj);
if (score == null) {
this.map.put(obj, IntScore.valueOf(-1));
} else {
score.dec();
}
}
// increase overall counter
gcount--;
}
public void setScore(final E obj, final int newScore) {
if (obj == null) return;
synchronized (this) {
IntScore score = this.map.get(obj);
if (score == null) {
this.map.put(obj, IntScore.ONE);
} else {
gcount -= score.intValue();
score.set(newScore);
}
}
// increase overall counter
gcount += newScore;
}
public void addScore(final E obj, final int incrementScore) {
if (obj == null) return;
synchronized (this) {
IntScore score = this.map.get(obj);
if (score == null) {
this.map.put(obj, IntScore.valueOf(incrementScore));
} else {
score.inc(incrementScore);
}
}
// increase overall counter
gcount += incrementScore;
}
public int deleteScore(final E obj) {
// deletes entry and returns previous score
if (obj == null) return 0;
final IntScore score;
synchronized (this) {
score = map.remove(obj);
if (score == null) return 0;
}
// decrease overall counter
gcount -= score.intValue();
return score.intValue();
}
public synchronized boolean existsScore(final E obj) {
return map.containsKey(obj);
}
public int getScore(final E obj) {
if (obj == null) return 0;
final IntScore score;
synchronized (this) {
score = map.get(obj);
}
if (score == null) return 0;
return score.intValue();
}
public int getMaxScore() {
if (map.isEmpty()) return -1;
int maxScore = Integer.MIN_VALUE;
synchronized (this) {
for (Map.Entry<E, IntScore> entry: this.map.entrySet()) if (entry.getValue().intValue() > maxScore) {
maxScore = entry.getValue().intValue();
}
}
return maxScore;
}
public int getMinScore() {
if (map.isEmpty()) return -1;
int minScore = Integer.MAX_VALUE;
synchronized (this) {
for (Map.Entry<E, IntScore> entry: this.map.entrySet()) if (entry.getValue().intValue() < minScore) {
minScore = entry.getValue().intValue();
}
}
return minScore;
}
public E getMaxObject() {
if (map.isEmpty()) return null;
E maxObject = null;
int maxScore = Integer.MIN_VALUE;
synchronized (this) {
for (Map.Entry<E, IntScore> entry: this.map.entrySet()) if (entry.getValue().intValue() > maxScore) {
maxScore = entry.getValue().intValue();
maxObject = entry.getKey();
}
}
return maxObject;
}
public E getMinObject() {
if (map.isEmpty()) return null;
E minObject = null;
int minScore = Integer.MAX_VALUE;
synchronized (this) {
for (Map.Entry<E, IntScore> entry: this.map.entrySet()) if (entry.getValue().intValue() < minScore) {
minScore = entry.getValue().intValue();
minObject = entry.getKey();
}
}
return minObject;
}
public String toString() {
return map.toString();
}
public Iterator<E> scores(boolean up) {
synchronized (this) {
// re-organize entries
TreeMap<IntScore, Set<E>> m = new TreeMap<IntScore, Set<E>>();
Set<E> s;
for (Map.Entry<E, IntScore> entry: this.map.entrySet()) {
s = m.get(entry.getValue());
if (s == null) {
s = this.map instanceof TreeMap ? new TreeSet<E>(((TreeMap<E, IntScore>) this.map).comparator()) : new HashSet<E>();
s.add(entry.getKey());
m.put(entry.getValue(), s);
} else {
s.add(entry.getKey());
}
}
// flatten result
List<E> l = new ArrayList<E>(this.map.size());
for (Set<E> f: m.values()) {
for (E e: f) l.add(e);
}
if (up) return l.iterator();
// optionally reverse list
List<E> r = new ArrayList<E>(l.size());
for (int i = l.size() - 1; i >= 0; i--) r.add(r.get(i));
return r.iterator();
}
}
}

@ -0,0 +1,67 @@
/**
* StaticScore
* Copyright 2010 by Michael Peter Christen, mc@yacy.net, Frankfurt am Main, Germany
* First released 14.10.2010 at http://yacy.net
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.cora.storage;
import java.util.Iterator;
public interface StaticScore<E> {
public void clear();
/**
* shrink the cluster to a demanded size
* @param maxsize
*/
public void shrinkToMaxSize(int maxsize);
/**
* shrink the cluster in such a way that the smallest score is equal or greater than a given minScore
* @param minScore
*/
public void shrinkToMinScore(int minScore);
public long totalCount();
public int size();
public boolean isEmpty();
public void setScore(final E obj, final int newScore);
public int deleteScore(final E obj);
public boolean existsScore(final E obj);
public int getScore(final E obj);
public int getMaxScore();
public int getMinScore();
public E getMaxObject();
public E getMinObject();
public String toString();
public Iterator<E> scores(final boolean up);
}

@ -34,12 +34,13 @@ import java.util.Iterator;
import java.util.Map; import java.util.Map;
import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentHashMap;
import net.yacy.cora.storage.ScoreCluster;
import net.yacy.cora.storage.StaticScore;
import net.yacy.kelondro.index.RowSpaceExceededException; import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.ByteOrder; import net.yacy.kelondro.order.ByteOrder;
import net.yacy.kelondro.order.CloneableIterator; import net.yacy.kelondro.order.CloneableIterator;
import net.yacy.kelondro.util.LookAheadIterator; import net.yacy.kelondro.util.LookAheadIterator;
import net.yacy.kelondro.util.ScoreCluster;
public class MapDataMining extends MapHeap { public class MapDataMining extends MapHeap {
@ -48,7 +49,7 @@ public class MapDataMining extends MapHeap {
private final static Double DOUBLE0 = Double.valueOf(0.0); private final static Double DOUBLE0 = Double.valueOf(0.0);
private final String[] sortfields, longaccfields, doubleaccfields; private final String[] sortfields, longaccfields, doubleaccfields;
private Map<String, ScoreCluster<String>> sortClusterMap; // a String-kelondroMScoreCluster - relation private Map<String, StaticScore<String>> sortClusterMap; // a String-kelondroMScoreCluster - relation
private Map<String, Long> accLong; // to store accumulations of Long cells private Map<String, Long> accLong; // to store accumulations of Long cells
private Map<String, Double> accDouble; // to store accumulations of Double cells private Map<String, Double> accDouble; // to store accumulations of Double cells
@ -71,7 +72,7 @@ public class MapDataMining extends MapHeap {
ScoreCluster<String>[] cluster = null; ScoreCluster<String>[] cluster = null;
if (sortfields == null) sortClusterMap = null; else { if (sortfields == null) sortClusterMap = null; else {
sortClusterMap = new ConcurrentHashMap<String, ScoreCluster<String>>(); sortClusterMap = new ConcurrentHashMap<String, StaticScore<String>>();
cluster = new ScoreCluster[sortfields.length]; cluster = new ScoreCluster[sortfields.length];
for (int i = 0; i < sortfields.length; i++) { for (int i = 0; i < sortfields.length; i++) {
cluster[i] = new ScoreCluster<String>(); cluster[i] = new ScoreCluster<String>();
@ -154,7 +155,7 @@ public class MapDataMining extends MapHeap {
public synchronized void clear() { public synchronized void clear() {
super.clear(); super.clear();
if (sortfields == null) sortClusterMap = null; else { if (sortfields == null) sortClusterMap = null; else {
sortClusterMap = new HashMap<String, ScoreCluster<String>>(); sortClusterMap = new HashMap<String, StaticScore<String>>();
for (int i = 0; i < sortfields.length; i++) { for (int i = 0; i < sortfields.length; i++) {
sortClusterMap.put(sortfields[i], new ScoreCluster<String>()); sortClusterMap.put(sortfields[i], new ScoreCluster<String>());
} }
@ -240,7 +241,7 @@ public class MapDataMining extends MapHeap {
private void updateSortCluster(final String key, final Map<String, String> map) { private void updateSortCluster(final String key, final Map<String, String> map) {
Object cell; Object cell;
ScoreCluster<String> cluster; StaticScore<String> cluster;
for (int i = 0; i < sortfields.length; i++) { for (int i = 0; i < sortfields.length; i++) {
cell = map.get(sortfields[i]); cell = map.get(sortfields[i]);
if (cell != null) { if (cell != null) {
@ -278,7 +279,7 @@ public class MapDataMining extends MapHeap {
private void deleteSortCluster(final String key) { private void deleteSortCluster(final String key) {
if (key == null) return; if (key == null) return;
ScoreCluster<String> cluster; StaticScore<String> cluster;
for (int i = 0; i < sortfields.length; i++) { for (int i = 0; i < sortfields.length; i++) {
cluster = sortClusterMap.get(sortfields[i]); cluster = sortClusterMap.get(sortfields[i]);
cluster.deleteScore(key); cluster.deleteScore(key);
@ -289,7 +290,7 @@ public class MapDataMining extends MapHeap {
public synchronized Iterator<byte[]> keys(final boolean up, /* sorted by */ final String field) { public synchronized Iterator<byte[]> keys(final boolean up, /* sorted by */ final String field) {
// sorted iteration using the sortClusters // sorted iteration using the sortClusters
if (sortClusterMap == null) return null; if (sortClusterMap == null) return null;
final ScoreCluster<String> cluster = sortClusterMap.get(field); final StaticScore<String> cluster = sortClusterMap.get(field);
if (cluster == null) return null; // sort field does not exist if (cluster == null) return null; // sort field does not exist
//System.out.println("DEBUG: cluster for field " + field + ": " + cluster.toString()); //System.out.println("DEBUG: cluster for field " + field + ": " + cluster.toString());
return new string2bytearrayIterator(cluster.scores(up)); return new string2bytearrayIterator(cluster.scores(up));

@ -49,6 +49,8 @@ import java.util.zip.ZipOutputStream;
import net.yacy.cora.document.MultiProtocolURI; import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.protocol.http.HTTPClient; import net.yacy.cora.protocol.http.HTTPClient;
import net.yacy.cora.storage.DynamicScore;
import net.yacy.cora.storage.ScoreCluster;
import net.yacy.gui.YaCyApp; import net.yacy.gui.YaCyApp;
import net.yacy.gui.framework.Browser; import net.yacy.gui.framework.Browser;
import net.yacy.kelondro.blob.MapDataMining; import net.yacy.kelondro.blob.MapDataMining;
@ -65,7 +67,6 @@ import net.yacy.kelondro.util.DateFormatter;
import net.yacy.kelondro.util.FileUtils; import net.yacy.kelondro.util.FileUtils;
import net.yacy.kelondro.util.Formatter; import net.yacy.kelondro.util.Formatter;
import net.yacy.kelondro.util.MemoryControl; import net.yacy.kelondro.util.MemoryControl;
import net.yacy.kelondro.util.ScoreCluster;
import net.yacy.kelondro.util.OS; import net.yacy.kelondro.util.OS;
//import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager; //import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager;
@ -606,7 +607,7 @@ public final class yacy {
final enumerateFiles ef = new enumerateFiles(new File(dbRoot, "WORDS"), true, false, true, true); final enumerateFiles ef = new enumerateFiles(new File(dbRoot, "WORDS"), true, false, true, true);
File f; File f;
byte[] h; byte[] h;
final ScoreCluster<byte[]> hs = new ScoreCluster<byte[]>(); final DynamicScore<byte[]> hs = new ScoreCluster<byte[]>(Base64Order.standardCoder);
while (ef.hasMoreElements()) { while (ef.hasMoreElements()) {
f = ef.nextElement(); f = ef.nextElement();
h = f.getName().substring(0, Word.commonHashLength).getBytes(); h = f.getName().substring(0, Word.commonHashLength).getBytes();

Loading…
Cancel
Save