added more score cluster options and made score cluster usage more transparent

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7248 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 15 years ago
parent e8f90201a5
commit e4d561971e

@ -32,12 +32,13 @@ import java.util.HashMap;
import java.util.Iterator;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.storage.DynamicScore;
import net.yacy.cora.storage.ScoreCluster;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.index.Row;
import net.yacy.kelondro.index.Row.Entry;
import net.yacy.kelondro.order.NaturalOrder;
import net.yacy.kelondro.util.DateFormatter;
import net.yacy.kelondro.util.ScoreCluster;
import net.yacy.repository.Blacklist;
import de.anomic.search.Switchboard;
@ -104,7 +105,7 @@ public class Supporter {
accumulateVotes(sb, negativeHashes, positiveHashes, yacyNewsPool.INCOMING_DB);
//accumulateVotes(negativeHashes, positiveHashes, yacyNewsPool.OUTGOING_DB);
//accumulateVotes(negativeHashes, positiveHashes, yacyNewsPool.PUBLISHED_DB);
final ScoreCluster<String> ranking = new ScoreCluster<String>(); // score cluster for url hashes
final DynamicScore<String> ranking = new ScoreCluster<String>(); // score cluster for url hashes
final Row rowdef = new Row("String url-255, String title-120, String description-120, String refid-" + (DateFormatter.PATTERN_SHORT_SECOND.length() + 12), NaturalOrder.naturalOrder);
final HashMap<String, Entry> Supporter = new HashMap<String, Entry>(); // a mapping from an url hash to a kelondroRow.Entry with display properties
accumulateSupporter(sb, Supporter, ranking, rowdef, negativeHashes, positiveHashes, yacyNewsPool.INCOMING_DB);
@ -197,7 +198,7 @@ public class Supporter {
private static void accumulateSupporter(
final Switchboard sb,
final HashMap<String, Entry> Supporter, final ScoreCluster<String> ranking, final Row rowdef,
final HashMap<String, Entry> Supporter, final DynamicScore<String> ranking, final Row rowdef,
final HashMap<String, Integer> negativeHashes, final HashMap<String, Integer> positiveHashes, final int dbtype) {
final int maxCount = Math.min(1000, sb.peers.newsPool.size(dbtype));
yacyNewsDB.Record record;

@ -32,12 +32,13 @@ import java.util.HashMap;
import java.util.Iterator;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.storage.DynamicScore;
import net.yacy.cora.storage.ScoreCluster;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.index.Row;
import net.yacy.kelondro.index.Row.Entry;
import net.yacy.kelondro.order.NaturalOrder;
import net.yacy.kelondro.util.DateFormatter;
import net.yacy.kelondro.util.ScoreCluster;
import net.yacy.repository.Blacklist;
import de.anomic.search.Switchboard;
@ -112,7 +113,7 @@ public class Surftips {
accumulateVotes(sb , negativeHashes, positiveHashes, yacyNewsPool.INCOMING_DB);
//accumulateVotes(negativeHashes, positiveHashes, yacyNewsPool.OUTGOING_DB);
//accumulateVotes(negativeHashes, positiveHashes, yacyNewsPool.PUBLISHED_DB);
final ScoreCluster<String> ranking = new ScoreCluster<String>(); // score cluster for url hashes
final DynamicScore<String> ranking = new ScoreCluster<String>(); // score cluster for url hashes
final Row rowdef = new Row("String url-255, String title-120, String description-120, String refid-" + (DateFormatter.PATTERN_SHORT_SECOND.length() + 12), NaturalOrder.naturalOrder);
final HashMap<String, Entry> surftips = new HashMap<String, Entry>(); // a mapping from an url hash to a kelondroRow.Entry with display properties
accumulateSurftips(sb, surftips, ranking, rowdef, negativeHashes, positiveHashes, yacyNewsPool.INCOMING_DB);
@ -206,7 +207,7 @@ public class Surftips {
private static void accumulateSurftips(
final Switchboard sb,
final HashMap<String, Entry> surftips, final ScoreCluster<String> ranking, final Row rowdef,
final HashMap<String, Entry> surftips, final DynamicScore<String> ranking, final Row rowdef,
final HashMap<String, Integer> negativeHashes, final HashMap<String, Integer> positiveHashes, final int dbtype) {
final int maxCount = Math.min(1000, sb.peers.newsPool.size(dbtype));
yacyNewsDB.Record record;

@ -32,19 +32,20 @@ import java.util.LinkedHashMap;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import net.yacy.cora.storage.ScoreCluster;
import net.yacy.cora.storage.DynamicScore;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.meta.URIMetadataRow;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.Bitfield;
import net.yacy.kelondro.util.ReverseMapIterator;
import net.yacy.kelondro.util.ScoreCluster;
import de.anomic.crawler.retrieval.EventOrigin;
public final class ResultURLs {
private final Map<EventOrigin, Map<String, InitExecEntry>> resultStacks; // a mapping from urlHash to Entries
private final Map<EventOrigin, ScoreCluster<String>> resultDomains;
private final Map<EventOrigin, DynamicScore<String>> resultDomains;
public class InitExecEntry {
public byte[] initiatorHash, executorHash;
@ -57,7 +58,7 @@ public final class ResultURLs {
public ResultURLs(int initialStackCapacity) {
// init result stacks
resultStacks = new ConcurrentHashMap<EventOrigin, Map<String, InitExecEntry>>(initialStackCapacity);
resultDomains = new ConcurrentHashMap<EventOrigin, ScoreCluster<String>>(initialStackCapacity);
resultDomains = new ConcurrentHashMap<EventOrigin, DynamicScore<String>>(initialStackCapacity);
for (EventOrigin origin: EventOrigin.values()) {
resultStacks.put(origin, new LinkedHashMap<String, InitExecEntry>());
resultDomains.put(origin, new ScoreCluster<String>());
@ -82,7 +83,7 @@ public final class ResultURLs {
return;
}
try {
final ScoreCluster<String> domains = getDomains(stackType);
final DynamicScore<String> domains = getDomains(stackType);
if (domains != null) {
domains.incScore(e.metadata().url().getHost());
}
@ -99,7 +100,7 @@ public final class ResultURLs {
}
public int getDomainListSize(final EventOrigin stack) {
final ScoreCluster<String> domains = getDomains(stack);
final DynamicScore<String> domains = getDomains(stack);
if (domains == null) return 0;
return domains.size();
}
@ -155,7 +156,7 @@ public final class ResultURLs {
private Map<String, InitExecEntry> getStack(final EventOrigin stack) {
return resultStacks.get(stack);
}
private ScoreCluster<String> getDomains(final EventOrigin stack) {
private DynamicScore<String> getDomains(final EventOrigin stack) {
return resultDomains.get(stack);
}
@ -166,7 +167,7 @@ public final class ResultURLs {
public synchronized void clearStack(final EventOrigin stack) {
final Map<String, InitExecEntry> resultStack = getStack(stack);
if (resultStack != null) resultStack.clear();
final ScoreCluster<String> resultDomains = getDomains(stack);
final DynamicScore<String> resultDomains = getDomains(stack);
if (resultDomains != null) {
// we do not clear this completely, just remove most of the less important entries
resultDomains.shrinkToMaxSize(100);

@ -7,11 +7,12 @@ import java.util.SortedSet;
import java.util.TreeSet;
import java.util.concurrent.LinkedBlockingQueue;
import net.yacy.cora.storage.DynamicScore;
import net.yacy.cora.storage.ScoreCluster;
import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.data.word.WordReference;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.rwi.IndexCell;
import net.yacy.kelondro.util.ScoreCluster;
/**
@ -127,7 +128,7 @@ public class DidYouMean {
if (this.word.indexOf(' ') > 0) return getSuggestions(this.word.split(" "), timeout, preSortSelection, this.index);
SortedSet<String> preSorted = getSuggestions(timeout);
if (System.currentTimeMillis() > timelimit) return preSorted;
ScoreCluster<String> scored = new ScoreCluster<String>();
DynamicScore<String> scored = new ScoreCluster<String>();
for (final String s: preSorted) {
if (System.currentTimeMillis() > timelimit) break;
if (scored.size() >= 2 * preSortSelection) break;

@ -38,6 +38,8 @@ import java.util.SortedSet;
import java.util.TreeSet;
import java.util.zip.GZIPInputStream;
import net.yacy.cora.storage.DynamicScore;
import net.yacy.cora.storage.ScoreMap;
import net.yacy.kelondro.logging.Log;
/**
@ -45,10 +47,17 @@ import net.yacy.kelondro.logging.Log;
*
*/
public class DidYouMeanLibrary {
private final File dictionaryPath;
private TreeSet<String> dict, tcid;
// common word cache
private static final int commonWordsMaxSize = 100000; // maximum size of common word cache
private static final int commonWordsMinLength = 4; // words must have that length at minimum
private DynamicScore<String> commonWords = new ScoreMap<String>();
// dictionaries
private final File dictionaryPath;
private TreeSet<String> dict; // the word dictionary
private TreeSet<String> tcid; // the dictionary of reverse words
/**
* create a new dictionary
* This loads all files that ends with '.words'
@ -61,6 +70,20 @@ public class DidYouMeanLibrary {
reload();
}
/**
* add a word to the generic dictionary
* @param word
*/
public void learn(String word) {
if (word == null) return;
word = word.trim().toLowerCase();
if (word.length() < commonWordsMinLength) return;
commonWords.incScore(word);
if (commonWords.size() >= commonWordsMaxSize) {
commonWords.shrinkToMaxSize(commonWordsMaxSize / 2);
}
}
/**
* scan the input directory and load all dictionaries (again)
*/

@ -40,6 +40,8 @@ import java.util.TreeSet;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.protocol.http.HTTPClient;
import net.yacy.cora.storage.DynamicScore;
import net.yacy.cora.storage.ScoreCluster;
import net.yacy.document.parser.html.CharacterCoding;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.meta.URIMetadataRow;
@ -52,12 +54,8 @@ import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.CloneableIterator;
import net.yacy.kelondro.table.SplitTable;
import net.yacy.kelondro.util.ScoreCluster;
import net.yacy.repository.Blacklist;
//import de.anomic.http.client.Client;
//import de.anomic.http.server.ResponseContainer;
public final class MetadataRepository implements Iterable<byte[]> {
// class objects
@ -610,7 +608,7 @@ public final class MetadataRepository implements Iterable<byte[]> {
Map<String, hashStat> map = domainSampleCollector();
// order elements by size
ScoreCluster<String> s = new ScoreCluster<String>();
DynamicScore<String> s = new ScoreCluster<String>();
for (Map.Entry<String, hashStat> e: map.entrySet()) {
s.addScore(e.getValue().urlhash, e.getValue().count);
}

@ -33,6 +33,8 @@ import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.Semaphore;
import net.yacy.cora.storage.DynamicScore;
import net.yacy.cora.storage.ScoreCluster;
import net.yacy.document.Condenser;
import net.yacy.document.LargeNumberCache;
import net.yacy.kelondro.data.meta.DigestURI;
@ -42,7 +44,6 @@ import net.yacy.kelondro.data.word.WordReferenceVars;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.Bitfield;
import net.yacy.kelondro.rwi.ReferenceContainer;
import net.yacy.kelondro.util.ScoreCluster;
public class ReferenceOrder {
@ -51,7 +52,7 @@ public class ReferenceOrder {
private int maxdomcount;
private WordReferenceVars min, max;
private final ScoreCluster<String> doms; // collected for "authority" heuristic
private final DynamicScore<String> doms; // collected for "authority" heuristic
private final RankingProfile ranking;
private final String language;

@ -29,6 +29,8 @@ import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import net.yacy.cora.storage.DynamicScore;
import net.yacy.cora.storage.ScoreCluster;
import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.index.HandleSet;
import net.yacy.kelondro.index.RowSpaceExceededException;
@ -36,7 +38,6 @@ import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.Base64Order;
import net.yacy.kelondro.order.Digest;
import net.yacy.kelondro.util.DateFormatter;
import net.yacy.kelondro.util.ScoreCluster;
import net.yacy.kelondro.util.kelondroException;
import de.anomic.yacy.yacyCore;
@ -57,7 +58,7 @@ public class PeerSelection {
byte[] wordhash,
int redundancy,
HashMap<String, yacySeed> regularSeeds,
ScoreCluster<String> ranking) {
DynamicScore<String> ranking) {
// this method is called from the search target computation
long[] dhtVerticalTargets = seedDB.scheme.dhtPositions(wordhash);
yacySeed seed;
@ -339,7 +340,7 @@ public class PeerSelection {
if (count > seedDB.sizeConnected()) count = seedDB.sizeConnected();
// fill a score object
final ScoreCluster<String> seedScore = new ScoreCluster<String>();
final DynamicScore<String> seedScore = new ScoreCluster<String>();
yacySeed ys;
long absage;
final Iterator<yacySeed> s = seedDB.seedsConnected(true, false, null, (float) 0.0);

@ -31,10 +31,11 @@ import java.util.Map;
import java.util.TreeMap;
import java.util.regex.Pattern;
import net.yacy.cora.storage.DynamicScore;
import net.yacy.cora.storage.ScoreCluster;
import net.yacy.kelondro.index.HandleSet;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.Bitfield;
import net.yacy.kelondro.util.ScoreCluster;
import net.yacy.repository.Blacklist;
import de.anomic.crawler.ResultURLs;
@ -185,7 +186,7 @@ public class yacySearch extends Thread {
}
// put in seeds according to dht
final ScoreCluster<String> ranking = new ScoreCluster<String>();
final DynamicScore<String> ranking = new ScoreCluster<String>();
final HashMap<String, yacySeed> regularSeeds = new HashMap<String, yacySeed>();
final HashMap<String, yacySeed> matchingSeeds = new HashMap<String, yacySeed>();
yacySeed seed;

@ -0,0 +1,31 @@
/**
* DynamicScore
* Copyright 2010 by Michael Peter Christen, mc@yacy.net, Frankfurt am Main, Germany
* First released 14.10.2010 at http://yacy.net
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.cora.storage;
public interface DynamicScore<E> extends StaticScore<E> {
public void incScore(final E obj);
public void decScore(final E obj);
public void addScore(final E obj, final int incrementScore);
}

@ -0,0 +1,96 @@
/**
* IntScore
* Copyright 2010 by Michael Peter Christen, mc@yacy.net, Frankfurt am Main, Germany
* First released 14.10.2010 at http://yacy.net
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.cora.storage;
import java.util.Comparator;
/**
* This class acts as a replacement for Long and shall be used as counter object in Object-Counter relations
* The use case of this class is given when an value element of a map must be increased or decreased. If
* the normal Long class is used, the new value must be rewritten to the map with an increased and newly allocated number object
* When using this class, then only the score of the Number object can be changed without the need of
* rewriting the new key value to a map.
*/
public class IntScore implements Comparable<IntScore>, Comparator<IntScore> {
public static IntScore ZERO = new IntScore(0);
public static IntScore ONE = new IntScore(1);
private int value;
public IntScore(int value) {
this.value = value;
}
public final static IntScore valueOf(final int n) {
return new IntScore(n);
}
public int intValue() {
return this.value;
}
public void inc() {
this.value++;
}
public void inc(int n) {
this.value += n;
}
public void dec() {
this.value--;
}
public void dec(int n) {
this.value -= n;
}
public void set(int n) {
this.value = n;
}
public void min(int n) {
if (n < this.value) this.value = n;
}
public void max(int n) {
if (n > this.value) this.value = n;
}
public boolean equals(Object o) {
return (o instanceof IntScore) && this.value == ((IntScore) o).value;
}
public int hashCode() {
return (int) (this.value ^ (this.value >>> 32));
}
public int compareTo(IntScore o) {
int thisVal = this.value;
int anotherVal = o.value;
return thisVal < anotherVal ? -1 : (thisVal == anotherVal ? 0 : 1);
}
public int compare(IntScore o1, IntScore o2) {
return o1.compareTo(o2);
}
}

@ -1,25 +1,24 @@
// kelondroMScoreCluster.java
// -----------------------
// (C) by Michael Peter Christen; mc@yacy.net
// first published on http://www.anomic.de
// Frankfurt, Germany, 2004
// last major change: 28.09.2004
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
/**
* ScoreCluster
* Copyright 2004, 2010 by Michael Peter Christen, mc@yacy.net, Frankfurt am Main, Germany
* First released 28.09.2004 at http://yacy.net
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.kelondro.util;
package net.yacy.cora.storage;
import java.text.ParseException;
import java.text.SimpleDateFormat;
@ -31,12 +30,13 @@ import java.util.Random;
import java.util.SortedMap;
import java.util.TreeMap;
import net.yacy.kelondro.util.kelondroOutOfLimitsException;
public final class ScoreCluster<E> {
public final class ScoreCluster<E> implements DynamicScore<E> {
protected final TreeMap<E, Long> refkeyDB; // a mapping from a reference to the cluster key
protected final TreeMap<Long, E> keyrefDB; // a mapping from the cluster key to the reference
protected final Map<E, Long> map; // a mapping from a reference to the cluster key
protected final TreeMap<Long, E> pam; // a mapping from the cluster key to the reference
private long gcount;
private int encnt;
@ -45,19 +45,20 @@ public final class ScoreCluster<E> {
}
public ScoreCluster(Comparator<? super E> comparator) {
if(comparator != null) {
refkeyDB = new TreeMap<E, Long>(comparator);
} else {
refkeyDB = new TreeMap<E, Long>();
}
keyrefDB = new TreeMap<Long, E>();
if (comparator == null) {
//map = new HashMap<E, Long>();
map = new TreeMap<E, Long>(comparator);
} else {
map = new TreeMap<E, Long>(comparator);
}
pam = new TreeMap<Long, E>();
gcount = 0;
encnt = 0;
}
public synchronized void clear() {
refkeyDB.clear();
keyrefDB.clear();
map.clear();
pam.clear();
gcount = 0;
encnt = 0;
}
@ -70,11 +71,11 @@ public final class ScoreCluster<E> {
if (maxsize < 0) return;
Long key;
synchronized (this) {
while (refkeyDB.size() > maxsize) {
while (map.size() > maxsize) {
// find and remove smallest objects until cluster has demanded size
key = keyrefDB.firstKey();
key = pam.firstKey();
if (key == null) break;
refkeyDB.remove(keyrefDB.remove(key));
map.remove(pam.remove(key));
}
}
}
@ -87,13 +88,13 @@ public final class ScoreCluster<E> {
int score;
Long key;
synchronized (this) {
while (keyrefDB.size() > 0) {
while (pam.size() > 0) {
// find and remove objects where their score is smaller than the demanded minimum score
key = keyrefDB.firstKey();
key = pam.firstKey();
if (key == null) break;
score = (int) ((key.longValue() & 0xFFFFFFFF00000000L) >> 32);
if (score >= minScore) break;
refkeyDB.remove(keyrefDB.remove(key));
map.remove(pam.remove(key));
}
}
}
@ -178,19 +179,11 @@ public final class ScoreCluster<E> {
}
public synchronized int size() {
return refkeyDB.size();
return map.size();
}
public synchronized boolean isEmpty() {
return refkeyDB.isEmpty();
}
public synchronized void incScore(final E[] objs) {
for (int i = 0; i < objs.length; i++) addScore(objs[i], 1);
}
public synchronized void decScore(final E[] objs) {
for (int i = 0; i < objs.length; i++) addScore(objs[i], -1);
return map.isEmpty();
}
public synchronized void incScore(final E obj) {
@ -204,7 +197,7 @@ public final class ScoreCluster<E> {
public void setScore(final E obj, final int newScore) {
if (obj == null) return;
synchronized (this) {
Long usk = refkeyDB.remove(obj); // get unique score key, old entry is not needed any more
Long usk = map.remove(obj); // get unique score key, old entry is not needed any more
if (newScore < 0) throw new kelondroOutOfLimitsException(newScore);
if (usk == null) {
@ -212,12 +205,12 @@ public final class ScoreCluster<E> {
usk = Long.valueOf(scoreKey(encnt++, newScore));
// put new value into cluster
refkeyDB.put(obj, usk);
keyrefDB.put(usk, obj);
map.put(obj, usk);
pam.put(usk, obj);
} else {
// delete old entry
keyrefDB.remove(usk);
pam.remove(usk);
// get previous handle and score
final long c = usk.longValue();
@ -227,8 +220,8 @@ public final class ScoreCluster<E> {
// set new value
usk = Long.valueOf(scoreKey(oldHandle, newScore)); // generates an unique key for a specific score
refkeyDB.put(obj, usk);
keyrefDB.put(usk, obj);
map.put(obj, usk);
pam.put(usk, obj);
}
}
// increase overall counter
@ -238,7 +231,7 @@ public final class ScoreCluster<E> {
public void addScore(final E obj, final int incrementScore) {
if (obj == null) return;
synchronized (this) {
Long usk = refkeyDB.remove(obj); // get unique score key, old entry is not needed any more
Long usk = map.remove(obj); // get unique score key, old entry is not needed any more
if (usk == null) {
// set new value
@ -246,12 +239,12 @@ public final class ScoreCluster<E> {
usk = Long.valueOf(scoreKey(encnt++, incrementScore));
// put new value into cluster
refkeyDB.put(obj, usk);
keyrefDB.put(usk, obj);
map.put(obj, usk);
pam.put(usk, obj);
} else {
// delete old entry
keyrefDB.remove(usk);
pam.remove(usk);
// get previous handle and score
final long c = usk.longValue();
@ -262,8 +255,8 @@ public final class ScoreCluster<E> {
final int newValue = oldScore + incrementScore;
if (newValue < 0) throw new kelondroOutOfLimitsException(newValue);
usk = Long.valueOf(scoreKey(oldHandle, newValue)); // generates an unique key for a specific score
refkeyDB.put(obj, usk);
keyrefDB.put(usk, obj);
map.put(obj, usk);
pam.put(usk, obj);
}
}
// increase overall counter
@ -275,11 +268,11 @@ public final class ScoreCluster<E> {
if (obj == null) return 0;
final Long usk;
synchronized (this) {
usk = refkeyDB.remove(obj); // get unique score key, old entry is not needed any more
usk = map.remove(obj); // get unique score key, old entry is not needed any more
if (usk == null) return 0;
// delete old entry
keyrefDB.remove(usk);
pam.remove(usk);
}
// get previous handle and score
@ -292,61 +285,41 @@ public final class ScoreCluster<E> {
}
public synchronized boolean existsScore(final E obj) {
return (refkeyDB.get(obj) != null);
return map.containsKey(obj);
}
public int getScore(final E obj) {
if (obj == null) return 0;
final Long cs;
synchronized (this) {
cs = refkeyDB.get(obj);
cs = map.get(obj);
}
if (cs == null) return 0;
return (int) ((cs.longValue() & 0xFFFFFFFF00000000L) >> 32);
}
public synchronized int getMaxScore() {
if (refkeyDB.isEmpty()) return -1;
return (int) ((keyrefDB.lastKey().longValue() & 0xFFFFFFFF00000000L) >> 32);
if (map.isEmpty()) return -1;
return (int) ((pam.lastKey().longValue() & 0xFFFFFFFF00000000L) >> 32);
}
public synchronized int getMinScore() {
if (refkeyDB.isEmpty()) return -1;
return (int) ((keyrefDB.firstKey().longValue() & 0xFFFFFFFF00000000L) >> 32);
if (map.isEmpty()) return -1;
return (int) ((pam.firstKey().longValue() & 0xFFFFFFFF00000000L) >> 32);
}
public synchronized E getMaxObject() {
if (refkeyDB.isEmpty()) return null;
return keyrefDB.get(keyrefDB.lastKey());
if (map.isEmpty()) return null;
return pam.get(pam.lastKey());
}
public synchronized E getMinObject() {
if (refkeyDB.isEmpty()) return null;
return keyrefDB.get(keyrefDB.firstKey());
}
public synchronized E[] getScores(final int maxCount, final boolean up) {
return getScores(maxCount, up, Integer.MIN_VALUE, Integer.MAX_VALUE);
}
@SuppressWarnings("unchecked")
public synchronized E[] getScores(int maxCount, final boolean up, final int minScore, final int maxScore) {
if (maxCount > refkeyDB.size()) maxCount = refkeyDB.size();
E[] s = (E[]) new Object[maxCount];
final Iterator<E> it = scores(up, minScore, maxScore);
int i = 0;
while ((i < maxCount) && (it.hasNext())) s[i++] = it.next();
if (i < maxCount) {
// re-copy the result array
E[] sc = (E[]) new Object[i];
System.arraycopy(s, 0, sc, 0, i);
s = sc;
}
return s;
if (map.isEmpty()) return null;
return pam.get(pam.firstKey());
}
public String toString() {
return refkeyDB + " / " + keyrefDB;
return map + " / " + pam;
}
public synchronized Iterator<E> scores(final boolean up) {
@ -354,66 +327,13 @@ public final class ScoreCluster<E> {
return new reverseScoreIterator<E>();
}
public synchronized Iterator<E> scores(final boolean up, final int minScore, final int maxScore) {
return new komplexScoreIterator<E>(up, minScore, maxScore);
}
private class komplexScoreIterator<A extends E> implements Iterator<E> {
boolean up;
TreeMap<Long, E> keyrefDBcopy;
E n;
int min, max;
@SuppressWarnings("unchecked")
public komplexScoreIterator(final boolean up, final int minScore, final int maxScore) {
this.up = up;
this.min = minScore;
this.max = maxScore;
this.keyrefDBcopy = (TreeMap<Long, E>) keyrefDB.clone(); // NoSuchElementException here?
internalNext();
}
public boolean hasNext() {
return (n != null);
}
private void internalNext() {
Long key;
int score = (max + min) / 2;
while (!keyrefDBcopy.isEmpty()) {
key = ((up) ? keyrefDBcopy.firstKey() : keyrefDBcopy.lastKey());
n = keyrefDBcopy.remove(key);
score = (int) ((key.longValue() & 0xFFFFFFFF00000000L) >> 32);
if ((score >= min) && (score <= max)) return;
if (((up) && (score > max)) || ((!(up)) && (score < min))) {
keyrefDBcopy = new TreeMap<Long, E>();
n = null;
return;
}
}
n = null;
}
public E next() {
final E o = n;
internalNext();
return o;
}
public void remove() {
if (n != null) deleteScore(n);
}
}
private class reverseScoreIterator<A extends E> implements Iterator<E> {
SortedMap<Long, E> view;
Long key;
public reverseScoreIterator() {
view = keyrefDB;
view = pam;
}
public boolean hasNext() {
@ -423,14 +343,14 @@ public final class ScoreCluster<E> {
public E next() {
key = view.lastKey();
view = view.headMap(key);
final E value = keyrefDB.get(key);
final E value = pam.get(key);
//System.out.println("cluster reverse iterator: score = " + ((((Long) key).longValue() & 0xFFFFFFFF00000000L) >> 32) + ", handle = " + (((Long) key).longValue() & 0xFFFFFFFFL) + ", value = " + value);
return value;
}
public void remove() {
final Object val = keyrefDB.remove(key);
if (val != null) refkeyDB.remove(val);
final Object val = pam.remove(key);
if (val != null) map.remove(val);
}
}
@ -441,7 +361,7 @@ public final class ScoreCluster<E> {
Map.Entry<Long, E> entry;
public simpleScoreIterator() {
ii = keyrefDB.entrySet().iterator();
ii = pam.entrySet().iterator();
}
public boolean hasNext() {
@ -456,7 +376,7 @@ public final class ScoreCluster<E> {
public void remove() {
ii.remove();
if (entry.getValue() != null) refkeyDB.remove(entry.getValue());
if (entry.getValue() != null) map.remove(entry.getValue());
}
}
@ -502,24 +422,12 @@ public final class ScoreCluster<E> {
}
System.out.println("result:");
Object[] result;
result = s.getScores(s.size(), true);
for (int i = 0; i < s.size(); i++) System.out.println("up: " + result[i]);
result = s.getScores(s.size(), false);
for (int i = 0; i < s.size(); i++) System.out.println("down: " + result[i]);
Iterator<String> i = s.scores(true);
while (i.hasNext()) System.out.println("up: " + i.next());
i = s.scores(false);
while (i.hasNext()) System.out.println("down: " + i.next());
System.out.println("finished create. time = " + (System.currentTimeMillis() - time));
System.out.println("total=" + s.totalCount() + ", elements=" + s.size() + ", redundant count=" + c);
/*
// delete cluster
time = System.currentTimeMillis();
for (int i = 0; i < 10000; i++) {
s.deleteScore("score#" + i + "xxx" + i + "xxx" + i + "xxx" + i + "xxx");
c -= i/10;
}
System.out.println("finished delete. time = " + (System.currentTimeMillis() - time));
System.out.println("total=" + s.totalCount() + ", elements=" + s.size() + ", redundant count=" + c);
*/
}
}

@ -0,0 +1,265 @@
/**
* ScoreMap
* Copyright 2010 by Michael Peter Christen, mc@yacy.net, Frankfurt am Main, Germany
* First released 14.10.2010 at http://yacy.net
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.cora.storage;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
public class ScoreMap<E> implements DynamicScore<E> {
protected final Map<E, IntScore> map; // a mapping from a reference to the cluster key
private long gcount;
public ScoreMap() {
this(null);
}
public ScoreMap(Comparator<? super E> comparator) {
if (comparator == null) {
map = new HashMap<E, IntScore>();
} else {
map = new TreeMap<E, IntScore>(comparator);
}
gcount = 0;
}
public synchronized void clear() {
map.clear();
gcount = 0;
}
/**
* shrink the cluster to a demanded size
* @param maxsize
*/
public void shrinkToMaxSize(int maxsize) {
if (this.map.size() <= maxsize) return;
int minScore = getMinScore();
while (this.map.size() > maxsize) {
minScore++;
shrinkToMinScore(minScore);
}
}
/**
* shrink the cluster in such a way that the smallest score is equal or greater than a given minScore
* @param minScore
*/
public void shrinkToMinScore(int minScore) {
synchronized (this) {
Iterator<Map.Entry<E, IntScore>> i = this.map.entrySet().iterator();
Map.Entry<E, IntScore> entry;
while (i.hasNext()) {
entry = i.next();
if (entry.getValue().intValue() < minScore) i.remove();
}
}
}
public synchronized long totalCount() {
return gcount;
}
public synchronized int size() {
return map.size();
}
public synchronized boolean isEmpty() {
return map.isEmpty();
}
public void incScore(final E obj) {
if (obj == null) return;
synchronized (this) {
IntScore score = this.map.get(obj);
if (score == null) {
this.map.put(obj, IntScore.ONE);
} else {
score.inc();
}
}
// increase overall counter
gcount++;
}
public void decScore(final E obj) {
if (obj == null) return;
synchronized (this) {
IntScore score = this.map.get(obj);
if (score == null) {
this.map.put(obj, IntScore.valueOf(-1));
} else {
score.dec();
}
}
// increase overall counter
gcount--;
}
public void setScore(final E obj, final int newScore) {
if (obj == null) return;
synchronized (this) {
IntScore score = this.map.get(obj);
if (score == null) {
this.map.put(obj, IntScore.ONE);
} else {
gcount -= score.intValue();
score.set(newScore);
}
}
// increase overall counter
gcount += newScore;
}
public void addScore(final E obj, final int incrementScore) {
if (obj == null) return;
synchronized (this) {
IntScore score = this.map.get(obj);
if (score == null) {
this.map.put(obj, IntScore.valueOf(incrementScore));
} else {
score.inc(incrementScore);
}
}
// increase overall counter
gcount += incrementScore;
}
public int deleteScore(final E obj) {
// deletes entry and returns previous score
if (obj == null) return 0;
final IntScore score;
synchronized (this) {
score = map.remove(obj);
if (score == null) return 0;
}
// decrease overall counter
gcount -= score.intValue();
return score.intValue();
}
public synchronized boolean existsScore(final E obj) {
return map.containsKey(obj);
}
public int getScore(final E obj) {
if (obj == null) return 0;
final IntScore score;
synchronized (this) {
score = map.get(obj);
}
if (score == null) return 0;
return score.intValue();
}
public int getMaxScore() {
if (map.isEmpty()) return -1;
int maxScore = Integer.MIN_VALUE;
synchronized (this) {
for (Map.Entry<E, IntScore> entry: this.map.entrySet()) if (entry.getValue().intValue() > maxScore) {
maxScore = entry.getValue().intValue();
}
}
return maxScore;
}
public int getMinScore() {
if (map.isEmpty()) return -1;
int minScore = Integer.MAX_VALUE;
synchronized (this) {
for (Map.Entry<E, IntScore> entry: this.map.entrySet()) if (entry.getValue().intValue() < minScore) {
minScore = entry.getValue().intValue();
}
}
return minScore;
}
public E getMaxObject() {
if (map.isEmpty()) return null;
E maxObject = null;
int maxScore = Integer.MIN_VALUE;
synchronized (this) {
for (Map.Entry<E, IntScore> entry: this.map.entrySet()) if (entry.getValue().intValue() > maxScore) {
maxScore = entry.getValue().intValue();
maxObject = entry.getKey();
}
}
return maxObject;
}
public E getMinObject() {
if (map.isEmpty()) return null;
E minObject = null;
int minScore = Integer.MAX_VALUE;
synchronized (this) {
for (Map.Entry<E, IntScore> entry: this.map.entrySet()) if (entry.getValue().intValue() < minScore) {
minScore = entry.getValue().intValue();
minObject = entry.getKey();
}
}
return minObject;
}
public String toString() {
return map.toString();
}
public Iterator<E> scores(boolean up) {
synchronized (this) {
// re-organize entries
TreeMap<IntScore, Set<E>> m = new TreeMap<IntScore, Set<E>>();
Set<E> s;
for (Map.Entry<E, IntScore> entry: this.map.entrySet()) {
s = m.get(entry.getValue());
if (s == null) {
s = this.map instanceof TreeMap ? new TreeSet<E>(((TreeMap<E, IntScore>) this.map).comparator()) : new HashSet<E>();
s.add(entry.getKey());
m.put(entry.getValue(), s);
} else {
s.add(entry.getKey());
}
}
// flatten result
List<E> l = new ArrayList<E>(this.map.size());
for (Set<E> f: m.values()) {
for (E e: f) l.add(e);
}
if (up) return l.iterator();
// optionally reverse list
List<E> r = new ArrayList<E>(l.size());
for (int i = l.size() - 1; i >= 0; i--) r.add(r.get(i));
return r.iterator();
}
}
}

@ -0,0 +1,67 @@
/**
* StaticScore
* Copyright 2010 by Michael Peter Christen, mc@yacy.net, Frankfurt am Main, Germany
* First released 14.10.2010 at http://yacy.net
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.cora.storage;
import java.util.Iterator;
public interface StaticScore<E> {
public void clear();
/**
* shrink the cluster to a demanded size
* @param maxsize
*/
public void shrinkToMaxSize(int maxsize);
/**
* shrink the cluster in such a way that the smallest score is equal or greater than a given minScore
* @param minScore
*/
public void shrinkToMinScore(int minScore);
public long totalCount();
public int size();
public boolean isEmpty();
public void setScore(final E obj, final int newScore);
public int deleteScore(final E obj);
public boolean existsScore(final E obj);
public int getScore(final E obj);
public int getMaxScore();
public int getMinScore();
public E getMaxObject();
public E getMinObject();
public String toString();
public Iterator<E> scores(final boolean up);
}

@ -34,12 +34,13 @@ import java.util.Iterator;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import net.yacy.cora.storage.ScoreCluster;
import net.yacy.cora.storage.StaticScore;
import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.ByteOrder;
import net.yacy.kelondro.order.CloneableIterator;
import net.yacy.kelondro.util.LookAheadIterator;
import net.yacy.kelondro.util.ScoreCluster;
public class MapDataMining extends MapHeap {
@ -48,7 +49,7 @@ public class MapDataMining extends MapHeap {
private final static Double DOUBLE0 = Double.valueOf(0.0);
private final String[] sortfields, longaccfields, doubleaccfields;
private Map<String, ScoreCluster<String>> sortClusterMap; // a String-kelondroMScoreCluster - relation
private Map<String, StaticScore<String>> sortClusterMap; // a String-kelondroMScoreCluster - relation
private Map<String, Long> accLong; // to store accumulations of Long cells
private Map<String, Double> accDouble; // to store accumulations of Double cells
@ -71,7 +72,7 @@ public class MapDataMining extends MapHeap {
ScoreCluster<String>[] cluster = null;
if (sortfields == null) sortClusterMap = null; else {
sortClusterMap = new ConcurrentHashMap<String, ScoreCluster<String>>();
sortClusterMap = new ConcurrentHashMap<String, StaticScore<String>>();
cluster = new ScoreCluster[sortfields.length];
for (int i = 0; i < sortfields.length; i++) {
cluster[i] = new ScoreCluster<String>();
@ -154,7 +155,7 @@ public class MapDataMining extends MapHeap {
public synchronized void clear() {
super.clear();
if (sortfields == null) sortClusterMap = null; else {
sortClusterMap = new HashMap<String, ScoreCluster<String>>();
sortClusterMap = new HashMap<String, StaticScore<String>>();
for (int i = 0; i < sortfields.length; i++) {
sortClusterMap.put(sortfields[i], new ScoreCluster<String>());
}
@ -240,7 +241,7 @@ public class MapDataMining extends MapHeap {
private void updateSortCluster(final String key, final Map<String, String> map) {
Object cell;
ScoreCluster<String> cluster;
StaticScore<String> cluster;
for (int i = 0; i < sortfields.length; i++) {
cell = map.get(sortfields[i]);
if (cell != null) {
@ -278,7 +279,7 @@ public class MapDataMining extends MapHeap {
private void deleteSortCluster(final String key) {
if (key == null) return;
ScoreCluster<String> cluster;
StaticScore<String> cluster;
for (int i = 0; i < sortfields.length; i++) {
cluster = sortClusterMap.get(sortfields[i]);
cluster.deleteScore(key);
@ -289,7 +290,7 @@ public class MapDataMining extends MapHeap {
public synchronized Iterator<byte[]> keys(final boolean up, /* sorted by */ final String field) {
// sorted iteration using the sortClusters
if (sortClusterMap == null) return null;
final ScoreCluster<String> cluster = sortClusterMap.get(field);
final StaticScore<String> cluster = sortClusterMap.get(field);
if (cluster == null) return null; // sort field does not exist
//System.out.println("DEBUG: cluster for field " + field + ": " + cluster.toString());
return new string2bytearrayIterator(cluster.scores(up));

@ -49,6 +49,8 @@ import java.util.zip.ZipOutputStream;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.protocol.http.HTTPClient;
import net.yacy.cora.storage.DynamicScore;
import net.yacy.cora.storage.ScoreCluster;
import net.yacy.gui.YaCyApp;
import net.yacy.gui.framework.Browser;
import net.yacy.kelondro.blob.MapDataMining;
@ -65,7 +67,6 @@ import net.yacy.kelondro.util.DateFormatter;
import net.yacy.kelondro.util.FileUtils;
import net.yacy.kelondro.util.Formatter;
import net.yacy.kelondro.util.MemoryControl;
import net.yacy.kelondro.util.ScoreCluster;
import net.yacy.kelondro.util.OS;
//import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager;
@ -606,7 +607,7 @@ public final class yacy {
final enumerateFiles ef = new enumerateFiles(new File(dbRoot, "WORDS"), true, false, true, true);
File f;
byte[] h;
final ScoreCluster<byte[]> hs = new ScoreCluster<byte[]>();
final DynamicScore<byte[]> hs = new ScoreCluster<byte[]>(Base64Order.standardCoder);
while (ef.hasMoreElements()) {
f = ef.nextElement();
h = f.getName().substring(0, Word.commonHashLength).getBytes();

Loading…
Cancel
Save