diff --git a/htroot/Performance_p.html b/htroot/Performance_p.html
index 5159fe721..9b965d967 100644
--- a/htroot/Performance_p.html
+++ b/htroot/Performance_p.html
@@ -81,6 +81,13 @@ Changes take effect immediately
If this is a big number, it shows that the caching works efficiently.
+
+
Singletons Cache Size:
+
#[singletonsSize]#
+
+ The Singletons Cache is a database that holds words that occurred only once.
+
+
Maximum number of Word Caches:
@@ -90,6 +97,7 @@ Changes take effect immediately
flushed to disc; this may last some minutes.
+
Changes take effect immediately
+
diff --git a/htroot/Performance_p.java b/htroot/Performance_p.java
index e4b5775c8..85182f4c6 100644
--- a/htroot/Performance_p.java
+++ b/htroot/Performance_p.java
@@ -146,6 +146,7 @@ public class Performance_p {
prop.put("maxURLinWordCache", "" + switchboard.wordIndex.maxURLinWordCache());
prop.put("maxWaitingWordFlush", switchboard.getConfig("maxWaitingWordFlush", "180"));
prop.put("wordCacheMax", switchboard.getConfig("wordCacheMax", "10000"));
+ prop.put("singletonsSize", switchboard.wordIndex.singletonsSize());
// return rewrite values for templates
return prop;
diff --git a/source/de/anomic/kelondro/kelondroMScoreCluster.java b/source/de/anomic/kelondro/kelondroMScoreCluster.java
index 21a787372..9571942a1 100644
--- a/source/de/anomic/kelondro/kelondroMScoreCluster.java
+++ b/source/de/anomic/kelondro/kelondroMScoreCluster.java
@@ -44,6 +44,7 @@ import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Iterator;
import java.util.TreeMap;
+import java.util.Map;
public class kelondroMScoreCluster {
@@ -243,21 +244,22 @@ public class kelondroMScoreCluster {
}
public Iterator scores(boolean up) {
- return scores(up, Integer.MIN_VALUE, Integer.MAX_VALUE);
+ if (up) return new simpleScoreIterator();
+ else return scores(false, Integer.MIN_VALUE, Integer.MAX_VALUE);
}
public Iterator scores(boolean up, int minScore, int maxScore) {
- return new scoreIterator(up, minScore, maxScore);
+ return new komplexScoreIterator(up, minScore, maxScore);
}
- private class scoreIterator implements Iterator {
+ private class komplexScoreIterator implements Iterator {
boolean up;
TreeMap keyrefDBcopy;
Object n;
int min, max;
- public scoreIterator(boolean up, int minScore, int maxScore) {
+ public komplexScoreIterator(boolean up, int minScore, int maxScore) {
this.up = up;
this.min = minScore;
this.max = maxScore;
@@ -299,7 +301,31 @@ public class kelondroMScoreCluster {
}
- public static void main(String[] args) {
+ private class simpleScoreIterator implements Iterator {
+
+ Iterator ii;
+ Map.Entry entry;
+
+ public simpleScoreIterator() {
+ ii = keyrefDB.entrySet().iterator();
+ }
+
+ public boolean hasNext() {
+ return ii.hasNext();
+ }
+
+ public Object next() {
+ entry = (Map.Entry) ii.next();
+ return entry.getValue();
+ }
+
+ public void remove() {
+ ii.remove();
+ }
+
+ }
+
+ public static void main(String[] args) {
System.out.println("Test for Score: start");
kelondroMScoreCluster s = new kelondroMScoreCluster();
int c = 0;
diff --git a/source/de/anomic/plasma/plasmaWordIndex.java b/source/de/anomic/plasma/plasmaWordIndex.java
index fd7a70c7a..373e14748 100644
--- a/source/de/anomic/plasma/plasmaWordIndex.java
+++ b/source/de/anomic/plasma/plasmaWordIndex.java
@@ -65,7 +65,7 @@ public class plasmaWordIndex {
public plasmaWordIndex(File databaseRoot, int bufferkb, serverLog log) throws IOException {
this.databaseRoot = databaseRoot;
plasmaWordIndexClassicDB fileDB = new plasmaWordIndexClassicDB(databaseRoot, log);
- this.ramCache = new plasmaWordIndexCache(databaseRoot, fileDB, 1000000, log);
+ this.ramCache = new plasmaWordIndexCache(databaseRoot, fileDB, bufferkb, log);
}
public int maxURLinWordCache() {
@@ -76,6 +76,10 @@ public class plasmaWordIndex {
return ramCache.wordCacheRAMSize();
}
+ public int singletonsSize() {
+ return ramCache.singletonsSize();
+ }
+
public void setMaxWords(int maxWords) {
ramCache.setMaxWords(maxWords);
}
diff --git a/source/de/anomic/plasma/plasmaWordIndexCache.java b/source/de/anomic/plasma/plasmaWordIndexCache.java
index 0d6da66fb..32f43f2d1 100644
--- a/source/de/anomic/plasma/plasmaWordIndexCache.java
+++ b/source/de/anomic/plasma/plasmaWordIndexCache.java
@@ -82,11 +82,11 @@ public class plasmaWordIndexCache implements plasmaWordIndexInterface {
for (int i = 0; i < yacySeedDB.commonHashLength; i++) maxKey += '-';
}
- public plasmaWordIndexCache(File databaseRoot, plasmaWordIndexInterface backend, long singletonBufferSize, serverLog log) {
+ public plasmaWordIndexCache(File databaseRoot, plasmaWordIndexInterface backend, int singletonbufferkb, serverLog log) {
// creates a new index cache
// the cache has a back-end where indexes that do not fit in the cache are flushed
this.databaseRoot = databaseRoot;
- this.singletonBufferSize = singletonBufferSize;
+ this.singletonBufferSize = singletonbufferkb * 1024;
this.cache = new TreeMap();
this.hashScore = new kelondroMScoreCluster();
this.hashDate = new HashMap();
@@ -132,7 +132,7 @@ public class plasmaWordIndexCache implements plasmaWordIndexInterface {
long wordsPerSecond = 0, wordcount = 0, urlcount = 0;
synchronized (cache) {
//Iterator i = cache.entrySet().iterator();
- Iterator i = hashScore.scores(false);
+ Iterator i = hashScore.scores(true);
//Map.Entry entry;
String wordHash;
plasmaWordIndexEntryContainer container;
@@ -318,6 +318,10 @@ public class plasmaWordIndexCache implements plasmaWordIndexInterface {
return cache.size();
}
+ public int singletonsSize() {
+ return singletons.size();
+ }
+
public void setMaxWords(int maxWords) {
this.maxWords = maxWords;
}
@@ -341,7 +345,14 @@ public class plasmaWordIndexCache implements plasmaWordIndexInterface {
true);
}
- private int flushFromMem(String key) {
+ private int flushFromMem(String key, boolean reintegrate) {
+ // this method flushes indexes out from the ram to the disc.
+ // at first we check the singleton database and act accordingly
+ // if we we are to flush an index, but see also an entry in the singletons, we
+ // decide upn the 'reintegrate'-Flag:
+ // true: do not flush to disc, but re-Integrate the singleton to the RAM
+ // false: flush the singleton together with container to disc
+
plasmaWordIndexEntryContainer container = null;
long time;
synchronized (cache) {
@@ -358,12 +369,13 @@ public class plasmaWordIndexCache implements plasmaWordIndexInterface {
// now decide where to flush that container
Object[] singleton = readSingleton(key);
if (singleton == null) {
+ // not found in singletons
if (container.size() == 1) {
- // store to singleton
+ // it is a singleton: store to singleton
storeSingleton(key, container.getOne(), time);
return 1;
} else {
- // store to back-end
+ // store to back-end; this should be a rare case
return backend.addEntries(container, time);
}
} else {
@@ -376,17 +388,28 @@ public class plasmaWordIndexCache implements plasmaWordIndexInterface {
// it is superfluous to flush this, simple do nothing
return 0;
} else {
- // we flush to the backend, but remove the entry from the singletons
+ // we flush to the backend, and the entry from the singletons
removeSingleton(key);
return backend.addEntries(container, java.lang.Math.max(time, oldTime));
}
} else {
- // now we have more than one entry,
+ // now we have more than one entry
// we must remove the key from the singleton database
removeSingleton(key);
- // add this to the backend
+ // .. and put it to the container
container.add(oldEntry);
- return backend.addEntries(container, java.lang.Math.max(time, oldTime));
+ if (reintegrate) {
+ // put singleton together with container back to ram
+ synchronized (cache) {
+ cache.put(key, container);
+ hashScore.setScore(key, container.size());
+ hashDate.put(key, new Long(time));
+ }
+ return -1;
+ } else {
+ // add this to the backend
+ return backend.addEntries(container, java.lang.Math.max(time, oldTime));
+ }
}
}
}
@@ -441,31 +464,35 @@ public class plasmaWordIndexCache implements plasmaWordIndexInterface {
break;
}
//log.logDebug("flushing high-key " + key + ", count=" + count + ", cachesize=" + cache.size() + ", singleton-size=" + singletons.size());
- total += flushFromMem(key);
+ total += flushFromMem(key, false);
}
// flush singletons
- while ((total < 200) && (hashScore.size() >= maxWords)) {
- key = (String) hashScore.getMinObject();
+ Iterator i = hashScore.scores(true);
+ ArrayList al = new ArrayList();
+ while ((i.hasNext()) && (total < 200)) {
+ key = (String) i.next();
createTime = (Long) hashDate.get(key);
count = hashScore.getScore(key);
if (count > 1) {
//log.logDebug("flush of singleton-key " + key + ": count too high (count=" + count + ")");
break;
}
- if ((createTime != null) && ((System.currentTimeMillis() - createTime.longValue()) < 9000)) {
- //log.logDebug("singleton-key " + key + " is too fresh, interruptiong flush (count=" + count + ", cachesize=" + cache.size() + ", singleton-size=" + singletons.size() + ")");
- break;
+ if ((createTime != null) && ((System.currentTimeMillis() - createTime.longValue()) < 90000)) {
+ //log.logDebug("singleton-key " + key + " is too fresh, interrupting flush (count=" + count + ", cachesize=" + cache.size() + ", singleton-size=" + singletons.size() + ")");
+ continue;
}
//log.logDebug("flushing singleton-key " + key + ", count=" + count + ", cachesize=" + cache.size() + ", singleton-size=" + singletons.size());
- total += flushFromMem(key);
+ al.add(key);
+ total++;
}
+ for (int k = 0; k < al.size(); k++) flushFromMem((String) al.get(k), true);
}
return total;
}
public plasmaWordIndexEntity getIndex(String wordHash, boolean deleteIfEmpty) {
- flushFromMem(wordHash);
+ flushFromMem(wordHash, false);
flushFromSingleton(wordHash);
return backend.getIndex(wordHash, deleteIfEmpty);
}
@@ -486,13 +513,13 @@ public class plasmaWordIndexCache implements plasmaWordIndexInterface {
backend.deleteIndex(wordHash);
}
- public int removeEntries(String wordHash, String[] urlHashes, boolean deleteComplete) {
- flushFromMem(wordHash);
+ public synchronized int removeEntries(String wordHash, String[] urlHashes, boolean deleteComplete) {
+ flushFromMem(wordHash, false);
flushFromSingleton(wordHash);
return backend.removeEntries(wordHash, urlHashes, deleteComplete);
}
- public int addEntries(plasmaWordIndexEntryContainer container, long creationTime) {
+ public synchronized int addEntries(plasmaWordIndexEntryContainer container, long creationTime) {
//serverLog.logDebug("PLASMA INDEXING", "addEntryToIndexMem: cache.size=" + cache.size() + "; hashScore.size=" + hashScore.size());
flushFromMemToLimit();
//if (flushc > 0) serverLog.logDebug("PLASMA INDEXING", "addEntryToIndexMem - flushed " + flushc + " entries");
diff --git a/source/de/anomic/yacy/yacySearch.java b/source/de/anomic/yacy/yacySearch.java
index d4f005aa5..fe4167461 100644
--- a/source/de/anomic/yacy/yacySearch.java
+++ b/source/de/anomic/yacy/yacySearch.java
@@ -101,16 +101,15 @@ public class yacySearch extends Thread {
int c;
while (i.hasNext()) {
dhtEnum = yacyCore.dhtAgent.getDHTSeeds(true, (String) i.next());
- c = seedcount;
- while ((dhtEnum.hasMoreElements()) && (c > 0)) {
+ c = 0;
+ while ((dhtEnum.hasMoreElements()) && (c < seedcount)) {
seed = (yacySeed) dhtEnum.nextElement();
- ranking.addScore(seed.hash, c);
- c--;
+ ranking.addScore(seed.hash, c++);
}
}
if (ranking.size() < seedcount) seedcount = ranking.size();
yacySeed[] result = new yacySeed[seedcount];
- Iterator e = ranking.scores(false);
+ Iterator e = ranking.scores(true);
c = 0;
while ((e.hasNext()) && (c < result.length))
result[c++] = yacyCore.seedDB.getConnected((String) e.next());
diff --git a/yacy.parser b/yacy.parser
index 8b244b04b..1b369b90f 100644
--- a/yacy.parser
+++ b/yacy.parser
@@ -1,2 +1,2 @@
#plasmaParser configuration file
-#Wed May 11 17:48:25 CEST 2005
+#Thu May 12 01:40:28 CEST 2005