diff --git a/build.properties b/build.properties
index 003816b2d..d5bdf3820 100644
--- a/build.properties
+++ b/build.properties
@@ -3,7 +3,7 @@ javacSource=1.4
javacTarget=1.4
# Release Configuration
-releaseVersion=0.432
+releaseVersion=0.433
releaseFile=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
#releaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
releaseDir=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr}
diff --git a/htroot/PerformanceQueues_p.html b/htroot/PerformanceQueues_p.html
index bffa4268d..51df4bc59 100644
--- a/htroot/PerformanceQueues_p.html
+++ b/htroot/PerformanceQueues_p.html
@@ -86,6 +86,13 @@ Changes take effect immediately
If this is a big number, it shows that the caching works efficiently.
+
+
Maximum Age of Word in cache:
+
#[maxAgeOfWordCache]#
+
+ This is the maximum age of a word index that is in the RAM cache in minutes.
+
+
Maximum number of Word Caches, low limit:
diff --git a/htroot/PerformanceQueues_p.java b/htroot/PerformanceQueues_p.java
index 61bbff583..dafe81f37 100644
--- a/htroot/PerformanceQueues_p.java
+++ b/htroot/PerformanceQueues_p.java
@@ -253,6 +253,7 @@ public class PerformanceQueues_p {
// table cache settings
prop.put("wordCacheRAMSize", switchboard.wordIndex.wordCacheRAMSize());
prop.put("maxURLinWordCache", "" + switchboard.wordIndex.maxURLinWordCache());
+ prop.put("maxAgeOfWordCache", "" + (switchboard.wordIndex.maxAgeOfWordCache() / 1000 / 60)); // minutes
prop.put("maxWaitingWordFlush", switchboard.getConfig("maxWaitingWordFlush", "180"));
prop.put("wordCacheMaxLow", switchboard.getConfig("wordCacheMaxLow", "10000"));
prop.put("wordCacheMaxHigh", switchboard.getConfig("wordCacheMaxHigh", "10000"));
diff --git a/source/de/anomic/plasma/plasmaWordIndex.java b/source/de/anomic/plasma/plasmaWordIndex.java
index 681031682..c9ec88d97 100644
--- a/source/de/anomic/plasma/plasmaWordIndex.java
+++ b/source/de/anomic/plasma/plasmaWordIndex.java
@@ -94,6 +94,10 @@ public final class plasmaWordIndex {
return ramCache.maxURLinWordCache();
}
+ public long maxAgeOfWordCache() {
+ return ramCache.maxAgeOfWordCache();
+ }
+
public int wordCacheRAMSize() {
return ramCache.wordCacheRAMSize();
}
@@ -123,7 +127,7 @@ public final class plasmaWordIndex {
}
}
} else {
- while (ramCache.maxURLinWordCache() > plasmaWordIndexCache.ramCacheLimit) {
+ while (ramCache.maxURLinWordCache() > plasmaWordIndexCache.ramCacheReferenceLimit) {
flushCache(1);
}
if (ramCache.size() > ramCache.getMaxWordsLow()) {
diff --git a/source/de/anomic/plasma/plasmaWordIndexCache.java b/source/de/anomic/plasma/plasmaWordIndexCache.java
index 4adb29094..fc225a76e 100644
--- a/source/de/anomic/plasma/plasmaWordIndexCache.java
+++ b/source/de/anomic/plasma/plasmaWordIndexCache.java
@@ -60,7 +60,8 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
// environment constants
private static final String indexArrayFileName = "indexDump1.array";
- public static final int ramCacheLimit = 50;
+ public static final int ramCacheReferenceLimit = 50;
+ public static final long ramCacheAgeLimit = 60 * 60 * 2 * 1000; // milliseconds; 2 Hours
// class variables
private final File databaseRoot;
@@ -205,7 +206,11 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
// cache settings
public int maxURLinWordCache() {
- return hashScore.getScore(hashScore.getMaxObject());
+ return hashScore.getMaxScore();
+ }
+
+ public long maxAgeOfWordCache() {
+ return System.currentTimeMillis() - longEmit(hashDate.getMinScore());
}
public int wordCacheRAMSize() {
@@ -249,18 +254,28 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
if (cache.size() == 0) return null;
try {
synchronized (cache) {
- String hash = (String) hashScore.getMaxObject();
- if (hash == null) return null;
+ String hash = null;
int count = hashScore.getMaxScore();
- //long time = longTime(hashDate.getScore(hash));
- if (count > ramCacheLimit) {
+ if ((count > ramCacheReferenceLimit) &&
+ ((hash = (String) hashScore.getMaxObject()) != null)) {
// flush high-score entries
return hash;
- } else {
- // flush oldest entries
- hash = (String) hashDate.getMinObject();
+ }
+ long oldestTime = longEmit(hashDate.getMinScore());
+ if (((System.currentTimeMillis() - oldestTime) > ramCacheAgeLimit) &&
+ ((hash = (String) hashDate.getMinObject()) != null)) {
+ // flush out-dated entries
return hash;
}
+ // not an urgent case
+ if (Runtime.getRuntime().freeMemory() < 10000000) {
+ // low-memory case
+ hash = (String) hashScore.getMaxObject(); // flush high-score entries (saves RAM)
+ } else {
+ // not-efficient-so-far case
+ hash = (String) hashDate.getMinObject(); // flush oldest entries (makes indexing faster)
+ }
+ return hash;
}
} catch (Exception e) {
log.logSevere("flushFromMem: " + e.getMessage(), e);
@@ -272,6 +287,10 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
return (int) ((longTime - startTime) / 1000);
}
+ private long longEmit(int intTime) {
+ return (((long) intTime) * (long) 1000) + startTime;
+ }
+
/*
private long longTime(int intTime) {
return ((long) intTime) * ((long) 1000) + startTime;
diff --git a/source/de/anomic/plasma/plasmaWordIndexEntry.java b/source/de/anomic/plasma/plasmaWordIndexEntry.java
index 5f5e127c2..cbad0d318 100644
--- a/source/de/anomic/plasma/plasmaWordIndexEntry.java
+++ b/source/de/anomic/plasma/plasmaWordIndexEntry.java
@@ -108,13 +108,18 @@ public final class plasmaWordIndexEntry implements Cloneable {
public static final int AP_H5 = 5; // h5-tag
public static final int AP_H6 = 6; // h6-tag
public static final int AP_TEXT = 7; // word appears in text (used to check validation of other appearances against spam)
- public static final int AP_URL = 8; // word inside an url
- public static final int AP_IMG = 9; // tag inside image references
- public static final int AP_TAG = 10; // for tagged indexeing (i.e. using mp3 tags)
+ public static final int AP_DOM = 8; // word inside an url: in Domain
+ public static final int AP_PATH = 9; // word inside an url: in path
+ public static final int AP_IMG = 10; // tag inside image references
public static final int AP_ANCHOR = 11; // anchor description
- public static final int AP_BOLD = 12;
- public static final int AP_ITALICS = 13;
- public static final int AP_INVISIBLE = 14; // good for spam detection
+ public static final int AP_BOLD = 12; // may be interpreted as emphasized
+ public static final int AP_ITALICS = 13; // may be interpreted as emphasized
+ public static final int AP_WEAK = 14; // for Text that is small or bareley visible
+ public static final int AP_INVISIBLE = 15; // good for spam detection
+ public static final int AP_TAG = 16; // for tagged indexeing (i.e. using mp3 tags)
+ public static final int AP_AUTHOR = 17; // word appears in author name
+ public static final int AP_OPUS = 18; // word appears in name of opus, which may be an album name (in mp3 tags)
+ public static final int AP_TRACK = 19; // word appears in track name (i.e. in mp3 tags)
// URL attributes
public static final int UA_LOCAL = 0; // URL was crawled locally
diff --git a/yacy.init b/yacy.init
index 56136aad8..28bc2e054 100644
--- a/yacy.init
+++ b/yacy.init
@@ -417,8 +417,8 @@ xpstopw=true
# the prereq-value is a memory pre-requisite: that much bytes must
# be available/free in the heap; othervise the loop is not executed
# and another idlesleep is performed
-20_dhtdistribution_idlesleep=20000
-20_dhtdistribution_busysleep=5000
+20_dhtdistribution_idlesleep=50000
+20_dhtdistribution_busysleep=2000
20_dhtdistribution_memprereq=8388608
20_dhtdistribution_threads=1
30_peerping_idlesleep=120000
@@ -428,7 +428,7 @@ xpstopw=true
40_peerseedcycle_busysleep=1200000
40_peerseedcycle_memprereq=4194304
50_localcrawl_idlesleep=10000
-50_localcrawl_busysleep=200
+50_localcrawl_busysleep=100
50_localcrawl_memprereq=1048576
50_localcrawl_isPaused=false
61_globalcrawltrigger_idlesleep=10000
@@ -442,8 +442,8 @@ xpstopw=true
70_cachemanager_idlesleep=5000
70_cachemanager_busysleep=0
70_cachemanager_memprereq=1048576
-80_indexing_idlesleep=5000
-80_indexing_busysleep=300
+80_indexing_idlesleep=2000
+80_indexing_busysleep=100
80_indexing_memprereq=2097152
82_crawlstack_idlesleep=5000
82_crawlstack_busysleep=0