refinements in ram cache flush procedure and default timing

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@1768 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 19 years ago
parent d31a4e0b4f
commit 87e90b9d8c

@ -3,7 +3,7 @@ javacSource=1.4
javacTarget=1.4
# Release Configuration
releaseVersion=0.432
releaseVersion=0.433
releaseFile=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
#releaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
releaseDir=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr}

@ -86,6 +86,13 @@ Changes take effect immediately</td>
If this is a big number, it shows that the caching works efficiently.
</td>
</tr>
<tr valign="top" class="TableCellDark">
<td class=small>Maximum Age of Word in cache:</td>
<td class=small>#[maxAgeOfWordCache]#</td>
<td class=small>
This is the maximum age of a word index that is in the RAM cache in minutes.
</td>
</tr>
<tr valign="top" class="TableCellDark">
<td class=small>Maximum number of Word Caches, low limit:</td>
<td class=small><input name="wordCacheMaxLow" type="text" size="20" maxlength="100" value="#[wordCacheMaxLow]#"></td>

@ -253,6 +253,7 @@ public class PerformanceQueues_p {
// table cache settings
prop.put("wordCacheRAMSize", switchboard.wordIndex.wordCacheRAMSize());
prop.put("maxURLinWordCache", "" + switchboard.wordIndex.maxURLinWordCache());
prop.put("maxAgeOfWordCache", "" + (switchboard.wordIndex.maxAgeOfWordCache() / 1000 / 60)); // minutes
prop.put("maxWaitingWordFlush", switchboard.getConfig("maxWaitingWordFlush", "180"));
prop.put("wordCacheMaxLow", switchboard.getConfig("wordCacheMaxLow", "10000"));
prop.put("wordCacheMaxHigh", switchboard.getConfig("wordCacheMaxHigh", "10000"));

@ -94,6 +94,10 @@ public final class plasmaWordIndex {
return ramCache.maxURLinWordCache();
}
public long maxAgeOfWordCache() {
return ramCache.maxAgeOfWordCache();
}
public int wordCacheRAMSize() {
return ramCache.wordCacheRAMSize();
}
@ -123,7 +127,7 @@ public final class plasmaWordIndex {
}
}
} else {
while (ramCache.maxURLinWordCache() > plasmaWordIndexCache.ramCacheLimit) {
while (ramCache.maxURLinWordCache() > plasmaWordIndexCache.ramCacheReferenceLimit) {
flushCache(1);
}
if (ramCache.size() > ramCache.getMaxWordsLow()) {

@ -60,7 +60,8 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
// environment constants
private static final String indexArrayFileName = "indexDump1.array";
public static final int ramCacheLimit = 50;
public static final int ramCacheReferenceLimit = 50;
public static final long ramCacheAgeLimit = 60 * 60 * 2 * 1000; // milliseconds; 2 Hours
// class variables
private final File databaseRoot;
@ -205,7 +206,11 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
// cache settings
public int maxURLinWordCache() {
return hashScore.getScore(hashScore.getMaxObject());
return hashScore.getMaxScore();
}
public long maxAgeOfWordCache() {
return System.currentTimeMillis() - longEmit(hashDate.getMinScore());
}
public int wordCacheRAMSize() {
@ -249,18 +254,28 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
if (cache.size() == 0) return null;
try {
synchronized (cache) {
String hash = (String) hashScore.getMaxObject();
if (hash == null) return null;
String hash = null;
int count = hashScore.getMaxScore();
//long time = longTime(hashDate.getScore(hash));
if (count > ramCacheLimit) {
if ((count > ramCacheReferenceLimit) &&
((hash = (String) hashScore.getMaxObject()) != null)) {
// flush high-score entries
return hash;
} else {
// flush oldest entries
hash = (String) hashDate.getMinObject();
}
long oldestTime = longEmit(hashDate.getMinScore());
if (((System.currentTimeMillis() - oldestTime) > ramCacheAgeLimit) &&
((hash = (String) hashDate.getMinObject()) != null)) {
// flush out-dated entries
return hash;
}
// not an urgent case
if (Runtime.getRuntime().freeMemory() < 10000000) {
// low-memory case
hash = (String) hashScore.getMaxObject(); // flush high-score entries (saves RAM)
} else {
// not-efficient-so-far case
hash = (String) hashDate.getMinObject(); // flush oldest entries (makes indexing faster)
}
return hash;
}
} catch (Exception e) {
log.logSevere("flushFromMem: " + e.getMessage(), e);
@ -272,6 +287,10 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
return (int) ((longTime - startTime) / 1000);
}
private long longEmit(int intTime) {
return (((long) intTime) * (long) 1000) + startTime;
}
/*
private long longTime(int intTime) {
return ((long) intTime) * ((long) 1000) + startTime;

@ -108,13 +108,18 @@ public final class plasmaWordIndexEntry implements Cloneable {
public static final int AP_H5 = 5; // h5-tag
public static final int AP_H6 = 6; // h6-tag
public static final int AP_TEXT = 7; // word appears in text (used to check validation of other appearances against spam)
public static final int AP_URL = 8; // word inside an url
public static final int AP_IMG = 9; // tag inside image references
public static final int AP_TAG = 10; // for tagged indexeing (i.e. using mp3 tags)
public static final int AP_DOM = 8; // word inside an url: in Domain
public static final int AP_PATH = 9; // word inside an url: in path
public static final int AP_IMG = 10; // tag inside image references
public static final int AP_ANCHOR = 11; // anchor description
public static final int AP_BOLD = 12;
public static final int AP_ITALICS = 13;
public static final int AP_INVISIBLE = 14; // good for spam detection
public static final int AP_BOLD = 12; // may be interpreted as emphasized
public static final int AP_ITALICS = 13; // may be interpreted as emphasized
public static final int AP_WEAK = 14; // for Text that is small or bareley visible
public static final int AP_INVISIBLE = 15; // good for spam detection
public static final int AP_TAG = 16; // for tagged indexeing (i.e. using mp3 tags)
public static final int AP_AUTHOR = 17; // word appears in author name
public static final int AP_OPUS = 18; // word appears in name of opus, which may be an album name (in mp3 tags)
public static final int AP_TRACK = 19; // word appears in track name (i.e. in mp3 tags)
// URL attributes
public static final int UA_LOCAL = 0; // URL was crawled locally

@ -417,8 +417,8 @@ xpstopw=true
# the prereq-value is a memory pre-requisite: that much bytes must
# be available/free in the heap; othervise the loop is not executed
# and another idlesleep is performed
20_dhtdistribution_idlesleep=20000
20_dhtdistribution_busysleep=5000
20_dhtdistribution_idlesleep=50000
20_dhtdistribution_busysleep=2000
20_dhtdistribution_memprereq=8388608
20_dhtdistribution_threads=1
30_peerping_idlesleep=120000
@ -428,7 +428,7 @@ xpstopw=true
40_peerseedcycle_busysleep=1200000
40_peerseedcycle_memprereq=4194304
50_localcrawl_idlesleep=10000
50_localcrawl_busysleep=200
50_localcrawl_busysleep=100
50_localcrawl_memprereq=1048576
50_localcrawl_isPaused=false
61_globalcrawltrigger_idlesleep=10000
@ -442,8 +442,8 @@ xpstopw=true
70_cachemanager_idlesleep=5000
70_cachemanager_busysleep=0
70_cachemanager_memprereq=1048576
80_indexing_idlesleep=5000
80_indexing_busysleep=300
80_indexing_idlesleep=2000
80_indexing_busysleep=100
80_indexing_memprereq=2097152
82_crawlstack_idlesleep=5000
82_crawlstack_busysleep=0

Loading…
Cancel
Save