added high/low priority for index adding

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@899 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 20 years ago
parent 3966b38360
commit 839db8869c

@ -87,14 +87,19 @@ Changes take effect immediately</td>
</td>
</tr>
<tr valign="top" class="TableCellDark">
<td class=small>Maximum number of Word Caches:</td>
<td class=small><input name="wordCacheMax" type="text" size="20" maxlength="100" value="#[wordCacheMax]#"></td>
<td class=small>
<td class=small>Maximum number of Word Caches, low limit:</td>
<td class=small><input name="wordCacheMaxLow" type="text" size="20" maxlength="100" value="#[wordCacheMaxLow]#"></td>
<td class=small rowspan="2">
This is is the number of word indexes that shall be held in the
ram cache during indexing. When YaCy is shut down, this cache must be
flushed to disc; this may last some minutes.
flushed to disc; this may last some minutes. The low limit is valid for crawling tasks, the high limit is valid
for search and DHT transmission tasks.
</td>
</tr>
<tr valign="top" class="TableCellDark">
<td class=small>Maximum number of Word Caches, high limit:</td>
<td class=small><input name="wordCacheMaxHigh" type="text" size="20" maxlength="100" value="#[wordCacheMaxHigh]#"></td>
</tr>
<tr valign="top" class="TableCellLight">
<td class=small colspan="3">
<input type="submit" name="cacheSizeSubmit" value="Enter New Cache Size">

@ -171,9 +171,11 @@ public class PerformanceQueues_p {
prop.put("table", c);
if ((post != null) && (post.containsKey("cacheSizeSubmit"))) {
int wordCacheMax = Integer.parseInt((String) post.get("wordCacheMax", "10000"));
switchboard.setConfig("wordCacheMax", Integer.toString(wordCacheMax));
switchboard.wordIndex.setMaxWords(wordCacheMax);
int wordCacheMaxLow = Integer.parseInt((String) post.get("wordCacheMaxLow", "8000"));
int wordCacheMaxHigh = Integer.parseInt((String) post.get("wordCacheMaxHigh", "10000"));
switchboard.setConfig("wordCacheMaxLow", Integer.toString(wordCacheMaxLow));
switchboard.setConfig("wordCacheMaxHigh", Integer.toString(wordCacheMaxHigh));
switchboard.wordIndex.setMaxWords(wordCacheMaxLow, wordCacheMaxHigh);
int maxWaitingWordFlush = Integer.parseInt((String) post.get("maxWaitingWordFlush", "180"));
switchboard.setConfig("maxWaitingWordFlush", Integer.toString(maxWaitingWordFlush));
}
@ -232,7 +234,8 @@ public class PerformanceQueues_p {
prop.put("wordCacheRAMSize", switchboard.wordIndex.wordCacheRAMSize());
prop.put("maxURLinWordCache", "" + switchboard.wordIndex.maxURLinWordCache());
prop.put("maxWaitingWordFlush", switchboard.getConfig("maxWaitingWordFlush", "180"));
prop.put("wordCacheMax", switchboard.getConfig("wordCacheMax", "10000"));
prop.put("wordCacheMaxLow", switchboard.getConfig("wordCacheMaxLow", "10000"));
prop.put("wordCacheMaxHigh", switchboard.getConfig("wordCacheMaxHigh", "10000"));
prop.put("onlineCautionDelay", switchboard.getConfig("onlineCautionDelay", "30000"));
prop.put("onlineCautionDelayCurrent", System.currentTimeMillis() - switchboard.proxyLastAccess);

@ -120,7 +120,7 @@ public final class transferRWI {
wordHash = estring.substring(0, p);
wordhashes[i] = wordHash;
entry = new plasmaWordIndexEntry(estring.substring(p));
sb.wordIndex.addEntries(plasmaWordIndexEntryContainer.instantContainer(wordHash, System.currentTimeMillis(), entry));
sb.wordIndex.addEntries(plasmaWordIndexEntryContainer.instantContainer(wordHash, System.currentTimeMillis(), entry), true);
urlHash = entry.getUrlHash();
if ((!(unknownURL.contains(urlHash))) &&
(!(sb.urlPool.loadedURL.exists(urlHash)))) {

@ -79,7 +79,7 @@ public final class plasmaSearch {
}
public void addWords(plasmaWordIndexEntryContainer container) {
wordIndex.addEntries(container);
wordIndex.addEntries(container, true);
}
public int addPageIndex(URL url, String urlHash, Date urlModified, plasmaCondenser condenser,
@ -108,7 +108,7 @@ public final class plasmaSearch {
wordHash = plasmaWordIndexEntry.word2hash(word);
entry = new plasmaWordIndexEntry(urlHash, count, p++, 0, 0,
age, quality, language, doctype, true);
this.wordIndex.addEntries(plasmaWordIndexEntryContainer.instantContainer(wordHash, System.currentTimeMillis(), entry));
this.wordIndex.addEntries(plasmaWordIndexEntryContainer.instantContainer(wordHash, System.currentTimeMillis(), entry), false);
}
//System.out.println("DEBUG: plasmaSearch.addPageIndex: added " + condenser.getWords().size() + " words, flushed " + c + " entries");
return condenser.getWords().size();

@ -306,8 +306,9 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
urlPool = new plasmaURLPool(plasmaPath, ramLURL, ramNURL, ramEURL);
wordIndex = new plasmaWordIndex(plasmaPath, ramRWI, log);
int wordCacheMax = Integer.parseInt((String) getConfig("wordCacheMax", "10000"));
wordIndex.setMaxWords(wordCacheMax);
int wordCacheMaxLow = Integer.parseInt((String) getConfig("wordCacheMaxLow", "8000"));
int wordCacheMaxHigh = Integer.parseInt((String) getConfig("wordCacheMaxHigh", "10000"));
wordIndex.setMaxWords(wordCacheMaxLow, wordCacheMaxHigh);
searchManager = new plasmaSearch(urlPool.loadedURL, wordIndex);
// start a cache manager

@ -93,12 +93,12 @@ public final class plasmaWordIndex {
return ramCache.assortmentsCacheFillStatusCml();
}
public void setMaxWords(int maxWords) {
ramCache.setMaxWords(maxWords);
public void setMaxWords(int maxWordsLow, int maxWordsHigh) {
ramCache.setMaxWords(maxWordsLow, maxWordsHigh);
}
public int addEntries(plasmaWordIndexEntryContainer entries) {
return ramCache.addEntries(entries, System.currentTimeMillis());
public int addEntries(plasmaWordIndexEntryContainer entries, boolean highPriority) {
return ramCache.addEntries(entries, System.currentTimeMillis(), highPriority);
}
public plasmaWordIndexEntity getEntity(String wordHash, boolean deleteIfEmpty) {

@ -74,8 +74,8 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
private final TreeMap cache;
private final kelondroMScoreCluster hashScore;
private final kelondroMScoreCluster hashDate;
private long startTime;
private int maxWords;
private long startTime;
private int maxWordsLow, maxWordsHigh; // we have 2 cache limits for different priorities
private final serverLog log;
private final plasmaWordIndexAssortmentCluster assortmentCluster;
private int assortmentBufferSize; //kb
@ -115,7 +115,8 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
this.hashScore = new kelondroMScoreCluster();
this.hashDate = new kelondroMScoreCluster();
this.startTime = System.currentTimeMillis();
this.maxWords = 10000;
this.maxWordsLow = 8000;
this.maxWordsHigh = 10000;
this.backend = backend;
this.log = log;
this.assortmentCluster = new plasmaWordIndexAssortmentCluster(assortmentClusterPath, assortmentCount, assortmentBufferSize, log);
@ -254,8 +255,9 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
return cache.size();
}
public void setMaxWords(int maxWords) {
this.maxWords = maxWords;
public void setMaxWords(int maxWordsLow, int maxWordsHigh) {
this.maxWordsLow = maxWordsLow;
this.maxWordsHigh = maxWordsHigh;
}
public int[] assortmentsSizes() {
@ -319,7 +321,7 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
} else {
flushFromMem();
try {
pausetime = 1 + java.lang.Math.min(1000, 5 * maxWords/(cache.size() + 1));
pausetime = 1 + java.lang.Math.min(1000, 5 * maxWordsHigh/(cache.size() + 1));
if (cache.size() == 0) pausetime = 2000;
this.sleep(pausetime);
} catch (InterruptedException e) {}
@ -395,11 +397,11 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
return container.size();
} else {
// *** should care about another option here ***
return backend.addEntries(feedback, time);
return backend.addEntries(feedback, time, true);
}
} else {
// store to back-end; this should be a rare case
return backend.addEntries(container, time);
return backend.addEntries(container, time, true);
}
}
@ -420,7 +422,7 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
} else {
// we have a non-empty entry-container
// integrate it to the backend
return backend.addEntries(container, container.updated()) > 0;
return backend.addEntries(container, container.updated(), true) > 0;
}
}
@ -464,17 +466,22 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
return removed;
}
public synchronized int addEntries(plasmaWordIndexEntryContainer container, long updateTime) {
public synchronized int addEntries(plasmaWordIndexEntryContainer container, long updateTime, boolean highPriority) {
// this puts the entries into the cache, not into the assortment directly
// check cache space
if (cache.size() > 0) try {
// pause to get space in the cache (while it is flushed)
if (cache.size() + 1000 >= this.maxWords) Thread.sleep(java.lang.Math.min(1000, cache.size() - this.maxWords + 1000));
long pausetime;
if (highPriority) {
if (cache.size() + 1000 >= this.maxWordsHigh) Thread.sleep(java.lang.Math.min(1000, cache.size() - this.maxWordsHigh + 1000));
pausetime = java.lang.Math.min(10, 2 * cache.size() / (maxWordsHigh + 1));
} else {
if (cache.size() + 1000 >= this.maxWordsLow) Thread.sleep(java.lang.Math.min(1000, cache.size() - this.maxWordsLow + 1000));
pausetime = java.lang.Math.min(10, 2 * cache.size() / (maxWordsLow + 1));
}
// slow down if we reach cache limit
long pausetime = java.lang.Math.min(10, 2 * cache.size() / (maxWords + 1));
//System.out.println("Pausetime=" + pausetime);
Thread.sleep(pausetime);
} catch (InterruptedException e) {}

@ -166,7 +166,7 @@ public class plasmaWordIndexClassicCacheMigration {
new String(row[j + 2], plasmaCrawlLURL.urlHashLength, plasmaWordIndexEntry.attrSpaceShort));
}
container.add(entries, System.currentTimeMillis());
fresh.addEntries(container);
fresh.addEntries(container, false);
i = null;
remove(hash);
return true;

@ -226,7 +226,7 @@ public class plasmaWordIndexClassicDB implements plasmaWordIndexInterface {
}
}
public int addEntries(plasmaWordIndexEntryContainer container, long creationTime) {
public int addEntries(plasmaWordIndexEntryContainer container, long creationTime, boolean highPriority) {
//System.out.println("* adding " + newEntries.size() + " cached word index entries for word " + wordHash); // debug
// fetch the index cache
if ((container == null) || (container.size() == 0)) return 0;

@ -55,7 +55,7 @@ public interface plasmaWordIndexInterface {
public void deleteIndex(String wordHash);
public int removeEntries(String wordHash, String[] urlHashes, boolean deleteComplete);
public int addEntries(plasmaWordIndexEntryContainer newEntries, long creationTime);
public int addEntries(plasmaWordIndexEntryContainer newEntries, long creationTime, boolean highPriority);
public void close(int waitingSeconds);

@ -492,12 +492,15 @@ javastart_Xms=Xms10m
# performance properties for the word index cache
# wordCacheMax is the number of word indexes that shall be held in the
# wordCacheMaxLow/High is the number of word indexes that shall be held in the
# ram cache during indexing. When YaCy is shut down, this cache must be
# flushed to disc; this may last some minutes.
# The low value is valid for crawling tasks, the high value is valid for
# remote index transmissions and search requests
# maxWaitingWordFlush gives the number of seconds that the shutdown
# may last for the word flush
wordCacheMax = 10000
wordCacheMaxLow = 12000
wordCacheMaxHigh = 16000
maxWaitingWordFlush = 180
# Specifies if yacy can be used as transparent http proxy.

Loading…
Cancel
Save