added high/low priority for index adding

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@899 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 20 years ago
parent 3966b38360
commit 839db8869c

@ -87,14 +87,19 @@ Changes take effect immediately</td>
</td> </td>
</tr> </tr>
<tr valign="top" class="TableCellDark"> <tr valign="top" class="TableCellDark">
<td class=small>Maximum number of Word Caches:</td> <td class=small>Maximum number of Word Caches, low limit:</td>
<td class=small><input name="wordCacheMax" type="text" size="20" maxlength="100" value="#[wordCacheMax]#"></td> <td class=small><input name="wordCacheMaxLow" type="text" size="20" maxlength="100" value="#[wordCacheMaxLow]#"></td>
<td class=small> <td class=small rowspan="2">
This is is the number of word indexes that shall be held in the This is is the number of word indexes that shall be held in the
ram cache during indexing. When YaCy is shut down, this cache must be ram cache during indexing. When YaCy is shut down, this cache must be
flushed to disc; this may last some minutes. flushed to disc; this may last some minutes. The low limit is valid for crawling tasks, the high limit is valid
for search and DHT transmission tasks.
</td> </td>
</tr> </tr>
<tr valign="top" class="TableCellDark">
<td class=small>Maximum number of Word Caches, high limit:</td>
<td class=small><input name="wordCacheMaxHigh" type="text" size="20" maxlength="100" value="#[wordCacheMaxHigh]#"></td>
</tr>
<tr valign="top" class="TableCellLight"> <tr valign="top" class="TableCellLight">
<td class=small colspan="3"> <td class=small colspan="3">
<input type="submit" name="cacheSizeSubmit" value="Enter New Cache Size"> <input type="submit" name="cacheSizeSubmit" value="Enter New Cache Size">

@ -171,9 +171,11 @@ public class PerformanceQueues_p {
prop.put("table", c); prop.put("table", c);
if ((post != null) && (post.containsKey("cacheSizeSubmit"))) { if ((post != null) && (post.containsKey("cacheSizeSubmit"))) {
int wordCacheMax = Integer.parseInt((String) post.get("wordCacheMax", "10000")); int wordCacheMaxLow = Integer.parseInt((String) post.get("wordCacheMaxLow", "8000"));
switchboard.setConfig("wordCacheMax", Integer.toString(wordCacheMax)); int wordCacheMaxHigh = Integer.parseInt((String) post.get("wordCacheMaxHigh", "10000"));
switchboard.wordIndex.setMaxWords(wordCacheMax); switchboard.setConfig("wordCacheMaxLow", Integer.toString(wordCacheMaxLow));
switchboard.setConfig("wordCacheMaxHigh", Integer.toString(wordCacheMaxHigh));
switchboard.wordIndex.setMaxWords(wordCacheMaxLow, wordCacheMaxHigh);
int maxWaitingWordFlush = Integer.parseInt((String) post.get("maxWaitingWordFlush", "180")); int maxWaitingWordFlush = Integer.parseInt((String) post.get("maxWaitingWordFlush", "180"));
switchboard.setConfig("maxWaitingWordFlush", Integer.toString(maxWaitingWordFlush)); switchboard.setConfig("maxWaitingWordFlush", Integer.toString(maxWaitingWordFlush));
} }
@ -232,7 +234,8 @@ public class PerformanceQueues_p {
prop.put("wordCacheRAMSize", switchboard.wordIndex.wordCacheRAMSize()); prop.put("wordCacheRAMSize", switchboard.wordIndex.wordCacheRAMSize());
prop.put("maxURLinWordCache", "" + switchboard.wordIndex.maxURLinWordCache()); prop.put("maxURLinWordCache", "" + switchboard.wordIndex.maxURLinWordCache());
prop.put("maxWaitingWordFlush", switchboard.getConfig("maxWaitingWordFlush", "180")); prop.put("maxWaitingWordFlush", switchboard.getConfig("maxWaitingWordFlush", "180"));
prop.put("wordCacheMax", switchboard.getConfig("wordCacheMax", "10000")); prop.put("wordCacheMaxLow", switchboard.getConfig("wordCacheMaxLow", "10000"));
prop.put("wordCacheMaxHigh", switchboard.getConfig("wordCacheMaxHigh", "10000"));
prop.put("onlineCautionDelay", switchboard.getConfig("onlineCautionDelay", "30000")); prop.put("onlineCautionDelay", switchboard.getConfig("onlineCautionDelay", "30000"));
prop.put("onlineCautionDelayCurrent", System.currentTimeMillis() - switchboard.proxyLastAccess); prop.put("onlineCautionDelayCurrent", System.currentTimeMillis() - switchboard.proxyLastAccess);

@ -120,7 +120,7 @@ public final class transferRWI {
wordHash = estring.substring(0, p); wordHash = estring.substring(0, p);
wordhashes[i] = wordHash; wordhashes[i] = wordHash;
entry = new plasmaWordIndexEntry(estring.substring(p)); entry = new plasmaWordIndexEntry(estring.substring(p));
sb.wordIndex.addEntries(plasmaWordIndexEntryContainer.instantContainer(wordHash, System.currentTimeMillis(), entry)); sb.wordIndex.addEntries(plasmaWordIndexEntryContainer.instantContainer(wordHash, System.currentTimeMillis(), entry), true);
urlHash = entry.getUrlHash(); urlHash = entry.getUrlHash();
if ((!(unknownURL.contains(urlHash))) && if ((!(unknownURL.contains(urlHash))) &&
(!(sb.urlPool.loadedURL.exists(urlHash)))) { (!(sb.urlPool.loadedURL.exists(urlHash)))) {

@ -79,7 +79,7 @@ public final class plasmaSearch {
} }
public void addWords(plasmaWordIndexEntryContainer container) { public void addWords(plasmaWordIndexEntryContainer container) {
wordIndex.addEntries(container); wordIndex.addEntries(container, true);
} }
public int addPageIndex(URL url, String urlHash, Date urlModified, plasmaCondenser condenser, public int addPageIndex(URL url, String urlHash, Date urlModified, plasmaCondenser condenser,
@ -108,7 +108,7 @@ public final class plasmaSearch {
wordHash = plasmaWordIndexEntry.word2hash(word); wordHash = plasmaWordIndexEntry.word2hash(word);
entry = new plasmaWordIndexEntry(urlHash, count, p++, 0, 0, entry = new plasmaWordIndexEntry(urlHash, count, p++, 0, 0,
age, quality, language, doctype, true); age, quality, language, doctype, true);
this.wordIndex.addEntries(plasmaWordIndexEntryContainer.instantContainer(wordHash, System.currentTimeMillis(), entry)); this.wordIndex.addEntries(plasmaWordIndexEntryContainer.instantContainer(wordHash, System.currentTimeMillis(), entry), false);
} }
//System.out.println("DEBUG: plasmaSearch.addPageIndex: added " + condenser.getWords().size() + " words, flushed " + c + " entries"); //System.out.println("DEBUG: plasmaSearch.addPageIndex: added " + condenser.getWords().size() + " words, flushed " + c + " entries");
return condenser.getWords().size(); return condenser.getWords().size();

@ -306,8 +306,9 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
urlPool = new plasmaURLPool(plasmaPath, ramLURL, ramNURL, ramEURL); urlPool = new plasmaURLPool(plasmaPath, ramLURL, ramNURL, ramEURL);
wordIndex = new plasmaWordIndex(plasmaPath, ramRWI, log); wordIndex = new plasmaWordIndex(plasmaPath, ramRWI, log);
int wordCacheMax = Integer.parseInt((String) getConfig("wordCacheMax", "10000")); int wordCacheMaxLow = Integer.parseInt((String) getConfig("wordCacheMaxLow", "8000"));
wordIndex.setMaxWords(wordCacheMax); int wordCacheMaxHigh = Integer.parseInt((String) getConfig("wordCacheMaxHigh", "10000"));
wordIndex.setMaxWords(wordCacheMaxLow, wordCacheMaxHigh);
searchManager = new plasmaSearch(urlPool.loadedURL, wordIndex); searchManager = new plasmaSearch(urlPool.loadedURL, wordIndex);
// start a cache manager // start a cache manager

@ -93,12 +93,12 @@ public final class plasmaWordIndex {
return ramCache.assortmentsCacheFillStatusCml(); return ramCache.assortmentsCacheFillStatusCml();
} }
public void setMaxWords(int maxWords) { public void setMaxWords(int maxWordsLow, int maxWordsHigh) {
ramCache.setMaxWords(maxWords); ramCache.setMaxWords(maxWordsLow, maxWordsHigh);
} }
public int addEntries(plasmaWordIndexEntryContainer entries) { public int addEntries(plasmaWordIndexEntryContainer entries, boolean highPriority) {
return ramCache.addEntries(entries, System.currentTimeMillis()); return ramCache.addEntries(entries, System.currentTimeMillis(), highPriority);
} }
public plasmaWordIndexEntity getEntity(String wordHash, boolean deleteIfEmpty) { public plasmaWordIndexEntity getEntity(String wordHash, boolean deleteIfEmpty) {

@ -75,7 +75,7 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
private final kelondroMScoreCluster hashScore; private final kelondroMScoreCluster hashScore;
private final kelondroMScoreCluster hashDate; private final kelondroMScoreCluster hashDate;
private long startTime; private long startTime;
private int maxWords; private int maxWordsLow, maxWordsHigh; // we have 2 cache limits for different priorities
private final serverLog log; private final serverLog log;
private final plasmaWordIndexAssortmentCluster assortmentCluster; private final plasmaWordIndexAssortmentCluster assortmentCluster;
private int assortmentBufferSize; //kb private int assortmentBufferSize; //kb
@ -115,7 +115,8 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
this.hashScore = new kelondroMScoreCluster(); this.hashScore = new kelondroMScoreCluster();
this.hashDate = new kelondroMScoreCluster(); this.hashDate = new kelondroMScoreCluster();
this.startTime = System.currentTimeMillis(); this.startTime = System.currentTimeMillis();
this.maxWords = 10000; this.maxWordsLow = 8000;
this.maxWordsHigh = 10000;
this.backend = backend; this.backend = backend;
this.log = log; this.log = log;
this.assortmentCluster = new plasmaWordIndexAssortmentCluster(assortmentClusterPath, assortmentCount, assortmentBufferSize, log); this.assortmentCluster = new plasmaWordIndexAssortmentCluster(assortmentClusterPath, assortmentCount, assortmentBufferSize, log);
@ -254,8 +255,9 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
return cache.size(); return cache.size();
} }
public void setMaxWords(int maxWords) { public void setMaxWords(int maxWordsLow, int maxWordsHigh) {
this.maxWords = maxWords; this.maxWordsLow = maxWordsLow;
this.maxWordsHigh = maxWordsHigh;
} }
public int[] assortmentsSizes() { public int[] assortmentsSizes() {
@ -319,7 +321,7 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
} else { } else {
flushFromMem(); flushFromMem();
try { try {
pausetime = 1 + java.lang.Math.min(1000, 5 * maxWords/(cache.size() + 1)); pausetime = 1 + java.lang.Math.min(1000, 5 * maxWordsHigh/(cache.size() + 1));
if (cache.size() == 0) pausetime = 2000; if (cache.size() == 0) pausetime = 2000;
this.sleep(pausetime); this.sleep(pausetime);
} catch (InterruptedException e) {} } catch (InterruptedException e) {}
@ -395,11 +397,11 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
return container.size(); return container.size();
} else { } else {
// *** should care about another option here *** // *** should care about another option here ***
return backend.addEntries(feedback, time); return backend.addEntries(feedback, time, true);
} }
} else { } else {
// store to back-end; this should be a rare case // store to back-end; this should be a rare case
return backend.addEntries(container, time); return backend.addEntries(container, time, true);
} }
} }
@ -420,7 +422,7 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
} else { } else {
// we have a non-empty entry-container // we have a non-empty entry-container
// integrate it to the backend // integrate it to the backend
return backend.addEntries(container, container.updated()) > 0; return backend.addEntries(container, container.updated(), true) > 0;
} }
} }
@ -464,17 +466,22 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
return removed; return removed;
} }
public synchronized int addEntries(plasmaWordIndexEntryContainer container, long updateTime) { public synchronized int addEntries(plasmaWordIndexEntryContainer container, long updateTime, boolean highPriority) {
// this puts the entries into the cache, not into the assortment directly // this puts the entries into the cache, not into the assortment directly
// check cache space // check cache space
if (cache.size() > 0) try { if (cache.size() > 0) try {
// pause to get space in the cache (while it is flushed) // pause to get space in the cache (while it is flushed)
if (cache.size() + 1000 >= this.maxWords) Thread.sleep(java.lang.Math.min(1000, cache.size() - this.maxWords + 1000)); long pausetime;
if (highPriority) {
if (cache.size() + 1000 >= this.maxWordsHigh) Thread.sleep(java.lang.Math.min(1000, cache.size() - this.maxWordsHigh + 1000));
pausetime = java.lang.Math.min(10, 2 * cache.size() / (maxWordsHigh + 1));
} else {
if (cache.size() + 1000 >= this.maxWordsLow) Thread.sleep(java.lang.Math.min(1000, cache.size() - this.maxWordsLow + 1000));
pausetime = java.lang.Math.min(10, 2 * cache.size() / (maxWordsLow + 1));
}
// slow down if we reach cache limit // slow down if we reach cache limit
long pausetime = java.lang.Math.min(10, 2 * cache.size() / (maxWords + 1));
//System.out.println("Pausetime=" + pausetime);
Thread.sleep(pausetime); Thread.sleep(pausetime);
} catch (InterruptedException e) {} } catch (InterruptedException e) {}

@ -166,7 +166,7 @@ public class plasmaWordIndexClassicCacheMigration {
new String(row[j + 2], plasmaCrawlLURL.urlHashLength, plasmaWordIndexEntry.attrSpaceShort)); new String(row[j + 2], plasmaCrawlLURL.urlHashLength, plasmaWordIndexEntry.attrSpaceShort));
} }
container.add(entries, System.currentTimeMillis()); container.add(entries, System.currentTimeMillis());
fresh.addEntries(container); fresh.addEntries(container, false);
i = null; i = null;
remove(hash); remove(hash);
return true; return true;

@ -226,7 +226,7 @@ public class plasmaWordIndexClassicDB implements plasmaWordIndexInterface {
} }
} }
public int addEntries(plasmaWordIndexEntryContainer container, long creationTime) { public int addEntries(plasmaWordIndexEntryContainer container, long creationTime, boolean highPriority) {
//System.out.println("* adding " + newEntries.size() + " cached word index entries for word " + wordHash); // debug //System.out.println("* adding " + newEntries.size() + " cached word index entries for word " + wordHash); // debug
// fetch the index cache // fetch the index cache
if ((container == null) || (container.size() == 0)) return 0; if ((container == null) || (container.size() == 0)) return 0;

@ -55,7 +55,7 @@ public interface plasmaWordIndexInterface {
public void deleteIndex(String wordHash); public void deleteIndex(String wordHash);
public int removeEntries(String wordHash, String[] urlHashes, boolean deleteComplete); public int removeEntries(String wordHash, String[] urlHashes, boolean deleteComplete);
public int addEntries(plasmaWordIndexEntryContainer newEntries, long creationTime); public int addEntries(plasmaWordIndexEntryContainer newEntries, long creationTime, boolean highPriority);
public void close(int waitingSeconds); public void close(int waitingSeconds);

@ -492,12 +492,15 @@ javastart_Xms=Xms10m
# performance properties for the word index cache # performance properties for the word index cache
# wordCacheMax is the number of word indexes that shall be held in the # wordCacheMaxLow/High is the number of word indexes that shall be held in the
# ram cache during indexing. When YaCy is shut down, this cache must be # ram cache during indexing. When YaCy is shut down, this cache must be
# flushed to disc; this may last some minutes. # flushed to disc; this may last some minutes.
# The low value is valid for crawling tasks, the high value is valid for
# remote index transmissions and search requests
# maxWaitingWordFlush gives the number of seconds that the shutdown # maxWaitingWordFlush gives the number of seconds that the shutdown
# may last for the word flush # may last for the word flush
wordCacheMax = 10000 wordCacheMaxLow = 12000
wordCacheMaxHigh = 16000
maxWaitingWordFlush = 180 maxWaitingWordFlush = 180
# Specifies if yacy can be used as transparent http proxy. # Specifies if yacy can be used as transparent http proxy.

Loading…
Cancel
Save