changed some parameters that may cause better memory usage and more indexing speed

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2457 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 18 years ago
parent b7f4a1521b
commit 985dcbde7f

@ -406,10 +406,12 @@ public class kelondroCollectionIndex {
}
public void close() throws IOException {
this.index.close();
Iterator i = arrays.values().iterator();
while (i.hasNext()) {
((kelondroFixedWidthArray) i.next()).close();
synchronized (index) {
this.index.close();
Iterator i = arrays.values().iterator();
while (i.hasNext()) {
((kelondroFixedWidthArray) i.next()).close();
}
}
}

@ -637,15 +637,19 @@ public final class plasmaCrawlStacker {
}
private void deleteDB() {
File cacheFile = new File(cacheStacksPath, "urlPreNotice.db");
cacheFile.delete();
if (this.newdb) {
kelondroFlexTable.delete(cacheStacksPath, "urlPreNotice1.table");
} else {
File cacheFile = new File(cacheStacksPath, "urlPreNotice.db");
cacheFile.delete();
}
}
private void openDB() {
if (!(cacheStacksPath.exists())) cacheStacksPath.mkdir(); // make the path
if (this.newdb) {
String newCacheName = "urPreNotice1.table";
String newCacheName = "urlPreNotice1.table";
cacheStacksPath.mkdirs();
try {
this.urlEntryCache = new kelondroFlexTable(cacheStacksPath, newCacheName, bufferkb * 0x400, preloadTime, plasmaCrawlNURL.rowdef, kelondroBase64Order.enhancedCoder);
@ -654,8 +658,6 @@ public final class plasmaCrawlStacker {
System.exit(-1);
}
} else {
File cacheFile = new File(cacheStacksPath, "urlPreNotice.db");
cacheFile.getParentFile().mkdirs();
this.urlEntryCache = kelondroTree.open(cacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, plasmaCrawlNURL.rowdef);
@ -729,6 +731,7 @@ public final class plasmaCrawlStacker {
try {
synchronized(this.urlEntryHashCache) {
urlHash = (String) this.urlEntryHashCache.removeFirst();
if (urlHash == null) throw new IOException("urlHash is null");
entry = this.urlEntryCache.remove(urlHash.getBytes());
}
} finally {

@ -169,7 +169,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
// load slots
public static int crawlSlots = 10;
public static int indexingSlots = 100;
public static int stackCrawlSlots = 10000;
public static int stackCrawlSlots = 1000000;
public static int maxCRLDump = 500000;
public static int maxCRGDump = 200000;
@ -413,7 +413,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
wordIndex = new plasmaWordIndex(plasmaPath, indexPublicTextPath, ramRWI, ramRWI_time, log, getConfigBool("useCollectionIndex", false));
// set a high maximum cache size to current size; this is adopted later automatically
int wordCacheMaxCount = Math.max(20000, (int) getConfigLong("wordCacheMaxCount", 20000));
int wordCacheMaxCount = Math.max(80000, (int) getConfigLong("wordCacheMaxCount", 80000));
setConfig("wordCacheMaxCount", Integer.toString(wordCacheMaxCount));
wordIndex.setMaxWordCount(wordCacheMaxCount);

@ -200,9 +200,10 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
public void flushCacheSome() {
synchronized (this) { ramCache.shiftK2W(); }
int flushCount = ramCache.wSize() / 420;
//int flushCount = ramCache.wSize() / 420;
int flushCount = ramCache.wSize() / 3000; // for testings
if (flushCount > 100) flushCount = 100;
if (flushCount < 10) flushCount = Math.min(10, ramCache.wSize());
if (flushCount < 5) flushCount = Math.min(5, ramCache.wSize());
flushCache(flushCount);
}

Loading…
Cancel
Save