- more asserts

- bugfix for performaceMemory
- refactoring of index ram cache: renamed indexRAMCacheRI to indexRAMRI, to make space for a cached indexRI, which should be named indexRAMCacheRI

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2925 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 18 years ago
parent 114a76a86e
commit ee4715a21c

@ -41,7 +41,7 @@ import de.anomic.kelondro.kelondroRow;
import de.anomic.server.logging.serverLog; import de.anomic.server.logging.serverLog;
import de.anomic.yacy.yacySeedDB; import de.anomic.yacy.yacySeedDB;
public final class indexRAMCacheRI implements indexRI { public final class indexRAMRI implements indexRI {
// environment constants // environment constants
public static final long wCacheMaxAge = 1000 * 60 * 30; // milliseconds; 30 minutes public static final long wCacheMaxAge = 1000 * 60 * 30; // milliseconds; 30 minutes
@ -66,7 +66,7 @@ public final class indexRAMCacheRI implements indexRI {
//minKey = ""; for (int i = 0; i < yacySeedDB.commonHashLength; i++) maxKey += '-'; //minKey = ""; for (int i = 0; i < yacySeedDB.commonHashLength; i++) maxKey += '-';
} }
public indexRAMCacheRI(File databaseRoot, kelondroRow payloadrow, int wCacheReferenceLimitInit, String dumpname, serverLog log) { public indexRAMRI(File databaseRoot, kelondroRow payloadrow, int wCacheReferenceLimitInit, String dumpname, serverLog log) {
// creates a new index cache // creates a new index cache
// the cache has a back-end where indexes that do not fit in the cache are flushed // the cache has a back-end where indexes that do not fit in the cache are flushed
@ -235,10 +235,9 @@ public final class indexRAMCacheRI implements indexRI {
} }
public synchronized int indexSize(String wordHash) { public synchronized int indexSize(String wordHash) {
int size = 0;
indexContainer cacheIndex = (indexContainer) cache.get(wordHash); indexContainer cacheIndex = (indexContainer) cache.get(wordHash);
if (cacheIndex != null) size += cacheIndex.size(); if (cacheIndex == null) return 0;
return size; return cacheIndex.size();
} }
public synchronized Iterator wordContainers(String startWordHash, boolean rot) { public synchronized Iterator wordContainers(String startWordHash, boolean rot) {

@ -35,6 +35,8 @@ import java.util.Map;
import java.util.TreeMap; import java.util.TreeMap;
import de.anomic.kelondro.kelondroBase64Order; import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroCache;
import de.anomic.kelondro.kelondroFlexTable;
import de.anomic.kelondro.kelondroIndex; import de.anomic.kelondro.kelondroIndex;
import de.anomic.kelondro.kelondroTree; import de.anomic.kelondro.kelondroTree;
import de.anomic.net.URL; import de.anomic.net.URL;
@ -433,21 +435,29 @@ public class indexURL {
public int cacheNodeChunkSize() { public int cacheNodeChunkSize() {
if (urlIndexFile instanceof kelondroTree) return ((kelondroTree) urlIndexFile).cacheNodeChunkSize(); if (urlIndexFile instanceof kelondroTree) return ((kelondroTree) urlIndexFile).cacheNodeChunkSize();
if (urlIndexFile instanceof kelondroCache) return ((kelondroCache) urlIndexFile).cacheNodeChunkSize();
if (urlIndexFile instanceof kelondroFlexTable) return ((kelondroFlexTable) urlIndexFile).cacheNodeChunkSize();
return 0; return 0;
} }
public int[] cacheNodeStatus() { public int[] cacheNodeStatus() {
if (urlIndexFile instanceof kelondroTree) return ((kelondroTree) urlIndexFile).cacheNodeStatus(); if (urlIndexFile instanceof kelondroTree) return ((kelondroTree) urlIndexFile).cacheNodeStatus();
if (urlIndexFile instanceof kelondroCache) return ((kelondroCache) urlIndexFile).cacheNodeStatus();
if (urlIndexFile instanceof kelondroFlexTable) return ((kelondroFlexTable) urlIndexFile).cacheNodeStatus();
return new int[]{0,0,0,0,0,0,0,0,0,0}; return new int[]{0,0,0,0,0,0,0,0,0,0};
} }
public int cacheObjectChunkSize() { public int cacheObjectChunkSize() {
if (urlIndexFile instanceof kelondroTree) return ((kelondroTree) urlIndexFile).cacheObjectChunkSize(); if (urlIndexFile instanceof kelondroTree) return ((kelondroTree) urlIndexFile).cacheObjectChunkSize();
if (urlIndexFile instanceof kelondroCache) return ((kelondroCache) urlIndexFile).cacheObjectChunkSize();
if (urlIndexFile instanceof kelondroFlexTable) return ((kelondroFlexTable) urlIndexFile).cacheObjectChunkSize();
return 0; return 0;
} }
public long[] cacheObjectStatus() { public long[] cacheObjectStatus() {
if (urlIndexFile instanceof kelondroTree) return ((kelondroTree) urlIndexFile).cacheObjectStatus(); if (urlIndexFile instanceof kelondroTree) return ((kelondroTree) urlIndexFile).cacheObjectStatus();
if (urlIndexFile instanceof kelondroCache) return ((kelondroCache) urlIndexFile).cacheObjectStatus();
if (urlIndexFile instanceof kelondroFlexTable) return ((kelondroFlexTable) urlIndexFile).cacheObjectStatus();
return new long[]{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; return new long[]{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
} }

@ -92,7 +92,7 @@ public class indexURLEntry implements Cloneable, indexEntry {
// more needed attributes: // more needed attributes:
// - boolean: appearance attributes: title, appears in header, anchor-descr, image-tag, hervorhebungen, meta-tags, word in link etc // - boolean: appearance attributes: title, appears in header, anchor-descr, image-tag, hervorhebungen, meta-tags, word in link etc
// - boolean: URL attributes // - boolean: URL attributes
assert (urlHash.length() == 12) : "urlhash = " + urlHash;
if ((language == null) || (language.length() != indexURL.urlLanguageLength)) language = "uk"; if ((language == null) || (language.length() != indexURL.urlLanguageLength)) language = "uk";
this.entry = urlEntryRow.newEntry(); this.entry = urlEntryRow.newEntry();
this.entry.setCol(col_urlhash, urlHash, null); this.entry.setCol(col_urlhash, urlHash, null);
@ -247,6 +247,9 @@ public class indexURLEntry implements Cloneable, indexEntry {
} }
static void normalize(indexURLEntry t, indexEntry min, indexEntry max) { static void normalize(indexURLEntry t, indexEntry min, indexEntry max) {
assert (t.urlHash().length() == 12) : "turlhash = " + t.urlHash();
assert (min.urlHash().length() == 12) : "minurlhash = " + min.urlHash();
assert (max.urlHash().length() == 12) : "maxurlhash = " + max.urlHash();
if (1 + max.worddistance() - min.worddistance() == 0) System.out.println("min = " + min.toPropertyForm(true) + "\nmax=" + max.toPropertyForm(true)); if (1 + max.worddistance() - min.worddistance() == 0) System.out.println("min = " + min.toPropertyForm(true) + "\nmax=" + max.toPropertyForm(true));
//System.out.println("Normalize:\nentry = " + t.toPropertyForm(true)); //System.out.println("Normalize:\nentry = " + t.toPropertyForm(true));
//System.out.println("min = " + min.toPropertyForm(true)); //System.out.println("min = " + min.toPropertyForm(true));

@ -2083,6 +2083,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
urlentry = acc.nextElement(); urlentry = acc.nextElement();
plasmaCrawlLURLEntry.Components comp = urlentry.comp(); plasmaCrawlLURLEntry.Components comp = urlentry.comp();
urlhash = urlentry.hash(); urlhash = urlentry.hash();
assert (urlhash != null);
assert (urlhash.length() == 12) : "urlhash = " + urlhash;
host = comp.url().getHost(); host = comp.url().getHost();
if (host.endsWith(".yacyh")) { if (host.endsWith(".yacyh")) {
// translate host into current IP // translate host into current IP

@ -42,7 +42,7 @@ import de.anomic.index.indexContainer;
import de.anomic.index.indexContainerOrder; import de.anomic.index.indexContainerOrder;
import de.anomic.index.indexEntry; import de.anomic.index.indexEntry;
import de.anomic.index.indexEntryAttribute; import de.anomic.index.indexEntryAttribute;
import de.anomic.index.indexRAMCacheRI; import de.anomic.index.indexRAMRI;
import de.anomic.index.indexRI; import de.anomic.index.indexRI;
import de.anomic.index.indexURLEntry; import de.anomic.index.indexURLEntry;
import de.anomic.kelondro.kelondroBase64Order; import de.anomic.kelondro.kelondroBase64Order;
@ -64,7 +64,7 @@ public final class plasmaWordIndex implements indexRI {
private final File oldDatabaseRoot; private final File oldDatabaseRoot;
private final kelondroOrder indexOrder = new kelondroNaturalOrder(true); private final kelondroOrder indexOrder = new kelondroNaturalOrder(true);
private final indexRAMCacheRI dhtOutCache, dhtInCache; private final indexRAMRI dhtOutCache, dhtInCache;
private final indexCollectionRI collections; // new database structure to replace AssortmentCluster and FileCluster private final indexCollectionRI collections; // new database structure to replace AssortmentCluster and FileCluster
private int assortmentBufferSize; // kb private int assortmentBufferSize; // kb
private final plasmaWordIndexAssortmentCluster assortmentCluster; // old database structure, to be replaced by CollectionRI private final plasmaWordIndexAssortmentCluster assortmentCluster; // old database structure, to be replaced by CollectionRI
@ -76,8 +76,8 @@ public final class plasmaWordIndex implements indexRI {
public plasmaWordIndex(File oldDatabaseRoot, File newIndexRoot, boolean dummy, int bufferkb, long preloadTime, serverLog log, boolean useCollectionIndex) throws IOException { public plasmaWordIndex(File oldDatabaseRoot, File newIndexRoot, boolean dummy, int bufferkb, long preloadTime, serverLog log, boolean useCollectionIndex) throws IOException {
this.oldDatabaseRoot = oldDatabaseRoot; this.oldDatabaseRoot = oldDatabaseRoot;
this.backend = new plasmaWordIndexFileCluster(oldDatabaseRoot, payloadrow, log); this.backend = new plasmaWordIndexFileCluster(oldDatabaseRoot, payloadrow, log);
this.dhtOutCache = new indexRAMCacheRI(oldDatabaseRoot, payloadrow, (useCollectionIndex) ? 1024 : 64, "indexDump1.array", log); this.dhtOutCache = new indexRAMRI(oldDatabaseRoot, payloadrow, (useCollectionIndex) ? 1024 : 64, "indexDump1.array", log);
this.dhtInCache = new indexRAMCacheRI(oldDatabaseRoot, payloadrow, (useCollectionIndex) ? 1024 : 64, "indexDump2.array", log); this.dhtInCache = new indexRAMRI(oldDatabaseRoot, payloadrow, (useCollectionIndex) ? 1024 : 64, "indexDump2.array", log);
// create assortment cluster path // create assortment cluster path
File assortmentClusterPath = new File(oldDatabaseRoot, indexAssortmentClusterPath); File assortmentClusterPath = new File(oldDatabaseRoot, indexAssortmentClusterPath);
@ -234,7 +234,7 @@ public final class plasmaWordIndex implements indexRI {
flushCacheSome(dhtInCache, busy); flushCacheSome(dhtInCache, busy);
} }
private void flushCacheSome(indexRAMCacheRI ram, boolean busy) { private void flushCacheSome(indexRAMRI ram, boolean busy) {
int flushCount; int flushCount;
if (ram.size() > ram.getMaxWordCount()) { if (ram.size() > ram.getMaxWordCount()) {
flushCount = ram.size() + 100 - ram.getMaxWordCount(); flushCount = ram.size() + 100 - ram.getMaxWordCount();
@ -246,7 +246,7 @@ public final class plasmaWordIndex implements indexRI {
flushCache(ram, flushCount); flushCache(ram, flushCount);
} }
private void flushCache(indexRAMCacheRI ram, int count) { private void flushCache(indexRAMRI ram, int count) {
if (count <= 0) return; if (count <= 0) return;
busyCacheFlush = true; busyCacheFlush = true;
String wordHash; String wordHash;

@ -504,6 +504,7 @@ public final class yacyClient {
// get one single search result // get one single search result
urlEntry = urlManager.newEntry((String) result.get("resource" + n)); urlEntry = urlManager.newEntry((String) result.get("resource" + n));
if (urlEntry == null) continue; if (urlEntry == null) continue;
assert (urlEntry.hash().length() == 12) : "urlEntry.hash() = " + urlEntry.hash();
plasmaCrawlLURLEntry.Components comp = urlEntry.comp(); plasmaCrawlLURLEntry.Components comp = urlEntry.comp();
if (blacklist.isListed(plasmaURLPattern.BLACKLIST_SEARCH, comp.url())) continue; // block with backlist if (blacklist.isListed(plasmaURLPattern.BLACKLIST_SEARCH, comp.url())) continue; // block with backlist
urlManager.store(urlEntry); urlManager.store(urlEntry);

Loading…
Cancel
Save