From a9b22647dc9d7cfa405a6b3cc55d2708e71e0cf3 Mon Sep 17 00:00:00 2001 From: orbiter Date: Sun, 8 May 2005 00:11:59 +0000 Subject: [PATCH] fixed bug in indexDump.stack - generation git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@88 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- .../de/anomic/kelondro/kelondroRecords.java | 3 +- .../anomic/plasma/plasmaWordIndexCache.java | 29 +++++++++++-------- yacy.parser | 2 +- 3 files changed, 19 insertions(+), 15 deletions(-) diff --git a/source/de/anomic/kelondro/kelondroRecords.java b/source/de/anomic/kelondro/kelondroRecords.java index f9c000bf0..8057d53b3 100644 --- a/source/de/anomic/kelondro/kelondroRecords.java +++ b/source/de/anomic/kelondro/kelondroRecords.java @@ -141,8 +141,7 @@ public class kelondroRecords { // FHandles: number of integer properties // txtProps: number of text properties - if (file.exists()) - throw new IOException("kelondroRecords: tree file " + file + " already exist"); + if (file.exists()) throw new IOException("kelondroRecords: file " + file + " already exist"); this.filename = file.getCanonicalPath(); kelondroRA raf = new kelondroFileRA(this.filename); //kelondroRA raf = new kelondroBufferedRA(new kelondroFileRA(this.filename), 5000000, 1000); diff --git a/source/de/anomic/plasma/plasmaWordIndexCache.java b/source/de/anomic/plasma/plasmaWordIndexCache.java index 37010ae7e..8594e423f 100644 --- a/source/de/anomic/plasma/plasmaWordIndexCache.java +++ b/source/de/anomic/plasma/plasmaWordIndexCache.java @@ -51,7 +51,7 @@ import de.anomic.yacy.yacySeedDB; public class plasmaWordIndexCache implements plasmaWordIndexInterface { - private static final String indexDumpFileName = "indexDump.stack"; + private static final String indexDumpFileName = "indexDump0.stack"; static String minKey, maxKey; @@ -91,24 +91,27 @@ public class plasmaWordIndexCache implements plasmaWordIndexInterface { log.logSystem("creating dump for index cache, " + cache.size() + " words (and much more urls)"); File indexDumpFile = new File(databaseRoot, indexDumpFileName); if (indexDumpFile.exists()) indexDumpFile.delete(); - kelondroStack dumpStack = new kelondroStack(indexDumpFile, 0, new int[]{plasmaWordIndexEntry.wordHashLength, 4, 8, plasmaWordIndexEntry.attrSpaceLong}); + kelondroStack dumpStack = new kelondroStack(indexDumpFile, 0, new int[]{plasmaWordIndexEntry.wordHashLength, 4, 8, plasmaWordIndexEntry.wordHashLength, plasmaWordIndexEntry.attrSpaceLong}); long startTime = System.currentTimeMillis(); long messageTime = System.currentTimeMillis() + 5000; long wordsPerSecond = 0, wordcount = 0, urlcount = 0; synchronized (cache) { - Iterator i = cache.entrySet().iterator(); - Map.Entry entry; + //Iterator i = cache.entrySet().iterator(); + Iterator i = hashScore.scores(false); + //Map.Entry entry; String wordHash; plasmaWordIndexEntryContainer container; long creationTime; plasmaWordIndexEntry wordEntry; - byte[][] row = new byte[4][]; + byte[][] row = new byte[5][]; while (i.hasNext()) { // get entries - entry = (Map.Entry) i.next(); - wordHash = (String) entry.getKey(); + //entry = (Map.Entry) i.next(); + wordHash = (String) i.next(); + //wordHash = (String) entry.getKey(); creationTime = getCreationTime(wordHash); - container = (plasmaWordIndexEntryContainer) entry.getValue(); + container = (plasmaWordIndexEntryContainer) cache.get(wordHash); + //container = (plasmaWordIndexEntryContainer) entry.getValue(); // put entries on stack if (container != null) { @@ -118,7 +121,8 @@ public class plasmaWordIndexCache implements plasmaWordIndexInterface { row[0] = wordHash.getBytes(); row[1] = kelondroRecords.long2bytes(container.size(), 4); row[2] = kelondroRecords.long2bytes(creationTime, 8); - row[3] = wordEntry.toEncodedForm(true).getBytes(); + row[3] = wordEntry.getUrlHash().getBytes(); + row[4] = wordEntry.toEncodedForm(true).getBytes(); dumpStack.push(row); urlcount++; } @@ -147,7 +151,7 @@ public class plasmaWordIndexCache implements plasmaWordIndexInterface { synchronized (cache) { Iterator i = dumpStack.iterator(); kelondroRecords.Node node; - String wordHash; + String wordHash, urlHash; plasmaWordIndexEntryContainer container; long creationTime; plasmaWordIndexEntry wordEntry; @@ -158,7 +162,8 @@ public class plasmaWordIndexCache implements plasmaWordIndexInterface { row = node.getValues(); wordHash = new String(row[0]); creationTime = kelondroRecords.bytes2long(row[2]); - wordEntry = new plasmaWordIndexEntry(wordHash, new String(row[3])); + urlHash = new String(row[3]); + wordEntry = new plasmaWordIndexEntry(urlHash, new String(row[4])); // store to cache addEntry(wordHash, wordEntry, creationTime); @@ -166,7 +171,7 @@ public class plasmaWordIndexCache implements plasmaWordIndexInterface { // write a log if (System.currentTimeMillis() > messageTime) { - urlsPerSecond = urlCount * 1000 / (1 + System.currentTimeMillis() - startTime); + urlsPerSecond = 1 + urlCount * 1000 / (1 + System.currentTimeMillis() - startTime); log.logInfo("restoring status: " + urlCount + " urls done, " + ((dumpStack.size() - urlCount) / urlsPerSecond) + " seconds remaining"); messageTime = System.currentTimeMillis() + 5000; } diff --git a/yacy.parser b/yacy.parser index 59e7dde02..a0e4bc054 100644 --- a/yacy.parser +++ b/yacy.parser @@ -1,2 +1,2 @@ #plasmaParser configuration file -#Sat May 07 22:32:33 CEST 2005 +#Sun May 08 00:07:28 CEST 2005