fixed bug in indexDump.stack - generation

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@88 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 20 years ago
parent 6c8e0fdbf5
commit a9b22647dc

@ -141,8 +141,7 @@ public class kelondroRecords {
// FHandles: number of integer properties // FHandles: number of integer properties
// txtProps: number of text properties // txtProps: number of text properties
if (file.exists()) if (file.exists()) throw new IOException("kelondroRecords: file " + file + " already exist");
throw new IOException("kelondroRecords: tree file " + file + " already exist");
this.filename = file.getCanonicalPath(); this.filename = file.getCanonicalPath();
kelondroRA raf = new kelondroFileRA(this.filename); kelondroRA raf = new kelondroFileRA(this.filename);
//kelondroRA raf = new kelondroBufferedRA(new kelondroFileRA(this.filename), 5000000, 1000); //kelondroRA raf = new kelondroBufferedRA(new kelondroFileRA(this.filename), 5000000, 1000);

@ -51,7 +51,7 @@ import de.anomic.yacy.yacySeedDB;
public class plasmaWordIndexCache implements plasmaWordIndexInterface { public class plasmaWordIndexCache implements plasmaWordIndexInterface {
private static final String indexDumpFileName = "indexDump.stack"; private static final String indexDumpFileName = "indexDump0.stack";
static String minKey, maxKey; static String minKey, maxKey;
@ -91,24 +91,27 @@ public class plasmaWordIndexCache implements plasmaWordIndexInterface {
log.logSystem("creating dump for index cache, " + cache.size() + " words (and much more urls)"); log.logSystem("creating dump for index cache, " + cache.size() + " words (and much more urls)");
File indexDumpFile = new File(databaseRoot, indexDumpFileName); File indexDumpFile = new File(databaseRoot, indexDumpFileName);
if (indexDumpFile.exists()) indexDumpFile.delete(); if (indexDumpFile.exists()) indexDumpFile.delete();
kelondroStack dumpStack = new kelondroStack(indexDumpFile, 0, new int[]{plasmaWordIndexEntry.wordHashLength, 4, 8, plasmaWordIndexEntry.attrSpaceLong}); kelondroStack dumpStack = new kelondroStack(indexDumpFile, 0, new int[]{plasmaWordIndexEntry.wordHashLength, 4, 8, plasmaWordIndexEntry.wordHashLength, plasmaWordIndexEntry.attrSpaceLong});
long startTime = System.currentTimeMillis(); long startTime = System.currentTimeMillis();
long messageTime = System.currentTimeMillis() + 5000; long messageTime = System.currentTimeMillis() + 5000;
long wordsPerSecond = 0, wordcount = 0, urlcount = 0; long wordsPerSecond = 0, wordcount = 0, urlcount = 0;
synchronized (cache) { synchronized (cache) {
Iterator i = cache.entrySet().iterator(); //Iterator i = cache.entrySet().iterator();
Map.Entry entry; Iterator i = hashScore.scores(false);
//Map.Entry entry;
String wordHash; String wordHash;
plasmaWordIndexEntryContainer container; plasmaWordIndexEntryContainer container;
long creationTime; long creationTime;
plasmaWordIndexEntry wordEntry; plasmaWordIndexEntry wordEntry;
byte[][] row = new byte[4][]; byte[][] row = new byte[5][];
while (i.hasNext()) { while (i.hasNext()) {
// get entries // get entries
entry = (Map.Entry) i.next(); //entry = (Map.Entry) i.next();
wordHash = (String) entry.getKey(); wordHash = (String) i.next();
//wordHash = (String) entry.getKey();
creationTime = getCreationTime(wordHash); creationTime = getCreationTime(wordHash);
container = (plasmaWordIndexEntryContainer) entry.getValue(); container = (plasmaWordIndexEntryContainer) cache.get(wordHash);
//container = (plasmaWordIndexEntryContainer) entry.getValue();
// put entries on stack // put entries on stack
if (container != null) { if (container != null) {
@ -118,7 +121,8 @@ public class plasmaWordIndexCache implements plasmaWordIndexInterface {
row[0] = wordHash.getBytes(); row[0] = wordHash.getBytes();
row[1] = kelondroRecords.long2bytes(container.size(), 4); row[1] = kelondroRecords.long2bytes(container.size(), 4);
row[2] = kelondroRecords.long2bytes(creationTime, 8); row[2] = kelondroRecords.long2bytes(creationTime, 8);
row[3] = wordEntry.toEncodedForm(true).getBytes(); row[3] = wordEntry.getUrlHash().getBytes();
row[4] = wordEntry.toEncodedForm(true).getBytes();
dumpStack.push(row); dumpStack.push(row);
urlcount++; urlcount++;
} }
@ -147,7 +151,7 @@ public class plasmaWordIndexCache implements plasmaWordIndexInterface {
synchronized (cache) { synchronized (cache) {
Iterator i = dumpStack.iterator(); Iterator i = dumpStack.iterator();
kelondroRecords.Node node; kelondroRecords.Node node;
String wordHash; String wordHash, urlHash;
plasmaWordIndexEntryContainer container; plasmaWordIndexEntryContainer container;
long creationTime; long creationTime;
plasmaWordIndexEntry wordEntry; plasmaWordIndexEntry wordEntry;
@ -158,7 +162,8 @@ public class plasmaWordIndexCache implements plasmaWordIndexInterface {
row = node.getValues(); row = node.getValues();
wordHash = new String(row[0]); wordHash = new String(row[0]);
creationTime = kelondroRecords.bytes2long(row[2]); creationTime = kelondroRecords.bytes2long(row[2]);
wordEntry = new plasmaWordIndexEntry(wordHash, new String(row[3])); urlHash = new String(row[3]);
wordEntry = new plasmaWordIndexEntry(urlHash, new String(row[4]));
// store to cache // store to cache
addEntry(wordHash, wordEntry, creationTime); addEntry(wordHash, wordEntry, creationTime);
@ -166,7 +171,7 @@ public class plasmaWordIndexCache implements plasmaWordIndexInterface {
// write a log // write a log
if (System.currentTimeMillis() > messageTime) { if (System.currentTimeMillis() > messageTime) {
urlsPerSecond = urlCount * 1000 / (1 + System.currentTimeMillis() - startTime); urlsPerSecond = 1 + urlCount * 1000 / (1 + System.currentTimeMillis() - startTime);
log.logInfo("restoring status: " + urlCount + " urls done, " + ((dumpStack.size() - urlCount) / urlsPerSecond) + " seconds remaining"); log.logInfo("restoring status: " + urlCount + " urls done, " + ((dumpStack.size() - urlCount) / urlsPerSecond) + " seconds remaining");
messageTime = System.currentTimeMillis() + 5000; messageTime = System.currentTimeMillis() + 5000;
} }

@ -1,2 +1,2 @@
#plasmaParser configuration file #plasmaParser configuration file
#Sat May 07 22:32:33 CEST 2005 #Sun May 08 00:07:28 CEST 2005

Loading…
Cancel
Save