orbiter 19 years ago
parent bf0d820659
commit 93a5ace330

@ -1,7 +1,6 @@
package de.anomic.plasma.dbImport; package de.anomic.plasma.dbImport;
import java.io.File; import java.io.File;
import java.io.IOException;
import java.util.Iterator; import java.util.Iterator;
import de.anomic.index.indexContainer; import de.anomic.index.indexContainer;
@ -59,12 +58,7 @@ public class AssortmentImporter extends AbstractImporter implements dbImporter{
// initializing the import assortment db // initializing the import assortment db
this.log.logInfo("Initializing source assortment file " + theImportAssortmentFile); this.log.logInfo("Initializing source assortment file " + theImportAssortmentFile);
try { this.assortmentFile = new plasmaWordIndexAssortment(importAssortmentPath, assortmentNr, this.cacheSize/1024, preloadTime, this.log);
this.assortmentFile = new plasmaWordIndexAssortment(importAssortmentPath, assortmentNr, this.cacheSize/1024, preloadTime, this.log);
} catch (IOException e) {
e.printStackTrace();
System.exit(-1);
}
this.importStartSize = this.assortmentFile.size(); this.importStartSize = this.assortmentFile.size();
} }
@ -92,7 +86,8 @@ public class AssortmentImporter extends AbstractImporter implements dbImporter{
public void run() { public void run() {
try { try {
// getting a content interator // getting a content interator
Iterator contentIterator = this.assortmentFile.wordContainers(null, true, false); Iterator contentIterator = this.assortmentFile.wordContainers();
this.log.logFine("Started import of file " + this.assortmentFile.getName());
while (contentIterator.hasNext()) { while (contentIterator.hasNext()) {
this.wordEntityCount++; this.wordEntityCount++;

@ -60,7 +60,6 @@ import de.anomic.index.indexContainer;
import de.anomic.index.indexRWIEntryNew; import de.anomic.index.indexRWIEntryNew;
import de.anomic.index.indexRWIEntryOld; import de.anomic.index.indexRWIEntryOld;
import de.anomic.kelondro.kelondroBase64Order; import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.kelondro.kelondroCache;
import de.anomic.kelondro.kelondroColumn; import de.anomic.kelondro.kelondroColumn;
import de.anomic.kelondro.kelondroException; import de.anomic.kelondro.kelondroException;
import de.anomic.kelondro.kelondroRow; import de.anomic.kelondro.kelondroRow;
@ -76,7 +75,7 @@ public final class plasmaWordIndexAssortment {
// class variables // class variables
private File assortmentFile; private File assortmentFile;
private serverLog log; private serverLog log;
private kelondroCache assortments; private kelondroTree assortment;
private long bufferSize; private long bufferSize;
private static String intx(int x) { private static String intx(int x) {
@ -99,7 +98,7 @@ public final class plasmaWordIndexAssortment {
return (rowsize - yacySeedDB.commonHashLength - 12) / indexRWIEntryOld.urlEntryRow.objectsize(); return (rowsize - yacySeedDB.commonHashLength - 12) / indexRWIEntryOld.urlEntryRow.objectsize();
} }
public plasmaWordIndexAssortment(File storagePath, int assortmentLength, int bufferkb, long preloadTime, serverLog log) throws IOException { public plasmaWordIndexAssortment(File storagePath, int assortmentLength, int bufferkb, long preloadTime, serverLog log) {
if (!(storagePath.exists())) storagePath.mkdirs(); if (!(storagePath.exists())) storagePath.mkdirs();
this.assortmentFile = new File(storagePath, assortmentFileName + intx(assortmentLength) + ".db"); this.assortmentFile = new File(storagePath, assortmentFileName + intx(assortmentLength) + ".db");
//this.bufferStructureLength = 3 + 2 * assortmentLength; //this.bufferStructureLength = 3 + 2 * assortmentLength;
@ -107,17 +106,21 @@ public final class plasmaWordIndexAssortment {
this.log = log; this.log = log;
// open assortment tree file // open assortment tree file
long start = System.currentTimeMillis(); long start = System.currentTimeMillis();
assortments = new kelondroCache(kelondroTree.open(assortmentFile, bufferSize / 2, preloadTime, bufferStructure(assortmentLength)), bufferSize / 2, true, false); assortment = kelondroTree.open(assortmentFile, bufferSize / 2, preloadTime, bufferStructure(assortmentLength));
long stop = System.currentTimeMillis(); long stop = System.currentTimeMillis();
if (log != null) log.logConfig("Opened Assortment, " + if (log != null) log.logConfig("Opened Assortment, " +
assortments.size() + " entries, width " + assortment.size() + " entries, width " +
assortmentLength + ", " + bufferkb + "kb buffer, " + assortmentLength + ", " + bufferkb + "kb buffer, " +
preloadTime + " ms preloadTime, " + preloadTime + " ms preloadTime, " +
(stop - start) + " ms effective, " + (stop - start) + " ms effective, " +
assortments.cacheNodeStatus()[1] + " preloaded"); assortment.cacheNodeStatus()[1] + " preloaded");
} }
public String getName() {
return this.assortmentFile.toString();
}
public final indexContainer row2container(kelondroRow.Entry row) { public final indexContainer row2container(kelondroRow.Entry row) {
if (row == null) return null; if (row == null) return null;
String wordHash = row.getColString(0, null); String wordHash = row.getColString(0, null);
@ -131,12 +134,12 @@ public final class plasmaWordIndexAssortment {
return container; return container;
} }
public Iterator wordContainers(String startWordHash, boolean up, boolean rot) throws IOException { public Iterator wordContainers() {
// returns an iteration of indexContainer elements // returns an iteration of indexContainer elements
try { try {
return new containerIterator(startWordHash, up, rot); return new containerIterator();
} catch (kelondroException e) { } catch (kelondroException e) {
log.logSevere("iterateAssortment/kelondro-error: " + e.getMessage() + " - reset assortment-DB " + assortmentFile, e); log.logSevere("iterateAssortment/kelondro-error: " + e.getMessage(), e);
return null; return null;
} }
} }
@ -145,8 +148,8 @@ public final class plasmaWordIndexAssortment {
private Iterator rowIterator; private Iterator rowIterator;
public containerIterator(String startWordHash, boolean up, boolean rot) throws IOException { public containerIterator() {
rowIterator = assortments.rows(up, rot, (startWordHash == null) ? null : startWordHash.getBytes()); rowIterator = assortment.contentRows(-1);
} }
public boolean hasNext() { public boolean hasNext() {
@ -165,16 +168,12 @@ public final class plasmaWordIndexAssortment {
} }
public int size() { public int size() {
try { return assortment.size();
return assortments.size();
} catch (IOException e) {
return 0;
}
} }
public void close() { public void close() {
try { try {
assortments.close(); assortment.close();
} catch (IOException e){ } catch (IOException e){
log.logSevere("unable to close assortment database: " + e.getMessage(), e); log.logSevere("unable to close assortment database: " + e.getMessage(), e);
} }

@ -1273,7 +1273,7 @@ public final class yacy {
serverLog log = new serverLog("HASHLIST"); serverLog log = new serverLog("HASHLIST");
File homeDBroot = new File(new File(homePath), "DATA/PLASMADB"); File homeDBroot = new File(new File(homePath), "DATA/PLASMADB");
File indexRoot = new File(new File(homePath), "DATA/INDEX"); File indexRoot = new File(new File(homePath), "DATA/INDEX");
String wordChunkStartHash = "------------"; String wordChunkStartHash = "AAAAAAAAAAAA";
try {serverLog.configureLogging(new File(homePath, "DATA/LOG/yacy.logging"));} catch (Exception e) {} try {serverLog.configureLogging(new File(homePath, "DATA/LOG/yacy.logging"));} catch (Exception e) {}
log.logInfo("STARTING CREATION OF RWI-HASHLIST"); log.logInfo("STARTING CREATION OF RWI-HASHLIST");
File root = new File(homePath); File root = new File(homePath);
@ -1285,7 +1285,7 @@ public final class yacy {
} else if (resource.startsWith("assortment")) { } else if (resource.startsWith("assortment")) {
int a = Integer.parseInt(resource.substring(10)); int a = Integer.parseInt(resource.substring(10));
plasmaWordIndexAssortment assortment = new plasmaWordIndexAssortment(new File(homeDBroot, "ACLUSTER"), a, 8*1024*1024, 3000, null); plasmaWordIndexAssortment assortment = new plasmaWordIndexAssortment(new File(homeDBroot, "ACLUSTER"), a, 8*1024*1024, 3000, null);
indexContainerIterator = assortment.wordContainers(wordChunkStartHash, true, false); indexContainerIterator = assortment.wordContainers();
} else if (resource.equals("words")) { } else if (resource.equals("words")) {
plasmaWordIndexFileCluster fileDB = new plasmaWordIndexFileCluster(homeDBroot); plasmaWordIndexFileCluster fileDB = new plasmaWordIndexFileCluster(homeDBroot);
indexContainerIterator = fileDB.wordContainers(wordChunkStartHash, false); indexContainerIterator = fileDB.wordContainers(wordChunkStartHash, false);

Loading…
Cancel
Save