Integrated new indexing data structure 'collections' into the main class

for indexing, the plasmaWordIndex.

The new data structure is ready-to-use, but currently disabled.
It can be activated by setting the static
plasmaWordIndex.useCollectionIndex
to true. This shall be done for testing purpose.

The new index is stored to
DATA/INDEX/PUBLIC/TEXT
The directory PLASMA shall be used only for crawler in the future.

Attention: during testing the data structure in INDEX may change,
and created indexes with the new data structure may get useless.


git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2348 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 19 years ago
parent 4ff742e42d
commit 279b1d969d

@ -97,7 +97,7 @@ public final class IndexImport_p {
if (startImport) {
dbImporter importerThread = switchboard.dbImportManager.getNewImporter(importType);
if (importerThread != null) {
importerThread.init(new File(importPath), cacheSize, 100);
importerThread.init(new File(importPath), switchboard.indexPublicTextPath, cacheSize, 100);
importerThread.startIt();
}
prop.put("LOCATION","");

@ -38,18 +38,27 @@ import de.anomic.kelondro.kelondroOutOfLimitsException;
import de.anomic.kelondro.kelondroRow;
import de.anomic.kelondro.kelondroRowCollection;
import de.anomic.kelondro.kelondroRowSet;
import de.anomic.server.logging.serverLog;
public class indexCollectionRI extends indexAbstractRI implements indexRI {
kelondroCollectionIndex collectionIndex;
public indexCollectionRI(File path, String filenameStub, long buffersize, long preloadTime) throws IOException {
kelondroRow rowdef = new kelondroRow(new int[]{});
collectionIndex = new kelondroCollectionIndex(
path, filenameStub, 9 /*keyLength*/,
kelondroNaturalOrder.naturalOrder, buffersize, preloadTime,
4 /*loadfactor*/, rowdef);
public indexCollectionRI(File path, String filenameStub, long buffersize, long preloadTime) {
kelondroRow rowdef = indexURLEntry.urlEntryRow;
try {
collectionIndex = new kelondroCollectionIndex(
path,
filenameStub,
12 /*keyLength*/,
kelondroNaturalOrder.naturalOrder,
buffersize,
preloadTime,
4 /*loadfactor*/,
rowdef);
} catch (IOException e) {
serverLog.logSevere("PLASMA", "unable to open collection index at " + path.toString() + ":" + e.getMessage());
}
}
public int size() {
@ -133,7 +142,7 @@ public class indexCollectionRI extends indexAbstractRI implements indexRI {
String wordHash = newEntries.getWordHash();
try {
collectionIndex.merge(wordHash.getBytes(), (kelondroRowCollection) newEntries);
return getContainer(wordHash, true, -1); // FIXME: this is not optimal
return null; // merge does allways 'eat' up all entries unlike the assortments; they may return an overflow container
} catch (kelondroOutOfLimitsException e) {
e.printStackTrace();
return null;

@ -55,7 +55,7 @@ public class kelondroCollectionIndex {
"int chunksize-4 {b256}," +
"int chunkcount-4 {b256}," +
"int indexpos-4 {b256}," +
"short lastread-2 {b256}" +
"short lastread-2 {b256}, " +
"short lastwrote-2 {b256}"
);
}
@ -157,7 +157,7 @@ public class kelondroCollectionIndex {
private int putmergeremove(byte[] key, kelondroRowCollection collection, boolean merge, Set removekeys, boolean deletecomplete) throws IOException, kelondroOutOfLimitsException {
//if (collection.size() > maxChunks) throw new kelondroOutOfLimitsException(maxChunks, collection.size());
if ((!merge) && (collection.size() == 0)) {
if ((!merge) && (removekeys != null) && (collection != null) && (collection.size() == 0)) {
// this is not a replacement, it is a deletion
delete(key);
return 0;

@ -14,7 +14,7 @@ public abstract class AbstractImporter extends Thread implements dbImporter{
protected boolean paused = false;
protected plasmaSwitchboard sb;
protected File importPath;
protected File importPath, indexPath;
protected int cacheSize;
protected long preloadTime;
@ -33,9 +33,10 @@ public abstract class AbstractImporter extends Thread implements dbImporter{
return this.error;
}
public void init(File theImportPath) {
public void init(File theImportPath, File theIndexPath) {
if (theImportPath == null) throw new NullPointerException("The Import path must not be null.");
this.importPath = theImportPath;
this.importPath = theImportPath;
this.indexPath = theIndexPath;
// getting a job id from the import manager
this.jobID = this.sb.dbImportManager.getJobID();

@ -21,8 +21,8 @@ public class AssortmentImporter extends AbstractImporter implements dbImporter{
this.jobType = "ASSORTMENT";
}
public void init(File theImportAssortmentFile, int theCacheSize, long preloadTime) {
super.init(theImportAssortmentFile);
public void init(File theImportAssortmentFile, File theIndexFile, int theCacheSize, long preloadTime) {
super.init(theImportAssortmentFile, theIndexFile);
this.importAssortmentFile = theImportAssortmentFile;
this.cacheSize = theCacheSize;
if (this.cacheSize < 2*1024*1024) this.cacheSize = 2*1024*1024;

@ -24,6 +24,6 @@ public interface dbImporter {
public String getError();
public String getStatus();
public void init(File importPath, int cacheSize, long preloadTime);
public void init(File importPath, File indexPath, int cacheSize, long preloadTime);
public void startIt();
}

@ -45,8 +45,8 @@ public class plasmaCrawlNURLImporter extends AbstractImporter implements dbImpor
return theStatus.toString();
}
public void init(File theImportPath, int theCacheSize, long preloadTime) {
super.init(theImportPath);
public void init(File theImportPath, File theIndexPath, int theCacheSize, long preloadTime) {
super.init(theImportPath, theIndexPath);
this.cacheSize = theCacheSize;
this.preloadTime = preloadTime;

@ -51,9 +51,9 @@ public class plasmaDbImporter extends AbstractImporter implements dbImporter {
return theStatus.toString();
}
public void init(File theImportPath, int theCacheSize, long preloadTime) {
super.init(theImportPath);
public void init(File theImportPath, File theIndexPath, int theCacheSize, long preloadTime) {
super.init(theImportPath, theIndexPath);
this.homeWordIndex = this.sb.wordIndex;
this.homeUrlDB = this.sb.urlPool.loadedURL;
this.cacheSize = theCacheSize;
@ -75,7 +75,7 @@ public class plasmaDbImporter extends AbstractImporter implements dbImporter {
}
this.log.logFine("Initializing source word index db.");
this.importWordIndex = new plasmaWordIndex(this.importPath, (this.cacheSize/2)/1024, preloadTime / 2, this.log);
this.importWordIndex = new plasmaWordIndex(this.importPath, this.indexPath, (this.cacheSize/2)/1024, preloadTime / 2, this.log);
this.log.logFine("Initializing import URL db.");
this.importUrlDB = new plasmaCrawlLURL(new File(this.importPath, "urlHash.db"), (this.cacheSize/2)/1024, preloadTime / 2);
this.importStartSize = this.importWordIndex.size();

@ -243,7 +243,7 @@ public class plasmaDHTChunk {
}
// create result
indexContainers = (indexContainer[]) tmpContainers.toArray(new indexContainer[tmpContainers.size()]);
//[C[16GwGuFzwffp] has 1 entries, C[16hGKMAl0w97] has 9 entries, C[17A8cDPF6SfG] has 9 entries, C[17Kdj__WWnUy] has 1 entries, C[1
if ((indexContainers == null) || (indexContainers.length == 0)) {
log.logFine("No index available for index transfer, hash start-point " + startPointHash);
this.status = chunkStatus_FAILED;
@ -269,7 +269,7 @@ public class plasmaDHTChunk {
}
public int deleteTransferIndexes() {
public synchronized int deleteTransferIndexes() {
Iterator urlIter;
indexEntry iEntry;
HashSet urlHashes;
@ -277,6 +277,10 @@ public class plasmaDHTChunk {
for (int i = 0; i < this.indexContainers.length; i++) {
// delete entries separately
if (this.indexContainers[i] == null) {
log.logFine("Deletion of partial index #" + i + " not possible, entry is null");
continue;
}
int c = this.indexContainers[i].size();
urlHashes = new HashSet(this.indexContainers[i].size());
urlIter = this.indexContainers[i].entries();

@ -180,6 +180,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
// storage management
public File htCachePath;
private File plasmaPath;
public File indexPublicTextPath;
public File listsPath;
public File htDocsPath;
public File rankingPath;
@ -260,6 +261,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
// load values from configs
this.plasmaPath = new File(rootPath, getConfig("dbPath", "DATA/PLASMADB"));
this.log.logConfig("Plasma DB Path: " + this.plasmaPath.toString());
this.indexPublicTextPath = new File(rootPath, getConfig("indexPublicTextPath", "DATA/INDEX/PUBLIC/TEXT"));
this.log.logConfig("Index Path: " + this.indexPublicTextPath.toString());
this.listsPath = new File(rootPath, getConfig("listsPath", "DATA/LISTS"));
this.log.logConfig("Lists Path: " + this.listsPath.toString());
this.htDocsPath = new File(rootPath, getConfig("htDocsPath", "DATA/HTDOCS"));
@ -386,7 +389,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
log.logConfig("Starting Indexing Management");
urlPool = new plasmaURLPool(plasmaPath, ramLURL, ramNURL, ramEURL, ramLURL_time);
wordIndex = new plasmaWordIndex(plasmaPath, ramRWI, ramRWI_time, log);
wordIndex = new plasmaWordIndex(plasmaPath, indexPublicTextPath, ramRWI, ramRWI_time, log);
int wordCacheMaxCount = (int) getConfigLong("wordCacheMaxCount", 10000);
wordIndex.setMaxWordCount(wordCacheMaxCount);

@ -58,6 +58,7 @@ import java.util.TreeSet;
import de.anomic.net.URL;
import de.anomic.htmlFilter.htmlFilterContentScraper;
import de.anomic.index.indexCollectionRI;
import de.anomic.index.indexContainer;
import de.anomic.index.indexContainerOrder;
import de.anomic.index.indexEntry;
@ -78,28 +79,37 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
private static final String indexAssortmentClusterPath = "ACLUSTER";
private static final int assortmentCount = 64;
private static final boolean useCollectionIndex = false;
private final File databaseRoot;
private final indexRAMCacheRI ramCache;
private final plasmaWordIndexAssortmentCluster assortmentCluster;
private int assortmentBufferSize; //kb
private final plasmaWordIndexFileCluster backend;
private final kelondroOrder indexOrder = new kelondroNaturalOrder(true);
private final File oldDatabaseRoot;
private final kelondroOrder indexOrder = new kelondroNaturalOrder(true);
private final indexRAMCacheRI ramCache;
private final indexCollectionRI collections; // new database structure to replace AssortmentCluster and FileCluster
private int assortmentBufferSize; // kb
private final plasmaWordIndexAssortmentCluster assortmentCluster; // old database structure, to be replaced by CollectionRI
private final plasmaWordIndexFileCluster backend; // old database structure, to be replaced by CollectionRI
public plasmaWordIndex(File databaseRoot, int bufferkb, long preloadTime, serverLog log) {
this.databaseRoot = databaseRoot;
this.backend = new plasmaWordIndexFileCluster(databaseRoot, log);
this.ramCache = new indexRAMCacheRI(databaseRoot, log);
public plasmaWordIndex(File oldDatabaseRoot, File newIndexRoot, int bufferkb, long preloadTime, serverLog log) {
this.oldDatabaseRoot = oldDatabaseRoot;
this.backend = new plasmaWordIndexFileCluster(oldDatabaseRoot, log);
this.ramCache = new indexRAMCacheRI(oldDatabaseRoot, log);
// create new assortment cluster path
File assortmentClusterPath = new File(databaseRoot, indexAssortmentClusterPath);
// create assortment cluster path
File assortmentClusterPath = new File(oldDatabaseRoot, indexAssortmentClusterPath);
if (!(assortmentClusterPath.exists())) assortmentClusterPath.mkdirs();
this.assortmentBufferSize = bufferkb;
this.assortmentCluster = new plasmaWordIndexAssortmentCluster(assortmentClusterPath, assortmentCount, assortmentBufferSize, preloadTime, log);
// create collections storage path
if (!(newIndexRoot.exists())) newIndexRoot.mkdirs();
if (useCollectionIndex)
collections = new indexCollectionRI(newIndexRoot, "test_generation0", bufferkb * 1024, preloadTime);
else
collections = null;
}
public File getRoot() {
return databaseRoot;
return oldDatabaseRoot;
}
public int maxURLinWCache() {
@ -203,9 +213,16 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
private synchronized void flushCache(String wordHash) {
indexContainer c = ramCache.deleteContainer(wordHash);
if (c != null) {
indexContainer feedback = assortmentCluster.addEntries(c, c.updated(), false);
if (feedback != null) {
backend.addEntries(feedback, System.currentTimeMillis(), true);
if (useCollectionIndex) {
indexContainer feedback = collections.addEntries(c, c.updated(), false);
if (feedback != null) {
throw new RuntimeException("indexCollectionRI shall not return feedback entries; feedback = " + feedback.toString());
}
} else {
indexContainer feedback = assortmentCluster.addEntries(c, c.updated(), false);
if (feedback != null) {
backend.addEntries(feedback, System.currentTimeMillis(), true);
}
}
}
}
@ -292,15 +309,25 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
// get from cache
indexContainer container = ramCache.getContainer(wordHash, true, -1);
// We must not use the container from cache to store everything we find,
// as that container remains linked to in the cache and might be changed later
// while the returned container is still in use.
// create a clone from the container
if (container != null) container = container.topLevelClone();
// get from collection index
if (useCollectionIndex) {
if (container == null) {
container = collections.getContainer(wordHash, true, (maxTime < 0) ? -1 : maxTime);
} else {
container.add(collections.getContainer(wordHash, true, (maxTime < 0) ? -1 : maxTime), -1);
}
}
// get from assortments
if (container == null) {
container = assortmentCluster.getContainer(wordHash, true, (maxTime < 0) ? -1 : maxTime);
} else {
// We must not use the container from cache to store everything we find,
// as that container remains linked to in the cache and might be changed later
// while the returned container is still in use.
// create a clone from the container
container = container.topLevelClone();
// add containers from assortment cluster
container.add(assortmentCluster.getContainer(wordHash, true, (maxTime < 0) ? -1 : maxTime), -1);
}
@ -357,6 +384,7 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
entity.close();
}
} catch (IOException e) {}
if (useCollectionIndex) size += collections.size();
size += assortmentCluster.indexSize(wordHash);
size += ramCache.indexSize(wordHash);
return size;
@ -364,6 +392,7 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
public synchronized void close(int waitingBoundSeconds) {
ramCache.close(waitingBoundSeconds);
if (useCollectionIndex) collections.close(-1);
assortmentCluster.close(-1);
backend.close(10);
}
@ -371,7 +400,8 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
public synchronized indexContainer deleteContainer(String wordHash) {
indexContainer c = ramCache.deleteContainer(wordHash);
if (c == null) c = new indexRowSetContainer(wordHash);
c.add(assortmentCluster.deleteContainer(wordHash, -1), -1);
if (useCollectionIndex) c.add(collections.deleteContainer(wordHash), -1);
c.add(assortmentCluster.deleteContainer(wordHash), -1);
c.add(backend.deleteContainer(wordHash), -1);
return c;
}
@ -379,6 +409,7 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
public boolean removeEntry(String wordHash, String urlHash, boolean deleteComplete) {
synchronized (this) {
if (ramCache.removeEntry(wordHash, urlHash, deleteComplete)) return true;
if (useCollectionIndex) {if (collections.removeEntry(wordHash, urlHash, deleteComplete)) return true;}
if (assortmentCluster.removeEntry(wordHash, urlHash, deleteComplete)) return true;
return backend.removeEntry(wordHash, urlHash, deleteComplete);
}
@ -389,6 +420,10 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
synchronized (this) {
removed += ramCache.removeEntries(wordHash, urlHashes, deleteComplete);
if (removed == urlHashes.size()) return removed;
if (useCollectionIndex) {
removed += collections.removeEntries(wordHash, urlHashes, deleteComplete);
if (removed == urlHashes.size()) return removed;
}
removed += assortmentCluster.removeEntries(wordHash, urlHashes, deleteComplete);
if (removed == urlHashes.size()) return removed;
removed += backend.removeEntries(wordHash, urlHashes, deleteComplete);
@ -405,9 +440,9 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
}
public static final int RL_RAMCACHE = 0;
public static final int RL_COLLECTIONS = 1; // the 'new' index structure
public static final int RL_ASSORTMENTS = 2;
public static final int RL_WORDFILES = 3;
public static final int RL_COLLECTIONS = 1; // the new index structure
public static final int RL_ASSORTMENTS = 2; // (to be) outdated structure
public static final int RL_WORDFILES = 3; // (to be) outdated structure
public synchronized TreeSet indexContainerSet(String startHash, int resourceLevel, boolean rot, int count) throws IOException {
@ -446,16 +481,56 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
if (resourceLevel == plasmaWordIndex.RL_RAMCACHE) {
return ramCache.wordContainers(startWordHash, false);
}
if (resourceLevel == plasmaWordIndex.RL_ASSORTMENTS) {
if ((resourceLevel == plasmaWordIndex.RL_COLLECTIONS) && (useCollectionIndex)) {
return new kelondroMergeIterator(
ramCache.wordContainers(startWordHash, false),
collections.wordContainers(startWordHash, false),
new indexContainerOrder(kelondroNaturalOrder.naturalOrder),
indexRowSetContainer.containerMergeMethod,
true);
}
if (resourceLevel == plasmaWordIndex.RL_ASSORTMENTS) {
if (useCollectionIndex) {
return new kelondroMergeIterator(
new kelondroMergeIterator(
ramCache.wordContainers(startWordHash, false),
collections.wordContainers(startWordHash, false),
new indexContainerOrder(kelondroNaturalOrder.naturalOrder),
indexRowSetContainer.containerMergeMethod,
true),
assortmentCluster.wordContainers(startWordHash, true, false),
new indexContainerOrder(kelondroNaturalOrder.naturalOrder),
indexRowSetContainer.containerMergeMethod,
true);
} else {
return new kelondroMergeIterator(
ramCache.wordContainers(startWordHash, false),
assortmentCluster.wordContainers(startWordHash, true, false),
new indexContainerOrder(kelondroNaturalOrder.naturalOrder),
indexRowSetContainer.containerMergeMethod,
true);
}
}
if (resourceLevel == plasmaWordIndex.RL_WORDFILES) {
return new kelondroMergeIterator(
if (useCollectionIndex) {
return new kelondroMergeIterator(
new kelondroMergeIterator(
new kelondroMergeIterator(
ramCache.wordContainers(startWordHash, false),
collections.wordContainers(startWordHash, false),
new indexContainerOrder(kelondroNaturalOrder.naturalOrder),
indexRowSetContainer.containerMergeMethod,
true),
assortmentCluster.wordContainers(startWordHash, true, false),
new indexContainerOrder(kelondroNaturalOrder.naturalOrder),
indexRowSetContainer.containerMergeMethod,
true),
backend.wordContainers(startWordHash, false),
new indexContainerOrder(kelondroNaturalOrder.naturalOrder),
indexRowSetContainer.containerMergeMethod,
true);
} else {
return new kelondroMergeIterator(
new kelondroMergeIterator(
ramCache.wordContainers(startWordHash, false),
assortmentCluster.wordContainers(startWordHash, true, false),
@ -466,6 +541,7 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
new indexContainerOrder(kelondroNaturalOrder.naturalOrder),
indexRowSetContainer.containerMergeMethod,
true);
}
}
return null;
}
@ -505,11 +581,11 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
public Object migrateWords2Assortment(String wordhash) throws IOException {
// returns the number of entries that had been added to the assortments
// can be negative if some assortments have been moved to the backend
File db = plasmaWordIndexFile.wordHash2path(databaseRoot, wordhash);
File db = plasmaWordIndexFile.wordHash2path(oldDatabaseRoot, wordhash);
if (!(db.exists())) return "not available";
plasmaWordIndexFile entity = null;
try {
entity = new plasmaWordIndexFile(databaseRoot, wordhash, true);
entity = new plasmaWordIndexFile(oldDatabaseRoot, wordhash, true);
int size = entity.size();
if (size > assortmentCluster.clusterCapacity) {
// this will be too big to integrate it
@ -671,8 +747,9 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
public static void main(String[] args) {
// System.out.println(kelondroMSetTools.fastStringComparator(true).compare("RwGeoUdyDQ0Y", "rwGeoUdyDQ0Y"));
// System.out.println(new Date(reverseMicroDateDays(microDateDays(System.currentTimeMillis()))));
plasmaWordIndex index = new plasmaWordIndex(new File("D:\\dev\\proxy\\DATA\\PLASMADB"), 555, 1000, new serverLog("TESTAPP"));
File plasmadb = new File("D:\\dev\\proxy\\DATA\\PLASMADB");
File indexdb = new File("D:\\dev\\proxy\\DATA\\INDEX\\PRIVATE\\TEXT");
plasmaWordIndex index = new plasmaWordIndex(plasmadb, indexdb, 555, 1000, new serverLog("TESTAPP"));
try {
Iterator containerIter = index.wordContainers("5A8yhZMh_Kmv", plasmaWordIndex.RL_WORDFILES, true);
while (containerIter.hasNext()) {

@ -646,9 +646,10 @@ public final class yacy {
// run with "java -classpath classes yacy -migratewords"
try {serverLog.configureLogging(new File(homePath, "DATA/LOG/yacy.logging"));} catch (Exception e) {}
File dbroot = new File(new File(homePath), "DATA/PLASMADB");
File indexRoot = new File(new File(homePath), "DATA/INDEX/PUBLIC/TEXT");
serverLog log = new serverLog("WORDMIGRATION");
log.logInfo("STARTING MIGRATION");
plasmaWordIndex wordIndexCache = new plasmaWordIndex(dbroot, 20000, 10000, log);
plasmaWordIndex wordIndexCache = new plasmaWordIndex(dbroot, indexRoot, 20000, 10000, log);
enumerateFiles words = new enumerateFiles(new File(dbroot, "WORDS"), true, false, true, true);
String wordhash;
File wordfile;
@ -686,6 +687,7 @@ public final class yacy {
// run with "java -classpath classes yacy -minimizeUrlDB"
try {serverLog.configureLogging(new File(homePath, "DATA/LOG/yacy.logging"));} catch (Exception e) {}
File dbroot = new File(new File(homePath), "DATA/PLASMADB");
File indexRoot = new File(new File(homePath), "DATA/INDEX/PUBLIC/TEXT");
serverLog log = new serverLog("URL-CLEANUP");
try {
log.logInfo("STARTING URL CLEANUP");
@ -702,7 +704,7 @@ public final class yacy {
int cacheMem = (int)((rt.maxMemory()-rt.totalMemory())/1024)-(2*cache + 8*1024);
if (cacheMem < 2048) throw new OutOfMemoryError("Not enough memory available to start clean up.");
plasmaWordIndex wordIndex = new plasmaWordIndex(dbroot, cacheMem, 10000, log);
plasmaWordIndex wordIndex = new plasmaWordIndex(dbroot, indexRoot, cacheMem, 10000, log);
Iterator indexContainerIterator = wordIndex.wordContainers("------------", plasmaWordIndex.RL_WORDFILES, false);
long urlCounter = 0, wordCounter = 0;
@ -1137,6 +1139,7 @@ public final class yacy {
plasmaWordIndex WordIndex = null;
serverLog log = new serverLog("HASHLIST");
File homeDBroot = new File(new File(homePath), "DATA/PLASMADB");
File indexRoot = new File(new File(homePath), "DATA/INDEX/PUBLIC/TEXT");
String wordChunkStartHash = "------------";
try {serverLog.configureLogging(new File(homePath, "DATA/LOG/yacy.logging"));} catch (Exception e) {}
log.logInfo("STARTING CREATION OF RWI-HASHLIST");
@ -1144,7 +1147,7 @@ public final class yacy {
try {
Iterator indexContainerIterator = null;
if (resource.equals("all")) {
WordIndex = new plasmaWordIndex(homeDBroot, 8*1024*1024, 3000, log);
WordIndex = new plasmaWordIndex(homeDBroot, indexRoot, 8*1024*1024, 3000, log);
indexContainerIterator = WordIndex.wordContainers(wordChunkStartHash, plasmaWordIndex.RL_WORDFILES, false);
} else if (resource.equals("assortments")) {
plasmaWordIndexAssortmentCluster assortmentCluster = new plasmaWordIndexAssortmentCluster(new File(homeDBroot, "ACLUSTER"), 64, 16*1024*1024, 3000, log);

@ -185,9 +185,12 @@ parseableExt=html,htm,txt,php,shtml,asp,aspx,jsp
# other peer users
promoteSearchPageGreeting =
# the path to the PLASMA database, especially the reverse word index
# the path to the PLASMA database of the web spider
dbPath=DATA/PLASMADB
# the path to the public reverse word index for text files (web pages)
indexPublicTextPath=DATA/INDEX/PUBLIC/TEXT
# the path to the LISTS files. Most lists are used to filter web content
listsPath=DATA/LISTS

Loading…
Cancel
Save