The collection index will not be supported any more.

Existing indexes based on the old index collections must be migrated with YaCy 0.8
- removed index collection classes and all migration tools
- added a 'incell' reference collection feature in URL analysis


git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5966 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 16 years ago
parent a2f48863fc
commit a7e392f31b

@ -53,12 +53,13 @@ import java.util.zip.GZIPOutputStream;
import de.anomic.kelondro.index.HandleSet;
import de.anomic.kelondro.index.IntegerHandleIndex;
import de.anomic.kelondro.order.Base64Order;
import de.anomic.kelondro.text.IndexCollection;
import de.anomic.kelondro.text.MetadataRepository;
import de.anomic.kelondro.text.ReferenceContainerArray;
import de.anomic.kelondro.text.MetadataRepository.Export;
import de.anomic.kelondro.text.metadataPrototype.URLMetadataRow;
import de.anomic.kelondro.text.referencePrototype.WordReferenceRow;
import de.anomic.kelondro.util.MemoryControl;
import de.anomic.plasma.plasmaWordIndex;
import de.anomic.yacy.yacyURL;
public class URLAnalysis {
@ -393,12 +394,11 @@ public class URLAnalysis {
System.out.println("finished");
}
public static void incollection(String collectionPath, String statisticPath) {
public static void incell(File cellPath, String statisticPath) {
try {
IntegerHandleIndex idx = IndexCollection.referenceHashes(
new File(collectionPath),
"collection",
12,
IntegerHandleIndex idx = ReferenceContainerArray.referenceHashes(
cellPath,
plasmaWordIndex.wordReferenceFactory,
Base64Order.enhancedCoder,
WordReferenceRow.urlEntryRow);
System.out.println("COLLECTION INDEX REFERENCE COLLECTION starting dump of statistics");
@ -475,11 +475,11 @@ public class URLAnalysis {
} else if (args[0].equals("-sort") && args.length >= 2) {
// generate file <file>.x.sort with sorted lists and split the file in smaller pieces
for (int i = 1; i < args.length; i++) sortsplit(args[i]);
} else if (args[0].equals("-incollection") && args.length >= 2) {
// generate a dump of all referenced URL hashes from a given RICOLLECTION
} else if (args[0].equals("-incell") && args.length >= 2) {
// generate a dump of all referenced URL hashes from a given RICELL
// example:
// java -Xmx1000m -cp classes de.anomic.data.URLAnalysis -incollection DATA/INDEX/freeworld/TEXT/RICOLLECTION used.dump
incollection(args[1], args[2]);
// java -Xmx1000m -cp classes de.anomic.data.URLAnalysis -incell DATA/INDEX/freeworld/TEXT/RICELL used.dump
incell(new File(args[1]), args[2]);
} else if (args[0].equals("-diffurlcol") && args.length >= 3) {
// make a diff-file that contains hashes from the url database that do not occur in the collection reference dump
// example:

@ -346,5 +346,5 @@ public final class IndexCell<ReferenceType extends Reference> extends AbstractBu
this.maxRamEntries = maxWords;
this.cleanCache();
}
}

File diff suppressed because it is too large Load Diff

@ -1,345 +0,0 @@
// IndexCollectionMigration.java
// (C) 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 30.03.2009 on http://yacy.net
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// $LastChangedDate: 2009-03-13 11:34:51 +0100 (Fr, 13 Mrz 2009) $
// $LastChangedRevision: 5709 $
// $LastChangedBy: orbiter $
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package de.anomic.kelondro.text;
import java.io.File;
import java.io.IOException;
import java.util.Set;
import de.anomic.kelondro.index.Row;
import de.anomic.kelondro.order.Base64Order;
import de.anomic.kelondro.order.ByteOrder;
import de.anomic.kelondro.order.CloneableIterator;
import de.anomic.kelondro.order.MergeIterator;
import de.anomic.kelondro.order.Order;
import de.anomic.kelondro.order.RotateIterator;
import de.anomic.kelondro.text.Index;
import de.anomic.kelondro.text.IndexCollection;
import de.anomic.kelondro.text.ReferenceContainer;
import de.anomic.kelondro.text.ReferenceContainerOrder;
import de.anomic.kelondro.text.referencePrototype.WordReferenceRow;
import de.anomic.kelondro.util.FileUtils;
import de.anomic.kelondro.util.Log;
public final class IndexCollectionMigration<ReferenceType extends Reference> extends AbstractBufferedIndex<ReferenceType> implements Index<ReferenceType>, BufferedIndex<ReferenceType> {
private final IndexCell<ReferenceType> cell;
private IndexCollection<ReferenceType> collections;
private final IODispatcher<ReferenceType> merger;
public IndexCollectionMigration (
final File indexPrimaryTextLocation,
final ReferenceFactory<ReferenceType> factory,
final ByteOrder wordOrdering,
final Row payloadrow,
final int entityCacheMaxSize,
final long targetFileSize,
final long maxFileSize,
final IODispatcher<ReferenceType> merger,
final int writeBufferSize,
final Log log) throws IOException {
super(factory);
this.merger = merger;
final File celldir = new File(indexPrimaryTextLocation, "RICELL");
this.cell = new IndexCell<ReferenceType>(
celldir,
factory,
wordOrdering,
WordReferenceRow.urlEntryRow,
entityCacheMaxSize,
targetFileSize,
maxFileSize,
this.merger,
writeBufferSize);
final File textindexcache = new File(indexPrimaryTextLocation, "RICACHE");
if (textindexcache.exists()) {
// migrate the "index.dhtout.blob" into RICELL directory
File f = new File(textindexcache, "index.dhtout.blob");
if (f.exists()) {
File n = this.cell.newContainerBLOBFile();
f.renameTo(n);
this.cell.mountBLOBFile(n);
}
f = new File(textindexcache, "index.dhtin.blob");
if (f.exists()) {
File n = this.cell.newContainerBLOBFile();
f.renameTo(n);
this.cell.mountBLOBFile(n);
}
// delete everything else
String[] l = textindexcache.list();
for (String s: l) {
f = new File(textindexcache, s);
FileUtils.deletedelete(f);
}
FileUtils.deletedelete(textindexcache);
}
// open collections, this is for migration only.
final File textindexcollections = new File(indexPrimaryTextLocation, "RICOLLECTION");
if (textindexcollections.exists()) {
this.collections = new IndexCollection<ReferenceType>(
textindexcollections,
"collection",
factory,
12,
Base64Order.enhancedCoder,
7,
WordReferenceRow.urlEntryRow,
false);
if (this.collections.size() == 0) {
// delete everything here
this.collections.close();
this.collections = null;
String[] l = textindexcollections.list();
File f;
for (String s: l) {
f = new File(textindexcollections, s);
FileUtils.deletedelete(f);
}
FileUtils.deletedelete(textindexcollections);
}
} else {
this.collections = null;
}
}
/* methods for interface Index */
public void add(final ReferenceContainer<ReferenceType> entries) throws IOException {
assert (entries.row().objectsize == WordReferenceRow.urlEntryRow.objectsize);
if (this.collections != null) {
ReferenceContainer<ReferenceType> e = this.collections.delete(entries.getTermHash());
if (e != null) {
e.merge(entries);
cell.add(e);
} else {
cell.add(entries);
}
} else {
cell.add(entries);
}
}
public void add(final byte[] wordHash, final ReferenceType entry) throws IOException {
if (this.collections != null) {
ReferenceContainer<ReferenceType> e = this.collections.delete(wordHash);
if (e != null) {
e.add(entry);
cell.add(e);
} else {
cell.add(wordHash, entry);
}
} else {
cell.add(wordHash, entry);
}
}
public boolean has(final byte[] wordHash) {
if (this.collections != null) {
ReferenceContainer<ReferenceType> e = this.collections.delete(wordHash);
if (e != null) {
try {
cell.add(e);
} catch (IOException e1) {
e1.printStackTrace();
}
return true;
} else {
return cell.has(wordHash);
}
} else {
return cell.has(wordHash);
}
}
public int count(byte[] wordHash) {
if (this.collections != null) {
ReferenceContainer<ReferenceType> e = this.collections.delete(wordHash);
if (e != null) {
try {
cell.add(e);
} catch (IOException e1) {
e1.printStackTrace();
}
return cell.count(wordHash);
} else {
return cell.count(wordHash);
}
} else {
return cell.count(wordHash);
}
}
public ReferenceContainer<ReferenceType> get(final byte[] wordHash, final Set<String> urlselection) throws IOException {
if (wordHash == null) {
// wrong input
return null;
}
if (this.collections != null) {
ReferenceContainer<ReferenceType> e = this.collections.delete(wordHash);
if (e != null) cell.add(e);
}
return this.cell.get(wordHash, urlselection);
}
public ReferenceContainer<ReferenceType> delete(final byte[] wordHash) throws IOException {
ReferenceContainer<ReferenceType> cc = cell.delete(wordHash);
if (cc == null) {
if (collections == null) return null;
return collections.delete(wordHash);
} else {
if (collections == null) return cc;
ReferenceContainer<ReferenceType> cd = collections.delete(wordHash);
if (cd == null) return cc;
return cc.merge(cd);
}
}
public boolean remove(final byte[] wordHash, final String urlHash) throws IOException {
if (this.collections != null) {
ReferenceContainer<ReferenceType> e = this.collections.delete(wordHash);
if (e != null) cell.add(e);
}
return cell.remove(wordHash, urlHash);
}
public int remove(final byte[] wordHash, final Set<String> urlHashes) throws IOException {
if (this.collections != null) {
ReferenceContainer<ReferenceType> e = this.collections.delete(wordHash);
if (e != null) cell.add(e);
}
return cell.remove(wordHash, urlHashes);
}
public synchronized CloneableIterator<ReferenceContainer<ReferenceType>> references(final byte[] startHash, final boolean rot, final boolean ram) throws IOException {
final CloneableIterator<ReferenceContainer<ReferenceType>> i = wordContainers(startHash, ram);
if (rot) {
return new RotateIterator<ReferenceContainer<ReferenceType>>(i, Base64Order.zero(startHash.length), cell.size() + ((ram) ? 0 : collections.size()));
}
return i;
}
private synchronized CloneableIterator<ReferenceContainer<ReferenceType>> wordContainers(final byte[] startWordHash, final boolean ram) throws IOException {
final Order<ReferenceContainer<ReferenceType>> containerOrder = new ReferenceContainerOrder<ReferenceType>(factory, cell.ordering().clone());
ReferenceContainer<ReferenceType> emptyContainer = ReferenceContainer.emptyContainer(factory, startWordHash, 0);
containerOrder.rotate(emptyContainer);
if (ram) {
return cell.references(startWordHash, true);
}
if (collections == null) return cell.references(startWordHash, false);
return new MergeIterator<ReferenceContainer<ReferenceType>>(
cell.references(startWordHash, false),
collections.references(startWordHash, false),
containerOrder,
ReferenceContainer.containerMergeMethod,
true);
}
public void clear() {
try {
cell.clear();
} catch (IOException e1) {
e1.printStackTrace();
}
if (collections != null) try {
collections.clear();
} catch (IOException e) {
e.printStackTrace();
}
}
public void close() {
cell.close();
if (collections != null) collections.close();
}
public int size() {
return (collections == null) ? cell.size() : java.lang.Math.max(collections.size(), cell.size());
}
public int minMem() {
return 1024*1024 /* indexing overhead */ + cell.minMem() + ((collections == null) ? 0 : collections.minMem());
}
/*
* methods for cache management
*/
public int getBufferMaxReferences() {
return cell.getBufferMaxReferences();
}
public long getBufferMinAge() {
return cell.getBufferMinAge();
}
public long getBufferMaxAge() {
return cell.getBufferMaxAge();
}
public long getBufferSizeBytes() {
return cell.getBufferSizeBytes();
}
public void setBufferMaxWordCount(final int maxWords) {
cell.setBufferMaxWordCount(maxWords);
}
public int getBackendSize() {
return (collections == null) ? cell.getBackendSize() : collections.size();
}
public int getBufferSize() {
return cell.getBufferSize();
}
public ByteOrder ordering() {
return cell.ordering();
}
public CloneableIterator<ReferenceContainer<ReferenceType>> references(byte[] startWordHash, boolean rot) {
final Order<ReferenceContainer<ReferenceType>> containerOrder = new ReferenceContainerOrder<ReferenceType>(factory, this.cell.ordering().clone());
if (this.collections == null) return this.cell.references(startWordHash, rot);
//else
return new MergeIterator<ReferenceContainer<ReferenceType>>(
this.cell.references(startWordHash, false),
this.collections.references(startWordHash, false),
containerOrder,
ReferenceContainer.containerMergeMethod,
true);
}
public void cleanupBuffer(int time) {
this.cell.cleanupBuffer(time);
}
}

@ -32,6 +32,7 @@ import java.util.List;
import de.anomic.kelondro.blob.BLOB;
import de.anomic.kelondro.blob.BLOBArray;
import de.anomic.kelondro.index.IntegerHandleIndex;
import de.anomic.kelondro.index.Row;
import de.anomic.kelondro.index.RowSet;
import de.anomic.kelondro.order.ByteOrder;
@ -283,5 +284,44 @@ public final class ReferenceContainerArray<ReferenceType extends Reference> {
return donesomething;
}
public static <ReferenceType extends Reference> IntegerHandleIndex referenceHashes(
final File heapLocation,
final ReferenceFactory<ReferenceType> factory,
final ByteOrder termOrder,
final Row payloadrow) throws IOException {
System.out.println("CELL REFERENCE COLLECTION startup");
IntegerHandleIndex references = new IntegerHandleIndex(payloadrow.primaryKeyLength, termOrder, 0, 1000000);
String[] files = heapLocation.list();
for (String f: files) {
if (f.length() < 22 && !f.startsWith("index") && !f.endsWith(".blob")) continue;
File fl = new File(heapLocation, f);
System.out.println("CELL REFERENCE COLLECTION opening blob " + fl);
CloneableIterator<ReferenceContainer<ReferenceType>> ei = new ReferenceContainerCache.blobFileEntries<ReferenceType>(fl, factory, payloadrow);
ReferenceContainer<ReferenceType> container;
final long start = System.currentTimeMillis();
long lastlog = start - 27000;
int count = 0;
while (ei.hasNext()) {
container = ei.next();
if (container == null) continue;
Iterator<ReferenceType> refi = container.entries();
while (refi.hasNext()) {
references.inc(refi.next().metadataHash().getBytes(), 1);
}
count++;
// write a log
if (System.currentTimeMillis() - lastlog > 30000) {
System.out.println("CELL REFERENCE COLLECTION scanned " + count + " RWI index entries. ");
//Log.logInfo("COLLECTION INDEX REFERENCE COLLECTION", "scanned " + count + " RWI index entries. " + (((System.currentTimeMillis() - start) * (array.size() + array.free() - count) / count) / 60000) + " minutes remaining for this array");
lastlog = System.currentTimeMillis();
}
}
}
System.out.println("CELL REFERENCE COLLECTION finished");
return references;
}
}

@ -34,7 +34,7 @@ import de.anomic.kelondro.order.MicroDate;
import de.anomic.kelondro.text.Reference;
import de.anomic.yacy.yacySeedDB;
public final class CitationReferenceRow /*implements Reference, Cloneable*/ {
public final class CitationReferenceRow implements Reference /*, Cloneable*/ {
// this object stores citation attributes to URL references
@ -131,10 +131,10 @@ public final class CitationReferenceRow /*implements Reference, Cloneable*/ {
return this.entry;
}
public String urlHash() {
public String metadataHash() {
return this.entry.getColString(col_urlhash, null);
}
public int virtualAge() {
return (int) this.entry.getColLong(col_lastModified); // this is the time in MicoDateDays format
}
@ -170,12 +170,6 @@ public final class CitationReferenceRow /*implements Reference, Cloneable*/ {
public String toString() {
return toPropertyForm();
}
public boolean isNewer(final Reference other) {
if (other == null) return true;
if (this.lastModified() > other.lastModified()) return true;
return false;
}
public boolean isOlder(final Reference other) {
if (other == null) return false;
@ -184,6 +178,31 @@ public final class CitationReferenceRow /*implements Reference, Cloneable*/ {
}
public int hashCode() {
return this.urlHash().hashCode();
return this.metadataHash().hashCode();
}
public int distance() {
throw new UnsupportedOperationException();
}
public void join(Reference oe) {
throw new UnsupportedOperationException();
}
public int maxposition() {
throw new UnsupportedOperationException();
}
public int minposition() {
throw new UnsupportedOperationException();
}
public int position(int p) {
throw new UnsupportedOperationException();
}
public int positions() {
throw new UnsupportedOperationException();
}
}

@ -538,6 +538,74 @@ public final class Condenser {
}
/*
private static class unsievedWordsEnum implements Enumeration<StringBuilder> {
// returns an enumeration of StringBuilder Objects
StringBuilder buffer = null;
sentencesFromInputStreamEnum e;
StringBuilder s;
int off;
public unsievedWordsEnum(final InputStream is) throws UnsupportedEncodingException {
e = new sentencesFromInputStreamEnum(is);
s = new StringBuilder(0);
off = 0;
buffer = nextElement0();
}
public void pre(final boolean x) {
e.pre(x);
}
private StringBuilder nextElement0() {
StringBuilder r;
StringBuilder sb;
char c;
while (s.length() - off <= 0) {
if (e.hasNext()) {
r = e.next();
if (r == null) return null;
r = trim(r);
sb = new StringBuilder(r.length() * 2);
for (int i = 0; i < r.length(); i++) {
c = r.charAt(i);
if (invisible(c)) sb = sb.append(' '); // TODO: Bugfix needed for UTF-8
else if (htmlFilterContentScraper.punctuation(c)) sb = sb.append(' ').append(c).append(' ');
else sb = sb.append(c);
}
s = trim(sb);
off = 0;
//System.out.println("PARSING-LINE '" + r + "'->'" + s + "'");
} else {
return null;
}
}
final int p = s.indexOf(" ", off);
if (p < 0) {
r = new StringBuilder(s.substring(off));
s = new StringBuilder(0);
off = 0;
return r;
}
r = trim(new StringBuilder(s.substring(off, p)));
off = p + 1;
while (off < s.length() && s.charAt(off) <= ' ') off++;
return r;
}
public boolean hasMoreElements() {
return buffer != null;
}
public StringBuilder nextElement() {
final StringBuilder r = buffer;
buffer = nextElement0();
return r;
}
}
*/
private static class unsievedWordsEnum implements Enumeration<StringBuilder> {
// returns an enumeration of StringBuilder Objects
StringBuilder buffer = null;

@ -38,8 +38,9 @@ import de.anomic.kelondro.order.Bitfield;
import de.anomic.kelondro.order.CloneableIterator;
import de.anomic.kelondro.order.MicroDate;
import de.anomic.kelondro.table.EcoTable;
import de.anomic.kelondro.text.IndexCollection;
import de.anomic.kelondro.text.IndexCell;
import de.anomic.kelondro.text.ReferenceContainer;
import de.anomic.kelondro.text.citationPrototype.CitationReferenceRow;
import de.anomic.kelondro.text.referencePrototype.WordReference;
import de.anomic.kelondro.util.DateFormatter;
import de.anomic.kelondro.util.MemoryControl;
@ -142,7 +143,7 @@ public class plasmaRankingCRProcess {
return true;
}
private static boolean accumulate_upd(final File f, final ObjectIndex acc, final IndexCollection<WordReference> seq) throws IOException {
private static boolean accumulate_upd(final File f, final ObjectIndex acc, final IndexCell<CitationReferenceRow> seq) throws IOException {
// open file
AttrSeq source_cr = null;
try {
@ -174,7 +175,7 @@ public class plasmaRankingCRProcess {
Vita = (int) acc_entry.getColLong("Vita", 0);
// update counters and dates
seq.put(key.getBytes(), new_entry.getSeqCollection()); // FIXME: old and new collection must be joined
//seq.add(key.getBytes(), new_entry.getSeqCollection());
UCount++; // increase update counter
PCount += (new_flags.get(1)) ? 1 : 0;
@ -194,7 +195,7 @@ public class plasmaRankingCRProcess {
for (int i = 1; i < acc.row().columns(); i++) {
acc_entry.setCol(i, new_entry.getAttr(acc.row().column(i).nickname, 0));
}
seq.put(key.getBytes(), new_entry.getSeqCollection());
//seq.put(key.getBytes(), new_entry.getSeqCollection());
FUDate = MicroDate.microDateHoursInt(System.currentTimeMillis()); // first update date
FDDate = MicroDate.microDateHoursInt(System.currentTimeMillis()); // very difficult to compute; this is only a quick-hack
LUDate = (int) new_entry.getAttr("VDate", 0);
@ -242,11 +243,16 @@ public class plasmaRankingCRProcess {
// open target file
AttrSeq acc = null;
ObjectIndex newacc = null;
IndexCollection<WordReference> newseq = null;
IndexCell<WordReference> newseq = null;
if (newdb) {
final File path = to_file.getParentFile(); // path to storage place
newacc = new EcoTable(new File(path, CRG_accname), CRG_accrow, EcoTable.tailCacheUsageAuto, 0, 0);
newseq = new IndexCollection<WordReference>(path, CRG_seqname, plasmaWordIndex.wordReferenceFactory, 12, Base64Order.enhancedCoder, 9, CRG_colrow, false);
newseq = new IndexCell<WordReference>(
path,
plasmaWordIndex.wordReferenceFactory,
Base64Order.enhancedCoder,
CRG_colrow,
10000, 1000000000L, 20, null, 1000000);
} else {
if (!(to_file.exists())) {
acc = new AttrSeq("Global Ranking Accumulator File",
@ -266,6 +272,7 @@ public class plasmaRankingCRProcess {
// open file
source_file = new File(from_dir, files[i]);
if (newdb) {
/*
if (accumulate_upd(source_file, newacc, newseq)) {
// move CR file to temporary folder
source_file.renameTo(new File(tmp_dir, files[i]));
@ -273,6 +280,7 @@ public class plasmaRankingCRProcess {
// error case: the CR-file is not valid; move to error path
source_file.renameTo(new File(err_dir, files[i]));
}
*/
} else {
if (accumulate_upd(source_file, acc)) {
// move CR file to temporary folder
@ -374,8 +382,10 @@ public class plasmaRankingCRProcess {
public static int genrcix(final File cr_path_in, final File rci_path_out) throws IOException {
//kelondroFlexTable acc = new kelondroFlexTable(cr_path_in, CRG_accname, kelondroBase64Order.enhancedCoder, 128 * 1024 * 1024, -1, CRG_accrow, true);
final IndexCollection<WordReference> seq = new IndexCollection<WordReference>(cr_path_in, CRG_seqname, plasmaWordIndex.wordReferenceFactory, 12, Base64Order.enhancedCoder, 9, CRG_colrow, false);
final IndexCollection<WordReference> rci = new IndexCollection<WordReference>(rci_path_out, RCI_colname, plasmaWordIndex.wordReferenceFactory, 6, Base64Order.enhancedCoder, 9, RCI_coli, false);
final IndexCell<WordReference> seq = new IndexCell<WordReference>(
cr_path_in, plasmaWordIndex.wordReferenceFactory, Base64Order.enhancedCoder, CRG_colrow, 10000, 1000000000L, 20, null, 1000000);
final IndexCell<WordReference> rci = new IndexCell<WordReference>(
rci_path_out, plasmaWordIndex.wordReferenceFactory, Base64Order.enhancedCoder, RCI_coli, 10000, 1000000000L, 20, null, 1000000);
// loop over all referees
int count = 0;
@ -406,7 +416,7 @@ public class plasmaRankingCRProcess {
rci_entry.add(refereeDom.getBytes());
// insert entry
rci.put(anchorDom.getBytes(), rci_entry);
//rci.put(anchorDom.getBytes(), rci_entry);
}
count++;
if ((count % 1000) == 0) {

@ -46,7 +46,6 @@ import de.anomic.kelondro.order.Base64Order;
import de.anomic.kelondro.order.ByteOrder;
import de.anomic.kelondro.text.BufferedIndex;
import de.anomic.kelondro.text.IndexCell;
import de.anomic.kelondro.text.IndexCollectionMigration;
import de.anomic.kelondro.text.ReferenceContainer;
import de.anomic.kelondro.text.IODispatcher;
import de.anomic.kelondro.text.MetadataRepository;
@ -151,37 +150,18 @@ public final class plasmaWordIndex {
}
}
// check if the peer has migrated the index
if (new File(indexPrimaryTextLocation, "RICOLLECTION").exists()) {
this.merger = new IODispatcher<WordReference>(plasmaWordIndex.wordReferenceFactory, 1, 1, writeBufferSize);
if (this.merger != null) this.merger.start();
this.index = new IndexCollectionMigration<WordReference>(
indexPrimaryTextLocation,
wordReferenceFactory,
wordOrder,
WordReferenceRow.urlEntryRow,
entityCacheMaxSize,
targetFileSize,
maxFileSize,
this.merger,
writeBufferSize,
log);
} else {
this.merger = new IODispatcher<WordReference>(plasmaWordIndex.wordReferenceFactory, 1, 1, writeBufferSize);
this.merger.start();
this.index = new IndexCell<WordReference>(
new File(indexPrimaryTextLocation, "RICELL"),
wordReferenceFactory,
wordOrder,
WordReferenceRow.urlEntryRow,
entityCacheMaxSize,
targetFileSize,
maxFileSize,
this.merger,
writeBufferSize);
}
this.merger = new IODispatcher<WordReference>(plasmaWordIndex.wordReferenceFactory, 1, 1, writeBufferSize);
this.merger.start();
this.index = new IndexCell<WordReference>(
new File(indexPrimaryTextLocation, "RICELL"),
wordReferenceFactory,
wordOrder,
WordReferenceRow.urlEntryRow,
entityCacheMaxSize,
targetFileSize,
maxFileSize,
this.merger,
writeBufferSize);
// migrate LURL-db files into new subdirectory METADATA
File textdir = new File(this.secondaryRoot, "TEXT");

Loading…
Cancel
Save