* strict type and size check in kelondroRow handling

* adopted all code to use the declaration form of kelondroRow
* fixed a bug in kelondroRow which caused wrong parsing of encoding type
* the bug caused bad database behaviour in new indexCollection data structure.
  because of this bug, all test databases are now already void. A new database is created
* the kelondroFlexTable and indexCollection data structures now store a declaration of the row definition
  into a properties file along the database files.

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2375 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 19 years ago
parent 314021453f
commit 740d49751d

@ -170,7 +170,7 @@ public class dbtest {
profiler.start(); profiler.start();
// create the database access // create the database access
kelondroRow testRow = new kelondroRow(new int[]{keylength, keylength, valuelength}); kelondroRow testRow = new kelondroRow("byte[] key-" + keylength + ", byte[] dummy-" + keylength + ", value-" + valuelength);
if (dbe.equals("kelondroTree")) { if (dbe.equals("kelondroTree")) {
File tablefile = new File(tablename + ".kelondro.db"); File tablefile = new File(tablename + ".kelondro.db");
if (tablefile.exists()) { if (tablefile.exists()) {

@ -96,7 +96,7 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
File indexDumpFile = new File(databaseRoot, indexArrayFileName); File indexDumpFile = new File(databaseRoot, indexArrayFileName);
if (indexDumpFile.exists()) indexDumpFile.delete(); if (indexDumpFile.exists()) indexDumpFile.delete();
kelondroFixedWidthArray dumpArray = null; kelondroFixedWidthArray dumpArray = null;
dumpArray = new kelondroFixedWidthArray(indexDumpFile, new kelondroRow(plasmaWordIndexAssortment.bufferStructureBasis), 0, false); dumpArray = new kelondroFixedWidthArray(indexDumpFile, plasmaWordIndexAssortment.bufferStructureBasis, 0, false);
long startTime = System.currentTimeMillis(); long startTime = System.currentTimeMillis();
long messageTime = System.currentTimeMillis() + 5000; long messageTime = System.currentTimeMillis() + 5000;
long wordsPerSecond = 0, wordcount = 0, urlcount = 0; long wordsPerSecond = 0, wordcount = 0, urlcount = 0;
@ -119,8 +119,8 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI {
while (ci.hasNext()) { while (ci.hasNext()) {
iEntry = (indexEntry) ci.next(); iEntry = (indexEntry) ci.next();
row.setCol(0, container.getWordHash().getBytes()); row.setCol(0, container.getWordHash().getBytes());
row.setCol(1, kelondroNaturalOrder.encodeLong(container.size(), 4)); row.setCol(1, container.size());
row.setCol(2, kelondroNaturalOrder.encodeLong(container.updated(), 8)); row.setCol(2, container.updated());
row.setCol(3, iEntry.urlHash().getBytes()); row.setCol(3, iEntry.urlHash().getBytes());
row.setCol(4, iEntry.toEncodedByteArrayForm(false)); row.setCol(4, iEntry.toEncodedByteArrayForm(false));
dumpArray.set((int) urlcount++, row); dumpArray.set((int) urlcount++, row);

@ -95,19 +95,19 @@ public class indexURLEntry implements Cloneable, indexEntry {
if ((language == null) || (language.length() != indexURL.urlLanguageLength)) language = "uk"; if ((language == null) || (language.length() != indexURL.urlLanguageLength)) language = "uk";
this.entry = urlEntryRow.newEntry(); this.entry = urlEntryRow.newEntry();
this.entry.setColString(col_urlhash, urlHash, null); this.entry.setCol(col_urlhash, urlHash, null);
this.entry.setColLong(col_quality, quality); this.entry.setCol(col_quality, quality);
this.entry.setColLong(col_lastModified, lastmodified); this.entry.setCol(col_lastModified, lastmodified);
this.entry.setColLong(col_hitcount, hitcount); this.entry.setCol(col_hitcount, hitcount);
this.entry.setColString(col_language, language, null); this.entry.setCol(col_language, language, null);
this.entry.setColByte(col_doctype, (byte) doctype); this.entry.setCol(col_doctype, (byte) doctype);
this.entry.setColByte(col_localflag, (byte) ((local) ? indexEntryAttribute.LT_LOCAL : indexEntryAttribute.LT_GLOBAL)); this.entry.setCol(col_localflag, (byte) ((local) ? indexEntryAttribute.LT_LOCAL : indexEntryAttribute.LT_GLOBAL));
this.entry.setColLong(col_posintext, posintext); this.entry.setCol(col_posintext, posintext);
this.entry.setColLong(col_posinphrase, posinphrase); this.entry.setCol(col_posinphrase, posinphrase);
this.entry.setColLong(col_posofphrase, posofphrase); this.entry.setCol(col_posofphrase, posofphrase);
this.entry.setColLong(col_worddistance, worddistance); this.entry.setCol(col_worddistance, worddistance);
this.entry.setColLong(col_wordcount, wordcount); this.entry.setCol(col_wordcount, wordcount);
this.entry.setColLong(col_phrasecount, phrasecount); this.entry.setCol(col_phrasecount, phrasecount);
} }
public indexURLEntry(String urlHash, String code) { public indexURLEntry(String urlHash, String code) {
@ -208,11 +208,11 @@ public class indexURLEntry implements Cloneable, indexEntry {
public static indexURLEntry combineDistance(indexURLEntry ie1, indexEntry ie2) { public static indexURLEntry combineDistance(indexURLEntry ie1, indexEntry ie2) {
// returns a modified entry of the first argument // returns a modified entry of the first argument
ie1.entry.setColLong(col_worddistance, ie1.worddistance() + ie2.worddistance() + Math.abs(ie1.posintext() - ie2.posintext())); ie1.entry.setCol(col_worddistance, ie1.worddistance() + ie2.worddistance() + Math.abs(ie1.posintext() - ie2.posintext()));
ie1.entry.setColLong(col_posintext, Math.min(ie1.posintext(), ie2.posintext())); ie1.entry.setCol(col_posintext, Math.min(ie1.posintext(), ie2.posintext()));
ie1.entry.setColLong(col_posinphrase, (ie1.posofphrase() == ie2.posofphrase()) ? ie1.posofphrase() : 0 /*unknown*/); ie1.entry.setCol(col_posinphrase, (ie1.posofphrase() == ie2.posofphrase()) ? ie1.posofphrase() : 0 /*unknown*/);
ie1.entry.setColLong(col_posofphrase, Math.min(ie1.posofphrase(), ie2.posofphrase())); ie1.entry.setCol(col_posofphrase, Math.min(ie1.posofphrase(), ie2.posofphrase()));
ie1.entry.setColLong(col_wordcount, (ie1.wordcount() + ie2.wordcount()) / 2); ie1.entry.setCol(col_wordcount, (ie1.wordcount() + ie2.wordcount()) / 2);
return ie1; return ie1;
} }
@ -225,27 +225,27 @@ public class indexURLEntry implements Cloneable, indexEntry {
} }
public static final void min(indexURLEntry t, indexEntry other) { public static final void min(indexURLEntry t, indexEntry other) {
if (t.hitcount() > other.hitcount()) t.entry.setColLong(col_hitcount, other.hitcount()); if (t.hitcount() > other.hitcount()) t.entry.setCol(col_hitcount, other.hitcount());
if (t.wordcount() > other.wordcount()) t.entry.setColLong(col_wordcount, other.wordcount()); if (t.wordcount() > other.wordcount()) t.entry.setCol(col_wordcount, other.wordcount());
if (t.phrasecount() > other.phrasecount()) t.entry.setColLong(col_phrasecount, other.phrasecount()); if (t.phrasecount() > other.phrasecount()) t.entry.setCol(col_phrasecount, other.phrasecount());
if (t.posintext() > other.posintext()) t.entry.setColLong(col_posintext, other.posintext()); if (t.posintext() > other.posintext()) t.entry.setCol(col_posintext, other.posintext());
if (t.posinphrase() > other.posinphrase()) t.entry.setColLong(col_posinphrase, other.posinphrase()); if (t.posinphrase() > other.posinphrase()) t.entry.setCol(col_posinphrase, other.posinphrase());
if (t.posofphrase() > other.posofphrase()) t.entry.setColLong(col_posofphrase, other.posofphrase()); if (t.posofphrase() > other.posofphrase()) t.entry.setCol(col_posofphrase, other.posofphrase());
if (t.worddistance() > other.worddistance()) t.entry.setColLong(col_worddistance, other.worddistance()); if (t.worddistance() > other.worddistance()) t.entry.setCol(col_worddistance, other.worddistance());
if (t.lastModified() > other.lastModified()) t.entry.setColLong(col_lastModified, other.lastModified()); if (t.lastModified() > other.lastModified()) t.entry.setCol(col_lastModified, other.lastModified());
if (t.quality() > other.quality()) t.entry.setColLong(col_quality, other.quality()); if (t.quality() > other.quality()) t.entry.setCol(col_quality, other.quality());
} }
public static final void max(indexURLEntry t, indexEntry other) { public static final void max(indexURLEntry t, indexEntry other) {
if (t.hitcount() < other.hitcount()) t.entry.setColLong(col_hitcount, other.hitcount()); if (t.hitcount() < other.hitcount()) t.entry.setCol(col_hitcount, other.hitcount());
if (t.wordcount() < other.wordcount()) t.entry.setColLong(col_wordcount, other.wordcount()); if (t.wordcount() < other.wordcount()) t.entry.setCol(col_wordcount, other.wordcount());
if (t.phrasecount() < other.phrasecount()) t.entry.setColLong(col_phrasecount, other.phrasecount()); if (t.phrasecount() < other.phrasecount()) t.entry.setCol(col_phrasecount, other.phrasecount());
if (t.posintext() < other.posintext()) t.entry.setColLong(col_posintext, other.posintext()); if (t.posintext() < other.posintext()) t.entry.setCol(col_posintext, other.posintext());
if (t.posinphrase() < other.posinphrase()) t.entry.setColLong(col_posinphrase, other.posinphrase()); if (t.posinphrase() < other.posinphrase()) t.entry.setCol(col_posinphrase, other.posinphrase());
if (t.posofphrase() < other.posofphrase()) t.entry.setColLong(col_posofphrase, other.posofphrase()); if (t.posofphrase() < other.posofphrase()) t.entry.setCol(col_posofphrase, other.posofphrase());
if (t.worddistance() < other.worddistance()) t.entry.setColLong(col_worddistance, other.worddistance()); if (t.worddistance() < other.worddistance()) t.entry.setCol(col_worddistance, other.worddistance());
if (t.lastModified() < other.lastModified()) t.entry.setColLong(col_lastModified, other.lastModified()); if (t.lastModified() < other.lastModified()) t.entry.setCol(col_lastModified, other.lastModified());
if (t.quality() < other.quality()) t.entry.setColLong(col_quality, other.quality()); if (t.quality() < other.quality()) t.entry.setCol(col_quality, other.quality());
} }
@ -258,15 +258,15 @@ public class indexURLEntry implements Cloneable, indexEntry {
} }
static void normalize(indexURLEntry t, indexEntry min, indexEntry max) { static void normalize(indexURLEntry t, indexEntry min, indexEntry max) {
t.entry.setColLong(col_hitcount , (t.hitcount() == 0) ? 0 : 1 + 255 * (t.hitcount() - min.hitcount() ) / (1 + max.hitcount() - min.hitcount())); t.entry.setCol(col_hitcount , (t.hitcount() == 0) ? 0 : 1 + 255 * (t.hitcount() - min.hitcount() ) / (1 + max.hitcount() - min.hitcount()));
t.entry.setColLong(col_wordcount , (t.wordcount() == 0) ? 0 : 1 + 255 * (t.wordcount() - min.wordcount() ) / (1 + max.wordcount() - min.wordcount())); t.entry.setCol(col_wordcount , (t.wordcount() == 0) ? 0 : 1 + 255 * (t.wordcount() - min.wordcount() ) / (1 + max.wordcount() - min.wordcount()));
t.entry.setColLong(col_phrasecount , (t.phrasecount() == 0) ? 0 : 1 + 255 * (t.phrasecount() - min.phrasecount() ) / (1 + max.phrasecount() - min.phrasecount())); t.entry.setCol(col_phrasecount , (t.phrasecount() == 0) ? 0 : 1 + 255 * (t.phrasecount() - min.phrasecount() ) / (1 + max.phrasecount() - min.phrasecount()));
t.entry.setColLong(col_posintext , (t.posintext() == 0) ? 0 : 1 + 255 * (t.posintext() - min.posintext() ) / (1 + max.posintext() - min.posintext())); t.entry.setCol(col_posintext , (t.posintext() == 0) ? 0 : 1 + 255 * (t.posintext() - min.posintext() ) / (1 + max.posintext() - min.posintext()));
t.entry.setColLong(col_posinphrase , (t.posinphrase() == 0) ? 0 : 1 + 255 * (t.posinphrase() - min.posinphrase() ) / (1 + max.posinphrase() - min.posinphrase())); t.entry.setCol(col_posinphrase , (t.posinphrase() == 0) ? 0 : 1 + 255 * (t.posinphrase() - min.posinphrase() ) / (1 + max.posinphrase() - min.posinphrase()));
t.entry.setColLong(col_posofphrase , (t.posofphrase() == 0) ? 0 : 1 + 255 * (t.posofphrase() - min.posofphrase() ) / (1 + max.posofphrase() - min.posofphrase())); t.entry.setCol(col_posofphrase , (t.posofphrase() == 0) ? 0 : 1 + 255 * (t.posofphrase() - min.posofphrase() ) / (1 + max.posofphrase() - min.posofphrase()));
t.entry.setColLong(col_worddistance , (t.worddistance() == 0) ? 0 : 1 + 255 * (t.worddistance() - min.worddistance()) / (1 + max.worddistance() - min.worddistance())); t.entry.setCol(col_worddistance , (t.worddistance() == 0) ? 0 : 1 + 255 * (t.worddistance() - min.worddistance()) / (1 + max.worddistance() - min.worddistance()));
t.entry.setColLong(col_lastModified , (t.lastModified() == 0) ? 0 : 1 + 255 * (t.lastModified() - min.lastModified()) / (1 + max.lastModified() - min.lastModified())); t.entry.setCol(col_lastModified , (t.lastModified() == 0) ? 0 : 1 + 255 * (t.lastModified() - min.lastModified()) / (1 + max.lastModified() - min.lastModified()));
t.entry.setColLong(col_quality , (t.quality() == 0) ? 0 : 1 + 255 * (t.quality() - min.quality() ) / (1 + max.quality() - min.quality())); t.entry.setCol(col_quality , (t.quality() == 0) ? 0 : 1 + 255 * (t.quality() - min.quality() ) / (1 + max.quality() - min.quality()));
} }
public void normalize(indexEntry min, indexEntry max) { public void normalize(indexEntry min, indexEntry max) {

@ -40,23 +40,23 @@ public class kelondroBytesIntMap {
public int geti(byte[] key) throws IOException { public int geti(byte[] key) throws IOException {
kelondroRow.Entry indexentry = ki.get(key); kelondroRow.Entry indexentry = ki.get(key);
if (indexentry == null) return -1; if (indexentry == null) return -1;
return (int) indexentry.getColLongB256(1); return (int) indexentry.getColLong(1);
} }
public int puti(byte[] key, int i) throws IOException { public int puti(byte[] key, int i) throws IOException {
kelondroRow.Entry newentry = ki.row().newEntry(); kelondroRow.Entry newentry = ki.row().newEntry();
newentry.setCol(0, key); newentry.setCol(0, key);
newentry.setColLongB256(1, i); newentry.setCol(1, i);
kelondroRow.Entry oldentry = ki.put(newentry); kelondroRow.Entry oldentry = ki.put(newentry);
if (oldentry == null) return -1; if (oldentry == null) return -1;
return (int) oldentry.getColLongB256(1); return (int) oldentry.getColLong(1);
} }
public int removei(byte[] key) throws IOException { public int removei(byte[] key) throws IOException {
if (ki.size() == 0) return -1; if (ki.size() == 0) return -1;
kelondroRow.Entry indexentry = ki.remove(key); kelondroRow.Entry indexentry = ki.remove(key);
if (indexentry == null) return -1; if (indexentry == null) return -1;
return (int) indexentry.getColLongB256(1); return (int) indexentry.getColLong(1);
} }
public int size() throws IOException { public int size() throws IOException {

@ -32,6 +32,8 @@ import java.util.Iterator;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.Set;
import de.anomic.server.serverFileUtils;
public class kelondroCollectionIndex { public class kelondroCollectionIndex {
private kelondroIndex index; private kelondroIndex index;
@ -60,19 +62,28 @@ public class kelondroCollectionIndex {
); );
} }
private static File arrayFile(File path, String filenameStub, int loadfactor, int chunksize, int partitionNumber, int serialNumber) { private static String fillZ(String s, int len) {
while (s.length() < len) s = "0" + s;
return s;
}
String lf = Integer.toHexString(loadfactor).toUpperCase(); private static File arrayFile(File path, String filenameStub, int loadfactor, int chunksize, int partitionNumber, int serialNumber) {
while (lf.length() < 2) lf = "0" + lf; String lf = fillZ(Integer.toHexString(loadfactor).toUpperCase(), 2);
String cs = Integer.toHexString(chunksize).toUpperCase(); String cs = fillZ(Integer.toHexString(chunksize).toUpperCase(), 4);
while (cs.length() < 4) cs = "0" + cs; String pn = fillZ(Integer.toHexString(partitionNumber).toUpperCase(), 2);
String pn = Integer.toHexString(partitionNumber).toUpperCase(); String sn = fillZ(Integer.toHexString(serialNumber).toUpperCase(), 2);
while (pn.length() < 2) pn = "0" + pn;
String sn = Integer.toHexString(serialNumber).toUpperCase();
while (sn.length() < 2) sn = "0" + sn;
return new File(path, filenameStub + "." + lf + "." + cs + "." + pn + "." + sn + ".kca"); // kelondro collection array return new File(path, filenameStub + "." + lf + "." + cs + "." + pn + "." + sn + ".kca"); // kelondro collection array
} }
private static File propertyFile(File path, String filenameStub, int loadfactor, int chunksize) {
String lf = fillZ(Integer.toHexString(loadfactor).toUpperCase(), 2);
String cs = fillZ(Integer.toHexString(chunksize).toUpperCase(), 4);
return new File(path, filenameStub + "." + lf + "." + cs + ".properties"); // kelondro collection array
}
/*
*/
public kelondroCollectionIndex(File path, String filenameStub, int keyLength, kelondroOrder indexOrder, public kelondroCollectionIndex(File path, String filenameStub, int keyLength, kelondroOrder indexOrder,
long buffersize, long preloadTime, long buffersize, long preloadTime,
int loadfactor, kelondroRow rowdef) throws IOException { int loadfactor, kelondroRow rowdef) throws IOException {
@ -85,6 +96,21 @@ public class kelondroCollectionIndex {
// create index table // create index table
index = new kelondroFlexTable(path, filenameStub + ".index", indexOrder, buffersize, preloadTime, indexRow(keyLength), true); index = new kelondroFlexTable(path, filenameStub + ".index", indexOrder, buffersize, preloadTime, indexRow(keyLength), true);
// save/check property file for this array
File propfile = propertyFile(path, filenameStub, loadfactor, rowdef.objectsize());
Map props = new HashMap();
if (propfile.exists()) {
props = serverFileUtils.loadHashMap(propfile);
String stored_rowdef = (String) props.get("rowdef");
if ((stored_rowdef == null) || (!(rowdef.subsumes(new kelondroRow(stored_rowdef))))) {
System.out.println("FATAL ERROR: stored rowdef '" + stored_rowdef + "' does not match with new rowdef '" +
rowdef + "' for array cluster '" + path + "/" + filenameStub + "'");
System.exit(-1);
}
}
props.put("rowdef", rowdef.toString());
serverFileUtils.saveMap(propfile, props, "CollectionIndex properties");
// open array files // open array files
this.arrays = new HashMap(); // all entries will be dynamically created with getArray() this.arrays = new HashMap(); // all entries will be dynamically created with getArray()
} }
@ -176,9 +202,9 @@ public class kelondroCollectionIndex {
} else { } else {
// overwrite the old collection // overwrite the old collection
// read old information // read old information
int oldchunksize = (int) oldindexrow.getColLongB256(idx_col_chunksize); // needed only for migration int oldchunksize = (int) oldindexrow.getColLong(idx_col_chunksize); // needed only for migration
int oldchunkcount = (int) oldindexrow.getColLongB256(idx_col_chunkcount); int oldchunkcount = (int) oldindexrow.getColLong(idx_col_chunkcount);
int oldrownumber = (int) oldindexrow.getColLongB256(idx_col_indexpos); int oldrownumber = (int) oldindexrow.getColLong(idx_col_indexpos);
int oldPartitionNumber = arrayIndex(oldchunkcount); int oldPartitionNumber = arrayIndex(oldchunkcount);
int oldSerialNumber = 0; int oldSerialNumber = 0;
@ -234,8 +260,8 @@ public class kelondroCollectionIndex {
array.set(oldrownumber, arrayEntry); array.set(oldrownumber, arrayEntry);
// update the index entry // update the index entry
oldindexrow.setColLongB256(idx_col_chunkcount, collection.size()); oldindexrow.setCol(idx_col_chunkcount, collection.size());
oldindexrow.setColLongB256(idx_col_lastwrote, kelondroRowCollection.daysSince2000(System.currentTimeMillis())); oldindexrow.setCol(idx_col_lastwrote, kelondroRowCollection.daysSince2000(System.currentTimeMillis()));
index.put(oldindexrow); index.put(oldindexrow);
} else { } else {
// we need a new slot, that means we must first delete the old entry // we need a new slot, that means we must first delete the old entry
@ -271,11 +297,11 @@ public class kelondroCollectionIndex {
// store the new row number in the index // store the new row number in the index
kelondroRow.Entry indexEntry = index.row().newEntry(); kelondroRow.Entry indexEntry = index.row().newEntry();
indexEntry.setCol(idx_col_key, key); indexEntry.setCol(idx_col_key, key);
indexEntry.setColLongB256(idx_col_chunksize, this.rowdef.objectsize()); indexEntry.setCol(idx_col_chunksize, this.rowdef.objectsize());
indexEntry.setColLongB256(idx_col_chunkcount, collection.size()); indexEntry.setCol(idx_col_chunkcount, collection.size());
indexEntry.setColLongB256(idx_col_indexpos, (long) newRowNumber); indexEntry.setCol(idx_col_indexpos, (long) newRowNumber);
indexEntry.setColLongB256(idx_col_lastread, kelondroRowCollection.daysSince2000(System.currentTimeMillis())); indexEntry.setCol(idx_col_lastread, kelondroRowCollection.daysSince2000(System.currentTimeMillis()));
indexEntry.setColLongB256(idx_col_lastwrote, kelondroRowCollection.daysSince2000(System.currentTimeMillis())); indexEntry.setCol(idx_col_lastwrote, kelondroRowCollection.daysSince2000(System.currentTimeMillis()));
index.put(indexEntry); index.put(indexEntry);
} }
@ -301,9 +327,9 @@ public class kelondroCollectionIndex {
// call this only within a synchronized(index) environment // call this only within a synchronized(index) environment
// read values // read values
int chunksize = (int) indexrow.getColLongB256(idx_col_chunksize); int chunksize = (int) indexrow.getColLong(idx_col_chunksize);
int chunkcount = (int) indexrow.getColLongB256(idx_col_chunkcount); int chunkcount = (int) indexrow.getColLong(idx_col_chunkcount);
int rownumber = (int) indexrow.getColLongB256(idx_col_indexpos); int rownumber = (int) indexrow.getColLong(idx_col_indexpos);
int partitionnumber = arrayIndex(chunkcount); int partitionnumber = arrayIndex(chunkcount);
int serialnumber = 0; int serialnumber = 0;
@ -320,18 +346,18 @@ public class kelondroCollectionIndex {
// store the row number in the index; this may be a double-entry, but better than nothing // store the row number in the index; this may be a double-entry, but better than nothing
kelondroRow.Entry indexEntry = index.row().newEntry(); kelondroRow.Entry indexEntry = index.row().newEntry();
indexEntry.setCol(idx_col_key, arrayrow.getColBytes(0)); indexEntry.setCol(idx_col_key, arrayrow.getColBytes(0));
indexEntry.setColLongB256(idx_col_chunksize, this.rowdef.objectsize()); indexEntry.setCol(idx_col_chunksize, this.rowdef.objectsize());
indexEntry.setColLongB256(idx_col_chunkcount, collection.size()); indexEntry.setCol(idx_col_chunkcount, collection.size());
indexEntry.setColLongB256(idx_col_indexpos, (long) rownumber); indexEntry.setCol(idx_col_indexpos, (long) rownumber);
indexEntry.setColLongB256(idx_col_lastread, kelondroRowCollection.daysSince2000(System.currentTimeMillis())); indexEntry.setCol(idx_col_lastread, kelondroRowCollection.daysSince2000(System.currentTimeMillis()));
indexEntry.setColLongB256(idx_col_lastwrote, kelondroRowCollection.daysSince2000(System.currentTimeMillis())); indexEntry.setCol(idx_col_lastwrote, kelondroRowCollection.daysSince2000(System.currentTimeMillis()));
index.put(indexEntry); index.put(indexEntry);
throw new kelondroException(arrayFile(this.path, this.filenameStub, this.loadfactor, chunksize, partitionnumber, serialnumber).toString(), "array contains wrong row '" + new String(arrayrow.getColBytes(0)) + "', expected is '" + new String(indexrow.getColBytes(idx_col_key)) + "', the row has been fixed"); throw new kelondroException(arrayFile(this.path, this.filenameStub, this.loadfactor, chunksize, partitionnumber, serialnumber).toString(), "array contains wrong row '" + new String(arrayrow.getColBytes(0)) + "', expected is '" + new String(indexrow.getColBytes(idx_col_key)) + "', the row has been fixed");
} }
int chunkcountInArray = collection.size(); int chunkcountInArray = collection.size();
if (chunkcountInArray != chunkcount) { if (chunkcountInArray != chunkcount) {
// fix the entry in index // fix the entry in index
indexrow.setColLong(idx_col_chunkcount, chunkcountInArray); indexrow.setCol(idx_col_chunkcount, chunkcountInArray);
index.put(indexrow); index.put(indexrow);
array.logFailure("INCONSISTENCY in " + arrayFile(this.path, this.filenameStub, this.loadfactor, chunksize, partitionnumber, serialnumber).toString() + ": array has different chunkcount than index: index = " + chunkcount + ", array = " + chunkcountInArray + "; the index has been auto-fixed"); array.logFailure("INCONSISTENCY in " + arrayFile(this.path, this.filenameStub, this.loadfactor, chunksize, partitionnumber, serialnumber).toString() + ": array has different chunkcount than index: index = " + chunkcount + ", array = " + chunkcountInArray + "; the index has been auto-fixed");
} }

@ -108,18 +108,18 @@ public class kelondroColumn {
if (p < 0) { if (p < 0) {
// if the cell was defined with a type, we dont need to give an explicit with definition // if the cell was defined with a type, we dont need to give an explicit with definition
if (this.cellwidth < 0) throw new kelondroException("kelondroColumn - no cell width definition given"); if (this.cellwidth < 0) throw new kelondroException("kelondroColumn - no cell width definition given");
p = celldef.indexOf(' '); int q = celldef.indexOf(' ');
if (p < 0) { if (q < 0) {
this.nickname = celldef; this.nickname = celldef;
celldef = ""; celldef = "";
} else { } else {
this.nickname = celldef.substring(0, p); this.nickname = celldef.substring(0, p);
celldef = celldef.substring(p + 1); celldef = celldef.substring(q + 1);
} }
} else { } else {
this.nickname = celldef.substring(0, p);
int q = celldef.indexOf(' '); int q = celldef.indexOf(' ');
if (q < 0) { if (q < 0) {
this.nickname = celldef.substring(0, p);
try { try {
this.cellwidth = Integer.parseInt(celldef.substring(p + 1)); this.cellwidth = Integer.parseInt(celldef.substring(p + 1));
} catch (NumberFormatException e) { } catch (NumberFormatException e) {
@ -127,7 +127,6 @@ public class kelondroColumn {
} }
celldef = ""; celldef = "";
} else { } else {
this.nickname = celldef.substring(0, q);
try { try {
this.cellwidth = Integer.parseInt(celldef.substring(p + 1, q)); this.cellwidth = Integer.parseInt(celldef.substring(p + 1, q));
} catch (NumberFormatException e) { } catch (NumberFormatException e) {
@ -157,8 +156,8 @@ public class kelondroColumn {
String expf = celldef.substring(1, p); String expf = celldef.substring(1, p);
celldef = celldef.substring(p + 1).trim(); celldef = celldef.substring(p + 1).trim();
if (expf.equals("b64e")) this.encoder = encoder_b64e; if (expf.equals("b64e")) this.encoder = encoder_b64e;
else if (expf.equals("b256")) this.encoder = encoder_b64e; else if (expf.equals("b256")) this.encoder = encoder_b256;
else if (expf.equals("bytes")) this.encoder = encoder_b64e; else if (expf.equals("bytes")) this.encoder = encoder_bytes;
else { else {
if (this.celltype == celltype_undefined) this.encoder = encoder_bytes; if (this.celltype == celltype_undefined) this.encoder = encoder_bytes;
else if (this.celltype == celltype_boolean) this.encoder = encoder_bytes; else if (this.celltype == celltype_boolean) this.encoder = encoder_bytes;
@ -181,6 +180,12 @@ public class kelondroColumn {
} }
} }
public void setAttributes(String nickname, int celltype, int encoder) {
this.celltype = celltype;
this.encoder = encoder;
this.nickname = nickname;
}
public int celltype() { public int celltype() {
return this.celltype; return this.celltype;
} }
@ -206,21 +211,28 @@ public class kelondroColumn {
switch (celltype) { switch (celltype) {
case celltype_boolean: case celltype_boolean:
s.append("boolean "); s.append("boolean ");
s.append(nickname);
break; break;
case celltype_binary: case celltype_binary:
s.append("byte[] "); s.append("byte[] ");
s.append(nickname);
s.append('-');
s.append(cellwidth);
break; break;
case celltype_string: case celltype_string:
s.append("String "); s.append("String ");
s.append(nickname);
s.append('-');
s.append(cellwidth);
break; break;
case celltype_cardinal: case celltype_cardinal:
s.append("Cardinal "); s.append("Cardinal ");
s.append(nickname);
s.append('-');
s.append(cellwidth);
break; break;
} }
s.append(nickname);
s.append('-');
s.append(cellwidth);
s.append(' ');
switch (encoder) { switch (encoder) {
case encoder_b64e: case encoder_b64e:
s.append(" {b64e}"); s.append(" {b64e}");
@ -231,4 +243,13 @@ public class kelondroColumn {
} }
return new String(s); return new String(s);
} }
public boolean equals(kelondroColumn otherCol) {
return
(this.celltype == otherCol.celltype) &&
(this.cellwidth == otherCol.cellwidth) &&
(this.encoder == otherCol.encoder) &&
(this.nickname.equals(otherCol.nickname));
}
} }

@ -76,7 +76,7 @@ public class kelondroDyn extends kelondroTree {
int nodesize, char fillChar, kelondroOrder objectOrder, int nodesize, char fillChar, kelondroOrder objectOrder,
boolean exitOnFail) { boolean exitOnFail) {
// creates a new dynamic tree // creates a new dynamic tree
super(file, buffersize, preloadTime, kelondroTree.defaultObjectCachePercent, new kelondroRow(new int[] { key + counterlen, nodesize }), objectOrder, 1, 8, exitOnFail); super(file, buffersize, preloadTime, kelondroTree.defaultObjectCachePercent, new kelondroRow("byte[] key-" + (key + counterlen) + ", byte[] node-" + nodesize), objectOrder, 1, 8, exitOnFail);
this.keylen = row().width(0) - counterlen; this.keylen = row().width(0) - counterlen;
this.reclen = row().width(1); this.reclen = row().width(1);
this.fillChar = fillChar; this.fillChar = fillChar;

@ -102,8 +102,8 @@ public class kelondroDynTree {
Iterator i = table.dynKeys(true, false); Iterator i = table.dynKeys(true, false);
String onekey = (String) i.next(); String onekey = (String) i.next();
kelondroTree onetree = getTree(onekey); kelondroTree onetree = getTree(onekey);
int[] columns = new int[onetree.row().columns()]; kelondroColumn[] columns = new kelondroColumn[onetree.row().columns()];
for (int j = 0; j < columns.length; j++) columns[j] = onetree.row().width(j); for (int j = 0; j < columns.length; j++) columns[j] = onetree.row().column(j);
this.rowdef = new kelondroRow(columns); this.rowdef = new kelondroRow(columns);
closeTree(onekey); closeTree(onekey);
} }
@ -327,7 +327,7 @@ public class kelondroDynTree {
kelondroDynTree dt = new kelondroDynTree(file, 0x100000L, 0, '_'); kelondroDynTree dt = new kelondroDynTree(file, 0x100000L, 0, '_');
System.out.println("opened: table keylength=" + dt.table.row().width(0) + ", sectorsize=" + dt.table.row().width(1) + ", " + dt.table.size() + " entries."); System.out.println("opened: table keylength=" + dt.table.row().width(0) + ", sectorsize=" + dt.table.row().width(1) + ", " + dt.table.size() + " entries.");
} else { } else {
kelondroDynTree dt = new kelondroDynTree(file, 0x100000L, 0, 16, 512, new kelondroRow(new int[] {10,20,30}), '_', true); kelondroDynTree dt = new kelondroDynTree(file, 0x100000L, 0, 16, 512, new kelondroRow("byte[] a-10, byte[] b-20, byte[] c-30"), '_', true);
String name; String name;
kelondroTree t; kelondroTree t;
kelondroRow.Entry line; kelondroRow.Entry line;

@ -134,7 +134,7 @@ public class kelondroFixedWidthArray extends kelondroRecords implements kelondro
public static void main(String[] args) { public static void main(String[] args) {
File f = new File("d:\\\\mc\\privat\\fixtest.db"); File f = new File("d:\\\\mc\\privat\\fixtest.db");
f.delete(); f.delete();
kelondroFixedWidthArray k = new kelondroFixedWidthArray(f, new kelondroRow(new int[]{12, 4}), 6, true); kelondroFixedWidthArray k = new kelondroFixedWidthArray(f, new kelondroRow("byte[] a-12, byte[] b-4"), 6, true);
try { try {
k.set(3, k.row().newEntry(new byte[][]{ k.set(3, k.row().newEntry(new byte[][]{
"test123".getBytes(), "abcd".getBytes()})); "test123".getBytes(), "abcd".getBytes()}));

@ -79,7 +79,7 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr
} }
private kelondroIndex initializeRamIndex(kelondroOrder objectOrder) throws IOException { private kelondroIndex initializeRamIndex(kelondroOrder objectOrder) throws IOException {
kelondroRowBufferedSet ri = new kelondroRowBufferedSet(new kelondroRow(new int[]{super.row().width(0), 4}), 0); kelondroRowBufferedSet ri = new kelondroRowBufferedSet(new kelondroRow(new kelondroColumn[]{super.row().column(0), new kelondroColumn("int c-4 {b256}")}), 0);
ri.setOrdering(objectOrder, 0); ri.setOrdering(objectOrder, 0);
Iterator content = super.col[0].contentNodes(-1); Iterator content = super.col[0].contentNodes(-1);
kelondroRecords.Node node; kelondroRecords.Node node;
@ -90,7 +90,7 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr
i = node.handle().hashCode(); i = node.handle().hashCode();
indexentry = ri.rowdef.newEntry(); indexentry = ri.rowdef.newEntry();
indexentry.setCol(0, node.getValueRow()); indexentry.setCol(0, node.getValueRow());
indexentry.setColLongB256(1, i); indexentry.setCol(1, i);
ri.add(indexentry); ri.add(indexentry);
if ((i % 10000) == 0) { if ((i % 10000) == 0) {
System.out.print('.'); System.out.print('.');
@ -117,7 +117,7 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr
i = node.handle().hashCode(); i = node.handle().hashCode();
indexentry = index.row().newEntry(); indexentry = index.row().newEntry();
indexentry.setCol(0, node.getValueRow()); indexentry.setCol(0, node.getValueRow());
indexentry.setColLongB256(1, i); indexentry.setCol(1, i);
index.put(indexentry); index.put(indexentry);
if ((i % 10000) == 0) { if ((i % 10000) == 0) {
System.out.print('.'); System.out.print('.');
@ -176,7 +176,7 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr
public Object next() { public Object next() {
kelondroRow.Entry idxEntry = (kelondroRow.Entry) indexIterator.next(); kelondroRow.Entry idxEntry = (kelondroRow.Entry) indexIterator.next();
int idx = (int) idxEntry.getColLongB256(1); int idx = (int) idxEntry.getColLong(1);
try { try {
return get(idx); return get(idx);
} catch (IOException e) { } catch (IOException e) {

@ -27,6 +27,10 @@ package de.anomic.kelondro;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import de.anomic.server.serverFileUtils;
public class kelondroFlexWidthArray implements kelondroArray { public class kelondroFlexWidthArray implements kelondroArray {
@ -44,7 +48,7 @@ public class kelondroFlexWidthArray implements kelondroArray {
check += '_'; check += '_';
} }
// check if tabel directory exists // check if table directory exists
File tabledir = new File(path, tablename + ".table"); File tabledir = new File(path, tablename + ".table");
if (tabledir.exists()) { if (tabledir.exists()) {
if (!(tabledir.isDirectory())) throw new IOException("path " + tabledir.toString() + " must be a directory"); if (!(tabledir.isDirectory())) throw new IOException("path " + tabledir.toString() + " must be a directory");
@ -53,6 +57,21 @@ public class kelondroFlexWidthArray implements kelondroArray {
tabledir.mkdir(); tabledir.mkdir();
} }
// save/check property file for this array
File propfile = new File(tabledir, "properties");
Map props = new HashMap();
if (propfile.exists()) {
props = serverFileUtils.loadHashMap(propfile);
String stored_rowdef = (String) props.get("rowdef");
if ((stored_rowdef == null) || (!(rowdef.subsumes(new kelondroRow(stored_rowdef))))) {
System.out.println("FATAL ERROR: stored rowdef '" + stored_rowdef + "' does not match with new rowdef '" +
rowdef + "' for flex table '" + path + "'");
System.exit(-1);
}
}
props.put("rowdef", rowdef.toString());
serverFileUtils.saveMap(propfile, props, "FlexWidthArray properties");
// open existing files // open existing files
String[] files = tabledir.list(); String[] files = tabledir.list();
for (int i = 0; i < files.length; i++) { for (int i = 0; i < files.length; i++) {
@ -78,9 +97,9 @@ public class kelondroFlexWidthArray implements kelondroArray {
q--; q--;
} }
// create new array file // create new array file
int columns[] = new int[q - p + 1]; kelondroColumn[] columns = new kelondroColumn[q - p + 1];
for (int j = p; j <= q; j++) { for (int j = p; j <= q; j++) {
columns[j - p] = rowdef.width(j); columns[j - p] = rowdef.column(j);
check = check.substring(0, j) + "X" + check.substring(j + 1); check = check.substring(0, j) + "X" + check.substring(j + 1);
} }
col[p] = new kelondroFixedWidthArray(new File(tabledir, colfilename(p, q)), new kelondroRow(columns), 16, true); col[p] = new kelondroFixedWidthArray(new File(tabledir, colfilename(p, q)), new kelondroRow(columns), 16, true);
@ -197,7 +216,7 @@ public class kelondroFlexWidthArray implements kelondroArray {
public static void main(String[] args) { public static void main(String[] args) {
File f = new File("d:\\\\mc\\privat\\"); File f = new File("d:\\\\mc\\privat\\");
try { try {
kelondroFlexWidthArray k = new kelondroFlexWidthArray(f, "flextest", new kelondroRow(new int[]{12, 4}), true); kelondroFlexWidthArray k = new kelondroFlexWidthArray(f, "flextest", new kelondroRow("byte[] a-12, byte[] b-4"), true);
k.set(3, k.row().newEntry(new byte[][]{ k.set(3, k.row().newEntry(new byte[][]{
"test123".getBytes(), "abcd".getBytes()})); "test123".getBytes(), "abcd".getBytes()}));
@ -205,7 +224,7 @@ public class kelondroFlexWidthArray implements kelondroArray {
"test456".getBytes(), "efgh".getBytes()})); "test456".getBytes(), "efgh".getBytes()}));
k.close(); k.close();
k = new kelondroFlexWidthArray(f, "flextest", new kelondroRow(new int[]{12, 4}), true); k = new kelondroFlexWidthArray(f, "flextest", new kelondroRow("byte[] a-12, byte[] b-4"), true);
System.out.println(k.get(2).toString()); System.out.println(k.get(2).toString());
System.out.println(k.get(3).toString()); System.out.println(k.get(3).toString());
System.out.println(k.get(4).toString()); System.out.println(k.get(4).toString());

@ -143,7 +143,7 @@ public class kelondroHashtable {
private static final byte[] dummyKey = kelondroBase64Order.enhancedCoder.encodeLong(0, 5).getBytes(); private static final byte[] dummyKey = kelondroBase64Order.enhancedCoder.encodeLong(0, 5).getBytes();
public kelondroHashtable(File file, kelondroRow rowdef, int offset, int maxsize, int maxrehash, boolean exitOnFail) { public kelondroHashtable(File file, kelondroRow rowdef, int offset, int maxsize, int maxrehash, boolean exitOnFail) {
// this creates a new hashtable // this creates a new hashtable
// the key element is not part of the columns array // the key element is not part of the columns array
// this is unlike the kelondroTree, where the key is part of a row // this is unlike the kelondroTree, where the key is part of a row
// the offset is a number of bits that is omitted in the folded tree hierarchy // the offset is a number of bits that is omitted in the folded tree hierarchy
@ -180,9 +180,9 @@ public class kelondroHashtable {
} }
private kelondroRow extCol(kelondroRow rowdef) { private kelondroRow extCol(kelondroRow rowdef) {
int[] newCol = new int[rowdef.columns() + 1]; kelondroColumn[] newCol = new kelondroColumn[rowdef.columns() + 1];
newCol[0] = 4; newCol[0] = new kelondroColumn("Cardinal key-4 {b256}");
for (int i = 0; i < rowdef.columns(); i++) newCol[i + 1] = rowdef.width(i); for (int i = 0; i < rowdef.columns(); i++) newCol[i + 1] = rowdef.column(i);
return new kelondroRow(newCol); return new kelondroRow(newCol);
} }
@ -219,7 +219,7 @@ public class kelondroHashtable {
// write row // write row
kelondroRow.Entry newhkrow = hashArray.row().newEntry(); kelondroRow.Entry newhkrow = hashArray.row().newEntry();
newhkrow.setColLongB256(0, hash.key()); newhkrow.setCol(0, hash.key());
newhkrow.setCol(1, rowentry.bytes()); newhkrow.setCol(1, rowentry.bytes());
hashArray.set(rowNumber, newhkrow); hashArray.set(rowNumber, newhkrow);
return hashArray.row().newEntry(oldhkrow.getColBytes(1)); return hashArray.row().newEntry(oldhkrow.getColBytes(1));
@ -233,7 +233,7 @@ public class kelondroHashtable {
rowNumber = hash.node(); rowNumber = hash.node();
if (rowNumber >= hashArray.size()) return new Object[]{new Integer(rowNumber), null}; if (rowNumber >= hashArray.size()) return new Object[]{new Integer(rowNumber), null};
hkrow = hashArray.get(rowNumber); hkrow = hashArray.get(rowNumber);
rowKey = (int) hkrow.getColLongB256(0); rowKey = (int) hkrow.getColLong(0);
if (rowKey == 0) return new Object[]{new Integer(rowNumber), null}; if (rowKey == 0) return new Object[]{new Integer(rowNumber), null};
hash.rehash(); hash.rehash();
} while (rowKey != hash.key()); } while (rowKey != hash.key());

@ -29,7 +29,7 @@ package de.anomic.kelondro;
public class kelondroIntBytesMap extends kelondroRowBufferedSet { public class kelondroIntBytesMap extends kelondroRowBufferedSet {
public kelondroIntBytesMap(int payloadSize, int initSize) { public kelondroIntBytesMap(int payloadSize, int initSize) {
super(new kelondroRow(new int[]{4, payloadSize}), initSize); super(new kelondroRow("Cardinal key-4 {b256}, byte[] payload-" + payloadSize), initSize);
// initialize ordering // initialize ordering
super.setOrdering(kelondroNaturalOrder.naturalOrder, 0); super.setOrdering(kelondroNaturalOrder.naturalOrder, 0);
@ -43,7 +43,7 @@ public class kelondroIntBytesMap extends kelondroRowBufferedSet {
public byte[] putb(int ii, byte[] value) { public byte[] putb(int ii, byte[] value) {
kelondroRow.Entry newentry = rowdef.newEntry(); kelondroRow.Entry newentry = rowdef.newEntry();
newentry.setCol(0, kelondroNaturalOrder.encodeLong((long) ii, 4)); newentry.setCol(0, (long) ii);
newentry.setCol(1, value); newentry.setCol(1, value);
kelondroRow.Entry oldentry = super.put(newentry); kelondroRow.Entry oldentry = super.put(newentry);
if (oldentry == null) return null; if (oldentry == null) return null;
@ -52,7 +52,7 @@ public class kelondroIntBytesMap extends kelondroRowBufferedSet {
public void addb(int ii, byte[] value) { public void addb(int ii, byte[] value) {
kelondroRow.Entry newentry = rowdef.newEntry(); kelondroRow.Entry newentry = rowdef.newEntry();
newentry.setCol(0, kelondroNaturalOrder.encodeLong((long) ii, 4)); newentry.setCol(0, (long) ii);
newentry.setCol(1, value); newentry.setCol(1, value);
add(newentry); add(newentry);
} }

@ -376,23 +376,23 @@ public class kelondroRecords {
this.OHBYTEC = entryFile.readShort(POS_OHBYTEC); this.OHBYTEC = entryFile.readShort(POS_OHBYTEC);
this.OHHANDLEC = entryFile.readShort(POS_OHHANDLEC); this.OHHANDLEC = entryFile.readShort(POS_OHHANDLEC);
int[] COLWIDTHS = new int[entryFile.readShort(POS_COLUMNS)]; kelondroColumn[] COLDEFS = new kelondroColumn[entryFile.readShort(POS_COLUMNS)];
this.HANDLES = new Handle[entryFile.readInt(POS_INTPROPC)]; this.HANDLES = new Handle[entryFile.readInt(POS_INTPROPC)];
this.TXTPROPS = new byte[entryFile.readInt(POS_TXTPROPC)][]; this.TXTPROPS = new byte[entryFile.readInt(POS_TXTPROPC)][];
this.TXTPROPW = entryFile.readInt(POS_TXTPROPW); this.TXTPROPW = entryFile.readInt(POS_TXTPROPW);
if (COLWIDTHS.length == 0) throw new kelondroException(filename, "init: zero columns; strong failure"); if (COLDEFS.length == 0) throw new kelondroException(filename, "init: zero columns; strong failure");
// calculate dynamic run-time seek pointers // calculate dynamic run-time seek pointers
POS_HANDLES = POS_COLWIDTHS + COLWIDTHS.length * 4; POS_HANDLES = POS_COLWIDTHS + COLDEFS.length * 4;
POS_TXTPROPS = POS_HANDLES + HANDLES.length * 4; POS_TXTPROPS = POS_HANDLES + HANDLES.length * 4;
POS_NODES = POS_TXTPROPS + TXTPROPS.length * TXTPROPW; POS_NODES = POS_TXTPROPS + TXTPROPS.length * TXTPROPW;
// read configuration arrays // read configuration arrays
for (int i = 0; i < COLWIDTHS.length; i++) { for (int i = 0; i < COLDEFS.length; i++) {
COLWIDTHS[i] = entryFile.readInt(POS_COLWIDTHS + 4 * i); COLDEFS[i] = new kelondroColumn("col-" + i, kelondroColumn.celltype_binary, kelondroColumn.encoder_bytes, entryFile.readInt(POS_COLWIDTHS + 4 * i), "");
} }
this.ROW = new kelondroRow(COLWIDTHS); this.ROW = new kelondroRow(COLDEFS);
for (int i = 0; i < HANDLES.length; i++) { for (int i = 0; i < HANDLES.length; i++) {
HANDLES[i] = new Handle(entryFile.readInt(POS_HANDLES + 4 * i)); HANDLES[i] = new Handle(entryFile.readInt(POS_HANDLES + 4 * i));
} }
@ -940,6 +940,21 @@ public class kelondroRecords {
return this.ROW; return this.ROW;
} }
public final void assignRowdef(kelondroRow rowdef) {
// overwrites a given rowdef
// the new rowdef must be compatible
if (rowdef.columns() < ROW.columns())
throw new kelondroException(this.filename,
"new rowdef '" + rowdef.toString() + "' is not compatible with old rowdef '" + ROW.toString() + "', they have a different number of columns");
// adopt encoder and cell type
kelondroColumn col;
for (int i = 0; i < ROW.columns(); i++) {
col = rowdef.column(i);
ROW.column(i).setAttributes(col.nickname(), col.celltype(), col.encoder());
}
}
private final long seekpos(Handle handle) { private final long seekpos(Handle handle) {
assert (handle.index >= 0): "handle index too low: " + handle.index; assert (handle.index >= 0): "handle index too low: " + handle.index;
assert (handle.index < USAGE.allCount()): "handle index too high:" + handle.index; assert (handle.index < USAGE.allCount()): "handle index too high:" + handle.index;

@ -48,9 +48,9 @@ public class kelondroRow {
this.colstart[i] = this.objectsize; this.colstart[i] = this.objectsize;
this.objectsize += this.row[i].cellwidth(); this.objectsize += this.row[i].cellwidth();
} }
} }
/*
public kelondroRow(int[] rowi) { public kelondroRow(int[] rowi) {
this.row = new kelondroColumn[rowi.length]; this.row = new kelondroColumn[rowi.length];
this.colstart = new int[rowi.length]; this.colstart = new int[rowi.length];
@ -61,7 +61,7 @@ public class kelondroRow {
this.objectsize += this.row[i].cellwidth(); this.objectsize += this.row[i].cellwidth();
} }
} }
*/
public kelondroRow(String structure) { public kelondroRow(String structure) {
// define row with row syntax // define row with row syntax
// example: // example:
@ -120,6 +120,10 @@ public class kelondroRow {
return this.objectsize; return this.objectsize;
} }
public kelondroColumn column(int col) {
return row[col];
}
public int width(int row) { public int width(int row) {
return this.row[row].cellwidth(); return this.row[row].cellwidth();
} }
@ -252,11 +256,11 @@ public class kelondroRow {
} }
} }
public void setColByte(int column, byte c) { public void setCol(int column, byte c) {
rowinstance[colstart[column]] = c; rowinstance[colstart[column]] = c;
} }
public void setColString(int column, String cell, String encoding) { public void setCol(int column, String cell, String encoding) {
if (encoding == null) if (encoding == null)
setCol(column, cell.getBytes()); setCol(column, cell.getBytes());
else else
@ -267,32 +271,22 @@ public class kelondroRow {
} }
} }
public void setColLong(int column, long cell) { public void setCol(int column, long cell) {
// uses the column definition to choose the right encoding // uses the column definition to choose the right encoding
switch (row[column].encoder()) { switch (row[column].encoder()) {
case kelondroColumn.encoder_none: case kelondroColumn.encoder_none:
throw new kelondroException("ROW", "setColLong has celltype none, no encoder given"); throw new kelondroException("ROW", "setColLong has celltype none, no encoder given");
case kelondroColumn.encoder_b64e: case kelondroColumn.encoder_b64e:
setColLongB64E(column, cell); kelondroBase64Order.enhancedCoder.encodeLong(cell, rowinstance, colstart[column], row[column].cellwidth());
break; break;
case kelondroColumn.encoder_b256: case kelondroColumn.encoder_b256:
setColLongB256(column, cell); kelondroNaturalOrder.encodeLong(cell, rowinstance, colstart[column], row[column].cellwidth());
break; break;
case kelondroColumn.encoder_bytes: case kelondroColumn.encoder_bytes:
throw new kelondroException("ROW", "setColLong of celltype bytes not applicable"); throw new kelondroException("ROW", "setColLong of celltype bytes not applicable");
} }
} }
public void setColLongB256(int column, long cell) {
// temporary method, should be replaced by setColLong if all row declarations are complete
kelondroNaturalOrder.encodeLong(cell, rowinstance, colstart[column], row[column].cellwidth());
}
public void setColLongB64E(int column, long cell) {
// temporary method, should be replaced by setColLong if all row declarations are complete
kelondroBase64Order.enhancedCoder.encodeLong(cell, rowinstance, colstart[column], row[column].cellwidth());
}
public String getColString(int column, String encoding) { public String getColString(int column, String encoding) {
int length = row[column].cellwidth(); int length = row[column].cellwidth();
int offset = colstart[column]; int offset = colstart[column];
@ -316,25 +310,15 @@ public class kelondroRow {
case kelondroColumn.encoder_none: case kelondroColumn.encoder_none:
throw new kelondroException("ROW", "getColLong has celltype none, no encoder given"); throw new kelondroException("ROW", "getColLong has celltype none, no encoder given");
case kelondroColumn.encoder_b64e: case kelondroColumn.encoder_b64e:
return getColLongB64E(column); return kelondroBase64Order.enhancedCoder.decodeLong(rowinstance, colstart[column], row[column].cellwidth());
case kelondroColumn.encoder_b256: case kelondroColumn.encoder_b256:
return getColLongB256(column); return kelondroNaturalOrder.decodeLong(rowinstance, colstart[column], row[column].cellwidth());
case kelondroColumn.encoder_bytes: case kelondroColumn.encoder_bytes:
throw new kelondroException("ROW", "getColLong of celltype bytes not applicable"); throw new kelondroException("ROW", "getColLong of celltype bytes not applicable");
} }
throw new kelondroException("ROW", "getColLong did not find appropriate encoding"); throw new kelondroException("ROW", "getColLong did not find appropriate encoding");
} }
public long getColLongB256(int column) {
// temporary method, should be replaced by getColLong if all row declarations are complete
return kelondroNaturalOrder.decodeLong(rowinstance, colstart[column], row[column].cellwidth());
}
public long getColLongB64E(int column) {
// temporary method, should be replaced by getColLong if all row declarations are complete
return kelondroBase64Order.enhancedCoder.decodeLong(rowinstance, colstart[column], row[column].cellwidth());
}
public byte getColByte(int column) { public byte getColByte(int column) {
return rowinstance[colstart[column]]; return rowinstance[colstart[column]];
} }
@ -345,41 +329,6 @@ public class kelondroRow {
return c; return c;
} }
/*
public byte[] toEncodedBytesForm() {
byte[] b = new byte[objectsize];
int encoder, cellwidth;
int p = 0;
for (int i = 0; i < row.length; i++) {
encoder = row[i].encoder();
cellwidth = row[i].cellwidth();
switch (row[i].celltype()) {
case kelondroColumn.celltype_undefined:
throw new kelondroException("ROW", "toEncodedForm of celltype undefined not possible");
case kelondroColumn.celltype_boolean:
throw new kelondroException("ROW", "toEncodedForm of celltype boolean not yet implemented");
case kelondroColumn.celltype_binary:
System.arraycopy(rowinstance, colstart[i], b, p, cellwidth);
p += cellwidth;
continue;
case kelondroColumn.celltype_string:
System.arraycopy(rowinstance, colstart[i], b, p, cellwidth);
p += cellwidth;
continue;
case kelondroColumn.celltype_cardinal:
if (encoder == kelondroColumn.encoder_b64e) {
long c = bytes2long(rowinstance, colstart[i], cellwidth);
System.arraycopy(kelondroBase64Order.enhancedCoder.encodeLongSmart(c, cellwidth).getBytes(), 0, b, p, cellwidth);
p += cellwidth;
continue;
}
throw new kelondroException("ROW", "toEncodedForm of celltype cardinal has no encoder (" + encoder + ")");
}
}
return b;
}
*/
public String toPropertyForm(boolean includeBraces, boolean decimalCardinal) { public String toPropertyForm(boolean includeBraces, boolean decimalCardinal) {
StringBuffer sb = new StringBuffer(); StringBuffer sb = new StringBuffer();
if (includeBraces) sb.append("{"); if (includeBraces) sb.append("{");
@ -445,4 +394,14 @@ public class kelondroRow {
return x; return x;
} }
public boolean subsumes(kelondroRow otherRow) {
// returns true, if this row has at least all columns as the other row
// and possibly some more
if (this.objectsize < otherRow.objectsize) return false;
for (int i = 0; i < otherRow.row.length; i++) {
if (!(this.row[i].equals(otherRow.row[i]))) return false;
}
return true;
}
} }

@ -84,9 +84,9 @@ public class kelondroRowCollection {
public kelondroRowCollection(kelondroRow rowdef, byte[] exportedCollectionRowinstance) { public kelondroRowCollection(kelondroRow rowdef, byte[] exportedCollectionRowinstance) {
this.rowdef = rowdef; this.rowdef = rowdef;
kelondroRow.Entry exportedCollection = exportRow(exportedCollectionRowinstance.length - exportOverheadSize).newEntry(exportedCollectionRowinstance); kelondroRow.Entry exportedCollection = exportRow(exportedCollectionRowinstance.length - exportOverheadSize).newEntry(exportedCollectionRowinstance);
this.chunkcount = (int) exportedCollection.getColLongB256(exp_chunkcount); this.chunkcount = (int) exportedCollection.getColLong(exp_chunkcount);
this.lastTimeRead = (exportedCollection.getColLongB256(exp_last_read) + 10957) * day; this.lastTimeRead = (exportedCollection.getColLong(exp_last_read) + 10957) * day;
this.lastTimeWrote = (exportedCollection.getColLongB256(exp_last_wrote) + 10957) * day; this.lastTimeWrote = (exportedCollection.getColLong(exp_last_wrote) + 10957) * day;
String sortOrderKey = exportedCollection.getColString(exp_order_type, null); String sortOrderKey = exportedCollection.getColString(exp_order_type, null);
if (sortOrderKey.equals("__")) { if (sortOrderKey.equals("__")) {
this.sortOrder = null; this.sortOrder = null;
@ -94,8 +94,8 @@ public class kelondroRowCollection {
this.sortOrder = kelondroNaturalOrder.bySignature(sortOrderKey); this.sortOrder = kelondroNaturalOrder.bySignature(sortOrderKey);
if (this.sortOrder == null) this.sortOrder = kelondroBase64Order.bySignature(sortOrderKey); if (this.sortOrder == null) this.sortOrder = kelondroBase64Order.bySignature(sortOrderKey);
} }
this.sortColumn = (int) exportedCollection.getColLongB256(exp_order_col); this.sortColumn = (int) exportedCollection.getColLong(exp_order_col);
this.sortBound = (int) exportedCollection.getColLongB256(exp_order_bound); this.sortBound = (int) exportedCollection.getColLong(exp_order_bound);
this.chunkcache = exportedCollection.getColBytes(exp_collection); this.chunkcache = exportedCollection.getColBytes(exp_collection);
} }
@ -125,12 +125,12 @@ public class kelondroRowCollection {
trim(); trim();
kelondroRow row = exportRow(chunkcache.length); kelondroRow row = exportRow(chunkcache.length);
kelondroRow.Entry entry = row.newEntry(); kelondroRow.Entry entry = row.newEntry();
entry.setColLongB256(exp_chunkcount, size()); entry.setCol(exp_chunkcount, size());
entry.setColLongB256(exp_last_read, daysSince2000(this.lastTimeRead)); entry.setCol(exp_last_read, daysSince2000(this.lastTimeRead));
entry.setColLongB256(exp_last_wrote, daysSince2000(this.lastTimeWrote)); entry.setCol(exp_last_wrote, daysSince2000(this.lastTimeWrote));
entry.setCol(exp_order_type, (this.sortOrder == null) ? "__".getBytes() : this.sortOrder.signature().getBytes()); entry.setCol(exp_order_type, (this.sortOrder == null) ? "__".getBytes() : this.sortOrder.signature().getBytes());
entry.setColLongB256(exp_order_col, this.sortColumn); entry.setCol(exp_order_col, this.sortColumn);
entry.setColLongB256(exp_order_bound, this.sortBound); entry.setCol(exp_order_bound, this.sortBound);
entry.setCol(exp_collection, chunkcache); entry.setCol(exp_collection, chunkcache);
return entry.bytes(); return entry.bytes();
} }

@ -449,7 +449,7 @@ public class kelondroRowSet extends kelondroRowCollection implements kelondroInd
*/ */
long start = System.currentTimeMillis(); long start = System.currentTimeMillis();
kelondroRowSet c = new kelondroRowSet(new kelondroRow(new int[]{12, 12}), 0); kelondroRowSet c = new kelondroRowSet(new kelondroRow("byte[] a-12, byte[] b-12"), 0);
Random random = new Random(0); Random random = new Random(0);
byte[] key; byte[] key;
for (int i = 0; i < 100000; i++) { for (int i = 0; i < 100000; i++) {

@ -67,10 +67,6 @@ public final class kelondroStack extends kelondroRecords {
private static int root = 0; // pointer for FHandles-array: pointer to root node private static int root = 0; // pointer for FHandles-array: pointer to root node
private static int toor = 1; // pointer for FHandles-array: pointer to root node private static int toor = 1; // pointer for FHandles-array: pointer to root node
public kelondroStack(File file, int key, int value, boolean exitOnFail) {
this(file, new kelondroRow(new int[] { key, value }), exitOnFail);
}
public kelondroStack(File file, kelondroRow rowdef, boolean exitOnFail) { public kelondroStack(File file, kelondroRow rowdef, boolean exitOnFail) {
// this creates a new stack // this creates a new stack
super(file, 0, 0, thisOHBytes, thisOHHandles, rowdef, thisFHandles, rowdef.columns() /* txtProps */, 80 /* txtPropWidth */, exitOnFail); super(file, 0, 0, thisOHBytes, thisOHHandles, rowdef, thisFHandles, rowdef.columns() /* txtProps */, 80 /* txtPropWidth */, exitOnFail);
@ -413,7 +409,7 @@ public final class kelondroStack extends kelondroRecords {
// create <keylen> <valuelen> <filename> // create <keylen> <valuelen> <filename>
File f = new File(args[3]); File f = new File(args[3]);
if (f.exists()) f.delete(); if (f.exists()) f.delete();
kelondroRow lens = new kelondroRow(new int[]{Integer.parseInt(args[1]), Integer.parseInt(args[2])}); kelondroRow lens = new kelondroRow("byte[] key-" + Integer.parseInt(args[1]) + ", byte[] value-" + Integer.parseInt(args[2]));
kelondroStack fm = new kelondroStack(f, lens, true); kelondroStack fm = new kelondroStack(f, lens, true);
fm.close(); fm.close();
} else if (args[0].equals("-p")) { } else if (args[0].equals("-p")) {

@ -91,11 +91,6 @@ public class kelondroTree extends kelondroRecords implements kelondroIndex {
private long lastIteratorCount = readAheadChunkSize; private long lastIteratorCount = readAheadChunkSize;
private kelondroObjectCache objectCache; private kelondroObjectCache objectCache;
public kelondroTree(File file, long buffersize, long preloadTime, int objectCachePercent, int key, int value, boolean exitOnFail) {
this(file, buffersize, preloadTime, objectCachePercent, new kelondroRow(new int[] { key, value }), new kelondroNaturalOrder(true), 1, 8, exitOnFail);
}
public kelondroTree(File file, long buffersize, long preloadTime, int objectCachePercent, kelondroRow rowdef, boolean exitOnFail) { public kelondroTree(File file, long buffersize, long preloadTime, int objectCachePercent, kelondroRow rowdef, boolean exitOnFail) {
// this creates a new tree file // this creates a new tree file
this(file, buffersize, preloadTime, objectCachePercent, rowdef, new kelondroNaturalOrder(true), rowdef.columns() /* txtProps */, 80 /* txtPropWidth */, exitOnFail); this(file, buffersize, preloadTime, objectCachePercent, rowdef, new kelondroNaturalOrder(true), rowdef.columns() /* txtProps */, 80 /* txtPropWidth */, exitOnFail);
@ -1235,7 +1230,7 @@ public class kelondroTree extends kelondroRecords implements kelondroIndex {
// test script // test script
File testFile = new File("test.db"); File testFile = new File("test.db");
while (testFile.exists()) testFile.delete(); while (testFile.exists()) testFile.delete();
kelondroTree fm = new kelondroTree(testFile, 0x100000, 0, 10, 4, 4, true); kelondroTree fm = new kelondroTree(testFile, 0x100000, 0, 10, new kelondroRow("byte[] a-4, byte[] b-4"), true);
byte[] dummy = "".getBytes(); byte[] dummy = "".getBytes();
fm.put("abc0".getBytes(), dummy); fm.put("bcd0".getBytes(), dummy); fm.put("abc0".getBytes(), dummy); fm.put("bcd0".getBytes(), dummy);
fm.put("def0".getBytes(), dummy); fm.put("bab0".getBytes(), dummy); fm.put("def0".getBytes(), dummy); fm.put("bab0".getBytes(), dummy);
@ -1310,7 +1305,7 @@ public class kelondroTree extends kelondroRecords implements kelondroIndex {
// create <keylen> <valuelen> <filename> // create <keylen> <valuelen> <filename>
File f = new File(args[3]); File f = new File(args[3]);
if (f.exists()) f.delete(); if (f.exists()) f.delete();
kelondroRow lens = new kelondroRow(new int[]{Integer.parseInt(args[1]), Integer.parseInt(args[2])}); kelondroRow lens = new kelondroRow("byte[] key-" + Integer.parseInt(args[1]) + ", byte[] value-" + Integer.parseInt(args[2]));
kelondroTree fm = new kelondroTree(f, 0x100000, 0, 10, lens, true); kelondroTree fm = new kelondroTree(f, 0x100000, 0, 10, lens, true);
fm.close(); fm.close();
} else if (args[0].equals("-u")) { } else if (args[0].equals("-u")) {
@ -1377,7 +1372,7 @@ public class kelondroTree extends kelondroRecords implements kelondroIndex {
int steps = 0; int steps = 0;
while (true) { while (true) {
if (testFile.exists()) testFile.delete(); if (testFile.exists()) testFile.delete();
tt = new kelondroTree(testFile, 200, 0, 10, 4 ,4, true); tt = new kelondroTree(testFile, 200, 0, 10, new kelondroRow("byte[] a-4, byte[] b-4"), true);
steps = 10 + ((int) System.currentTimeMillis() % 7) * (((int) System.currentTimeMillis() + 17) % 11); steps = 10 + ((int) System.currentTimeMillis() % 7) * (((int) System.currentTimeMillis() + 17) % 11);
t = s; t = s;
d = ""; d = "";
@ -1443,7 +1438,7 @@ public class kelondroTree extends kelondroRecords implements kelondroIndex {
File f = new File("test.db"); File f = new File("test.db");
if (f.exists()) f.delete(); if (f.exists()) f.delete();
try { try {
kelondroTree tt = new kelondroTree(f, 1000, 0, 10, 4, 4, true); kelondroTree tt = new kelondroTree(f, 1000, 0, 10, new kelondroRow("byte[] a-4, byte[] b-4"), true);
byte[] b; byte[] b;
b = testWord('B'); tt.put(b, b); //tt.print(); b = testWord('B'); tt.put(b, b); //tt.print();
b = testWord('C'); tt.put(b, b); //tt.print(); b = testWord('C'); tt.put(b, b); //tt.print();
@ -1508,7 +1503,7 @@ public class kelondroTree extends kelondroRecords implements kelondroIndex {
public static kelondroTree testTree(File f, String testentities) throws IOException { public static kelondroTree testTree(File f, String testentities) throws IOException {
if (f.exists()) f.delete(); if (f.exists()) f.delete();
kelondroTree tt = new kelondroTree(f, 0, 0, 10, 4, 4, true); kelondroTree tt = new kelondroTree(f, 0, 0, 10, new kelondroRow("byte[] a-4, byte[] b-4"), true);
byte[] b; byte[] b;
for (int i = 0; i < testentities.length(); i++) { for (int i = 0; i < testentities.length(); i++) {
b = testWord(testentities.charAt(i)); b = testWord(testentities.charAt(i));

@ -63,10 +63,10 @@ public class plasmaCrawlBalancer {
try { try {
stack = new kelondroStack(stackFile); stack = new kelondroStack(stackFile);
} catch (IOException e) { } catch (IOException e) {
stack = new kelondroStack(stackFile, new kelondroRow(new int[] {indexURL.urlHashLength}), true); stack = new kelondroStack(stackFile, new kelondroRow("byte[] urlhash-" + indexURL.urlHashLength), true);
} }
} else { } else {
stack = new kelondroStack(stackFile, new kelondroRow(new int[] {indexURL.urlHashLength}), true); stack = new kelondroStack(stackFile, new kelondroRow("byte[] urlhash-" + indexURL.urlHashLength), true);
} }
domainStacks = new HashMap(); domainStacks = new HashMap();
} }

@ -125,29 +125,30 @@ public class plasmaCrawlEURL extends indexURL {
public plasmaCrawlEURL(File cachePath, int bufferkb, long preloadTime) { public plasmaCrawlEURL(File cachePath, int bufferkb, long preloadTime) {
super(); super();
int[] ce = { kelondroRow rowdef = new kelondroRow(
urlHashLength, // the url's hash "String urlhash-" + urlHashLength + ", " + // the url's hash
urlHashLength, // the url's referrer hash "String refhash-" + urlHashLength + ", " + // the url's referrer hash
urlHashLength, // the crawling initiator "String initiator-" + urlHashLength + ", " + // the crawling initiator
urlHashLength, // the crawling executor "String executor-" + urlHashLength + ", " + // the crawling executor
urlStringLength, // the url as string "String urlstring-" + urlStringLength + ", " + // the url as string
urlNameLength, // the name of the url, from anchor tag <a>name</a> "String urlname-" + urlNameLength + ", " + // the name of the url, from anchor tag <a>name</a>
urlDateLength, // the time when the url was first time appeared "Cardinal appdate-" + urlDateLength + " {b64e}, " + // the time when the url was first time appeared
urlDateLength, // the time when the url was last time tried to load "Cardinal loaddate-" + urlDateLength + " {b64e}, " + // the time when the url was last time tried to load
urlRetryLength, // number of load retries "Cardinal retrycount-" + urlRetryLength + " {b64e}, " + // number of load retries
urlErrorLength, // string describing load failure "String failcause-" + urlErrorLength + ", " + // string describing load failure
urlFlagLength // extra space "byte[] flags-" + urlFlagLength); // extra space
};
if (cachePath.exists()) try { if (cachePath.exists()) try {
// open existing cache // open existing cache
urlHashCache = new kelondroTree(cachePath, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent); urlHashCache = new kelondroTree(cachePath, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent);
urlHashCache.assignRowdef(rowdef);
} catch (IOException e) { } catch (IOException e) {
cachePath.delete(); cachePath.delete();
urlHashCache = new kelondroTree(cachePath, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, new kelondroRow(ce), true); urlHashCache = new kelondroTree(cachePath, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, rowdef, true);
} else { } else {
// create new cache // create new cache
cachePath.getParentFile().mkdirs(); cachePath.getParentFile().mkdirs();
urlHashCache = new kelondroTree(cachePath, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, new kelondroRow(ce), true); urlHashCache = new kelondroTree(cachePath, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, rowdef, true);
} }
} }
@ -252,9 +253,9 @@ public class plasmaCrawlEURL extends indexURL {
this.url = new URL(entry.getColString(4, "UTF-8").trim()); this.url = new URL(entry.getColString(4, "UTF-8").trim());
String n = entry.getColString(5, "UTF-8"); String n = entry.getColString(5, "UTF-8");
this.name = (n == null) ? "" : n.trim(); this.name = (n == null) ? "" : n.trim();
this.initdate = new Date(86400000 * entry.getColLongB64E(6)); this.initdate = new Date(86400000 * entry.getColLong(6));
this.trydate = new Date(86400000 * entry.getColLongB64E(7)); this.trydate = new Date(86400000 * entry.getColLong(7));
this.trycount = (int) entry.getColLongB64E(8); this.trycount = (int) entry.getColLong(8);
this.failreason = entry.getColString(9, "UTF-8"); this.failreason = entry.getColString(9, "UTF-8");
this.flags = new bitfield(entry.getColBytes(10)); this.flags = new bitfield(entry.getColBytes(10));
return; return;

@ -97,35 +97,34 @@ public final class plasmaCrawlLURL extends indexURL {
public plasmaCrawlLURL(File cachePath, int bufferkb, long preloadTime) { public plasmaCrawlLURL(File cachePath, int bufferkb, long preloadTime) {
super(); super();
int[] ce = { kelondroRow rowdef = new kelondroRow(
urlHashLength, "String urlhash-" + urlHashLength + ", " + // the url's hash
urlStringLength, "String urlstring-" + urlStringLength + ", " + // the url as string
urlDescrLength, "String urldescr-" + urlDescrLength + ", " + // the description of the url
urlDateLength, "Cardinal moddate-" + urlDateLength + " {b64e}, " + // last-modified from the httpd
urlDateLength, "Cardinal loaddate-" + urlDateLength + " {b64e}, " + // time when the url was loaded
urlHashLength, "String refhash-" + urlHashLength + ", " + // the url's referrer hash
urlCopyCountLength, "Cardinal copycount-" + urlCopyCountLength + " {b64e}, " + //
urlFlagLength, "byte[] flags-" + urlFlagLength + ", " + // flags
urlQualityLength, "Cardinal quality-" + urlQualityLength + " {b64e}, " + //
urlLanguageLength, "String language-" + urlLanguageLength + ", " + //
urlDoctypeLength, "byte[] doctype-" + urlDoctypeLength + ", " + //
urlSizeLength, "Cardinal size-" + urlSizeLength + " {b64e}, " + // size of file in bytes
urlWordCountLength "Cardinal wc-" + urlWordCountLength + " {b64e}"); // word count
};
int segmentsize = 0;
for (int i = 0; i < ce.length; i++) { segmentsize += ce[i]; }
if (cachePath.exists()) { if (cachePath.exists()) {
// open existing cache // open existing cache
try { try {
urlHashCache = new kelondroTree(cachePath, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent); urlHashCache = new kelondroTree(cachePath, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent);
urlHashCache.assignRowdef(rowdef);
} catch (IOException e) { } catch (IOException e) {
cachePath.getParentFile().mkdirs(); cachePath.getParentFile().mkdirs();
urlHashCache = new kelondroTree(cachePath, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, new kelondroRow(ce), true); urlHashCache = new kelondroTree(cachePath, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, rowdef, true);
} }
} else { } else {
// create new cache // create new cache
cachePath.getParentFile().mkdirs(); cachePath.getParentFile().mkdirs();
urlHashCache = new kelondroTree(cachePath, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, new kelondroRow(ce), true); urlHashCache = new kelondroTree(cachePath, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, rowdef, true);
} }
// init result stacks // init result stacks
@ -284,7 +283,6 @@ public final class plasmaCrawlLURL extends indexURL {
return false; return false;
} }
public boolean exists(String urlHash) { public boolean exists(String urlHash) {
try { try {
if (urlHashCache.get(urlHash.getBytes()) != null) { if (urlHashCache.get(urlHash.getBytes()) != null) {
@ -297,18 +295,6 @@ public final class plasmaCrawlLURL extends indexURL {
} }
} }
/*
public long existsIndexSize() {
return this.existsIndex.size();
}
public void clearExistsIndex() {
synchronized (existsIndex) {
existsIndex.clear();
}
}
*/
private static SimpleDateFormat dayFormatter = new SimpleDateFormat("yyyy/MM/dd", Locale.US); private static SimpleDateFormat dayFormatter = new SimpleDateFormat("yyyy/MM/dd", Locale.US);
private static String daydate(Date date) { private static String daydate(Date date) {
if (date == null) { if (date == null) {
@ -368,13 +354,6 @@ public final class plasmaCrawlLURL extends indexURL {
url = urle.url(); url = urle.url();
urlstr = url.toString(); urlstr = url.toString();
// Kosmetik, die wirklich benutzte URL behaelt die ':80'
// if (txt.endsWith(":80")) txt = txt.substring(0, txt.length() - 3);
// if ((p = txt.indexOf(":80/")) != -1) {
// txt = txt.substring(0, p).concat(txt.substring(p + 3)); // den '/' erstmal nicht abschneiden
// serverLog.logFinest("PLASMA", "plasmaCrawlLURL/genTableProps Remove ':80' URL=" + txt);
// }
urltxt = nxTools.cutUrlText(urlstr, 72); // shorten the string text like a URL urltxt = nxTools.cutUrlText(urlstr, 72); // shorten the string text like a URL
cachepath = (url == null) ? "-not-cached-" : cacheManager.getCachePath(url).toString().replace('\\', '/').substring(cacheManager.cachePath.toString().length() + 1); cachepath = (url == null) ? "-not-cached-" : cacheManager.getCachePath(url).toString().replace('\\', '/').substring(cacheManager.cachePath.toString().length() + 1);
@ -478,16 +457,16 @@ public final class plasmaCrawlLURL extends indexURL {
this.urlHash = entry.getColString(0, null); this.urlHash = entry.getColString(0, null);
this.url = new URL(entry.getColString(1, "UTF-8").trim()); this.url = new URL(entry.getColString(1, "UTF-8").trim());
this.descr = (entry.empty(2)) ? this.url.toString() : entry.getColString(2, "UTF-8").trim(); this.descr = (entry.empty(2)) ? this.url.toString() : entry.getColString(2, "UTF-8").trim();
this.moddate = new Date(86400000 * entry.getColLongB64E(3)); this.moddate = new Date(86400000 * entry.getColLong(3));
this.loaddate = new Date(86400000 * entry.getColLongB64E(4)); this.loaddate = new Date(86400000 * entry.getColLong(4));
this.referrerHash = (entry.empty(5)) ? dummyHash : entry.getColString(5, "UTF-8"); this.referrerHash = (entry.empty(5)) ? dummyHash : entry.getColString(5, "UTF-8");
this.copyCount = (int) entry.getColLongB64E(6); this.copyCount = (int) entry.getColLong(6);
this.flags = entry.getColString(7, "UTF-8"); this.flags = entry.getColString(7, "UTF-8");
this.quality = (int) entry.getColLongB64E(8); this.quality = (int) entry.getColLong(8);
this.language = entry.getColString(9, "UTF-8"); this.language = entry.getColString(9, "UTF-8");
this.doctype = (char) entry.getColByte(10); this.doctype = (char) entry.getColByte(10);
this.size = (int) entry.getColLongB64E(11); this.size = (int) entry.getColLong(11);
this.wordCount = (int) entry.getColLongB64E(12); this.wordCount = (int) entry.getColLong(12);
this.snippet = null; this.snippet = null;
this.word = searchedWord; this.word = searchedWord;
this.stored = false; this.stored = false;

@ -76,20 +76,19 @@ public class plasmaCrawlNURL extends indexURL {
/** /**
* column length definition for the {@link plasmaURL#urlHashCache} DB * column length definition for the {@link plasmaURL#urlHashCache} DB
*/ */
public static final int[] ce = { public final static kelondroRow rowdef = new kelondroRow(
urlHashLength, // the url hash "String urlhash-" + urlHashLength + ", " + // the url's hash
urlHashLength, // initiator "String initiator-" + urlHashLength + ", " + // the crawling initiator
urlStringLength, // the url as string "String urlstring-" + urlStringLength + ", " + // the url as string
urlHashLength, // the url's referrer hash "String refhash-" + urlHashLength + ", " + // the url's referrer hash
urlNameLength, // the name of the url, from anchor tag <a>name</a> "String urlname-" + urlNameLength + ", " + // the name of the url, from anchor tag <a>name</a>
urlDateLength, // the time when the url was first time appeared "Cardinal appdate-" + urlDateLength + " {b64e}, " + // the time when the url was first time appeared
urlCrawlProfileHandleLength, // the name of the prefetch profile handle "String profile-" + urlCrawlProfileHandleLength + ", " + // the name of the prefetch profile handle
urlCrawlDepthLength, // the prefetch depth so far, starts at 0 "Cardinal depth-" + urlCrawlDepthLength + " {b64e}, " + // the prefetch depth so far, starts at 0
urlParentBranchesLength, // number of anchors of the parent "Cardinal parentbr-" + urlParentBranchesLength + " {b64e}, " + // number of anchors of the parent
urlForkFactorLength, // sum of anchors of all ancestors "Cardinal forkfactor-" + urlForkFactorLength + " {b64e}, " + // sum of anchors of all ancestors
urlFlagLength, // extra space "byte[] flags-" + urlFlagLength + ", " + // flags
urlHandleLength // extra handle "String handle-" + urlHandleLength); // extra handle
};
private final plasmaCrawlBalancer coreStack; // links found by crawling to depth-1 private final plasmaCrawlBalancer coreStack; // links found by crawling to depth-1
private final plasmaCrawlBalancer limitStack; // links found by crawling at target depth private final plasmaCrawlBalancer limitStack; // links found by crawling at target depth
@ -127,7 +126,7 @@ public class plasmaCrawlNURL extends indexURL {
limitStack = new plasmaCrawlBalancer(limitStackFile); limitStack = new plasmaCrawlBalancer(limitStackFile);
overhangStack = new plasmaCrawlBalancer(overhangStackFile); overhangStack = new plasmaCrawlBalancer(overhangStackFile);
remoteStack = new plasmaCrawlBalancer(remoteStackFile); remoteStack = new plasmaCrawlBalancer(remoteStackFile);
kelondroRow rowdef = new kelondroRow(new int[] {indexURL.urlHashLength}); kelondroRow rowdef = new kelondroRow("byte[] urlhash-" + indexURL.urlHashLength);
if (imageStackFile.exists()) try { if (imageStackFile.exists()) try {
imageStack = new kelondroStack(imageStackFile); imageStack = new kelondroStack(imageStackFile);
} catch (IOException e) { } catch (IOException e) {
@ -170,13 +169,14 @@ public class plasmaCrawlNURL extends indexURL {
if (cacheFile.exists()) try { if (cacheFile.exists()) try {
// open existing cache // open existing cache
urlHashCache = new kelondroTree(cacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent); urlHashCache = new kelondroTree(cacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent);
urlHashCache.assignRowdef(rowdef);
} catch (IOException e) { } catch (IOException e) {
cacheFile.delete(); cacheFile.delete();
urlHashCache = new kelondroTree(cacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, new kelondroRow(ce), true); urlHashCache = new kelondroTree(cacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, rowdef, true);
} else { } else {
// create new cache // create new cache
cacheFile.getParentFile().mkdirs(); cacheFile.getParentFile().mkdirs();
urlHashCache = new kelondroTree(cacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, new kelondroRow(ce), true); urlHashCache = new kelondroTree(cacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, rowdef, true);
} }
} }
@ -520,11 +520,11 @@ public class plasmaCrawlNURL extends indexURL {
this.url = new URL(entry.getColString(2, null).trim()); this.url = new URL(entry.getColString(2, null).trim());
this.referrer = (entry.empty(3)) ? dummyHash : entry.getColString(3, null); this.referrer = (entry.empty(3)) ? dummyHash : entry.getColString(3, null);
this.name = (entry.empty(4)) ? "" : entry.getColString(4, null).trim(); this.name = (entry.empty(4)) ? "" : entry.getColString(4, null).trim();
this.loaddate = new Date(86400000 * entry.getColLongB64E(5)); this.loaddate = new Date(86400000 * entry.getColLong(5));
this.profileHandle = (entry.empty(6)) ? null : entry.getColString(6, null).trim(); this.profileHandle = (entry.empty(6)) ? null : entry.getColString(6, null).trim();
this.depth = (int) entry.getColLongB64E(7); this.depth = (int) entry.getColLong(7);
this.anchors = (int) entry.getColLongB64E(8); this.anchors = (int) entry.getColLong(8);
this.forkfactor = (int) entry.getColLongB64E(9); this.forkfactor = (int) entry.getColLong(9);
this.flags = new bitfield(entry.getColBytes(10)); this.flags = new bitfield(entry.getColBytes(10));
this.handle = Integer.parseInt(entry.getColString(11, null), 16); this.handle = Integer.parseInt(entry.getColString(11, null), 16);
return; return;

@ -467,11 +467,11 @@ public final class plasmaCrawlStacker {
this.url = entry.getColString(2, "UTF-8").trim(); this.url = entry.getColString(2, "UTF-8").trim();
this.referrerHash = (entry.empty(3)) ? indexURL.dummyHash : entry.getColString(3, "UTF-8"); this.referrerHash = (entry.empty(3)) ? indexURL.dummyHash : entry.getColString(3, "UTF-8");
this.name = (entry.empty(4)) ? "" : entry.getColString(4, "UTF-8").trim(); this.name = (entry.empty(4)) ? "" : entry.getColString(4, "UTF-8").trim();
this.loaddate = new Date(86400000 * entry.getColLongB64E(5)); this.loaddate = new Date(86400000 * entry.getColLong(5));
this.profileHandle = (entry.empty(6)) ? null : entry.getColString(6, "UTF-8").trim(); this.profileHandle = (entry.empty(6)) ? null : entry.getColString(6, "UTF-8").trim();
this.depth = (int) entry.getColLongB64E(7); this.depth = (int) entry.getColLong(7);
this.anchors = (int) entry.getColLongB64E(8); this.anchors = (int) entry.getColLong(8);
this.forkfactor = (int) entry.getColLongB64E(9); this.forkfactor = (int) entry.getColLong(9);
this.flags = new bitfield(entry.getColBytes(10)); this.flags = new bitfield(entry.getColBytes(10));
this.handle = Integer.parseInt(new String(entry.getColBytes(11), "UTF-8")); this.handle = Integer.parseInt(new String(entry.getColBytes(11), "UTF-8"));
} catch (Exception e) { } catch (Exception e) {
@ -581,9 +581,10 @@ public final class plasmaCrawlStacker {
// open existing cache // open existing cache
try { try {
this.urlEntryCache = new kelondroTree(cacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent); this.urlEntryCache = new kelondroTree(cacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent);
this.urlEntryCache.assignRowdef(plasmaCrawlNURL.rowdef);
} catch (IOException e) { } catch (IOException e) {
cacheFile.delete(); cacheFile.delete();
this.urlEntryCache = new kelondroTree(cacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, new kelondroRow(plasmaCrawlNURL.ce), true); this.urlEntryCache = new kelondroTree(cacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, plasmaCrawlNURL.rowdef, true);
} }
try { try {
// loop through the list and fill the messageList with url hashs // loop through the list and fill the messageList with url hashs
@ -605,7 +606,7 @@ public final class plasmaCrawlStacker {
// deleting old db and creating a new db // deleting old db and creating a new db
try {this.urlEntryCache.close();}catch(Exception ex){} try {this.urlEntryCache.close();}catch(Exception ex){}
cacheFile.delete(); cacheFile.delete();
this.urlEntryCache = new kelondroTree(cacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, new kelondroRow(plasmaCrawlNURL.ce), true); this.urlEntryCache = new kelondroTree(cacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, plasmaCrawlNURL.rowdef, true);
} catch (IOException e) { } catch (IOException e) {
/* if we have an error, we start with a fresh database */ /* if we have an error, we start with a fresh database */
plasmaCrawlStacker.this.log.logSevere("Unable to initialize crawl stacker queue, IOException:" + e.getMessage() + ". Reseting DB.\n",e); plasmaCrawlStacker.this.log.logSevere("Unable to initialize crawl stacker queue, IOException:" + e.getMessage() + ". Reseting DB.\n",e);
@ -613,12 +614,12 @@ public final class plasmaCrawlStacker {
// deleting old db and creating a new db // deleting old db and creating a new db
try {this.urlEntryCache.close();}catch(Exception ex){} try {this.urlEntryCache.close();}catch(Exception ex){}
cacheFile.delete(); cacheFile.delete();
this.urlEntryCache = new kelondroTree(cacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, new kelondroRow(plasmaCrawlNURL.ce), true); this.urlEntryCache = new kelondroTree(cacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, plasmaCrawlNURL.rowdef, true);
} }
} else { } else {
// create new cache // create new cache
cacheFile.getParentFile().mkdirs(); cacheFile.getParentFile().mkdirs();
this.urlEntryCache = new kelondroTree(cacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, new kelondroRow(plasmaCrawlNURL.ce), true); this.urlEntryCache = new kelondroTree(cacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, plasmaCrawlNURL.rowdef, true);
} }
} }
@ -664,18 +665,18 @@ public final class plasmaCrawlStacker {
this.writeSync.P(); this.writeSync.P();
String urlHash = null; String urlHash = null;
kelondroRow.Entry entryBytes = null; kelondroRow.Entry entry = null;
stackCrawlMessage newMessage = null; stackCrawlMessage newMessage = null;
try { try {
synchronized(this.urlEntryHashCache) { synchronized(this.urlEntryHashCache) {
urlHash = (String) this.urlEntryHashCache.removeFirst(); urlHash = (String) this.urlEntryHashCache.removeFirst();
entryBytes = this.urlEntryCache.remove(urlHash.getBytes()); entry = this.urlEntryCache.remove(urlHash.getBytes());
} }
} finally { } finally {
this.writeSync.V(); this.writeSync.V();
} }
newMessage = new stackCrawlMessage(urlHash, entryBytes); newMessage = new stackCrawlMessage(urlHash, entry);
return newMessage; return newMessage;
} }
} }

@ -80,16 +80,16 @@ public class plasmaSwitchboardQueue {
} }
private void initQueueStack() { private void initQueueStack() {
kelondroRow rowdef = new kelondroRow(new int[] { kelondroRow rowdef = new kelondroRow(
indexURL.urlStringLength, "String url-" + indexURL.urlStringLength + ", " + // the url
indexURL.urlHashLength, "String refhash-" + indexURL.urlHashLength + ", " + // the url's referrer hash
11, "Cardinal modifiedsince-11" + " {b64e}, " + // from ifModifiedSince
1, "byte[] flags-1" + ", " + // flags
yacySeedDB.commonHashLength, "String initiator-" + yacySeedDB.commonHashLength + ", " + // the crawling initiator
indexURL.urlCrawlDepthLength, "Cardinal depth-" + indexURL.urlCrawlDepthLength + " {b64e}, " + // the prefetch depth so far, starts at 0
indexURL.urlCrawlProfileHandleLength, "String profile-" + indexURL.urlCrawlProfileHandleLength + ", " + // the name of the prefetch profile handle
indexURL.urlDescrLength "String urldescr-" + indexURL.urlDescrLength); //
});
if (sbQueueStackPath.exists()) try { if (sbQueueStackPath.exists()) try {
sbQueueStack = new kelondroStack(sbQueueStackPath); sbQueueStack = new kelondroStack(sbQueueStackPath);
} catch (IOException e) { } catch (IOException e) {
@ -217,7 +217,7 @@ public class plasmaSwitchboardQueue {
} }
public Entry(kelondroRow.Entry row) throws IOException { public Entry(kelondroRow.Entry row) throws IOException {
long ims = row.getColLongB64E(2); long ims = row.getColLong(2);
byte flags = row.getColByte(3); byte flags = row.getColByte(3);
try { try {
this.url = new URL(row.getColString(0, "UTF-8")); this.url = new URL(row.getColString(0, "UTF-8"));
@ -228,7 +228,7 @@ public class plasmaSwitchboardQueue {
this.ifModifiedSince = (ims == 0) ? null : new Date(ims); this.ifModifiedSince = (ims == 0) ? null : new Date(ims);
this.flags = ((flags & 1) == 1) ? (byte) 1 : (byte) 0; this.flags = ((flags & 1) == 1) ? (byte) 1 : (byte) 0;
this.initiator = row.getColString(4, "UTF-8"); this.initiator = row.getColString(4, "UTF-8");
this.depth = (int) row.getColLongB64E(5); this.depth = (int) row.getColLong(5);
this.profileHandle = row.getColString(6, "UTF-8"); this.profileHandle = row.getColString(6, "UTF-8");
this.anchorName = row.getColString(7, "UTF-8"); this.anchorName = row.getColString(7, "UTF-8");

@ -62,9 +62,9 @@ public class plasmaWordConnotation {
if (refDBfile.exists()) try { if (refDBfile.exists()) try {
refDB = new kelondroDynTree(refDBfile, bufferkb * 0x400, preloadTime, fillChar); refDB = new kelondroDynTree(refDBfile, bufferkb * 0x400, preloadTime, fillChar);
} catch (IOException e) { } catch (IOException e) {
refDB = new kelondroDynTree(refDBfile, bufferkb * 0x400, preloadTime, wordlength, nodesize, new kelondroRow(new int[] {wordlength, countlength}), fillChar, true); refDB = new kelondroDynTree(refDBfile, bufferkb * 0x400, preloadTime, wordlength, nodesize, new kelondroRow("byte[] word-" + wordlength + ", Cardinal count-" + countlength), fillChar, true);
} else { } else {
refDB = new kelondroDynTree(refDBfile, bufferkb * 0x400, preloadTime, wordlength, nodesize, new kelondroRow(new int[] {wordlength, countlength}), fillChar, true); refDB = new kelondroDynTree(refDBfile, bufferkb * 0x400, preloadTime, wordlength, nodesize, new kelondroRow("byte[] word-" + wordlength + ", Cardinal count-" + countlength), fillChar, true);
} }
} }
@ -73,8 +73,8 @@ public class plasmaWordConnotation {
//reference = reference.toLowerCase(); //reference = reference.toLowerCase();
kelondroRow.Entry record = refDB.get(word, reference.getBytes()); kelondroRow.Entry record = refDB.get(word, reference.getBytes());
long c; long c;
if (record == null) c = 0; else c = record.getColLongB64E(1); if (record == null) c = 0; else c = record.getColLong(1);
record.setColLongB64E(1, c++); record.setCol(1, c++);
refDB.put(word, record); refDB.put(word, record);
} }

@ -104,7 +104,7 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI {
// create collections storage path // create collections storage path
if (!(newIndexRoot.exists())) newIndexRoot.mkdirs(); if (!(newIndexRoot.exists())) newIndexRoot.mkdirs();
if (useCollectionIndex) if (useCollectionIndex)
collections = new indexCollectionRI(newIndexRoot, "test_generation0", bufferkb * 1024, preloadTime); collections = new indexCollectionRI(newIndexRoot, "test_generation1", bufferkb * 1024, preloadTime);
else else
collections = null; collections = null;

@ -61,6 +61,7 @@ import de.anomic.index.indexEntry;
import de.anomic.index.indexEntryAttribute; import de.anomic.index.indexEntryAttribute;
import de.anomic.index.indexRowSetContainer; import de.anomic.index.indexRowSetContainer;
import de.anomic.index.indexURLEntry; import de.anomic.index.indexURLEntry;
import de.anomic.kelondro.kelondroColumn;
import de.anomic.kelondro.kelondroException; import de.anomic.kelondro.kelondroException;
import de.anomic.kelondro.kelondroTree; import de.anomic.kelondro.kelondroTree;
import de.anomic.kelondro.kelondroRow; import de.anomic.kelondro.kelondroRow;
@ -70,13 +71,12 @@ public final class plasmaWordIndexAssortment {
// environment constants // environment constants
private static final String assortmentFileName = "indexAssortment"; private static final String assortmentFileName = "indexAssortment";
public static final int[] bufferStructureBasis = new int[]{ public static final kelondroRow bufferStructureBasis = new kelondroRow(
indexEntryAttribute.wordHashLength, // a wordHash "byte[] wordhash-" + indexEntryAttribute.wordHashLength + ", " +
4, // occurrence counter "Cardinal occ-4 {b256}, " +
8, // timestamp of last access "Cardinal time-8 {b256}, " +
indexEntryAttribute.urlHashLength, // corresponding URL hash "byte[] urlhash-" + indexEntryAttribute.urlHashLength + ", " +
indexURLEntry.encodedByteArrayFormLength(false) // URL attributes "byte[] urlattr-" + indexURLEntry.encodedByteArrayFormLength(false));
};
// class variables // class variables
private File assortmentFile; private File assortmentFile;
@ -87,25 +87,25 @@ public final class plasmaWordIndexAssortment {
private long preloadTime; private long preloadTime;
private static String intx(int x) { private static String intx(int x) {
String s = Integer.toString(x); String s = Integer.toString(x);
while (s.length() < 3) s = "0" + s; while (s.length() < 3) s = "0" + s;
return s; return s;
} }
private static int[] bufferStructure(int assortmentCapacity) { private static kelondroRow bufferStructure(int assortmentCapacity) {
int[] structure = new int[3 + 2 * assortmentCapacity]; kelondroColumn[] structure = new kelondroColumn[3 + 2 * assortmentCapacity];
structure[0] = bufferStructureBasis[0]; structure[0] = bufferStructureBasis.column(0);
structure[1] = bufferStructureBasis[1]; structure[1] = bufferStructureBasis.column(1);
structure[2] = bufferStructureBasis[2]; structure[2] = bufferStructureBasis.column(2);
for (int i = 0; i < assortmentCapacity; i++) { for (int i = 0; i < assortmentCapacity; i++) {
structure[3 + 2 * i] = bufferStructureBasis[3]; structure[3 + 2 * i] = bufferStructureBasis.column(3);
structure[4 + 2 * i] = bufferStructureBasis[4]; structure[4 + 2 * i] = bufferStructureBasis.column(4);
} }
return structure; return new kelondroRow(structure);
} }
private static int assortmentCapacity(int rowsize) { private static int assortmentCapacity(int rowsize) {
return (rowsize - bufferStructureBasis[0] - bufferStructureBasis[1] - bufferStructureBasis[2]) / (bufferStructureBasis[3] + bufferStructureBasis[4]); return (rowsize - bufferStructureBasis.width(0) - bufferStructureBasis.width(1) - bufferStructureBasis.width(2)) / (bufferStructureBasis.width(3) + bufferStructureBasis.width(4));
} }
public plasmaWordIndexAssortment(File storagePath, int assortmentLength, int bufferkb, long preloadTime, serverLog log) { public plasmaWordIndexAssortment(File storagePath, int assortmentLength, int bufferkb, long preloadTime, serverLog log) {
@ -121,6 +121,7 @@ public final class plasmaWordIndexAssortment {
try { try {
long start = System.currentTimeMillis(); long start = System.currentTimeMillis();
assortments = new kelondroTree(assortmentFile, bufferSize, preloadTime, kelondroTree.defaultObjectCachePercent); assortments = new kelondroTree(assortmentFile, bufferSize, preloadTime, kelondroTree.defaultObjectCachePercent);
assortments.assignRowdef(bufferStructure(assortmentLength));
long stop = System.currentTimeMillis(); long stop = System.currentTimeMillis();
if (log != null) if (log != null)
log.logConfig("Opened Assortment, " + log.logConfig("Opened Assortment, " +
@ -138,7 +139,7 @@ public final class plasmaWordIndexAssortment {
assortmentFile.delete(); // make space for new one assortmentFile.delete(); // make space for new one
} }
// create new assortment tree file // create new assortment tree file
assortments = new kelondroTree(assortmentFile, bufferSize, preloadTime, kelondroTree.defaultObjectCachePercent, new kelondroRow(bufferStructure(assortmentLength)), true); assortments = new kelondroTree(assortmentFile, bufferSize, preloadTime, kelondroTree.defaultObjectCachePercent, bufferStructure(assortmentLength), true);
if (log != null) log.logConfig("Created new Assortment, width " + assortmentLength + ", " + bufferkb + "kb buffer"); if (log != null) log.logConfig("Created new Assortment, width " + assortmentLength + ", " + bufferkb + "kb buffer");
} }
@ -149,8 +150,8 @@ public final class plasmaWordIndexAssortment {
if (newContainer.size() != assortmentLength) throw new RuntimeException("plasmaWordIndexAssortment.store: wrong container size"); if (newContainer.size() != assortmentLength) throw new RuntimeException("plasmaWordIndexAssortment.store: wrong container size");
kelondroRow.Entry row = assortments.row().newEntry(); kelondroRow.Entry row = assortments.row().newEntry();
row.setCol(0, newContainer.getWordHash().getBytes()); row.setCol(0, newContainer.getWordHash().getBytes());
row.setColLongB256(1, 1); row.setCol(1, 1);
row.setColLongB256(2, newContainer.updated()); row.setCol(2, newContainer.updated());
Iterator entries = newContainer.entries(); Iterator entries = newContainer.entries();
indexEntry entry; indexEntry entry;
for (int i = 0; i < assortmentLength; i++) { for (int i = 0; i < assortmentLength; i++) {
@ -228,24 +229,10 @@ public final class plasmaWordIndexAssortment {
return row2container(row); return row2container(row);
} }
/*
public indexContainer row2container(String wordHash, kelondroRow.Entry row) {
if (row == null) return null;
final long updateTime = row.getColLongB256(2);
indexTreeMapContainer container = new indexTreeMapContainer(wordHash);
for (int i = 0; i < assortmentLength; i++) {
container.add(
new indexURLEntry[] { new indexURLEntry(
new String(row.getColBytes(3 + 2 * i)), new String(row.getColBytes(4 + 2 * i))) }, updateTime);
}
return container;
}
*/
public final static indexContainer row2container(kelondroRow.Entry row) { public final static indexContainer row2container(kelondroRow.Entry row) {
if (row == null) return null; if (row == null) return null;
String wordHash = row.getColString(0, null); String wordHash = row.getColString(0, null);
final long updateTime = row.getColLongB256(2); final long updateTime = row.getColLong(2);
indexContainer container = new indexRowSetContainer(wordHash); indexContainer container = new indexRowSetContainer(wordHash);
int al = assortmentCapacity(row.objectsize()); int al = assortmentCapacity(row.objectsize());
for (int i = 0; i < al; i++) { for (int i = 0; i < al; i++) {
@ -274,7 +261,7 @@ public final class plasmaWordIndexAssortment {
if (!(assortmentFile.delete())) throw new RuntimeException("cannot delete assortment database"); if (!(assortmentFile.delete())) throw new RuntimeException("cannot delete assortment database");
} }
if (assortmentFile.exists()) assortmentFile.delete(); if (assortmentFile.exists()) assortmentFile.delete();
assortments = new kelondroTree(assortmentFile, bufferSize, preloadTime, kelondroTree.defaultObjectCachePercent, new kelondroRow(bufferStructure(assortmentLength)), true); assortments = new kelondroTree(assortmentFile, bufferSize, preloadTime, kelondroTree.defaultObjectCachePercent, bufferStructure(assortmentLength), true);
} }
public Iterator containers(String startWordHash, boolean up, boolean rot) throws IOException { public Iterator containers(String startWordHash, boolean up, boolean rot) throws IOException {

@ -96,10 +96,10 @@ public final class plasmaWordIndexFile {
kt = new kelondroTree(theLocation, cacheSize, 0, kelondroTree.defaultObjectCachePercent); kt = new kelondroTree(theLocation, cacheSize, 0, kelondroTree.defaultObjectCachePercent);
} catch (IOException e) { } catch (IOException e) {
theLocation.delete(); theLocation.delete();
kt = new kelondroTree(theLocation, cacheSize, 0, kelondroTree.defaultObjectCachePercent, indexURL.urlHashLength, indexURLEntry.encodedByteArrayFormLength(false), false); kt = new kelondroTree(theLocation, cacheSize, 0, kelondroTree.defaultObjectCachePercent, new kelondroRow("byte[] urlhash-" + indexURL.urlHashLength + ", byte[] ba-" + indexURLEntry.encodedByteArrayFormLength(false)), false);
} else { } else {
// create new index file // create new index file
kt = new kelondroTree(theLocation, cacheSize, 0, kelondroTree.defaultObjectCachePercent, indexURL.urlHashLength, indexURLEntry.encodedByteArrayFormLength(false), false); kt = new kelondroTree(theLocation, cacheSize, 0, kelondroTree.defaultObjectCachePercent, new kelondroRow("byte[] urlhash-" + indexURL.urlHashLength + ", byte[] ba-" + indexURLEntry.encodedByteArrayFormLength(false)), false);
} }
return kt; // everyone who get this should close it when finished! return kt; // everyone who get this should close it when finished!
} }

@ -70,6 +70,7 @@ public class yacyNewsDB {
if (path.exists()) try { if (path.exists()) try {
news = new kelondroTree(path, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent); news = new kelondroTree(path, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent);
news.assignRowdef(yacyNewsRecord.rowdef);
} catch (IOException e) { } catch (IOException e) {
news = createDB(path, bufferkb, preloadTime); news = createDB(path, bufferkb, preloadTime);
} else { } else {
@ -166,7 +167,7 @@ public class yacyNewsDB {
b.getColString(0, null), b.getColString(0, null),
b.getColString(1, null), b.getColString(1, null),
(b.empty(2)) ? null : yacyCore.parseUniversalDate(b.getColString(2, null), serverDate.UTCDiffString()), (b.empty(2)) ? null : yacyCore.parseUniversalDate(b.getColString(2, null), serverDate.UTCDiffString()),
(int) b.getColLongB64E(3), (int) b.getColLong(3),
serverCodings.string2map(b.getColString(4, null)) serverCodings.string2map(b.getColString(4, null))
); );
} }

Loading…
Cancel
Save