diff --git a/source/dbtest.java b/source/dbtest.java index 7781a30d3..7f9162f90 100644 --- a/source/dbtest.java +++ b/source/dbtest.java @@ -170,7 +170,7 @@ public class dbtest { profiler.start(); // create the database access - kelondroRow testRow = new kelondroRow(new int[]{keylength, keylength, valuelength}); + kelondroRow testRow = new kelondroRow("byte[] key-" + keylength + ", byte[] dummy-" + keylength + ", value-" + valuelength); if (dbe.equals("kelondroTree")) { File tablefile = new File(tablename + ".kelondro.db"); if (tablefile.exists()) { diff --git a/source/de/anomic/index/indexRAMCacheRI.java b/source/de/anomic/index/indexRAMCacheRI.java index fb6d2d458..0458cb32e 100644 --- a/source/de/anomic/index/indexRAMCacheRI.java +++ b/source/de/anomic/index/indexRAMCacheRI.java @@ -96,7 +96,7 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI { File indexDumpFile = new File(databaseRoot, indexArrayFileName); if (indexDumpFile.exists()) indexDumpFile.delete(); kelondroFixedWidthArray dumpArray = null; - dumpArray = new kelondroFixedWidthArray(indexDumpFile, new kelondroRow(plasmaWordIndexAssortment.bufferStructureBasis), 0, false); + dumpArray = new kelondroFixedWidthArray(indexDumpFile, plasmaWordIndexAssortment.bufferStructureBasis, 0, false); long startTime = System.currentTimeMillis(); long messageTime = System.currentTimeMillis() + 5000; long wordsPerSecond = 0, wordcount = 0, urlcount = 0; @@ -119,8 +119,8 @@ public final class indexRAMCacheRI extends indexAbstractRI implements indexRI { while (ci.hasNext()) { iEntry = (indexEntry) ci.next(); row.setCol(0, container.getWordHash().getBytes()); - row.setCol(1, kelondroNaturalOrder.encodeLong(container.size(), 4)); - row.setCol(2, kelondroNaturalOrder.encodeLong(container.updated(), 8)); + row.setCol(1, container.size()); + row.setCol(2, container.updated()); row.setCol(3, iEntry.urlHash().getBytes()); row.setCol(4, iEntry.toEncodedByteArrayForm(false)); dumpArray.set((int) urlcount++, row); diff --git a/source/de/anomic/index/indexURLEntry.java b/source/de/anomic/index/indexURLEntry.java index f244d33a3..308387138 100644 --- a/source/de/anomic/index/indexURLEntry.java +++ b/source/de/anomic/index/indexURLEntry.java @@ -95,19 +95,19 @@ public class indexURLEntry implements Cloneable, indexEntry { if ((language == null) || (language.length() != indexURL.urlLanguageLength)) language = "uk"; this.entry = urlEntryRow.newEntry(); - this.entry.setColString(col_urlhash, urlHash, null); - this.entry.setColLong(col_quality, quality); - this.entry.setColLong(col_lastModified, lastmodified); - this.entry.setColLong(col_hitcount, hitcount); - this.entry.setColString(col_language, language, null); - this.entry.setColByte(col_doctype, (byte) doctype); - this.entry.setColByte(col_localflag, (byte) ((local) ? indexEntryAttribute.LT_LOCAL : indexEntryAttribute.LT_GLOBAL)); - this.entry.setColLong(col_posintext, posintext); - this.entry.setColLong(col_posinphrase, posinphrase); - this.entry.setColLong(col_posofphrase, posofphrase); - this.entry.setColLong(col_worddistance, worddistance); - this.entry.setColLong(col_wordcount, wordcount); - this.entry.setColLong(col_phrasecount, phrasecount); + this.entry.setCol(col_urlhash, urlHash, null); + this.entry.setCol(col_quality, quality); + this.entry.setCol(col_lastModified, lastmodified); + this.entry.setCol(col_hitcount, hitcount); + this.entry.setCol(col_language, language, null); + this.entry.setCol(col_doctype, (byte) doctype); + this.entry.setCol(col_localflag, (byte) ((local) ? indexEntryAttribute.LT_LOCAL : indexEntryAttribute.LT_GLOBAL)); + this.entry.setCol(col_posintext, posintext); + this.entry.setCol(col_posinphrase, posinphrase); + this.entry.setCol(col_posofphrase, posofphrase); + this.entry.setCol(col_worddistance, worddistance); + this.entry.setCol(col_wordcount, wordcount); + this.entry.setCol(col_phrasecount, phrasecount); } public indexURLEntry(String urlHash, String code) { @@ -208,11 +208,11 @@ public class indexURLEntry implements Cloneable, indexEntry { public static indexURLEntry combineDistance(indexURLEntry ie1, indexEntry ie2) { // returns a modified entry of the first argument - ie1.entry.setColLong(col_worddistance, ie1.worddistance() + ie2.worddistance() + Math.abs(ie1.posintext() - ie2.posintext())); - ie1.entry.setColLong(col_posintext, Math.min(ie1.posintext(), ie2.posintext())); - ie1.entry.setColLong(col_posinphrase, (ie1.posofphrase() == ie2.posofphrase()) ? ie1.posofphrase() : 0 /*unknown*/); - ie1.entry.setColLong(col_posofphrase, Math.min(ie1.posofphrase(), ie2.posofphrase())); - ie1.entry.setColLong(col_wordcount, (ie1.wordcount() + ie2.wordcount()) / 2); + ie1.entry.setCol(col_worddistance, ie1.worddistance() + ie2.worddistance() + Math.abs(ie1.posintext() - ie2.posintext())); + ie1.entry.setCol(col_posintext, Math.min(ie1.posintext(), ie2.posintext())); + ie1.entry.setCol(col_posinphrase, (ie1.posofphrase() == ie2.posofphrase()) ? ie1.posofphrase() : 0 /*unknown*/); + ie1.entry.setCol(col_posofphrase, Math.min(ie1.posofphrase(), ie2.posofphrase())); + ie1.entry.setCol(col_wordcount, (ie1.wordcount() + ie2.wordcount()) / 2); return ie1; } @@ -225,27 +225,27 @@ public class indexURLEntry implements Cloneable, indexEntry { } public static final void min(indexURLEntry t, indexEntry other) { - if (t.hitcount() > other.hitcount()) t.entry.setColLong(col_hitcount, other.hitcount()); - if (t.wordcount() > other.wordcount()) t.entry.setColLong(col_wordcount, other.wordcount()); - if (t.phrasecount() > other.phrasecount()) t.entry.setColLong(col_phrasecount, other.phrasecount()); - if (t.posintext() > other.posintext()) t.entry.setColLong(col_posintext, other.posintext()); - if (t.posinphrase() > other.posinphrase()) t.entry.setColLong(col_posinphrase, other.posinphrase()); - if (t.posofphrase() > other.posofphrase()) t.entry.setColLong(col_posofphrase, other.posofphrase()); - if (t.worddistance() > other.worddistance()) t.entry.setColLong(col_worddistance, other.worddistance()); - if (t.lastModified() > other.lastModified()) t.entry.setColLong(col_lastModified, other.lastModified()); - if (t.quality() > other.quality()) t.entry.setColLong(col_quality, other.quality()); + if (t.hitcount() > other.hitcount()) t.entry.setCol(col_hitcount, other.hitcount()); + if (t.wordcount() > other.wordcount()) t.entry.setCol(col_wordcount, other.wordcount()); + if (t.phrasecount() > other.phrasecount()) t.entry.setCol(col_phrasecount, other.phrasecount()); + if (t.posintext() > other.posintext()) t.entry.setCol(col_posintext, other.posintext()); + if (t.posinphrase() > other.posinphrase()) t.entry.setCol(col_posinphrase, other.posinphrase()); + if (t.posofphrase() > other.posofphrase()) t.entry.setCol(col_posofphrase, other.posofphrase()); + if (t.worddistance() > other.worddistance()) t.entry.setCol(col_worddistance, other.worddistance()); + if (t.lastModified() > other.lastModified()) t.entry.setCol(col_lastModified, other.lastModified()); + if (t.quality() > other.quality()) t.entry.setCol(col_quality, other.quality()); } public static final void max(indexURLEntry t, indexEntry other) { - if (t.hitcount() < other.hitcount()) t.entry.setColLong(col_hitcount, other.hitcount()); - if (t.wordcount() < other.wordcount()) t.entry.setColLong(col_wordcount, other.wordcount()); - if (t.phrasecount() < other.phrasecount()) t.entry.setColLong(col_phrasecount, other.phrasecount()); - if (t.posintext() < other.posintext()) t.entry.setColLong(col_posintext, other.posintext()); - if (t.posinphrase() < other.posinphrase()) t.entry.setColLong(col_posinphrase, other.posinphrase()); - if (t.posofphrase() < other.posofphrase()) t.entry.setColLong(col_posofphrase, other.posofphrase()); - if (t.worddistance() < other.worddistance()) t.entry.setColLong(col_worddistance, other.worddistance()); - if (t.lastModified() < other.lastModified()) t.entry.setColLong(col_lastModified, other.lastModified()); - if (t.quality() < other.quality()) t.entry.setColLong(col_quality, other.quality()); + if (t.hitcount() < other.hitcount()) t.entry.setCol(col_hitcount, other.hitcount()); + if (t.wordcount() < other.wordcount()) t.entry.setCol(col_wordcount, other.wordcount()); + if (t.phrasecount() < other.phrasecount()) t.entry.setCol(col_phrasecount, other.phrasecount()); + if (t.posintext() < other.posintext()) t.entry.setCol(col_posintext, other.posintext()); + if (t.posinphrase() < other.posinphrase()) t.entry.setCol(col_posinphrase, other.posinphrase()); + if (t.posofphrase() < other.posofphrase()) t.entry.setCol(col_posofphrase, other.posofphrase()); + if (t.worddistance() < other.worddistance()) t.entry.setCol(col_worddistance, other.worddistance()); + if (t.lastModified() < other.lastModified()) t.entry.setCol(col_lastModified, other.lastModified()); + if (t.quality() < other.quality()) t.entry.setCol(col_quality, other.quality()); } @@ -258,15 +258,15 @@ public class indexURLEntry implements Cloneable, indexEntry { } static void normalize(indexURLEntry t, indexEntry min, indexEntry max) { - t.entry.setColLong(col_hitcount , (t.hitcount() == 0) ? 0 : 1 + 255 * (t.hitcount() - min.hitcount() ) / (1 + max.hitcount() - min.hitcount())); - t.entry.setColLong(col_wordcount , (t.wordcount() == 0) ? 0 : 1 + 255 * (t.wordcount() - min.wordcount() ) / (1 + max.wordcount() - min.wordcount())); - t.entry.setColLong(col_phrasecount , (t.phrasecount() == 0) ? 0 : 1 + 255 * (t.phrasecount() - min.phrasecount() ) / (1 + max.phrasecount() - min.phrasecount())); - t.entry.setColLong(col_posintext , (t.posintext() == 0) ? 0 : 1 + 255 * (t.posintext() - min.posintext() ) / (1 + max.posintext() - min.posintext())); - t.entry.setColLong(col_posinphrase , (t.posinphrase() == 0) ? 0 : 1 + 255 * (t.posinphrase() - min.posinphrase() ) / (1 + max.posinphrase() - min.posinphrase())); - t.entry.setColLong(col_posofphrase , (t.posofphrase() == 0) ? 0 : 1 + 255 * (t.posofphrase() - min.posofphrase() ) / (1 + max.posofphrase() - min.posofphrase())); - t.entry.setColLong(col_worddistance , (t.worddistance() == 0) ? 0 : 1 + 255 * (t.worddistance() - min.worddistance()) / (1 + max.worddistance() - min.worddistance())); - t.entry.setColLong(col_lastModified , (t.lastModified() == 0) ? 0 : 1 + 255 * (t.lastModified() - min.lastModified()) / (1 + max.lastModified() - min.lastModified())); - t.entry.setColLong(col_quality , (t.quality() == 0) ? 0 : 1 + 255 * (t.quality() - min.quality() ) / (1 + max.quality() - min.quality())); + t.entry.setCol(col_hitcount , (t.hitcount() == 0) ? 0 : 1 + 255 * (t.hitcount() - min.hitcount() ) / (1 + max.hitcount() - min.hitcount())); + t.entry.setCol(col_wordcount , (t.wordcount() == 0) ? 0 : 1 + 255 * (t.wordcount() - min.wordcount() ) / (1 + max.wordcount() - min.wordcount())); + t.entry.setCol(col_phrasecount , (t.phrasecount() == 0) ? 0 : 1 + 255 * (t.phrasecount() - min.phrasecount() ) / (1 + max.phrasecount() - min.phrasecount())); + t.entry.setCol(col_posintext , (t.posintext() == 0) ? 0 : 1 + 255 * (t.posintext() - min.posintext() ) / (1 + max.posintext() - min.posintext())); + t.entry.setCol(col_posinphrase , (t.posinphrase() == 0) ? 0 : 1 + 255 * (t.posinphrase() - min.posinphrase() ) / (1 + max.posinphrase() - min.posinphrase())); + t.entry.setCol(col_posofphrase , (t.posofphrase() == 0) ? 0 : 1 + 255 * (t.posofphrase() - min.posofphrase() ) / (1 + max.posofphrase() - min.posofphrase())); + t.entry.setCol(col_worddistance , (t.worddistance() == 0) ? 0 : 1 + 255 * (t.worddistance() - min.worddistance()) / (1 + max.worddistance() - min.worddistance())); + t.entry.setCol(col_lastModified , (t.lastModified() == 0) ? 0 : 1 + 255 * (t.lastModified() - min.lastModified()) / (1 + max.lastModified() - min.lastModified())); + t.entry.setCol(col_quality , (t.quality() == 0) ? 0 : 1 + 255 * (t.quality() - min.quality() ) / (1 + max.quality() - min.quality())); } public void normalize(indexEntry min, indexEntry max) { diff --git a/source/de/anomic/kelondro/kelondroBytesIntMap.java b/source/de/anomic/kelondro/kelondroBytesIntMap.java index ea7fa0abb..da134cdde 100644 --- a/source/de/anomic/kelondro/kelondroBytesIntMap.java +++ b/source/de/anomic/kelondro/kelondroBytesIntMap.java @@ -40,23 +40,23 @@ public class kelondroBytesIntMap { public int geti(byte[] key) throws IOException { kelondroRow.Entry indexentry = ki.get(key); if (indexentry == null) return -1; - return (int) indexentry.getColLongB256(1); + return (int) indexentry.getColLong(1); } public int puti(byte[] key, int i) throws IOException { kelondroRow.Entry newentry = ki.row().newEntry(); newentry.setCol(0, key); - newentry.setColLongB256(1, i); + newentry.setCol(1, i); kelondroRow.Entry oldentry = ki.put(newentry); if (oldentry == null) return -1; - return (int) oldentry.getColLongB256(1); + return (int) oldentry.getColLong(1); } public int removei(byte[] key) throws IOException { if (ki.size() == 0) return -1; kelondroRow.Entry indexentry = ki.remove(key); if (indexentry == null) return -1; - return (int) indexentry.getColLongB256(1); + return (int) indexentry.getColLong(1); } public int size() throws IOException { diff --git a/source/de/anomic/kelondro/kelondroCollectionIndex.java b/source/de/anomic/kelondro/kelondroCollectionIndex.java index d44a1570e..71d1a7595 100644 --- a/source/de/anomic/kelondro/kelondroCollectionIndex.java +++ b/source/de/anomic/kelondro/kelondroCollectionIndex.java @@ -32,6 +32,8 @@ import java.util.Iterator; import java.util.Map; import java.util.Set; +import de.anomic.server.serverFileUtils; + public class kelondroCollectionIndex { private kelondroIndex index; @@ -60,19 +62,28 @@ public class kelondroCollectionIndex { ); } + private static String fillZ(String s, int len) { + while (s.length() < len) s = "0" + s; + return s; + } + private static File arrayFile(File path, String filenameStub, int loadfactor, int chunksize, int partitionNumber, int serialNumber) { - - String lf = Integer.toHexString(loadfactor).toUpperCase(); - while (lf.length() < 2) lf = "0" + lf; - String cs = Integer.toHexString(chunksize).toUpperCase(); - while (cs.length() < 4) cs = "0" + cs; - String pn = Integer.toHexString(partitionNumber).toUpperCase(); - while (pn.length() < 2) pn = "0" + pn; - String sn = Integer.toHexString(serialNumber).toUpperCase(); - while (sn.length() < 2) sn = "0" + sn; + String lf = fillZ(Integer.toHexString(loadfactor).toUpperCase(), 2); + String cs = fillZ(Integer.toHexString(chunksize).toUpperCase(), 4); + String pn = fillZ(Integer.toHexString(partitionNumber).toUpperCase(), 2); + String sn = fillZ(Integer.toHexString(serialNumber).toUpperCase(), 2); return new File(path, filenameStub + "." + lf + "." + cs + "." + pn + "." + sn + ".kca"); // kelondro collection array } + private static File propertyFile(File path, String filenameStub, int loadfactor, int chunksize) { + String lf = fillZ(Integer.toHexString(loadfactor).toUpperCase(), 2); + String cs = fillZ(Integer.toHexString(chunksize).toUpperCase(), 4); + return new File(path, filenameStub + "." + lf + "." + cs + ".properties"); // kelondro collection array + } + + /* + + */ public kelondroCollectionIndex(File path, String filenameStub, int keyLength, kelondroOrder indexOrder, long buffersize, long preloadTime, int loadfactor, kelondroRow rowdef) throws IOException { @@ -85,6 +96,21 @@ public class kelondroCollectionIndex { // create index table index = new kelondroFlexTable(path, filenameStub + ".index", indexOrder, buffersize, preloadTime, indexRow(keyLength), true); + // save/check property file for this array + File propfile = propertyFile(path, filenameStub, loadfactor, rowdef.objectsize()); + Map props = new HashMap(); + if (propfile.exists()) { + props = serverFileUtils.loadHashMap(propfile); + String stored_rowdef = (String) props.get("rowdef"); + if ((stored_rowdef == null) || (!(rowdef.subsumes(new kelondroRow(stored_rowdef))))) { + System.out.println("FATAL ERROR: stored rowdef '" + stored_rowdef + "' does not match with new rowdef '" + + rowdef + "' for array cluster '" + path + "/" + filenameStub + "'"); + System.exit(-1); + } + } + props.put("rowdef", rowdef.toString()); + serverFileUtils.saveMap(propfile, props, "CollectionIndex properties"); + // open array files this.arrays = new HashMap(); // all entries will be dynamically created with getArray() } @@ -176,9 +202,9 @@ public class kelondroCollectionIndex { } else { // overwrite the old collection // read old information - int oldchunksize = (int) oldindexrow.getColLongB256(idx_col_chunksize); // needed only for migration - int oldchunkcount = (int) oldindexrow.getColLongB256(idx_col_chunkcount); - int oldrownumber = (int) oldindexrow.getColLongB256(idx_col_indexpos); + int oldchunksize = (int) oldindexrow.getColLong(idx_col_chunksize); // needed only for migration + int oldchunkcount = (int) oldindexrow.getColLong(idx_col_chunkcount); + int oldrownumber = (int) oldindexrow.getColLong(idx_col_indexpos); int oldPartitionNumber = arrayIndex(oldchunkcount); int oldSerialNumber = 0; @@ -234,8 +260,8 @@ public class kelondroCollectionIndex { array.set(oldrownumber, arrayEntry); // update the index entry - oldindexrow.setColLongB256(idx_col_chunkcount, collection.size()); - oldindexrow.setColLongB256(idx_col_lastwrote, kelondroRowCollection.daysSince2000(System.currentTimeMillis())); + oldindexrow.setCol(idx_col_chunkcount, collection.size()); + oldindexrow.setCol(idx_col_lastwrote, kelondroRowCollection.daysSince2000(System.currentTimeMillis())); index.put(oldindexrow); } else { // we need a new slot, that means we must first delete the old entry @@ -271,11 +297,11 @@ public class kelondroCollectionIndex { // store the new row number in the index kelondroRow.Entry indexEntry = index.row().newEntry(); indexEntry.setCol(idx_col_key, key); - indexEntry.setColLongB256(idx_col_chunksize, this.rowdef.objectsize()); - indexEntry.setColLongB256(idx_col_chunkcount, collection.size()); - indexEntry.setColLongB256(idx_col_indexpos, (long) newRowNumber); - indexEntry.setColLongB256(idx_col_lastread, kelondroRowCollection.daysSince2000(System.currentTimeMillis())); - indexEntry.setColLongB256(idx_col_lastwrote, kelondroRowCollection.daysSince2000(System.currentTimeMillis())); + indexEntry.setCol(idx_col_chunksize, this.rowdef.objectsize()); + indexEntry.setCol(idx_col_chunkcount, collection.size()); + indexEntry.setCol(idx_col_indexpos, (long) newRowNumber); + indexEntry.setCol(idx_col_lastread, kelondroRowCollection.daysSince2000(System.currentTimeMillis())); + indexEntry.setCol(idx_col_lastwrote, kelondroRowCollection.daysSince2000(System.currentTimeMillis())); index.put(indexEntry); } @@ -301,9 +327,9 @@ public class kelondroCollectionIndex { // call this only within a synchronized(index) environment // read values - int chunksize = (int) indexrow.getColLongB256(idx_col_chunksize); - int chunkcount = (int) indexrow.getColLongB256(idx_col_chunkcount); - int rownumber = (int) indexrow.getColLongB256(idx_col_indexpos); + int chunksize = (int) indexrow.getColLong(idx_col_chunksize); + int chunkcount = (int) indexrow.getColLong(idx_col_chunkcount); + int rownumber = (int) indexrow.getColLong(idx_col_indexpos); int partitionnumber = arrayIndex(chunkcount); int serialnumber = 0; @@ -320,18 +346,18 @@ public class kelondroCollectionIndex { // store the row number in the index; this may be a double-entry, but better than nothing kelondroRow.Entry indexEntry = index.row().newEntry(); indexEntry.setCol(idx_col_key, arrayrow.getColBytes(0)); - indexEntry.setColLongB256(idx_col_chunksize, this.rowdef.objectsize()); - indexEntry.setColLongB256(idx_col_chunkcount, collection.size()); - indexEntry.setColLongB256(idx_col_indexpos, (long) rownumber); - indexEntry.setColLongB256(idx_col_lastread, kelondroRowCollection.daysSince2000(System.currentTimeMillis())); - indexEntry.setColLongB256(idx_col_lastwrote, kelondroRowCollection.daysSince2000(System.currentTimeMillis())); + indexEntry.setCol(idx_col_chunksize, this.rowdef.objectsize()); + indexEntry.setCol(idx_col_chunkcount, collection.size()); + indexEntry.setCol(idx_col_indexpos, (long) rownumber); + indexEntry.setCol(idx_col_lastread, kelondroRowCollection.daysSince2000(System.currentTimeMillis())); + indexEntry.setCol(idx_col_lastwrote, kelondroRowCollection.daysSince2000(System.currentTimeMillis())); index.put(indexEntry); throw new kelondroException(arrayFile(this.path, this.filenameStub, this.loadfactor, chunksize, partitionnumber, serialnumber).toString(), "array contains wrong row '" + new String(arrayrow.getColBytes(0)) + "', expected is '" + new String(indexrow.getColBytes(idx_col_key)) + "', the row has been fixed"); } int chunkcountInArray = collection.size(); if (chunkcountInArray != chunkcount) { // fix the entry in index - indexrow.setColLong(idx_col_chunkcount, chunkcountInArray); + indexrow.setCol(idx_col_chunkcount, chunkcountInArray); index.put(indexrow); array.logFailure("INCONSISTENCY in " + arrayFile(this.path, this.filenameStub, this.loadfactor, chunksize, partitionnumber, serialnumber).toString() + ": array has different chunkcount than index: index = " + chunkcount + ", array = " + chunkcountInArray + "; the index has been auto-fixed"); } diff --git a/source/de/anomic/kelondro/kelondroColumn.java b/source/de/anomic/kelondro/kelondroColumn.java index d235c26c3..c4c211cf1 100644 --- a/source/de/anomic/kelondro/kelondroColumn.java +++ b/source/de/anomic/kelondro/kelondroColumn.java @@ -42,7 +42,7 @@ public class kelondroColumn { private int celltype, cellwidth, encoder; private String nickname, description; - + public kelondroColumn(String nickname, int celltype, int encoder, int cellwidth, String description) { this.celltype = celltype; this.cellwidth = cellwidth; @@ -108,18 +108,18 @@ public class kelondroColumn { if (p < 0) { // if the cell was defined with a type, we dont need to give an explicit with definition if (this.cellwidth < 0) throw new kelondroException("kelondroColumn - no cell width definition given"); - p = celldef.indexOf(' '); - if (p < 0) { + int q = celldef.indexOf(' '); + if (q < 0) { this.nickname = celldef; celldef = ""; } else { this.nickname = celldef.substring(0, p); - celldef = celldef.substring(p + 1); + celldef = celldef.substring(q + 1); } } else { + this.nickname = celldef.substring(0, p); int q = celldef.indexOf(' '); if (q < 0) { - this.nickname = celldef.substring(0, p); try { this.cellwidth = Integer.parseInt(celldef.substring(p + 1)); } catch (NumberFormatException e) { @@ -127,7 +127,6 @@ public class kelondroColumn { } celldef = ""; } else { - this.nickname = celldef.substring(0, q); try { this.cellwidth = Integer.parseInt(celldef.substring(p + 1, q)); } catch (NumberFormatException e) { @@ -157,8 +156,8 @@ public class kelondroColumn { String expf = celldef.substring(1, p); celldef = celldef.substring(p + 1).trim(); if (expf.equals("b64e")) this.encoder = encoder_b64e; - else if (expf.equals("b256")) this.encoder = encoder_b64e; - else if (expf.equals("bytes")) this.encoder = encoder_b64e; + else if (expf.equals("b256")) this.encoder = encoder_b256; + else if (expf.equals("bytes")) this.encoder = encoder_bytes; else { if (this.celltype == celltype_undefined) this.encoder = encoder_bytes; else if (this.celltype == celltype_boolean) this.encoder = encoder_bytes; @@ -181,6 +180,12 @@ public class kelondroColumn { } } + public void setAttributes(String nickname, int celltype, int encoder) { + this.celltype = celltype; + this.encoder = encoder; + this.nickname = nickname; + } + public int celltype() { return this.celltype; } @@ -206,21 +211,28 @@ public class kelondroColumn { switch (celltype) { case celltype_boolean: s.append("boolean "); + s.append(nickname); break; case celltype_binary: s.append("byte[] "); + s.append(nickname); + s.append('-'); + s.append(cellwidth); break; case celltype_string: s.append("String "); + s.append(nickname); + s.append('-'); + s.append(cellwidth); break; case celltype_cardinal: s.append("Cardinal "); + s.append(nickname); + s.append('-'); + s.append(cellwidth); break; } - s.append(nickname); - s.append('-'); - s.append(cellwidth); - s.append(' '); + switch (encoder) { case encoder_b64e: s.append(" {b64e}"); @@ -231,4 +243,13 @@ public class kelondroColumn { } return new String(s); } + + public boolean equals(kelondroColumn otherCol) { + return + (this.celltype == otherCol.celltype) && + (this.cellwidth == otherCol.cellwidth) && + (this.encoder == otherCol.encoder) && + (this.nickname.equals(otherCol.nickname)); + } + } diff --git a/source/de/anomic/kelondro/kelondroDyn.java b/source/de/anomic/kelondro/kelondroDyn.java index 5eaf4ba3a..9de99a341 100644 --- a/source/de/anomic/kelondro/kelondroDyn.java +++ b/source/de/anomic/kelondro/kelondroDyn.java @@ -76,7 +76,7 @@ public class kelondroDyn extends kelondroTree { int nodesize, char fillChar, kelondroOrder objectOrder, boolean exitOnFail) { // creates a new dynamic tree - super(file, buffersize, preloadTime, kelondroTree.defaultObjectCachePercent, new kelondroRow(new int[] { key + counterlen, nodesize }), objectOrder, 1, 8, exitOnFail); + super(file, buffersize, preloadTime, kelondroTree.defaultObjectCachePercent, new kelondroRow("byte[] key-" + (key + counterlen) + ", byte[] node-" + nodesize), objectOrder, 1, 8, exitOnFail); this.keylen = row().width(0) - counterlen; this.reclen = row().width(1); this.fillChar = fillChar; diff --git a/source/de/anomic/kelondro/kelondroDynTree.java b/source/de/anomic/kelondro/kelondroDynTree.java index d84b51a77..0125b57d2 100644 --- a/source/de/anomic/kelondro/kelondroDynTree.java +++ b/source/de/anomic/kelondro/kelondroDynTree.java @@ -102,8 +102,8 @@ public class kelondroDynTree { Iterator i = table.dynKeys(true, false); String onekey = (String) i.next(); kelondroTree onetree = getTree(onekey); - int[] columns = new int[onetree.row().columns()]; - for (int j = 0; j < columns.length; j++) columns[j] = onetree.row().width(j); + kelondroColumn[] columns = new kelondroColumn[onetree.row().columns()]; + for (int j = 0; j < columns.length; j++) columns[j] = onetree.row().column(j); this.rowdef = new kelondroRow(columns); closeTree(onekey); } @@ -327,7 +327,7 @@ public class kelondroDynTree { kelondroDynTree dt = new kelondroDynTree(file, 0x100000L, 0, '_'); System.out.println("opened: table keylength=" + dt.table.row().width(0) + ", sectorsize=" + dt.table.row().width(1) + ", " + dt.table.size() + " entries."); } else { - kelondroDynTree dt = new kelondroDynTree(file, 0x100000L, 0, 16, 512, new kelondroRow(new int[] {10,20,30}), '_', true); + kelondroDynTree dt = new kelondroDynTree(file, 0x100000L, 0, 16, 512, new kelondroRow("byte[] a-10, byte[] b-20, byte[] c-30"), '_', true); String name; kelondroTree t; kelondroRow.Entry line; diff --git a/source/de/anomic/kelondro/kelondroFixedWidthArray.java b/source/de/anomic/kelondro/kelondroFixedWidthArray.java index 63b0ee296..0d186f0dc 100644 --- a/source/de/anomic/kelondro/kelondroFixedWidthArray.java +++ b/source/de/anomic/kelondro/kelondroFixedWidthArray.java @@ -134,7 +134,7 @@ public class kelondroFixedWidthArray extends kelondroRecords implements kelondro public static void main(String[] args) { File f = new File("d:\\\\mc\\privat\\fixtest.db"); f.delete(); - kelondroFixedWidthArray k = new kelondroFixedWidthArray(f, new kelondroRow(new int[]{12, 4}), 6, true); + kelondroFixedWidthArray k = new kelondroFixedWidthArray(f, new kelondroRow("byte[] a-12, byte[] b-4"), 6, true); try { k.set(3, k.row().newEntry(new byte[][]{ "test123".getBytes(), "abcd".getBytes()})); diff --git a/source/de/anomic/kelondro/kelondroFlexTable.java b/source/de/anomic/kelondro/kelondroFlexTable.java index 2f1348a81..da3a54633 100644 --- a/source/de/anomic/kelondro/kelondroFlexTable.java +++ b/source/de/anomic/kelondro/kelondroFlexTable.java @@ -79,7 +79,7 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr } private kelondroIndex initializeRamIndex(kelondroOrder objectOrder) throws IOException { - kelondroRowBufferedSet ri = new kelondroRowBufferedSet(new kelondroRow(new int[]{super.row().width(0), 4}), 0); + kelondroRowBufferedSet ri = new kelondroRowBufferedSet(new kelondroRow(new kelondroColumn[]{super.row().column(0), new kelondroColumn("int c-4 {b256}")}), 0); ri.setOrdering(objectOrder, 0); Iterator content = super.col[0].contentNodes(-1); kelondroRecords.Node node; @@ -90,7 +90,7 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr i = node.handle().hashCode(); indexentry = ri.rowdef.newEntry(); indexentry.setCol(0, node.getValueRow()); - indexentry.setColLongB256(1, i); + indexentry.setCol(1, i); ri.add(indexentry); if ((i % 10000) == 0) { System.out.print('.'); @@ -117,7 +117,7 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr i = node.handle().hashCode(); indexentry = index.row().newEntry(); indexentry.setCol(0, node.getValueRow()); - indexentry.setColLongB256(1, i); + indexentry.setCol(1, i); index.put(indexentry); if ((i % 10000) == 0) { System.out.print('.'); @@ -176,7 +176,7 @@ public class kelondroFlexTable extends kelondroFlexWidthArray implements kelondr public Object next() { kelondroRow.Entry idxEntry = (kelondroRow.Entry) indexIterator.next(); - int idx = (int) idxEntry.getColLongB256(1); + int idx = (int) idxEntry.getColLong(1); try { return get(idx); } catch (IOException e) { diff --git a/source/de/anomic/kelondro/kelondroFlexWidthArray.java b/source/de/anomic/kelondro/kelondroFlexWidthArray.java index 74d4c1189..9098ba6f0 100644 --- a/source/de/anomic/kelondro/kelondroFlexWidthArray.java +++ b/source/de/anomic/kelondro/kelondroFlexWidthArray.java @@ -27,6 +27,10 @@ package de.anomic.kelondro; import java.io.File; import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +import de.anomic.server.serverFileUtils; public class kelondroFlexWidthArray implements kelondroArray { @@ -44,7 +48,7 @@ public class kelondroFlexWidthArray implements kelondroArray { check += '_'; } - // check if tabel directory exists + // check if table directory exists File tabledir = new File(path, tablename + ".table"); if (tabledir.exists()) { if (!(tabledir.isDirectory())) throw new IOException("path " + tabledir.toString() + " must be a directory"); @@ -53,6 +57,21 @@ public class kelondroFlexWidthArray implements kelondroArray { tabledir.mkdir(); } + // save/check property file for this array + File propfile = new File(tabledir, "properties"); + Map props = new HashMap(); + if (propfile.exists()) { + props = serverFileUtils.loadHashMap(propfile); + String stored_rowdef = (String) props.get("rowdef"); + if ((stored_rowdef == null) || (!(rowdef.subsumes(new kelondroRow(stored_rowdef))))) { + System.out.println("FATAL ERROR: stored rowdef '" + stored_rowdef + "' does not match with new rowdef '" + + rowdef + "' for flex table '" + path + "'"); + System.exit(-1); + } + } + props.put("rowdef", rowdef.toString()); + serverFileUtils.saveMap(propfile, props, "FlexWidthArray properties"); + // open existing files String[] files = tabledir.list(); for (int i = 0; i < files.length; i++) { @@ -78,9 +97,9 @@ public class kelondroFlexWidthArray implements kelondroArray { q--; } // create new array file - int columns[] = new int[q - p + 1]; + kelondroColumn[] columns = new kelondroColumn[q - p + 1]; for (int j = p; j <= q; j++) { - columns[j - p] = rowdef.width(j); + columns[j - p] = rowdef.column(j); check = check.substring(0, j) + "X" + check.substring(j + 1); } col[p] = new kelondroFixedWidthArray(new File(tabledir, colfilename(p, q)), new kelondroRow(columns), 16, true); @@ -197,7 +216,7 @@ public class kelondroFlexWidthArray implements kelondroArray { public static void main(String[] args) { File f = new File("d:\\\\mc\\privat\\"); try { - kelondroFlexWidthArray k = new kelondroFlexWidthArray(f, "flextest", new kelondroRow(new int[]{12, 4}), true); + kelondroFlexWidthArray k = new kelondroFlexWidthArray(f, "flextest", new kelondroRow("byte[] a-12, byte[] b-4"), true); k.set(3, k.row().newEntry(new byte[][]{ "test123".getBytes(), "abcd".getBytes()})); @@ -205,7 +224,7 @@ public class kelondroFlexWidthArray implements kelondroArray { "test456".getBytes(), "efgh".getBytes()})); k.close(); - k = new kelondroFlexWidthArray(f, "flextest", new kelondroRow(new int[]{12, 4}), true); + k = new kelondroFlexWidthArray(f, "flextest", new kelondroRow("byte[] a-12, byte[] b-4"), true); System.out.println(k.get(2).toString()); System.out.println(k.get(3).toString()); System.out.println(k.get(4).toString()); diff --git a/source/de/anomic/kelondro/kelondroHashtable.java b/source/de/anomic/kelondro/kelondroHashtable.java index b46484ccf..37403d88e 100644 --- a/source/de/anomic/kelondro/kelondroHashtable.java +++ b/source/de/anomic/kelondro/kelondroHashtable.java @@ -143,7 +143,7 @@ public class kelondroHashtable { private static final byte[] dummyKey = kelondroBase64Order.enhancedCoder.encodeLong(0, 5).getBytes(); public kelondroHashtable(File file, kelondroRow rowdef, int offset, int maxsize, int maxrehash, boolean exitOnFail) { - // this creates a new hashtable + // this creates a new hashtable // the key element is not part of the columns array // this is unlike the kelondroTree, where the key is part of a row // the offset is a number of bits that is omitted in the folded tree hierarchy @@ -180,9 +180,9 @@ public class kelondroHashtable { } private kelondroRow extCol(kelondroRow rowdef) { - int[] newCol = new int[rowdef.columns() + 1]; - newCol[0] = 4; - for (int i = 0; i < rowdef.columns(); i++) newCol[i + 1] = rowdef.width(i); + kelondroColumn[] newCol = new kelondroColumn[rowdef.columns() + 1]; + newCol[0] = new kelondroColumn("Cardinal key-4 {b256}"); + for (int i = 0; i < rowdef.columns(); i++) newCol[i + 1] = rowdef.column(i); return new kelondroRow(newCol); } @@ -219,7 +219,7 @@ public class kelondroHashtable { // write row kelondroRow.Entry newhkrow = hashArray.row().newEntry(); - newhkrow.setColLongB256(0, hash.key()); + newhkrow.setCol(0, hash.key()); newhkrow.setCol(1, rowentry.bytes()); hashArray.set(rowNumber, newhkrow); return hashArray.row().newEntry(oldhkrow.getColBytes(1)); @@ -233,7 +233,7 @@ public class kelondroHashtable { rowNumber = hash.node(); if (rowNumber >= hashArray.size()) return new Object[]{new Integer(rowNumber), null}; hkrow = hashArray.get(rowNumber); - rowKey = (int) hkrow.getColLongB256(0); + rowKey = (int) hkrow.getColLong(0); if (rowKey == 0) return new Object[]{new Integer(rowNumber), null}; hash.rehash(); } while (rowKey != hash.key()); diff --git a/source/de/anomic/kelondro/kelondroIntBytesMap.java b/source/de/anomic/kelondro/kelondroIntBytesMap.java index ace851b8f..0f9e42ab3 100644 --- a/source/de/anomic/kelondro/kelondroIntBytesMap.java +++ b/source/de/anomic/kelondro/kelondroIntBytesMap.java @@ -29,7 +29,7 @@ package de.anomic.kelondro; public class kelondroIntBytesMap extends kelondroRowBufferedSet { public kelondroIntBytesMap(int payloadSize, int initSize) { - super(new kelondroRow(new int[]{4, payloadSize}), initSize); + super(new kelondroRow("Cardinal key-4 {b256}, byte[] payload-" + payloadSize), initSize); // initialize ordering super.setOrdering(kelondroNaturalOrder.naturalOrder, 0); @@ -43,7 +43,7 @@ public class kelondroIntBytesMap extends kelondroRowBufferedSet { public byte[] putb(int ii, byte[] value) { kelondroRow.Entry newentry = rowdef.newEntry(); - newentry.setCol(0, kelondroNaturalOrder.encodeLong((long) ii, 4)); + newentry.setCol(0, (long) ii); newentry.setCol(1, value); kelondroRow.Entry oldentry = super.put(newentry); if (oldentry == null) return null; @@ -52,7 +52,7 @@ public class kelondroIntBytesMap extends kelondroRowBufferedSet { public void addb(int ii, byte[] value) { kelondroRow.Entry newentry = rowdef.newEntry(); - newentry.setCol(0, kelondroNaturalOrder.encodeLong((long) ii, 4)); + newentry.setCol(0, (long) ii); newentry.setCol(1, value); add(newentry); } diff --git a/source/de/anomic/kelondro/kelondroRecords.java b/source/de/anomic/kelondro/kelondroRecords.java index 9bed9b337..6e7220e7e 100644 --- a/source/de/anomic/kelondro/kelondroRecords.java +++ b/source/de/anomic/kelondro/kelondroRecords.java @@ -376,23 +376,23 @@ public class kelondroRecords { this.OHBYTEC = entryFile.readShort(POS_OHBYTEC); this.OHHANDLEC = entryFile.readShort(POS_OHHANDLEC); - int[] COLWIDTHS = new int[entryFile.readShort(POS_COLUMNS)]; + kelondroColumn[] COLDEFS = new kelondroColumn[entryFile.readShort(POS_COLUMNS)]; this.HANDLES = new Handle[entryFile.readInt(POS_INTPROPC)]; this.TXTPROPS = new byte[entryFile.readInt(POS_TXTPROPC)][]; this.TXTPROPW = entryFile.readInt(POS_TXTPROPW); - if (COLWIDTHS.length == 0) throw new kelondroException(filename, "init: zero columns; strong failure"); + if (COLDEFS.length == 0) throw new kelondroException(filename, "init: zero columns; strong failure"); // calculate dynamic run-time seek pointers - POS_HANDLES = POS_COLWIDTHS + COLWIDTHS.length * 4; + POS_HANDLES = POS_COLWIDTHS + COLDEFS.length * 4; POS_TXTPROPS = POS_HANDLES + HANDLES.length * 4; POS_NODES = POS_TXTPROPS + TXTPROPS.length * TXTPROPW; // read configuration arrays - for (int i = 0; i < COLWIDTHS.length; i++) { - COLWIDTHS[i] = entryFile.readInt(POS_COLWIDTHS + 4 * i); + for (int i = 0; i < COLDEFS.length; i++) { + COLDEFS[i] = new kelondroColumn("col-" + i, kelondroColumn.celltype_binary, kelondroColumn.encoder_bytes, entryFile.readInt(POS_COLWIDTHS + 4 * i), ""); } - this.ROW = new kelondroRow(COLWIDTHS); + this.ROW = new kelondroRow(COLDEFS); for (int i = 0; i < HANDLES.length; i++) { HANDLES[i] = new Handle(entryFile.readInt(POS_HANDLES + 4 * i)); } @@ -939,6 +939,21 @@ public class kelondroRecords { public final kelondroRow row() { return this.ROW; } + + public final void assignRowdef(kelondroRow rowdef) { + // overwrites a given rowdef + // the new rowdef must be compatible + if (rowdef.columns() < ROW.columns()) + throw new kelondroException(this.filename, + "new rowdef '" + rowdef.toString() + "' is not compatible with old rowdef '" + ROW.toString() + "', they have a different number of columns"); + + // adopt encoder and cell type + kelondroColumn col; + for (int i = 0; i < ROW.columns(); i++) { + col = rowdef.column(i); + ROW.column(i).setAttributes(col.nickname(), col.celltype(), col.encoder()); + } + } private final long seekpos(Handle handle) { assert (handle.index >= 0): "handle index too low: " + handle.index; diff --git a/source/de/anomic/kelondro/kelondroRow.java b/source/de/anomic/kelondro/kelondroRow.java index c96582aea..a7cdbbeb6 100644 --- a/source/de/anomic/kelondro/kelondroRow.java +++ b/source/de/anomic/kelondro/kelondroRow.java @@ -48,9 +48,9 @@ public class kelondroRow { this.colstart[i] = this.objectsize; this.objectsize += this.row[i].cellwidth(); } - } + /* public kelondroRow(int[] rowi) { this.row = new kelondroColumn[rowi.length]; this.colstart = new int[rowi.length]; @@ -61,7 +61,7 @@ public class kelondroRow { this.objectsize += this.row[i].cellwidth(); } } - + */ public kelondroRow(String structure) { // define row with row syntax // example: @@ -120,6 +120,10 @@ public class kelondroRow { return this.objectsize; } + public kelondroColumn column(int col) { + return row[col]; + } + public int width(int row) { return this.row[row].cellwidth(); } @@ -252,11 +256,11 @@ public class kelondroRow { } } - public void setColByte(int column, byte c) { + public void setCol(int column, byte c) { rowinstance[colstart[column]] = c; } - public void setColString(int column, String cell, String encoding) { + public void setCol(int column, String cell, String encoding) { if (encoding == null) setCol(column, cell.getBytes()); else @@ -267,32 +271,22 @@ public class kelondroRow { } } - public void setColLong(int column, long cell) { + public void setCol(int column, long cell) { // uses the column definition to choose the right encoding switch (row[column].encoder()) { case kelondroColumn.encoder_none: throw new kelondroException("ROW", "setColLong has celltype none, no encoder given"); case kelondroColumn.encoder_b64e: - setColLongB64E(column, cell); + kelondroBase64Order.enhancedCoder.encodeLong(cell, rowinstance, colstart[column], row[column].cellwidth()); break; case kelondroColumn.encoder_b256: - setColLongB256(column, cell); + kelondroNaturalOrder.encodeLong(cell, rowinstance, colstart[column], row[column].cellwidth()); break; case kelondroColumn.encoder_bytes: throw new kelondroException("ROW", "setColLong of celltype bytes not applicable"); } } - public void setColLongB256(int column, long cell) { - // temporary method, should be replaced by setColLong if all row declarations are complete - kelondroNaturalOrder.encodeLong(cell, rowinstance, colstart[column], row[column].cellwidth()); - } - - public void setColLongB64E(int column, long cell) { - // temporary method, should be replaced by setColLong if all row declarations are complete - kelondroBase64Order.enhancedCoder.encodeLong(cell, rowinstance, colstart[column], row[column].cellwidth()); - } - public String getColString(int column, String encoding) { int length = row[column].cellwidth(); int offset = colstart[column]; @@ -316,25 +310,15 @@ public class kelondroRow { case kelondroColumn.encoder_none: throw new kelondroException("ROW", "getColLong has celltype none, no encoder given"); case kelondroColumn.encoder_b64e: - return getColLongB64E(column); + return kelondroBase64Order.enhancedCoder.decodeLong(rowinstance, colstart[column], row[column].cellwidth()); case kelondroColumn.encoder_b256: - return getColLongB256(column); + return kelondroNaturalOrder.decodeLong(rowinstance, colstart[column], row[column].cellwidth()); case kelondroColumn.encoder_bytes: throw new kelondroException("ROW", "getColLong of celltype bytes not applicable"); } throw new kelondroException("ROW", "getColLong did not find appropriate encoding"); } - - public long getColLongB256(int column) { - // temporary method, should be replaced by getColLong if all row declarations are complete - return kelondroNaturalOrder.decodeLong(rowinstance, colstart[column], row[column].cellwidth()); - } - - public long getColLongB64E(int column) { - // temporary method, should be replaced by getColLong if all row declarations are complete - return kelondroBase64Order.enhancedCoder.decodeLong(rowinstance, colstart[column], row[column].cellwidth()); - } - + public byte getColByte(int column) { return rowinstance[colstart[column]]; } @@ -344,41 +328,6 @@ public class kelondroRow { System.arraycopy(rowinstance, colstart[column], c, 0, row[column].cellwidth()); return c; } - - /* - public byte[] toEncodedBytesForm() { - byte[] b = new byte[objectsize]; - int encoder, cellwidth; - int p = 0; - for (int i = 0; i < row.length; i++) { - encoder = row[i].encoder(); - cellwidth = row[i].cellwidth(); - switch (row[i].celltype()) { - case kelondroColumn.celltype_undefined: - throw new kelondroException("ROW", "toEncodedForm of celltype undefined not possible"); - case kelondroColumn.celltype_boolean: - throw new kelondroException("ROW", "toEncodedForm of celltype boolean not yet implemented"); - case kelondroColumn.celltype_binary: - System.arraycopy(rowinstance, colstart[i], b, p, cellwidth); - p += cellwidth; - continue; - case kelondroColumn.celltype_string: - System.arraycopy(rowinstance, colstart[i], b, p, cellwidth); - p += cellwidth; - continue; - case kelondroColumn.celltype_cardinal: - if (encoder == kelondroColumn.encoder_b64e) { - long c = bytes2long(rowinstance, colstart[i], cellwidth); - System.arraycopy(kelondroBase64Order.enhancedCoder.encodeLongSmart(c, cellwidth).getBytes(), 0, b, p, cellwidth); - p += cellwidth; - continue; - } - throw new kelondroException("ROW", "toEncodedForm of celltype cardinal has no encoder (" + encoder + ")"); - } - } - return b; - } - */ public String toPropertyForm(boolean includeBraces, boolean decimalCardinal) { StringBuffer sb = new StringBuffer(); @@ -445,4 +394,14 @@ public class kelondroRow { return x; } + public boolean subsumes(kelondroRow otherRow) { + // returns true, if this row has at least all columns as the other row + // and possibly some more + if (this.objectsize < otherRow.objectsize) return false; + for (int i = 0; i < otherRow.row.length; i++) { + if (!(this.row[i].equals(otherRow.row[i]))) return false; + } + return true; + } + } diff --git a/source/de/anomic/kelondro/kelondroRowCollection.java b/source/de/anomic/kelondro/kelondroRowCollection.java index 23a3436f0..cf32c2227 100644 --- a/source/de/anomic/kelondro/kelondroRowCollection.java +++ b/source/de/anomic/kelondro/kelondroRowCollection.java @@ -84,9 +84,9 @@ public class kelondroRowCollection { public kelondroRowCollection(kelondroRow rowdef, byte[] exportedCollectionRowinstance) { this.rowdef = rowdef; kelondroRow.Entry exportedCollection = exportRow(exportedCollectionRowinstance.length - exportOverheadSize).newEntry(exportedCollectionRowinstance); - this.chunkcount = (int) exportedCollection.getColLongB256(exp_chunkcount); - this.lastTimeRead = (exportedCollection.getColLongB256(exp_last_read) + 10957) * day; - this.lastTimeWrote = (exportedCollection.getColLongB256(exp_last_wrote) + 10957) * day; + this.chunkcount = (int) exportedCollection.getColLong(exp_chunkcount); + this.lastTimeRead = (exportedCollection.getColLong(exp_last_read) + 10957) * day; + this.lastTimeWrote = (exportedCollection.getColLong(exp_last_wrote) + 10957) * day; String sortOrderKey = exportedCollection.getColString(exp_order_type, null); if (sortOrderKey.equals("__")) { this.sortOrder = null; @@ -94,8 +94,8 @@ public class kelondroRowCollection { this.sortOrder = kelondroNaturalOrder.bySignature(sortOrderKey); if (this.sortOrder == null) this.sortOrder = kelondroBase64Order.bySignature(sortOrderKey); } - this.sortColumn = (int) exportedCollection.getColLongB256(exp_order_col); - this.sortBound = (int) exportedCollection.getColLongB256(exp_order_bound); + this.sortColumn = (int) exportedCollection.getColLong(exp_order_col); + this.sortBound = (int) exportedCollection.getColLong(exp_order_bound); this.chunkcache = exportedCollection.getColBytes(exp_collection); } @@ -125,12 +125,12 @@ public class kelondroRowCollection { trim(); kelondroRow row = exportRow(chunkcache.length); kelondroRow.Entry entry = row.newEntry(); - entry.setColLongB256(exp_chunkcount, size()); - entry.setColLongB256(exp_last_read, daysSince2000(this.lastTimeRead)); - entry.setColLongB256(exp_last_wrote, daysSince2000(this.lastTimeWrote)); + entry.setCol(exp_chunkcount, size()); + entry.setCol(exp_last_read, daysSince2000(this.lastTimeRead)); + entry.setCol(exp_last_wrote, daysSince2000(this.lastTimeWrote)); entry.setCol(exp_order_type, (this.sortOrder == null) ? "__".getBytes() : this.sortOrder.signature().getBytes()); - entry.setColLongB256(exp_order_col, this.sortColumn); - entry.setColLongB256(exp_order_bound, this.sortBound); + entry.setCol(exp_order_col, this.sortColumn); + entry.setCol(exp_order_bound, this.sortBound); entry.setCol(exp_collection, chunkcache); return entry.bytes(); } diff --git a/source/de/anomic/kelondro/kelondroRowSet.java b/source/de/anomic/kelondro/kelondroRowSet.java index adc10204d..48257e7fd 100644 --- a/source/de/anomic/kelondro/kelondroRowSet.java +++ b/source/de/anomic/kelondro/kelondroRowSet.java @@ -449,7 +449,7 @@ public class kelondroRowSet extends kelondroRowCollection implements kelondroInd */ long start = System.currentTimeMillis(); - kelondroRowSet c = new kelondroRowSet(new kelondroRow(new int[]{12, 12}), 0); + kelondroRowSet c = new kelondroRowSet(new kelondroRow("byte[] a-12, byte[] b-12"), 0); Random random = new Random(0); byte[] key; for (int i = 0; i < 100000; i++) { diff --git a/source/de/anomic/kelondro/kelondroStack.java b/source/de/anomic/kelondro/kelondroStack.java index bf86460b9..c381821ee 100644 --- a/source/de/anomic/kelondro/kelondroStack.java +++ b/source/de/anomic/kelondro/kelondroStack.java @@ -67,10 +67,6 @@ public final class kelondroStack extends kelondroRecords { private static int root = 0; // pointer for FHandles-array: pointer to root node private static int toor = 1; // pointer for FHandles-array: pointer to root node - public kelondroStack(File file, int key, int value, boolean exitOnFail) { - this(file, new kelondroRow(new int[] { key, value }), exitOnFail); - } - public kelondroStack(File file, kelondroRow rowdef, boolean exitOnFail) { // this creates a new stack super(file, 0, 0, thisOHBytes, thisOHHandles, rowdef, thisFHandles, rowdef.columns() /* txtProps */, 80 /* txtPropWidth */, exitOnFail); @@ -413,7 +409,7 @@ public final class kelondroStack extends kelondroRecords { // create File f = new File(args[3]); if (f.exists()) f.delete(); - kelondroRow lens = new kelondroRow(new int[]{Integer.parseInt(args[1]), Integer.parseInt(args[2])}); + kelondroRow lens = new kelondroRow("byte[] key-" + Integer.parseInt(args[1]) + ", byte[] value-" + Integer.parseInt(args[2])); kelondroStack fm = new kelondroStack(f, lens, true); fm.close(); } else if (args[0].equals("-p")) { diff --git a/source/de/anomic/kelondro/kelondroTree.java b/source/de/anomic/kelondro/kelondroTree.java index 7dba027be..9b175dd1b 100644 --- a/source/de/anomic/kelondro/kelondroTree.java +++ b/source/de/anomic/kelondro/kelondroTree.java @@ -90,11 +90,6 @@ public class kelondroTree extends kelondroRecords implements kelondroIndex { private int readAheadChunkSize = 100; private long lastIteratorCount = readAheadChunkSize; private kelondroObjectCache objectCache; - - - public kelondroTree(File file, long buffersize, long preloadTime, int objectCachePercent, int key, int value, boolean exitOnFail) { - this(file, buffersize, preloadTime, objectCachePercent, new kelondroRow(new int[] { key, value }), new kelondroNaturalOrder(true), 1, 8, exitOnFail); - } public kelondroTree(File file, long buffersize, long preloadTime, int objectCachePercent, kelondroRow rowdef, boolean exitOnFail) { // this creates a new tree file @@ -1235,7 +1230,7 @@ public class kelondroTree extends kelondroRecords implements kelondroIndex { // test script File testFile = new File("test.db"); while (testFile.exists()) testFile.delete(); - kelondroTree fm = new kelondroTree(testFile, 0x100000, 0, 10, 4, 4, true); + kelondroTree fm = new kelondroTree(testFile, 0x100000, 0, 10, new kelondroRow("byte[] a-4, byte[] b-4"), true); byte[] dummy = "".getBytes(); fm.put("abc0".getBytes(), dummy); fm.put("bcd0".getBytes(), dummy); fm.put("def0".getBytes(), dummy); fm.put("bab0".getBytes(), dummy); @@ -1310,7 +1305,7 @@ public class kelondroTree extends kelondroRecords implements kelondroIndex { // create File f = new File(args[3]); if (f.exists()) f.delete(); - kelondroRow lens = new kelondroRow(new int[]{Integer.parseInt(args[1]), Integer.parseInt(args[2])}); + kelondroRow lens = new kelondroRow("byte[] key-" + Integer.parseInt(args[1]) + ", byte[] value-" + Integer.parseInt(args[2])); kelondroTree fm = new kelondroTree(f, 0x100000, 0, 10, lens, true); fm.close(); } else if (args[0].equals("-u")) { @@ -1377,7 +1372,7 @@ public class kelondroTree extends kelondroRecords implements kelondroIndex { int steps = 0; while (true) { if (testFile.exists()) testFile.delete(); - tt = new kelondroTree(testFile, 200, 0, 10, 4 ,4, true); + tt = new kelondroTree(testFile, 200, 0, 10, new kelondroRow("byte[] a-4, byte[] b-4"), true); steps = 10 + ((int) System.currentTimeMillis() % 7) * (((int) System.currentTimeMillis() + 17) % 11); t = s; d = ""; @@ -1443,7 +1438,7 @@ public class kelondroTree extends kelondroRecords implements kelondroIndex { File f = new File("test.db"); if (f.exists()) f.delete(); try { - kelondroTree tt = new kelondroTree(f, 1000, 0, 10, 4, 4, true); + kelondroTree tt = new kelondroTree(f, 1000, 0, 10, new kelondroRow("byte[] a-4, byte[] b-4"), true); byte[] b; b = testWord('B'); tt.put(b, b); //tt.print(); b = testWord('C'); tt.put(b, b); //tt.print(); @@ -1508,7 +1503,7 @@ public class kelondroTree extends kelondroRecords implements kelondroIndex { public static kelondroTree testTree(File f, String testentities) throws IOException { if (f.exists()) f.delete(); - kelondroTree tt = new kelondroTree(f, 0, 0, 10, 4, 4, true); + kelondroTree tt = new kelondroTree(f, 0, 0, 10, new kelondroRow("byte[] a-4, byte[] b-4"), true); byte[] b; for (int i = 0; i < testentities.length(); i++) { b = testWord(testentities.charAt(i)); diff --git a/source/de/anomic/plasma/plasmaCrawlBalancer.java b/source/de/anomic/plasma/plasmaCrawlBalancer.java index cf5522160..66cf346f1 100644 --- a/source/de/anomic/plasma/plasmaCrawlBalancer.java +++ b/source/de/anomic/plasma/plasmaCrawlBalancer.java @@ -63,10 +63,10 @@ public class plasmaCrawlBalancer { try { stack = new kelondroStack(stackFile); } catch (IOException e) { - stack = new kelondroStack(stackFile, new kelondroRow(new int[] {indexURL.urlHashLength}), true); + stack = new kelondroStack(stackFile, new kelondroRow("byte[] urlhash-" + indexURL.urlHashLength), true); } } else { - stack = new kelondroStack(stackFile, new kelondroRow(new int[] {indexURL.urlHashLength}), true); + stack = new kelondroStack(stackFile, new kelondroRow("byte[] urlhash-" + indexURL.urlHashLength), true); } domainStacks = new HashMap(); } diff --git a/source/de/anomic/plasma/plasmaCrawlEURL.java b/source/de/anomic/plasma/plasmaCrawlEURL.java index e393e651c..6a3912d18 100644 --- a/source/de/anomic/plasma/plasmaCrawlEURL.java +++ b/source/de/anomic/plasma/plasmaCrawlEURL.java @@ -125,29 +125,30 @@ public class plasmaCrawlEURL extends indexURL { public plasmaCrawlEURL(File cachePath, int bufferkb, long preloadTime) { super(); - int[] ce = { - urlHashLength, // the url's hash - urlHashLength, // the url's referrer hash - urlHashLength, // the crawling initiator - urlHashLength, // the crawling executor - urlStringLength, // the url as string - urlNameLength, // the name of the url, from anchor tag name - urlDateLength, // the time when the url was first time appeared - urlDateLength, // the time when the url was last time tried to load - urlRetryLength, // number of load retries - urlErrorLength, // string describing load failure - urlFlagLength // extra space - }; + kelondroRow rowdef = new kelondroRow( + "String urlhash-" + urlHashLength + ", " + // the url's hash + "String refhash-" + urlHashLength + ", " + // the url's referrer hash + "String initiator-" + urlHashLength + ", " + // the crawling initiator + "String executor-" + urlHashLength + ", " + // the crawling executor + "String urlstring-" + urlStringLength + ", " + // the url as string + "String urlname-" + urlNameLength + ", " + // the name of the url, from anchor tag name + "Cardinal appdate-" + urlDateLength + " {b64e}, " + // the time when the url was first time appeared + "Cardinal loaddate-" + urlDateLength + " {b64e}, " + // the time when the url was last time tried to load + "Cardinal retrycount-" + urlRetryLength + " {b64e}, " + // number of load retries + "String failcause-" + urlErrorLength + ", " + // string describing load failure + "byte[] flags-" + urlFlagLength); // extra space + if (cachePath.exists()) try { // open existing cache urlHashCache = new kelondroTree(cachePath, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent); + urlHashCache.assignRowdef(rowdef); } catch (IOException e) { cachePath.delete(); - urlHashCache = new kelondroTree(cachePath, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, new kelondroRow(ce), true); + urlHashCache = new kelondroTree(cachePath, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, rowdef, true); } else { // create new cache cachePath.getParentFile().mkdirs(); - urlHashCache = new kelondroTree(cachePath, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, new kelondroRow(ce), true); + urlHashCache = new kelondroTree(cachePath, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, rowdef, true); } } @@ -252,9 +253,9 @@ public class plasmaCrawlEURL extends indexURL { this.url = new URL(entry.getColString(4, "UTF-8").trim()); String n = entry.getColString(5, "UTF-8"); this.name = (n == null) ? "" : n.trim(); - this.initdate = new Date(86400000 * entry.getColLongB64E(6)); - this.trydate = new Date(86400000 * entry.getColLongB64E(7)); - this.trycount = (int) entry.getColLongB64E(8); + this.initdate = new Date(86400000 * entry.getColLong(6)); + this.trydate = new Date(86400000 * entry.getColLong(7)); + this.trycount = (int) entry.getColLong(8); this.failreason = entry.getColString(9, "UTF-8"); this.flags = new bitfield(entry.getColBytes(10)); return; diff --git a/source/de/anomic/plasma/plasmaCrawlLURL.java b/source/de/anomic/plasma/plasmaCrawlLURL.java index 72e48ee9f..bd9398679 100644 --- a/source/de/anomic/plasma/plasmaCrawlLURL.java +++ b/source/de/anomic/plasma/plasmaCrawlLURL.java @@ -97,35 +97,34 @@ public final class plasmaCrawlLURL extends indexURL { public plasmaCrawlLURL(File cachePath, int bufferkb, long preloadTime) { super(); - int[] ce = { - urlHashLength, - urlStringLength, - urlDescrLength, - urlDateLength, - urlDateLength, - urlHashLength, - urlCopyCountLength, - urlFlagLength, - urlQualityLength, - urlLanguageLength, - urlDoctypeLength, - urlSizeLength, - urlWordCountLength - }; - int segmentsize = 0; - for (int i = 0; i < ce.length; i++) { segmentsize += ce[i]; } + kelondroRow rowdef = new kelondroRow( + "String urlhash-" + urlHashLength + ", " + // the url's hash + "String urlstring-" + urlStringLength + ", " + // the url as string + "String urldescr-" + urlDescrLength + ", " + // the description of the url + "Cardinal moddate-" + urlDateLength + " {b64e}, " + // last-modified from the httpd + "Cardinal loaddate-" + urlDateLength + " {b64e}, " + // time when the url was loaded + "String refhash-" + urlHashLength + ", " + // the url's referrer hash + "Cardinal copycount-" + urlCopyCountLength + " {b64e}, " + // + "byte[] flags-" + urlFlagLength + ", " + // flags + "Cardinal quality-" + urlQualityLength + " {b64e}, " + // + "String language-" + urlLanguageLength + ", " + // + "byte[] doctype-" + urlDoctypeLength + ", " + // + "Cardinal size-" + urlSizeLength + " {b64e}, " + // size of file in bytes + "Cardinal wc-" + urlWordCountLength + " {b64e}"); // word count + if (cachePath.exists()) { // open existing cache try { urlHashCache = new kelondroTree(cachePath, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent); + urlHashCache.assignRowdef(rowdef); } catch (IOException e) { cachePath.getParentFile().mkdirs(); - urlHashCache = new kelondroTree(cachePath, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, new kelondroRow(ce), true); + urlHashCache = new kelondroTree(cachePath, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, rowdef, true); } } else { // create new cache cachePath.getParentFile().mkdirs(); - urlHashCache = new kelondroTree(cachePath, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, new kelondroRow(ce), true); + urlHashCache = new kelondroTree(cachePath, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, rowdef, true); } // init result stacks @@ -284,7 +283,6 @@ public final class plasmaCrawlLURL extends indexURL { return false; } - public boolean exists(String urlHash) { try { if (urlHashCache.get(urlHash.getBytes()) != null) { @@ -297,18 +295,6 @@ public final class plasmaCrawlLURL extends indexURL { } } - /* - public long existsIndexSize() { - return this.existsIndex.size(); - } - - public void clearExistsIndex() { - synchronized (existsIndex) { - existsIndex.clear(); - } - } - */ - private static SimpleDateFormat dayFormatter = new SimpleDateFormat("yyyy/MM/dd", Locale.US); private static String daydate(Date date) { if (date == null) { @@ -368,13 +354,6 @@ public final class plasmaCrawlLURL extends indexURL { url = urle.url(); urlstr = url.toString(); - - // Kosmetik, die wirklich benutzte URL behaelt die ':80' -// if (txt.endsWith(":80")) txt = txt.substring(0, txt.length() - 3); -// if ((p = txt.indexOf(":80/")) != -1) { -// txt = txt.substring(0, p).concat(txt.substring(p + 3)); // den '/' erstmal nicht abschneiden -// serverLog.logFinest("PLASMA", "plasmaCrawlLURL/genTableProps Remove ':80' URL=" + txt); -// } urltxt = nxTools.cutUrlText(urlstr, 72); // shorten the string text like a URL cachepath = (url == null) ? "-not-cached-" : cacheManager.getCachePath(url).toString().replace('\\', '/').substring(cacheManager.cachePath.toString().length() + 1); @@ -478,16 +457,16 @@ public final class plasmaCrawlLURL extends indexURL { this.urlHash = entry.getColString(0, null); this.url = new URL(entry.getColString(1, "UTF-8").trim()); this.descr = (entry.empty(2)) ? this.url.toString() : entry.getColString(2, "UTF-8").trim(); - this.moddate = new Date(86400000 * entry.getColLongB64E(3)); - this.loaddate = new Date(86400000 * entry.getColLongB64E(4)); + this.moddate = new Date(86400000 * entry.getColLong(3)); + this.loaddate = new Date(86400000 * entry.getColLong(4)); this.referrerHash = (entry.empty(5)) ? dummyHash : entry.getColString(5, "UTF-8"); - this.copyCount = (int) entry.getColLongB64E(6); + this.copyCount = (int) entry.getColLong(6); this.flags = entry.getColString(7, "UTF-8"); - this.quality = (int) entry.getColLongB64E(8); + this.quality = (int) entry.getColLong(8); this.language = entry.getColString(9, "UTF-8"); this.doctype = (char) entry.getColByte(10); - this.size = (int) entry.getColLongB64E(11); - this.wordCount = (int) entry.getColLongB64E(12); + this.size = (int) entry.getColLong(11); + this.wordCount = (int) entry.getColLong(12); this.snippet = null; this.word = searchedWord; this.stored = false; diff --git a/source/de/anomic/plasma/plasmaCrawlNURL.java b/source/de/anomic/plasma/plasmaCrawlNURL.java index 3f91d1545..19db11c73 100644 --- a/source/de/anomic/plasma/plasmaCrawlNURL.java +++ b/source/de/anomic/plasma/plasmaCrawlNURL.java @@ -76,21 +76,20 @@ public class plasmaCrawlNURL extends indexURL { /** * column length definition for the {@link plasmaURL#urlHashCache} DB */ - public static final int[] ce = { - urlHashLength, // the url hash - urlHashLength, // initiator - urlStringLength, // the url as string - urlHashLength, // the url's referrer hash - urlNameLength, // the name of the url, from anchor tag name - urlDateLength, // the time when the url was first time appeared - urlCrawlProfileHandleLength, // the name of the prefetch profile handle - urlCrawlDepthLength, // the prefetch depth so far, starts at 0 - urlParentBranchesLength, // number of anchors of the parent - urlForkFactorLength, // sum of anchors of all ancestors - urlFlagLength, // extra space - urlHandleLength // extra handle - }; - + public final static kelondroRow rowdef = new kelondroRow( + "String urlhash-" + urlHashLength + ", " + // the url's hash + "String initiator-" + urlHashLength + ", " + // the crawling initiator + "String urlstring-" + urlStringLength + ", " + // the url as string + "String refhash-" + urlHashLength + ", " + // the url's referrer hash + "String urlname-" + urlNameLength + ", " + // the name of the url, from anchor tag name + "Cardinal appdate-" + urlDateLength + " {b64e}, " + // the time when the url was first time appeared + "String profile-" + urlCrawlProfileHandleLength + ", " + // the name of the prefetch profile handle + "Cardinal depth-" + urlCrawlDepthLength + " {b64e}, " + // the prefetch depth so far, starts at 0 + "Cardinal parentbr-" + urlParentBranchesLength + " {b64e}, " + // number of anchors of the parent + "Cardinal forkfactor-" + urlForkFactorLength + " {b64e}, " + // sum of anchors of all ancestors + "byte[] flags-" + urlFlagLength + ", " + // flags + "String handle-" + urlHandleLength); // extra handle + private final plasmaCrawlBalancer coreStack; // links found by crawling to depth-1 private final plasmaCrawlBalancer limitStack; // links found by crawling at target depth private final plasmaCrawlBalancer overhangStack; // links found by crawling at depth+1 @@ -127,7 +126,7 @@ public class plasmaCrawlNURL extends indexURL { limitStack = new plasmaCrawlBalancer(limitStackFile); overhangStack = new plasmaCrawlBalancer(overhangStackFile); remoteStack = new plasmaCrawlBalancer(remoteStackFile); - kelondroRow rowdef = new kelondroRow(new int[] {indexURL.urlHashLength}); + kelondroRow rowdef = new kelondroRow("byte[] urlhash-" + indexURL.urlHashLength); if (imageStackFile.exists()) try { imageStack = new kelondroStack(imageStackFile); } catch (IOException e) { @@ -170,13 +169,14 @@ public class plasmaCrawlNURL extends indexURL { if (cacheFile.exists()) try { // open existing cache urlHashCache = new kelondroTree(cacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent); + urlHashCache.assignRowdef(rowdef); } catch (IOException e) { cacheFile.delete(); - urlHashCache = new kelondroTree(cacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, new kelondroRow(ce), true); + urlHashCache = new kelondroTree(cacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, rowdef, true); } else { // create new cache cacheFile.getParentFile().mkdirs(); - urlHashCache = new kelondroTree(cacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, new kelondroRow(ce), true); + urlHashCache = new kelondroTree(cacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, rowdef, true); } } @@ -520,11 +520,11 @@ public class plasmaCrawlNURL extends indexURL { this.url = new URL(entry.getColString(2, null).trim()); this.referrer = (entry.empty(3)) ? dummyHash : entry.getColString(3, null); this.name = (entry.empty(4)) ? "" : entry.getColString(4, null).trim(); - this.loaddate = new Date(86400000 * entry.getColLongB64E(5)); + this.loaddate = new Date(86400000 * entry.getColLong(5)); this.profileHandle = (entry.empty(6)) ? null : entry.getColString(6, null).trim(); - this.depth = (int) entry.getColLongB64E(7); - this.anchors = (int) entry.getColLongB64E(8); - this.forkfactor = (int) entry.getColLongB64E(9); + this.depth = (int) entry.getColLong(7); + this.anchors = (int) entry.getColLong(8); + this.forkfactor = (int) entry.getColLong(9); this.flags = new bitfield(entry.getColBytes(10)); this.handle = Integer.parseInt(entry.getColString(11, null), 16); return; diff --git a/source/de/anomic/plasma/plasmaCrawlStacker.java b/source/de/anomic/plasma/plasmaCrawlStacker.java index 3ee99b130..9ec66bc86 100644 --- a/source/de/anomic/plasma/plasmaCrawlStacker.java +++ b/source/de/anomic/plasma/plasmaCrawlStacker.java @@ -467,11 +467,11 @@ public final class plasmaCrawlStacker { this.url = entry.getColString(2, "UTF-8").trim(); this.referrerHash = (entry.empty(3)) ? indexURL.dummyHash : entry.getColString(3, "UTF-8"); this.name = (entry.empty(4)) ? "" : entry.getColString(4, "UTF-8").trim(); - this.loaddate = new Date(86400000 * entry.getColLongB64E(5)); + this.loaddate = new Date(86400000 * entry.getColLong(5)); this.profileHandle = (entry.empty(6)) ? null : entry.getColString(6, "UTF-8").trim(); - this.depth = (int) entry.getColLongB64E(7); - this.anchors = (int) entry.getColLongB64E(8); - this.forkfactor = (int) entry.getColLongB64E(9); + this.depth = (int) entry.getColLong(7); + this.anchors = (int) entry.getColLong(8); + this.forkfactor = (int) entry.getColLong(9); this.flags = new bitfield(entry.getColBytes(10)); this.handle = Integer.parseInt(new String(entry.getColBytes(11), "UTF-8")); } catch (Exception e) { @@ -581,9 +581,10 @@ public final class plasmaCrawlStacker { // open existing cache try { this.urlEntryCache = new kelondroTree(cacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent); + this.urlEntryCache.assignRowdef(plasmaCrawlNURL.rowdef); } catch (IOException e) { cacheFile.delete(); - this.urlEntryCache = new kelondroTree(cacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, new kelondroRow(plasmaCrawlNURL.ce), true); + this.urlEntryCache = new kelondroTree(cacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, plasmaCrawlNURL.rowdef, true); } try { // loop through the list and fill the messageList with url hashs @@ -605,7 +606,7 @@ public final class plasmaCrawlStacker { // deleting old db and creating a new db try {this.urlEntryCache.close();}catch(Exception ex){} cacheFile.delete(); - this.urlEntryCache = new kelondroTree(cacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, new kelondroRow(plasmaCrawlNURL.ce), true); + this.urlEntryCache = new kelondroTree(cacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, plasmaCrawlNURL.rowdef, true); } catch (IOException e) { /* if we have an error, we start with a fresh database */ plasmaCrawlStacker.this.log.logSevere("Unable to initialize crawl stacker queue, IOException:" + e.getMessage() + ". Reseting DB.\n",e); @@ -613,13 +614,13 @@ public final class plasmaCrawlStacker { // deleting old db and creating a new db try {this.urlEntryCache.close();}catch(Exception ex){} cacheFile.delete(); - this.urlEntryCache = new kelondroTree(cacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, new kelondroRow(plasmaCrawlNURL.ce), true); + this.urlEntryCache = new kelondroTree(cacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, plasmaCrawlNURL.rowdef, true); } } else { // create new cache cacheFile.getParentFile().mkdirs(); - this.urlEntryCache = new kelondroTree(cacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, new kelondroRow(plasmaCrawlNURL.ce), true); - } + this.urlEntryCache = new kelondroTree(cacheFile, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent, plasmaCrawlNURL.rowdef, true); + } } public void close() throws IOException { @@ -664,18 +665,18 @@ public final class plasmaCrawlStacker { this.writeSync.P(); String urlHash = null; - kelondroRow.Entry entryBytes = null; + kelondroRow.Entry entry = null; stackCrawlMessage newMessage = null; try { synchronized(this.urlEntryHashCache) { urlHash = (String) this.urlEntryHashCache.removeFirst(); - entryBytes = this.urlEntryCache.remove(urlHash.getBytes()); + entry = this.urlEntryCache.remove(urlHash.getBytes()); } } finally { this.writeSync.V(); } - newMessage = new stackCrawlMessage(urlHash, entryBytes); + newMessage = new stackCrawlMessage(urlHash, entry); return newMessage; } } diff --git a/source/de/anomic/plasma/plasmaSwitchboardQueue.java b/source/de/anomic/plasma/plasmaSwitchboardQueue.java index 410154057..bf9ef13b8 100644 --- a/source/de/anomic/plasma/plasmaSwitchboardQueue.java +++ b/source/de/anomic/plasma/plasmaSwitchboardQueue.java @@ -80,16 +80,16 @@ public class plasmaSwitchboardQueue { } private void initQueueStack() { - kelondroRow rowdef = new kelondroRow(new int[] { - indexURL.urlStringLength, - indexURL.urlHashLength, - 11, - 1, - yacySeedDB.commonHashLength, - indexURL.urlCrawlDepthLength, - indexURL.urlCrawlProfileHandleLength, - indexURL.urlDescrLength - }); + kelondroRow rowdef = new kelondroRow( + "String url-" + indexURL.urlStringLength + ", " + // the url + "String refhash-" + indexURL.urlHashLength + ", " + // the url's referrer hash + "Cardinal modifiedsince-11" + " {b64e}, " + // from ifModifiedSince + "byte[] flags-1" + ", " + // flags + "String initiator-" + yacySeedDB.commonHashLength + ", " + // the crawling initiator + "Cardinal depth-" + indexURL.urlCrawlDepthLength + " {b64e}, " + // the prefetch depth so far, starts at 0 + "String profile-" + indexURL.urlCrawlProfileHandleLength + ", " + // the name of the prefetch profile handle + "String urldescr-" + indexURL.urlDescrLength); // + if (sbQueueStackPath.exists()) try { sbQueueStack = new kelondroStack(sbQueueStackPath); } catch (IOException e) { @@ -217,7 +217,7 @@ public class plasmaSwitchboardQueue { } public Entry(kelondroRow.Entry row) throws IOException { - long ims = row.getColLongB64E(2); + long ims = row.getColLong(2); byte flags = row.getColByte(3); try { this.url = new URL(row.getColString(0, "UTF-8")); @@ -228,7 +228,7 @@ public class plasmaSwitchboardQueue { this.ifModifiedSince = (ims == 0) ? null : new Date(ims); this.flags = ((flags & 1) == 1) ? (byte) 1 : (byte) 0; this.initiator = row.getColString(4, "UTF-8"); - this.depth = (int) row.getColLongB64E(5); + this.depth = (int) row.getColLong(5); this.profileHandle = row.getColString(6, "UTF-8"); this.anchorName = row.getColString(7, "UTF-8"); diff --git a/source/de/anomic/plasma/plasmaWordConnotation.java b/source/de/anomic/plasma/plasmaWordConnotation.java index a1b669874..664b7bea1 100644 --- a/source/de/anomic/plasma/plasmaWordConnotation.java +++ b/source/de/anomic/plasma/plasmaWordConnotation.java @@ -62,9 +62,9 @@ public class plasmaWordConnotation { if (refDBfile.exists()) try { refDB = new kelondroDynTree(refDBfile, bufferkb * 0x400, preloadTime, fillChar); } catch (IOException e) { - refDB = new kelondroDynTree(refDBfile, bufferkb * 0x400, preloadTime, wordlength, nodesize, new kelondroRow(new int[] {wordlength, countlength}), fillChar, true); + refDB = new kelondroDynTree(refDBfile, bufferkb * 0x400, preloadTime, wordlength, nodesize, new kelondroRow("byte[] word-" + wordlength + ", Cardinal count-" + countlength), fillChar, true); } else { - refDB = new kelondroDynTree(refDBfile, bufferkb * 0x400, preloadTime, wordlength, nodesize, new kelondroRow(new int[] {wordlength, countlength}), fillChar, true); + refDB = new kelondroDynTree(refDBfile, bufferkb * 0x400, preloadTime, wordlength, nodesize, new kelondroRow("byte[] word-" + wordlength + ", Cardinal count-" + countlength), fillChar, true); } } @@ -73,8 +73,8 @@ public class plasmaWordConnotation { //reference = reference.toLowerCase(); kelondroRow.Entry record = refDB.get(word, reference.getBytes()); long c; - if (record == null) c = 0; else c = record.getColLongB64E(1); - record.setColLongB64E(1, c++); + if (record == null) c = 0; else c = record.getColLong(1); + record.setCol(1, c++); refDB.put(word, record); } diff --git a/source/de/anomic/plasma/plasmaWordIndex.java b/source/de/anomic/plasma/plasmaWordIndex.java index 6cf6a3beb..197ac0670 100644 --- a/source/de/anomic/plasma/plasmaWordIndex.java +++ b/source/de/anomic/plasma/plasmaWordIndex.java @@ -104,7 +104,7 @@ public final class plasmaWordIndex extends indexAbstractRI implements indexRI { // create collections storage path if (!(newIndexRoot.exists())) newIndexRoot.mkdirs(); if (useCollectionIndex) - collections = new indexCollectionRI(newIndexRoot, "test_generation0", bufferkb * 1024, preloadTime); + collections = new indexCollectionRI(newIndexRoot, "test_generation1", bufferkb * 1024, preloadTime); else collections = null; diff --git a/source/de/anomic/plasma/plasmaWordIndexAssortment.java b/source/de/anomic/plasma/plasmaWordIndexAssortment.java index ea0887d55..031f4d60f 100644 --- a/source/de/anomic/plasma/plasmaWordIndexAssortment.java +++ b/source/de/anomic/plasma/plasmaWordIndexAssortment.java @@ -61,6 +61,7 @@ import de.anomic.index.indexEntry; import de.anomic.index.indexEntryAttribute; import de.anomic.index.indexRowSetContainer; import de.anomic.index.indexURLEntry; +import de.anomic.kelondro.kelondroColumn; import de.anomic.kelondro.kelondroException; import de.anomic.kelondro.kelondroTree; import de.anomic.kelondro.kelondroRow; @@ -70,13 +71,12 @@ public final class plasmaWordIndexAssortment { // environment constants private static final String assortmentFileName = "indexAssortment"; - public static final int[] bufferStructureBasis = new int[]{ - indexEntryAttribute.wordHashLength, // a wordHash - 4, // occurrence counter - 8, // timestamp of last access - indexEntryAttribute.urlHashLength, // corresponding URL hash - indexURLEntry.encodedByteArrayFormLength(false) // URL attributes - }; + public static final kelondroRow bufferStructureBasis = new kelondroRow( + "byte[] wordhash-" + indexEntryAttribute.wordHashLength + ", " + + "Cardinal occ-4 {b256}, " + + "Cardinal time-8 {b256}, " + + "byte[] urlhash-" + indexEntryAttribute.urlHashLength + ", " + + "byte[] urlattr-" + indexURLEntry.encodedByteArrayFormLength(false)); // class variables private File assortmentFile; @@ -87,25 +87,25 @@ public final class plasmaWordIndexAssortment { private long preloadTime; private static String intx(int x) { - String s = Integer.toString(x); - while (s.length() < 3) s = "0" + s; - return s; + String s = Integer.toString(x); + while (s.length() < 3) s = "0" + s; + return s; } - private static int[] bufferStructure(int assortmentCapacity) { - int[] structure = new int[3 + 2 * assortmentCapacity]; - structure[0] = bufferStructureBasis[0]; - structure[1] = bufferStructureBasis[1]; - structure[2] = bufferStructureBasis[2]; + private static kelondroRow bufferStructure(int assortmentCapacity) { + kelondroColumn[] structure = new kelondroColumn[3 + 2 * assortmentCapacity]; + structure[0] = bufferStructureBasis.column(0); + structure[1] = bufferStructureBasis.column(1); + structure[2] = bufferStructureBasis.column(2); for (int i = 0; i < assortmentCapacity; i++) { - structure[3 + 2 * i] = bufferStructureBasis[3]; - structure[4 + 2 * i] = bufferStructureBasis[4]; + structure[3 + 2 * i] = bufferStructureBasis.column(3); + structure[4 + 2 * i] = bufferStructureBasis.column(4); } - return structure; + return new kelondroRow(structure); } private static int assortmentCapacity(int rowsize) { - return (rowsize - bufferStructureBasis[0] - bufferStructureBasis[1] - bufferStructureBasis[2]) / (bufferStructureBasis[3] + bufferStructureBasis[4]); + return (rowsize - bufferStructureBasis.width(0) - bufferStructureBasis.width(1) - bufferStructureBasis.width(2)) / (bufferStructureBasis.width(3) + bufferStructureBasis.width(4)); } public plasmaWordIndexAssortment(File storagePath, int assortmentLength, int bufferkb, long preloadTime, serverLog log) { @@ -121,6 +121,7 @@ public final class plasmaWordIndexAssortment { try { long start = System.currentTimeMillis(); assortments = new kelondroTree(assortmentFile, bufferSize, preloadTime, kelondroTree.defaultObjectCachePercent); + assortments.assignRowdef(bufferStructure(assortmentLength)); long stop = System.currentTimeMillis(); if (log != null) log.logConfig("Opened Assortment, " + @@ -138,7 +139,7 @@ public final class plasmaWordIndexAssortment { assortmentFile.delete(); // make space for new one } // create new assortment tree file - assortments = new kelondroTree(assortmentFile, bufferSize, preloadTime, kelondroTree.defaultObjectCachePercent, new kelondroRow(bufferStructure(assortmentLength)), true); + assortments = new kelondroTree(assortmentFile, bufferSize, preloadTime, kelondroTree.defaultObjectCachePercent, bufferStructure(assortmentLength), true); if (log != null) log.logConfig("Created new Assortment, width " + assortmentLength + ", " + bufferkb + "kb buffer"); } @@ -149,8 +150,8 @@ public final class plasmaWordIndexAssortment { if (newContainer.size() != assortmentLength) throw new RuntimeException("plasmaWordIndexAssortment.store: wrong container size"); kelondroRow.Entry row = assortments.row().newEntry(); row.setCol(0, newContainer.getWordHash().getBytes()); - row.setColLongB256(1, 1); - row.setColLongB256(2, newContainer.updated()); + row.setCol(1, 1); + row.setCol(2, newContainer.updated()); Iterator entries = newContainer.entries(); indexEntry entry; for (int i = 0; i < assortmentLength; i++) { @@ -228,24 +229,10 @@ public final class plasmaWordIndexAssortment { return row2container(row); } - /* - public indexContainer row2container(String wordHash, kelondroRow.Entry row) { - if (row == null) return null; - final long updateTime = row.getColLongB256(2); - indexTreeMapContainer container = new indexTreeMapContainer(wordHash); - for (int i = 0; i < assortmentLength; i++) { - container.add( - new indexURLEntry[] { new indexURLEntry( - new String(row.getColBytes(3 + 2 * i)), new String(row.getColBytes(4 + 2 * i))) }, updateTime); - } - return container; - } - */ - public final static indexContainer row2container(kelondroRow.Entry row) { if (row == null) return null; String wordHash = row.getColString(0, null); - final long updateTime = row.getColLongB256(2); + final long updateTime = row.getColLong(2); indexContainer container = new indexRowSetContainer(wordHash); int al = assortmentCapacity(row.objectsize()); for (int i = 0; i < al; i++) { @@ -274,7 +261,7 @@ public final class plasmaWordIndexAssortment { if (!(assortmentFile.delete())) throw new RuntimeException("cannot delete assortment database"); } if (assortmentFile.exists()) assortmentFile.delete(); - assortments = new kelondroTree(assortmentFile, bufferSize, preloadTime, kelondroTree.defaultObjectCachePercent, new kelondroRow(bufferStructure(assortmentLength)), true); + assortments = new kelondroTree(assortmentFile, bufferSize, preloadTime, kelondroTree.defaultObjectCachePercent, bufferStructure(assortmentLength), true); } public Iterator containers(String startWordHash, boolean up, boolean rot) throws IOException { diff --git a/source/de/anomic/plasma/plasmaWordIndexFile.java b/source/de/anomic/plasma/plasmaWordIndexFile.java index b0dc284b2..2a706e118 100644 --- a/source/de/anomic/plasma/plasmaWordIndexFile.java +++ b/source/de/anomic/plasma/plasmaWordIndexFile.java @@ -96,10 +96,10 @@ public final class plasmaWordIndexFile { kt = new kelondroTree(theLocation, cacheSize, 0, kelondroTree.defaultObjectCachePercent); } catch (IOException e) { theLocation.delete(); - kt = new kelondroTree(theLocation, cacheSize, 0, kelondroTree.defaultObjectCachePercent, indexURL.urlHashLength, indexURLEntry.encodedByteArrayFormLength(false), false); + kt = new kelondroTree(theLocation, cacheSize, 0, kelondroTree.defaultObjectCachePercent, new kelondroRow("byte[] urlhash-" + indexURL.urlHashLength + ", byte[] ba-" + indexURLEntry.encodedByteArrayFormLength(false)), false); } else { // create new index file - kt = new kelondroTree(theLocation, cacheSize, 0, kelondroTree.defaultObjectCachePercent, indexURL.urlHashLength, indexURLEntry.encodedByteArrayFormLength(false), false); + kt = new kelondroTree(theLocation, cacheSize, 0, kelondroTree.defaultObjectCachePercent, new kelondroRow("byte[] urlhash-" + indexURL.urlHashLength + ", byte[] ba-" + indexURLEntry.encodedByteArrayFormLength(false)), false); } return kt; // everyone who get this should close it when finished! } diff --git a/source/de/anomic/yacy/yacyNewsDB.java b/source/de/anomic/yacy/yacyNewsDB.java index 7853719e2..a380151d8 100644 --- a/source/de/anomic/yacy/yacyNewsDB.java +++ b/source/de/anomic/yacy/yacyNewsDB.java @@ -70,6 +70,7 @@ public class yacyNewsDB { if (path.exists()) try { news = new kelondroTree(path, bufferkb * 0x400, preloadTime, kelondroTree.defaultObjectCachePercent); + news.assignRowdef(yacyNewsRecord.rowdef); } catch (IOException e) { news = createDB(path, bufferkb, preloadTime); } else { @@ -166,7 +167,7 @@ public class yacyNewsDB { b.getColString(0, null), b.getColString(1, null), (b.empty(2)) ? null : yacyCore.parseUniversalDate(b.getColString(2, null), serverDate.UTCDiffString()), - (int) b.getColLongB64E(3), + (int) b.getColLong(3), serverCodings.string2map(b.getColString(4, null)) ); }