package de.anomic.kelondro ;
// a collectionIndex is an index to kelondroRowCollection objects
// such a collection ist defined by the following parameters
// - chunksize
// - chunkcount
// each of such a collection is stored in a byte[] which may or may not have space for more chunks
// than already exists in such an array. To store these arrays, we reserve entries in kelondroArray
// database files. There will be a set of array files for different sizes of the collection arrays.
// the 1st file has space for <loadfactor> chunks, the 2nd file for <loadfactor> * <loadfactor> chunks,
// the 3rd file for <loadfactor>^^3 chunks, and the n-th file for <loadfactor>^^n chunks.
// if the loadfactor is 4, then we have the following capacities:
// file 0: 4
// file 1: 16
// file 2: 64
// file 3: 256
// file 4: 1024
// file 5: 4096
// file 6:16384
// file 7:65536
// the maximum number of such files is called the partitions number.
// we don't want that these files grow too big, an kelondroOutOfLimitsException is throws if they
// are oversized.
// the collection arrays may be migration to another size during run-time, which means that not only the
// partitions as mentioned above are maintained, but also a set of "shadow-partitions", that represent old
// partitions and where data is read only and slowly migrated to the default partitions.
//
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
import java.io.File ;
import java.io.IOException ;
import java.text.SimpleDateFormat ;
import java.util.ArrayList ;
import java.util.Calendar ;
import java.util.GregorianCalendar ;
import java.util.HashMap ;
import java.util.Iterator ;
import java.util.List ;
import java.util.Map ;
import java.util.Random ;
import java.util.Set ;
import java.util.TimeZone ;
import java.util.TreeMap ;
import de.anomic.index.indexContainer ;
import de.anomic.server.serverCodings ;
import de.anomic.server.serverFileUtils ;
import de.anomic.server.serverMemory ;
import de.anomic.server.logging.serverLog ;
import de.anomic.yacy.yacyURL ;
public class kelondroCollectionIndex {
private static final int serialNumber = 0 ;
private kelondroIndex index ;
private int keylength ;
private File path ;
private String filenameStub ;
private File commonsPath ;
private int loadfactor ;
private Map arrays ; // Map of (partitionNumber"-"chunksize)/kelondroFixedWidthArray - Objects
private kelondroRow payloadrow ; // definition of the payload (chunks inside the collections)
private int maxPartitions ; // this is the maxmimum number of array files; yet not used
private static final int idx_col_key = 0 ; // the index
private static final int idx_col_chunksize = 1 ; // chunksize (number of bytes in a single chunk, needed for migration option)
private static final int idx_col_chunkcount = 2 ; // chunkcount (number of chunks in this collection)
private static final int idx_col_clusteridx = 3 ; // selector for right cluster file, must be >= arrayIndex(chunkcount)
private static final int idx_col_flags = 4 ; // flags (for future use)
private static final int idx_col_indexpos = 5 ; // indexpos (position in array file)
private static final int idx_col_lastread = 6 ; // a time stamp, update time in days since 1.1.2000
private static final int idx_col_lastwrote = 7 ; // a time stamp, update time in days since 1.1.2000
private static kelondroRow indexRow ( int keylength , kelondroOrder payloadOrder ) {
return new kelondroRow (
"byte[] key-" + keylength + "," +
"int chunksize-4 {b256}," +
"int chunkcount-4 {b256}," +
"byte clusteridx-1 {b256}," +
"byte flags-1 {b256}," +
"int indexpos-4 {b256}," +
"short lastread-2 {b256}, " +
"short lastwrote-2 {b256}" ,
payloadOrder , 0
) ;
}
public kelondroRow payloadRow ( ) {
return this . payloadrow ;
}
private static String fillZ ( String s , int len ) {
while ( s . length ( ) < len ) s = "0" + s ;
return s ;
}
private static File arrayFile ( File path , String filenameStub , int loadfactor , int chunksize , int partitionNumber , int serialNumber ) {
String lf = fillZ ( Integer . toHexString ( loadfactor ) . toUpperCase ( ) , 2 ) ;
String cs = fillZ ( Integer . toHexString ( chunksize ) . toUpperCase ( ) , 4 ) ;
String pn = fillZ ( Integer . toHexString ( partitionNumber ) . toUpperCase ( ) , 2 ) ;
String sn = fillZ ( Integer . toHexString ( serialNumber ) . toUpperCase ( ) , 2 ) ;
return new File ( path , filenameStub + "." + lf + "." + cs + "." + pn + "." + sn + ".kca" ) ; // kelondro collection array
}
private static File propertyFile ( File path , String filenameStub , int loadfactor , int chunksize ) {
String lf = fillZ ( Integer . toHexString ( loadfactor ) . toUpperCase ( ) , 2 ) ;
String cs = fillZ ( Integer . toHexString ( chunksize ) . toUpperCase ( ) , 4 ) ;
return new File ( path , filenameStub + "." + lf + "." + cs + ".properties" ) ;
}
public kelondroCollectionIndex ( File path , String filenameStub , int keyLength , kelondroOrder indexOrder ,
long preloadTime , int loadfactor , int maxpartitions , kelondroRow rowdef ) throws IOException {
// the buffersize is number of bytes that are only used if the kelondroFlexTable is backed up with a kelondroTree
this . path = path ;
this . filenameStub = filenameStub ;
this . keylength = keyLength ;
this . payloadrow = rowdef ;
this . loadfactor = loadfactor ;
this . maxPartitions = maxpartitions ;
this . commonsPath = new File ( path , filenameStub + "." + fillZ ( Integer . toHexString ( rowdef . objectsize ) . toUpperCase ( ) , 4 ) + ".commons" ) ;
this . commonsPath . mkdirs ( ) ;
boolean ramIndexGeneration = false ;
boolean fileIndexGeneration = ! ( new File ( path , filenameStub + ".index" ) . exists ( ) ) ;
if ( ramIndexGeneration ) index = new kelondroRowSet ( indexRow ( keyLength , indexOrder ) , 0 ) ;
if ( fileIndexGeneration ) index = new kelondroFlexTable ( path , filenameStub + ".index" , preloadTime , indexRow ( keyLength , indexOrder ) , true ) ;
// open array files
this . arrays = new HashMap ( ) ; // all entries will be dynamically created with getArray()
if ( ( ( fileIndexGeneration ) | | ( ramIndexGeneration ) ) ) {
serverLog . logFine ( "STARTUP" , "STARTED INITIALIZATION OF NEW COLLECTION INDEX. THIS WILL TAKE SOME TIME" ) ;
openAllArrayFiles ( ( ( fileIndexGeneration ) | | ( ramIndexGeneration ) ) , indexOrder ) ;
}
// open/create index table
if ( index = = null ) index = openIndexFile ( path , filenameStub , indexOrder , preloadTime , loadfactor , rowdef ) ;
}
private void openAllArrayFiles ( boolean indexGeneration , kelondroOrder indexOrder ) throws IOException {
String [ ] list = this . path . list ( ) ;
kelondroFixedWidthArray array ;
kelondroRow irow = indexRow ( keylength , indexOrder ) ;
int t = kelondroRowCollection . daysSince2000 ( System . currentTimeMillis ( ) ) ;
for ( int i = 0 ; i < list . length ; i + + ) if ( list [ i ] . endsWith ( ".kca" ) ) {
// open array
int pos = list [ i ] . indexOf ( '.' ) ;
if ( pos < 0 ) continue ;
int chunksize = Integer . parseInt ( list [ i ] . substring ( pos + 4 , pos + 8 ) , 16 ) ;
int partitionNumber = Integer . parseInt ( list [ i ] . substring ( pos + 9 , pos + 11 ) , 16 ) ;
int serialNumber = Integer . parseInt ( list [ i ] . substring ( pos + 12 , pos + 14 ) , 16 ) ;
try {
array = openArrayFile ( partitionNumber , serialNumber , true ) ;
} catch ( IOException e ) {
e . printStackTrace ( ) ;
continue ;
}
// remember that we opened the array
arrays . put ( partitionNumber + "-" + chunksize , array ) ;
if ( ( index ! = null ) & & ( indexGeneration ) ) {
// loop over all elements in array and create index entry for each row
kelondroRow . EntryIndex aentry ;
kelondroRow . Entry ientry ;
Iterator ei = array . contentRows ( - 1 ) ;
byte [ ] key ;
long start = System . currentTimeMillis ( ) ;
long lastlog = start ;
int count = 0 ;
while ( ei . hasNext ( ) ) {
aentry = ( kelondroRow . EntryIndex ) ei . next ( ) ;
key = aentry . getColBytes ( 0 ) ;
assert ( key ! = null ) ;
if ( key = = null ) continue ; // skip deleted entries
ientry = irow . newEntry ( ) ;
ientry . setCol ( idx_col_key , key ) ;
ientry . setCol ( idx_col_chunksize , chunksize ) ;
ientry . setCol ( idx_col_chunkcount , kelondroRowCollection . sizeOfExportedCollectionRows ( aentry , 1 ) ) ;
ientry . setCol ( idx_col_clusteridx , ( byte ) partitionNumber ) ;
ientry . setCol ( idx_col_flags , ( byte ) 0 ) ;
ientry . setCol ( idx_col_indexpos , aentry . index ( ) ) ;
ientry . setCol ( idx_col_lastread , t ) ;
ientry . setCol ( idx_col_lastwrote , t ) ;
index . addUnique ( ientry ) ; // FIXME: this should avoid doubles
count + + ;
// write a log
if ( System . currentTimeMillis ( ) - lastlog > 30000 ) {
serverLog . logFine ( "STARTUP" , "created " + count + " RWI index entries. " + ( ( ( System . currentTimeMillis ( ) - start ) * ( array . size ( ) + array . free ( ) - count ) / count ) / 60000 ) + " minutes remaining for this array" ) ;
lastlog = System . currentTimeMillis ( ) ;
}
}
}
}
}
private kelondroIndex openIndexFile ( File path , String filenameStub , kelondroOrder indexOrder ,
long preloadTime , int loadfactor , kelondroRow rowdef ) throws IOException {
// open/create index table
kelondroIndex theindex = new kelondroCache ( new kelondroFlexTable ( path , filenameStub + ".index" , preloadTime , indexRow ( keylength , indexOrder ) , true ) , true , false ) ;
//kelondroIndex theindex = new kelondroFlexTable(path, filenameStub + ".index", preloadTime, indexRow(keylength, indexOrder), true);
// save/check property file for this array
File propfile = propertyFile ( path , filenameStub , loadfactor , rowdef . objectsize ( ) ) ;
Map props = new HashMap ( ) ;
if ( propfile . exists ( ) ) {
props = serverFileUtils . loadHashMap ( propfile ) ;
String stored_rowdef = ( String ) props . get ( "rowdef" ) ;
if ( ( stored_rowdef = = null ) | | ( ! ( rowdef . subsumes ( new kelondroRow ( stored_rowdef , rowdef . objectOrder , 0 ) ) ) ) ) {
System . out . println ( "FATAL ERROR: stored rowdef '" + stored_rowdef + "' does not match with new rowdef '" +
rowdef + "' for array cluster '" + path + "/" + filenameStub + "'" ) ;
System . exit ( - 1 ) ;
}
}
props . put ( "rowdef" , rowdef . toString ( ) ) ;
serverFileUtils . saveMap ( propfile , props , "CollectionIndex properties" ) ;
return theindex ;
}
private kelondroFixedWidthArray openArrayFile ( int partitionNumber , int serialNumber , boolean create ) throws IOException {
File f = arrayFile ( path , filenameStub , loadfactor , payloadrow . objectsize ( ) , partitionNumber , serialNumber ) ;
int load = arrayCapacity ( partitionNumber ) ;
kelondroRow rowdef = new kelondroRow (
"byte[] key-" + keylength + "," +
"byte[] collection-" + ( kelondroRowCollection . exportOverheadSize + load * this . payloadrow . objectsize ( ) ) ,
index . row ( ) . objectOrder ,
0
) ;
if ( ( ! ( f . exists ( ) ) ) & & ( ! create ) ) return null ;
kelondroFixedWidthArray a = new kelondroFixedWidthArray ( f , rowdef , 0 ) ;
serverLog . logFine ( "STARTUP" , "opened array file " + f + " with " + a . size ( ) + " RWIs" ) ;
return a ;
}
private kelondroFixedWidthArray getArray ( int partitionNumber , int serialNumber , int chunksize ) {
String accessKey = partitionNumber + "-" + chunksize ;
kelondroFixedWidthArray array = ( kelondroFixedWidthArray ) arrays . get ( accessKey ) ;
if ( array ! = null ) return array ;
try {
array = openArrayFile ( partitionNumber , serialNumber , true ) ;
} catch ( IOException e ) {
return null ;
}
arrays . put ( accessKey , array ) ;
return array ;
}
private int arrayCapacity ( int arrayCounter ) {
if ( arrayCounter < 0 ) return 0 ;
int load = this . loadfactor ;
for ( int i = 0 ; i < arrayCounter ; i + + ) load = load * this . loadfactor ;
return load ;
}
private int arrayIndex ( int requestedCapacity ) throws kelondroOutOfLimitsException {
// the requestedCapacity is the number of wanted chunks
int load = 1 , i = 0 ;
while ( true ) {
load = load * this . loadfactor ;
if ( load > = requestedCapacity ) return i ;
i + + ;
}
}
public int size ( ) {
return index . size ( ) ;
}
public int minMem ( ) {
// calculate a minimum amount of memory that is necessary to use the collection
// during runtime (after the index was initialized)
// caclculate an upper limit (not the correct size) of the maximum number of indexes for a wordHash
// this is computed by the size of the biggest used collection
// this must be multiplied with the payload size
// and doubled for necessary memory transformation during sort operation
return ( int ) ( arrayCapacity ( arrays . size ( ) - 1 ) * this . payloadrow . objectsize * kelondroRowSet . growfactor ) ;
}
private void array_remove (
int oldPartitionNumber , int serialNumber , int chunkSize ,
int oldRownumber ) throws IOException {
// we need a new slot, that means we must first delete the old entry
// find array file
kelondroFixedWidthArray array = getArray ( oldPartitionNumber , serialNumber , chunkSize ) ;
// delete old entry
array . remove ( oldRownumber ) ;
}
private kelondroRow . Entry array_new (
byte [ ] key , kelondroRowCollection collection ) throws IOException {
// the collection is new
int partitionNumber = arrayIndex ( collection . size ( ) ) ;
kelondroRow . Entry indexrow = index . row ( ) . newEntry ( ) ;
kelondroFixedWidthArray array = getArray ( partitionNumber , serialNumber , this . payloadrow . objectsize ( ) ) ;
// define row
kelondroRow . Entry arrayEntry = array . row ( ) . newEntry ( ) ;
arrayEntry . setCol ( 0 , key ) ;
arrayEntry . setCol ( 1 , collection . exportCollection ( ) ) ;
// write a new entry in this array
int newRowNumber = array . add ( arrayEntry ) ;
// store the new row number in the index
indexrow . setCol ( idx_col_key , key ) ;
indexrow . setCol ( idx_col_chunksize , this . payloadrow . objectsize ( ) ) ;
indexrow . setCol ( idx_col_chunkcount , collection . size ( ) ) ;
indexrow . setCol ( idx_col_clusteridx , ( byte ) partitionNumber ) ;
indexrow . setCol ( idx_col_flags , ( byte ) 0 ) ;
indexrow . setCol ( idx_col_indexpos , ( long ) newRowNumber ) ;
indexrow . setCol ( idx_col_lastread , kelondroRowCollection . daysSince2000 ( System . currentTimeMillis ( ) ) ) ;
indexrow . setCol ( idx_col_lastwrote , kelondroRowCollection . daysSince2000 ( System . currentTimeMillis ( ) ) ) ;
// after calling this method there must be an index.addUnique(indexrow);
return indexrow ;
}
private void array_add (
byte [ ] key , kelondroRowCollection collection , kelondroRow . Entry indexrow ,
int partitionNumber , int serialNumber , int chunkSize ) throws IOException {
// write a new entry in the other array
kelondroFixedWidthArray array = getArray ( partitionNumber , serialNumber , chunkSize ) ;
// define new row
kelondroRow . Entry arrayEntry = array . row ( ) . newEntry ( ) ;
arrayEntry . setCol ( 0 , key ) ;
arrayEntry . setCol ( 1 , collection . exportCollection ( ) ) ;
// write a new entry in this array
int rowNumber = array . add ( arrayEntry ) ;
// store the new row number in the index
indexrow . setCol ( idx_col_chunkcount , collection . size ( ) ) ;
indexrow . setCol ( idx_col_clusteridx , ( byte ) partitionNumber ) ;
indexrow . setCol ( idx_col_indexpos , ( long ) rowNumber ) ;
indexrow . setCol ( idx_col_lastwrote , kelondroRowCollection . daysSince2000 ( System . currentTimeMillis ( ) ) ) ;
// after calling this method there must be a index.put(indexrow);
}
private ArrayList array_add_multiple ( TreeMap array_add_map , int serialNumber , int chunkSize ) throws IOException {
// returns a List of kelondroRow.Entry entries for indexrow storage
Map . Entry entry ;
Iterator i = array_add_map . entrySet ( ) . iterator ( ) ;
Iterator j ;
ArrayList actionList ;
int partitionNumber ;
kelondroFixedWidthArray array ;
Object [ ] objs ;
byte [ ] key ;
kelondroRowCollection collection ;
kelondroRow . Entry indexrow ;
ArrayList indexrows = new ArrayList ( ) ;
while ( i . hasNext ( ) ) {
entry = ( Map . Entry ) i . next ( ) ;
actionList = ( ArrayList ) entry . getValue ( ) ;
partitionNumber = ( ( Integer ) entry . getKey ( ) ) . intValue ( ) ;
array = getArray ( partitionNumber , serialNumber , chunkSize ) ;
j = actionList . iterator ( ) ;
while ( j . hasNext ( ) ) {
objs = ( Object [ ] ) j . next ( ) ;
key = ( byte [ ] ) objs [ 0 ] ;
collection = ( kelondroRowCollection ) objs [ 1 ] ;
indexrow = ( kelondroRow . Entry ) objs [ 2 ] ;
// define new row
kelondroRow . Entry arrayEntry = array . row ( ) . newEntry ( ) ;
arrayEntry . setCol ( 0 , key ) ;
arrayEntry . setCol ( 1 , collection . exportCollection ( ) ) ;
// write a new entry in this array
int rowNumber = array . add ( arrayEntry ) ;
// store the new row number in the index
indexrow . setCol ( idx_col_chunkcount , collection . size ( ) ) ;
indexrow . setCol ( idx_col_clusteridx , ( byte ) partitionNumber ) ;
indexrow . setCol ( idx_col_indexpos , ( long ) rowNumber ) ;
indexrow . setCol ( idx_col_lastwrote , kelondroRowCollection . daysSince2000 ( System . currentTimeMillis ( ) ) ) ;
indexrows . add ( indexrow ) ;
}
}
// after calling this method there must be a index.put(indexrow);
return indexrows ;
}
private void array_replace (
byte [ ] key , kelondroRowCollection collection , kelondroRow . Entry indexrow ,
int partitionNumber , int serialNumber , int chunkSize ,
int rowNumber ) throws IOException {
// we don't need a new slot, just write collection into the old one
// find array file
kelondroFixedWidthArray array = getArray ( partitionNumber , serialNumber , chunkSize ) ;
// define new row
kelondroRow . Entry arrayEntry = array . row ( ) . newEntry ( ) ;
arrayEntry . setCol ( 0 , key ) ;
arrayEntry . setCol ( 1 , collection . exportCollection ( ) ) ;
// overwrite entry in this array
array . set ( rowNumber , arrayEntry ) ;
// update the index entry
final int collectionsize = collection . size ( ) ; // extra variable for easier debugging
indexrow . setCol ( idx_col_chunkcount , collectionsize ) ;
indexrow . setCol ( idx_col_clusteridx , ( byte ) partitionNumber ) ;
indexrow . setCol ( idx_col_lastwrote , kelondroRowCollection . daysSince2000 ( System . currentTimeMillis ( ) ) ) ;
// after calling this method there must be a index.put(indexrow);
}
private ArrayList array_replace_multiple ( TreeMap array_replace_map , int serialNumber , int chunkSize ) throws IOException {
Map . Entry entry , e ;
Iterator i = array_replace_map . entrySet ( ) . iterator ( ) ;
Iterator j ;
TreeMap actionMap ;
int partitionNumber ;
kelondroFixedWidthArray array ;
ArrayList indexrows = new ArrayList ( ) ;
Object [ ] objs ;
int rowNumber ;
byte [ ] key ;
kelondroRowCollection collection ;
kelondroRow . Entry indexrow ;
while ( i . hasNext ( ) ) {
entry = ( Map . Entry ) i . next ( ) ;
actionMap = ( TreeMap ) entry . getValue ( ) ;
partitionNumber = ( ( Integer ) entry . getKey ( ) ) . intValue ( ) ;
array = getArray ( partitionNumber , serialNumber , chunkSize ) ;
j = actionMap . entrySet ( ) . iterator ( ) ;
while ( j . hasNext ( ) ) {
e = ( Map . Entry ) j . next ( ) ;
rowNumber = ( ( Integer ) e . getKey ( ) ) . intValue ( ) ;
objs = ( Object [ ] ) e . getValue ( ) ;
key = ( byte [ ] ) objs [ 0 ] ;
collection = ( kelondroRowCollection ) objs [ 1 ] ;
indexrow = ( kelondroRow . Entry ) objs [ 2 ] ;
// define new row
kelondroRow . Entry arrayEntry = array . row ( ) . newEntry ( ) ;
arrayEntry . setCol ( 0 , key ) ;
arrayEntry . setCol ( 1 , collection . exportCollection ( ) ) ;
// overwrite entry in this array
array . set ( rowNumber , arrayEntry ) ;
// update the index entry
indexrow . setCol ( idx_col_chunkcount , collection . size ( ) ) ;
indexrow . setCol ( idx_col_clusteridx , ( byte ) partitionNumber ) ;
indexrow . setCol ( idx_col_lastwrote , kelondroRowCollection . daysSince2000 ( System . currentTimeMillis ( ) ) ) ;
indexrows . add ( indexrow ) ;
}
}
// after calling this method there mus be a index.put(indexrow);
return indexrows ;
}
public synchronized void put ( byte [ ] key , kelondroRowCollection collection ) throws IOException , kelondroOutOfLimitsException {
assert ( key ! = null ) ;
assert ( collection ! = null ) ;
assert ( collection . size ( ) ! = 0 ) ;
// first find an old entry, if one exists
kelondroRow . Entry indexrow = index . get ( key ) ;
if ( indexrow = = null ) {
// create new row and index entry
if ( ( collection ! = null ) & & ( collection . size ( ) > 0 ) ) {
indexrow = array_new ( key , collection ) ; // modifies indexrow
index . addUnique ( indexrow ) ;
}
return ;
}
// overwrite the old collection
// read old information
//int oldchunksize = (int) indexrow.getColLong(idx_col_chunksize); // needed only for migration
int oldchunkcount = ( int ) indexrow . getColLong ( idx_col_chunkcount ) ; // the number if rows in the collection
int oldrownumber = ( int ) indexrow . getColLong ( idx_col_indexpos ) ; // index of the entry in array
int oldPartitionNumber = ( int ) indexrow . getColByte ( idx_col_clusteridx ) ; // points to array file
assert ( oldPartitionNumber > = arrayIndex ( oldchunkcount ) ) ;
int newPartitionNumber = arrayIndex ( collection . size ( ) ) ;
// see if we need new space or if we can overwrite the old space
if ( oldPartitionNumber = = newPartitionNumber ) {
array_replace (
key , collection , indexrow ,
oldPartitionNumber , serialNumber , this . payloadrow . objectsize ( ) ,
oldrownumber ) ; // modifies indexrow
} else {
array_remove (
oldPartitionNumber , serialNumber , this . payloadrow . objectsize ( ) ,
oldrownumber ) ;
array_add (
key , collection , indexrow ,
newPartitionNumber , serialNumber , this . payloadrow . objectsize ( ) ) ; // modifies indexrow
}
if ( ( int ) indexrow . getColLong ( idx_col_chunkcount ) ! = collection . size ( ) )
serverLog . logSevere ( "kelondroCollectionIndex" , "UPDATE (put) ERROR: array has different chunkcount than index after merge: index = " + ( int ) indexrow . getColLong ( idx_col_chunkcount ) + ", collection.size() = " + collection . size ( ) ) ;
index . put ( indexrow ) ; // write modified indexrow
}
public synchronized void mergeMultiple ( List /* of indexContainer */ containerList ) throws IOException , kelondroOutOfLimitsException {
// merge a bulk of index containers
// this method should be used to optimize the R/W head path length
// separate the list in two halves:
// - containers that do not exist yet in the collection
// - containers that do exist in the collection and must be merged
Iterator i = containerList . iterator ( ) ;
indexContainer container ;
byte [ ] key ;
ArrayList newContainer = new ArrayList ( ) ;
TreeMap existingContainer = new TreeMap ( ) ; // a mapping from Integer (partition) to a TreeMap (mapping from index to object triple)
TreeMap containerMap ; // temporary map; mapping from index position to object triple with {key, container, indexrow}
kelondroRow . Entry indexrow ;
int oldrownumber1 ; // index of the entry in array
int oldPartitionNumber1 ; // points to array file
while ( i . hasNext ( ) ) {
container = ( indexContainer ) i . next ( ) ;
if ( ( container = = null ) | | ( container . size ( ) = = 0 ) ) continue ;
key = container . getWordHash ( ) . getBytes ( ) ;
// first find an old entry, if one exists
indexrow = index . get ( key ) ;
if ( indexrow = = null ) {
newContainer . add ( new Object [ ] { key , container } ) ;
} else {
oldrownumber1 = ( int ) indexrow . getColLong ( idx_col_indexpos ) ;
oldPartitionNumber1 = ( int ) indexrow . getColByte ( idx_col_clusteridx ) ;
containerMap = ( TreeMap ) existingContainer . get ( new Integer ( oldPartitionNumber1 ) ) ;
if ( containerMap = = null ) containerMap = new TreeMap ( ) ;
containerMap . put ( new Integer ( oldrownumber1 ) , new Object [ ] { key , container , indexrow } ) ;
existingContainer . put ( new Integer ( oldPartitionNumber1 ) , containerMap ) ;
}
}
// now iterate through the container lists and execute merges
// this is done in such a way, that there is a optimized path for the R/W head
// merge existing containers
Map . Entry tripleEntry ;
Object [ ] record ;
ArrayList indexrows_existing = new ArrayList ( ) ;
kelondroRowCollection collection ;
TreeMap array_replace_map = new TreeMap ( ) ;
TreeMap array_add_map = new TreeMap ( ) ;
ArrayList actionList ;
TreeMap actionMap ;
//boolean madegc = false;
//System.out.println("DEBUG existingContainer: " + existingContainer.toString());
while ( existingContainer . size ( ) > 0 ) {
oldPartitionNumber1 = ( ( Integer ) existingContainer . lastKey ( ) ) . intValue ( ) ;
containerMap = ( TreeMap ) existingContainer . remove ( new Integer ( oldPartitionNumber1 ) ) ;
Iterator j = containerMap . entrySet ( ) . iterator ( ) ;
while ( j . hasNext ( ) ) {
tripleEntry = ( Map . Entry ) j . next ( ) ;
oldrownumber1 = ( ( Integer ) tripleEntry . getKey ( ) ) . intValue ( ) ;
record = ( Object [ ] ) tripleEntry . getValue ( ) ; // {byte[], indexContainer, kelondroRow.Entry}
// merge with the old collection
key = ( byte [ ] ) record [ 0 ] ;
collection = ( kelondroRowCollection ) record [ 1 ] ;
indexrow = ( kelondroRow . Entry ) record [ 2 ] ;
// read old information
int oldchunksize = ( int ) indexrow . getColLong ( idx_col_chunksize ) ; // needed only for migration
int oldchunkcount = ( int ) indexrow . getColLong ( idx_col_chunkcount ) ; // the number if rows in the collection
int oldrownumber = ( int ) indexrow . getColLong ( idx_col_indexpos ) ; // index of the entry in array
int oldPartitionNumber = ( int ) indexrow . getColByte ( idx_col_clusteridx ) ; // points to array file
assert oldPartitionNumber1 = = oldPartitionNumber : "oldPartitionNumber1 = " + oldPartitionNumber1 + ", oldPartitionNumber = " + oldPartitionNumber + ", containerMap = " + containerMap + ", existingContainer: " + existingContainer . toString ( ) ;
assert oldrownumber1 = = oldrownumber : "oldrownumber1 = " + oldrownumber1 + ", oldrownumber = " + oldrownumber + ", containerMap = " + containerMap + ", existingContainer: " + existingContainer . toString ( ) ;
assert ( oldPartitionNumber > = arrayIndex ( oldchunkcount ) ) ;
int oldSerialNumber = 0 ;
// load the old collection and join it
collection . addAllUnique ( getwithparams ( indexrow , oldchunksize , oldchunkcount , oldPartitionNumber , oldrownumber , oldSerialNumber , false ) ) ;
collection . sort ( ) ;
collection . uniq ( - 1 ) ; // FIXME: not clear if it would be better to insert the collection with put to avoid double-entries
collection . trim ( false ) ;
// check for size of collection:
// if necessary shrink the collection and dump a part of that collection
// to avoid that this grows too big
if ( arrayIndex ( collection . size ( ) ) > maxPartitions ) {
shrinkCollection ( key , collection , arrayCapacity ( maxPartitions ) ) ;
}
// determine new partition position
int newPartitionNumber = arrayIndex ( collection . size ( ) ) ;
// see if we need new space or if we can overwrite the old space
if ( oldPartitionNumber = = newPartitionNumber ) {
actionMap = ( TreeMap ) array_replace_map . get ( new Integer ( oldPartitionNumber ) ) ;
if ( actionMap = = null ) actionMap = new TreeMap ( ) ;
actionMap . put ( new Integer ( oldrownumber ) , new Object [ ] { key , collection , indexrow } ) ;
array_replace_map . put ( new Integer ( oldPartitionNumber ) , actionMap ) ;
/ *
array_replace (
key , collection , indexrow ,
oldPartitionNumber , oldSerialNumber , this . payloadrow . objectsize ( ) ,
oldrownumber ) ; // modifies indexrow
indexrows_existing . add ( indexrow ) ; // indexrows are collected and written later as block
* /
} else {
array_remove (
oldPartitionNumber , oldSerialNumber , this . payloadrow . objectsize ( ) ,
oldrownumber ) ;
actionList = ( ArrayList ) array_add_map . get ( new Integer ( newPartitionNumber ) ) ;
if ( actionList = = null ) actionList = new ArrayList ( ) ;
actionList . add ( new Object [ ] { key , collection , indexrow } ) ;
array_add_map . put ( new Integer ( newPartitionNumber ) , actionList ) ;
/ *
array_add (
key , collection , indexrow ,
newPartitionNumber , oldSerialNumber , this . payloadrow . objectsize ( ) ) ; // modifies indexrow
indexrows_existing . add ( indexrow ) ; // indexrows are collected and written later as block
* /
}
// memory protection: flush collected collections
if ( serverMemory . available ( ) < minMem ( ) ) {
// emergency flush
indexrows_existing . addAll ( array_replace_multiple ( array_replace_map , 0 , this . payloadrow . objectsize ( ) ) ) ;
array_replace_map = new TreeMap ( ) ; // delete references
indexrows_existing . addAll ( array_add_multiple ( array_add_map , 0 , this . payloadrow . objectsize ( ) ) ) ;
array_add_map = new TreeMap ( ) ; // delete references
//if (!madegc) {
// prevent that this flush is made again even when there is enough memory
serverMemory . gc ( 10000 , "kelendroCollectionIndex.mergeMultiple(...)" ) ; // thq
// prevent that this gc happens more than one time
// madegc = true;
//}
}
}
}
// finallly flush the collected collections
indexrows_existing . addAll ( array_replace_multiple ( array_replace_map , 0 , this . payloadrow . objectsize ( ) ) ) ;
array_replace_map = new TreeMap ( ) ; // delete references
indexrows_existing . addAll ( array_add_multiple ( array_add_map , 0 , this . payloadrow . objectsize ( ) ) ) ;
array_add_map = new TreeMap ( ) ; // delete references
// write new containers
i = newContainer . iterator ( ) ;
ArrayList indexrows_new = new ArrayList ( ) ;
while ( i . hasNext ( ) ) {
record = ( Object [ ] ) i . next ( ) ; // {byte[], indexContainer}
key = ( byte [ ] ) record [ 0 ] ;
collection = ( indexContainer ) record [ 1 ] ;
indexrow = array_new ( key , collection ) ; // modifies indexrow
indexrows_new . add ( indexrow ) ; // collect new index rows
}
// write index entries
index . putMultiple ( indexrows_existing ) ; // write modified indexrows in optimized manner
index . addUniqueMultiple ( indexrows_new ) ; // write new indexrows in optimized manner
}
public synchronized void merge ( indexContainer container ) throws IOException , kelondroOutOfLimitsException {
if ( ( container = = null ) | | ( container . size ( ) = = 0 ) ) return ;
byte [ ] key = container . getWordHash ( ) . getBytes ( ) ;
// first find an old entry, if one exists
kelondroRow . Entry indexrow = index . get ( key ) ;
if ( indexrow = = null ) {
indexrow = array_new ( key , container ) ; // modifies indexrow
index . addUnique ( indexrow ) ; // write modified indexrow
} else {
// merge with the old collection
// attention! this modifies the indexrow entry which must be written with index.put(indexrow) afterwards!
kelondroRowCollection collection = ( kelondroRowCollection ) container ;
// read old information
int oldchunksize = ( int ) indexrow . getColLong ( idx_col_chunksize ) ; // needed only for migration
int oldchunkcount = ( int ) indexrow . getColLong ( idx_col_chunkcount ) ; // the number if rows in the collection
int oldrownumber = ( int ) indexrow . getColLong ( idx_col_indexpos ) ; // index of the entry in array
int oldPartitionNumber = ( int ) indexrow . getColByte ( idx_col_clusteridx ) ; // points to array file
assert ( oldPartitionNumber > = arrayIndex ( oldchunkcount ) ) : "oldPartitionNumber = " + oldPartitionNumber + ", arrayIndex(oldchunkcount) = " + arrayIndex ( oldchunkcount ) ;
int oldSerialNumber = 0 ;
// load the old collection and join it
collection . addAllUnique ( getwithparams ( indexrow , oldchunksize , oldchunkcount , oldPartitionNumber , oldrownumber , oldSerialNumber , false ) ) ;
collection . sort ( ) ;
collection . uniq ( - 1 ) ; // FIXME: not clear if it would be better to insert the collection with put to avoid double-entries
collection . trim ( false ) ;
// check for size of collection:
// if necessary shrink the collection and dump a part of that collection
// to avoid that this grows too big
if ( arrayIndex ( collection . size ( ) ) > maxPartitions ) {
shrinkCollection ( key , collection , arrayCapacity ( maxPartitions ) ) ;
}
// determine new partition location
int newPartitionNumber = arrayIndex ( collection . size ( ) ) ;
// see if we need new space or if we can overwrite the old space
if ( oldPartitionNumber = = newPartitionNumber ) {
array_replace (
key , collection , indexrow ,
oldPartitionNumber , oldSerialNumber , this . payloadrow . objectsize ( ) ,
oldrownumber ) ; // modifies indexrow
} else {
array_remove (
oldPartitionNumber , oldSerialNumber , this . payloadrow . objectsize ( ) ,
oldrownumber ) ;
array_add (
key , collection , indexrow ,
newPartitionNumber , oldSerialNumber , this . payloadrow . objectsize ( ) ) ; // modifies indexrow
}
final int collectionsize = collection . size ( ) ; // extra variable for easier debugging
final int indexrowcount = ( int ) indexrow . getColLong ( idx_col_chunkcount ) ;
if ( indexrowcount ! = collectionsize )
serverLog . logSevere ( "kelondroCollectionIndex" , "UPDATE (merge) ERROR: array has different chunkcount than index after merge: index = " + indexrowcount + ", collection.size() = " + collectionsize ) ;
index . put ( indexrow ) ; // write modified indexrow
}
}
private void shrinkCollection ( byte [ ] key , kelondroRowCollection collection , int targetSize ) {
//TODO Remove timing before release
// removes entries from collection
// the removed entries are stored in a 'commons' dump file
if ( key . length ! = 12 ) return ;
// check if the collection is already small enough
int oldsize = collection . size ( ) ;
if ( oldsize < = targetSize ) return ;
kelondroRowSet newcommon = new kelondroRowSet ( collection . rowdef , 0 ) ;
long sadd1 = 0 , srem1 = 0 , sadd2 = 0 , srem2 = 0 , tot1 = 0 , tot2 = 0 ;
long t1 = 0 , t2 = 0 ;
// delete some entries, which are bad rated
Iterator i = collection . rows ( ) ;
kelondroRow . Entry entry ;
byte [ ] ref ;
t1 = System . currentTimeMillis ( ) ;
while ( i . hasNext ( ) ) {
entry = ( kelondroRow . Entry ) i . next ( ) ;
ref = entry . getColBytes ( 0 ) ;
if ( ( ref . length ! = 12 ) | | ( ! yacyURL . probablyRootURL ( new String ( ref ) ) ) ) {
t2 = System . currentTimeMillis ( ) ;
newcommon . addUnique ( entry ) ;
sadd1 + = System . currentTimeMillis ( ) - t2 ;
t2 = System . currentTimeMillis ( ) ;
i . remove ( ) ;
srem1 + = System . currentTimeMillis ( ) - t2 ;
}
}
int firstnewcommon = newcommon . size ( ) ;
tot1 = System . currentTimeMillis ( ) - t1 ;
// check if we shrinked enough
Random rand = new Random ( System . currentTimeMillis ( ) ) ;
t1 = System . currentTimeMillis ( ) ;
while ( collection . size ( ) > targetSize ) {
// now delete randomly more entries from the survival collection
i = collection . rows ( ) ;
while ( i . hasNext ( ) ) {
entry = ( kelondroRow . Entry ) i . next ( ) ;
ref = entry . getColBytes ( 0 ) ;
if ( rand . nextInt ( ) % 4 ! = 0 ) {
t2 = System . currentTimeMillis ( ) ;
newcommon . addUnique ( entry ) ;
sadd2 + = System . currentTimeMillis ( ) - t2 ;
t2 = System . currentTimeMillis ( ) ;
i . remove ( ) ;
srem2 + = System . currentTimeMillis ( ) - t2 ;
}
}
}
tot2 = System . currentTimeMillis ( ) - t1 ;
collection . trim ( false ) ;
serverLog . logFine ( "kelondroCollectionIndex" , "tot= " + tot1 + '/' + tot2 + " # add/rem(1)= " + sadd1 + '/' + srem1 + " # add/rem(2)= " + sadd2 + '/' + srem2 ) ;
serverLog . logInfo ( "kelondroCollectionIndex" , "shrinked common word " + new String ( key ) + "; old size = " + oldsize + ", new size = " + collection . size ( ) + ", maximum size = " + targetSize + ", newcommon size = " + newcommon . size ( ) + ", first newcommon = " + firstnewcommon ) ;
// finally dump the removed entries to a file
newcommon . sort ( ) ;
TimeZone GMTTimeZone = TimeZone . getTimeZone ( "GMT" ) ;
Calendar gregorian = new GregorianCalendar ( GMTTimeZone ) ;
SimpleDateFormat formatter = new SimpleDateFormat ( "yyyyMMddHHmmss" ) ;
String filename = serverCodings . encodeHex ( kelondroBase64Order . enhancedCoder . decode ( new String ( key ) ) ) + "_" + formatter . format ( gregorian . getTime ( ) ) + ".collection" ;
File storagePath = new File ( commonsPath , filename . substring ( 0 , 2 ) ) ; // make a subpath
storagePath . mkdirs ( ) ;
File file = new File ( storagePath , filename ) ;
try {
newcommon . saveCollection ( file ) ;
serverLog . logInfo ( "kelondroCollectionIndex" , "dumped common word " + new String ( key ) + " to " + file . toString ( ) + "; size = " + newcommon . size ( ) ) ;
} catch ( IOException e ) {
e . printStackTrace ( ) ;
serverLog . logWarning ( "kelondroCollectionIndex" , "failed to dump common word " + new String ( key ) + " to " + file . toString ( ) + "; size = " + newcommon . size ( ) ) ;
}
}
public synchronized int remove ( byte [ ] key , Set removekeys ) throws IOException , kelondroOutOfLimitsException {
if ( ( removekeys = = null ) | | ( removekeys . size ( ) = = 0 ) ) return 0 ;
// first find an old entry, if one exists
kelondroRow . Entry indexrow = index . get ( key ) ;
if ( indexrow = = null ) return 0 ;
// overwrite the old collection
// read old information
int oldchunksize = ( int ) indexrow . getColLong ( idx_col_chunksize ) ; // needed only for migration
int oldchunkcount = ( int ) indexrow . getColLong ( idx_col_chunkcount ) ; // the number if rows in the collection
int oldrownumber = ( int ) indexrow . getColLong ( idx_col_indexpos ) ; // index of the entry in array
int oldPartitionNumber = ( int ) indexrow . getColByte ( idx_col_clusteridx ) ; // points to array file
assert ( oldPartitionNumber > = arrayIndex ( oldchunkcount ) ) ;
int removed = 0 ;
assert ( removekeys ! = null ) ;
// load the old collection and remove keys
kelondroRowSet oldcollection = getwithparams ( indexrow , oldchunksize , oldchunkcount , oldPartitionNumber , oldrownumber , serialNumber , false ) ;
// remove the keys from the set
Iterator i = removekeys . iterator ( ) ;
Object k ;
while ( i . hasNext ( ) ) {
k = i . next ( ) ;
if ( ( k instanceof byte [ ] ) & & ( oldcollection . remove ( ( byte [ ] ) k , false ) ! = null ) ) removed + + ;
if ( ( k instanceof String ) & & ( oldcollection . remove ( ( ( String ) k ) . getBytes ( ) , false ) ! = null ) ) removed + + ;
}
oldcollection . sort ( ) ;
oldcollection . trim ( false ) ;
/ * in case that the new array size is zero we dont delete the array , just allocate a minimal chunk
*
if ( oldcollection . size ( ) = = 0 ) {
// delete the index entry and the array
kelondroFixedWidthArray array = getArray ( oldPartitionNumber , serialNumber , oldchunksize ) ;
array . remove ( oldrownumber , false ) ;
index . remove ( key ) ;
return removed ;
}
* /
int newPartitionNumber = arrayIndex ( oldcollection . size ( ) ) ;
// see if we need new space or if we can overwrite the old space
if ( oldPartitionNumber = = newPartitionNumber ) {
array_replace (
key , oldcollection , indexrow ,
oldPartitionNumber , serialNumber , this . payloadrow . objectsize ( ) ,
oldrownumber ) ; // modifies indexrow
} else {
array_remove (
oldPartitionNumber , serialNumber , this . payloadrow . objectsize ( ) ,
oldrownumber ) ;
array_add (
key , oldcollection , indexrow ,
newPartitionNumber , serialNumber , this . payloadrow . objectsize ( ) ) ; // modifies indexrow
}
index . put ( indexrow ) ; // write modified indexrow
return removed ;
}
public synchronized int indexSize ( byte [ ] key ) throws IOException {
kelondroRow . Entry indexrow = index . get ( key ) ;
if ( indexrow = = null ) return 0 ;
return ( int ) indexrow . getColLong ( idx_col_chunkcount ) ;
}
public synchronized boolean has ( byte [ ] key ) throws IOException {
return index . has ( key ) ;
}
public synchronized kelondroRowSet get ( byte [ ] key ) throws IOException {
// find an entry, if one exists
kelondroRow . Entry indexrow = index . get ( key ) ;
if ( indexrow = = null ) return null ;
kelondroRowSet col = getdelete ( indexrow , false ) ;
assert ( col ! = null ) ;
return col ;
}
public synchronized kelondroRowSet delete ( byte [ ] key ) throws IOException {
// find an entry, if one exists
kelondroRow . Entry indexrow = index . remove ( key , false ) ;
if ( indexrow = = null ) return null ;
kelondroRowSet removedCollection = getdelete ( indexrow , true ) ;
assert ( removedCollection ! = null ) ;
return removedCollection ;
}
protected kelondroRowSet getdelete ( kelondroRow . Entry indexrow , boolean remove ) throws IOException {
// call this only within a synchronized(index) environment
// read values
int chunksize = ( int ) indexrow . getColLong ( idx_col_chunksize ) ;
int chunkcount = ( int ) indexrow . getColLong ( idx_col_chunkcount ) ;
int rownumber = ( int ) indexrow . getColLong ( idx_col_indexpos ) ;
int partitionnumber = ( int ) indexrow . getColByte ( idx_col_clusteridx ) ;
assert ( partitionnumber > = arrayIndex ( chunkcount ) ) : "partitionnumber = " + partitionnumber + ", arrayIndex(chunkcount) = " + arrayIndex ( chunkcount ) ;
int serialnumber = 0 ;
return getwithparams ( indexrow , chunksize , chunkcount , partitionnumber , rownumber , serialnumber , remove ) ;
}
private synchronized kelondroRowSet getwithparams ( kelondroRow . Entry indexrow , int chunksize , int chunkcount , int clusteridx , int rownumber , int serialnumber , boolean remove ) throws IOException {
// open array entry
kelondroFixedWidthArray array = getArray ( clusteridx , serialnumber , chunksize ) ;
kelondroRow . Entry arrayrow = array . get ( rownumber ) ;
if ( arrayrow = = null ) throw new kelondroException ( arrayFile ( this . path , this . filenameStub , this . loadfactor , chunksize , clusteridx , serialnumber ) . toString ( ) , "array does not contain expected row" ) ;
// read the row and define a collection
byte [ ] indexkey = indexrow . getColBytes ( idx_col_key ) ;
byte [ ] arraykey = arrayrow . getColBytes ( 0 ) ;
if ( ! ( index . row ( ) . objectOrder . wellformed ( arraykey ) ) ) {
// cleanup for a bad bug that corrupted the database
index . remove ( indexkey , false ) ; // the RowCollection must be considered lost
array . remove ( rownumber ) ; // loose the RowCollection (we don't know how much is lost)
serverLog . logSevere ( "kelondroCollectionIndex." + array . filename , "lost a RowCollection because of a bad arraykey" ) ;
return new kelondroRowSet ( this . payloadrow , 0 ) ;
}
kelondroRowSet collection = new kelondroRowSet ( this . payloadrow , arrayrow , 1 ) ; // FIXME: this does not yet work with different rowdef in case of several rowdef.objectsize()
if ( ( ! ( index . row ( ) . objectOrder . wellformed ( indexkey ) ) ) | | ( index . row ( ) . objectOrder . compare ( arraykey , indexkey ) ! = 0 ) ) {
// check if we got the right row; this row is wrong. Fix it:
index . remove ( indexkey , true ) ; // the wrong row cannot be fixed
// store the row number in the index; this may be a double-entry, but better than nothing
kelondroRow . Entry indexEntry = index . row ( ) . newEntry ( ) ;
indexEntry . setCol ( idx_col_key , arrayrow . getColBytes ( 0 ) ) ;
indexEntry . setCol ( idx_col_chunksize , this . payloadrow . objectsize ( ) ) ;
indexEntry . setCol ( idx_col_chunkcount , collection . size ( ) ) ;
indexEntry . setCol ( idx_col_clusteridx , ( byte ) clusteridx ) ;
indexEntry . setCol ( idx_col_flags , ( byte ) 0 ) ;
indexEntry . setCol ( idx_col_indexpos , ( long ) rownumber ) ;
indexEntry . setCol ( idx_col_lastread , kelondroRowCollection . daysSince2000 ( System . currentTimeMillis ( ) ) ) ;
indexEntry . setCol ( idx_col_lastwrote , kelondroRowCollection . daysSince2000 ( System . currentTimeMillis ( ) ) ) ;
index . put ( indexEntry ) ;
serverLog . logSevere ( "kelondroCollectionIndex." + array . filename , "array contains wrong row '" + new String ( arrayrow . getColBytes ( 0 ) ) + "', expected is '" + new String ( indexrow . getColBytes ( idx_col_key ) ) + "', the row has been fixed" ) ;
}
int chunkcountInArray = collection . size ( ) ;
if ( chunkcountInArray ! = chunkcount ) {
// fix the entry in index
indexrow . setCol ( idx_col_chunkcount , chunkcountInArray ) ;
index . put ( indexrow ) ;
array . logFailure ( "INCONSISTENCY (get) in " + arrayFile ( this . path , this . filenameStub , this . loadfactor , chunksize , clusteridx , serialnumber ) . toString ( ) + ": array has different chunkcount than index: index = " + chunkcount + ", array = " + chunkcountInArray + "; the index has been auto-fixed" ) ;
}
if ( remove ) array . remove ( rownumber ) ; // index is removed in calling method
return collection ;
}
public synchronized Iterator keycollections ( byte [ ] startKey , byte [ ] secondKey , boolean rot ) {
// returns an iteration of {byte[], kelondroRowSet} Objects
try {
return new keycollectionIterator ( startKey , secondKey , rot ) ;
} catch ( IOException e ) {
e . printStackTrace ( ) ;
return null ;
}
}
public class keycollectionIterator implements Iterator {
Iterator indexRowIterator ;
public keycollectionIterator ( byte [ ] startKey , byte [ ] secondKey , boolean rot ) throws IOException {
// iterator of {byte[], kelondroRowSet} Objects
kelondroCloneableIterator i = index . rows ( true , startKey ) ;
indexRowIterator = ( rot ) ? new kelondroRotateIterator ( i , secondKey ) : i ;
}
public boolean hasNext ( ) {
return indexRowIterator . hasNext ( ) ;
}
public Object next ( ) {
kelondroRow . Entry indexrow = ( kelondroRow . Entry ) indexRowIterator . next ( ) ;
assert ( indexrow ! = null ) ;
if ( indexrow = = null ) return null ;
try {
return new Object [ ] { indexrow . getColBytes ( 0 ) , getdelete ( indexrow , false ) } ;
} catch ( IOException e ) {
e . printStackTrace ( ) ;
return null ;
}
}
public void remove ( ) {
indexRowIterator . remove ( ) ;
}
}
public synchronized void close ( ) {
this . index . close ( ) ;
Iterator i = arrays . values ( ) . iterator ( ) ;
while ( i . hasNext ( ) ) {
( ( kelondroFixedWidthArray ) i . next ( ) ) . close ( ) ;
}
}
public static void main ( String [ ] args ) {
// define payload structure
kelondroRow rowdef = new kelondroRow ( "byte[] a-10, byte[] b-80" , kelondroNaturalOrder . naturalOrder , 0 ) ;
File path = new File ( args [ 0 ] ) ;
String filenameStub = args [ 1 ] ;
long preloadTime = 10000 ;
try {
// initialize collection index
kelondroCollectionIndex collectionIndex = new kelondroCollectionIndex (
path , filenameStub , 9 /*keyLength*/ ,
kelondroNaturalOrder . naturalOrder , preloadTime ,
4 /*loadfactor*/ , 7 , rowdef ) ;
// fill index with values
kelondroRowSet collection = new kelondroRowSet ( rowdef , 0 ) ;
collection . addUnique ( rowdef . newEntry ( new byte [ ] [ ] { "abc" . getBytes ( ) , "efg" . getBytes ( ) } ) ) ;
collectionIndex . put ( "erstes" . getBytes ( ) , collection ) ;
for ( int i = 1 ; i < = 170 ; i + + ) {
collection = new kelondroRowSet ( rowdef , 0 ) ;
for ( int j = 0 ; j < i ; j + + ) {
collection . addUnique ( rowdef . newEntry ( new byte [ ] [ ] { ( "abc" + j ) . getBytes ( ) , "xxx" . getBytes ( ) } ) ) ;
}
System . out . println ( "put key-" + i + ": " + collection . toString ( ) ) ;
collectionIndex . put ( ( "key-" + i ) . getBytes ( ) , collection ) ;
}
// extend collections with more values
for ( int i = 0 ; i < = 170 ; i + + ) {
collection = new kelondroRowSet ( rowdef , 0 ) ;
for ( int j = 0 ; j < i ; j + + ) {
collection . addUnique ( rowdef . newEntry ( new byte [ ] [ ] { ( "def" + j ) . getBytes ( ) , "xxx" . getBytes ( ) } ) ) ;
}
collectionIndex . merge ( new indexContainer ( "key-" + i , collection ) ) ;
}
// printout of index
collectionIndex . close ( ) ;
kelondroFlexTable index = new kelondroFlexTable ( path , filenameStub + ".index" , preloadTime , kelondroCollectionIndex . indexRow ( 9 , kelondroNaturalOrder . naturalOrder ) , true ) ;
index . print ( ) ;
index . close ( ) ;
} catch ( IOException e ) {
e . printStackTrace ( ) ;
}
}
}