// HeapWriter.java
// (C) 2008 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 30.12.2008 on http://yacy.net
//
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package net.yacy.kelondro.blob ;
import java.io.BufferedOutputStream ;
import java.io.DataOutputStream ;
import java.io.File ;
import java.io.FileOutputStream ;
import java.io.IOException ;
import net.yacy.cora.document.encoding.UTF8 ;
import net.yacy.cora.order.ByteOrder ;
import net.yacy.cora.storage.HandleMap ;
import net.yacy.cora.util.ConcurrentLog ;
import net.yacy.cora.util.SpaceExceededException ;
import net.yacy.kelondro.index.RowHandleMap ;
import net.yacy.kelondro.util.FileUtils ;
public final class HeapWriter {
private final static ConcurrentLog log = new ConcurrentLog ( "HeapWriter" ) ;
public final static byte [ ] ZERO = new byte [ ] { 0 } ;
private final int keylength ; // the length of the primary key
private HandleMap index ; // key/seek relation for used records
private final File heapFileTMP ; // the temporary file of the heap during writing
private final File heapFileREADY ; // the final file of the heap when the file is closed
private DataOutputStream os ; // the output stream where the BLOB is written
private long seek ; // the current write position
//private HashSet<String> doublecheck;// only for testing
/ *
* This class implements a BLOB management based on a sequence of records
* The data structure is :
* file : = = record *
* record : = = reclen key blob
* reclen : = = < 4 byte integer = = length of key and blob >
* key : = = < bytes as defined with keylen , if first byte is zero then record is empty >
* blob : = = < bytes of length reclen - keylen >
* that means that each record has the size reclen + 4
*
* Because the blob sizes are stored with integers , one entry may not exceed 2 GB
*
* With this class a BLOB file can only be written .
* To read them , use a kelondroBLOBHeapReader .
* A BLOBHeap can be also read and write in random access mode with kelondroBLOBHeap .
* /
/ * *
* create a heap file : a arbitrary number of BLOBs , indexed by an access key
* The heap file will be indexed upon initialization .
* @param temporaryHeapFile
* @param readyHeapFile
* @param keylength
* @param ordering
* @throws IOException
* /
public HeapWriter ( final File temporaryHeapFile , final File readyHeapFile , final int keylength , final ByteOrder ordering , int outBuffer ) throws IOException {
this . heapFileTMP = temporaryHeapFile ;
this . heapFileREADY = readyHeapFile ;
this . keylength = keylength ;
this . index = new RowHandleMap ( keylength , ordering , 8 , 100000 , readyHeapFile . getAbsolutePath ( ) ) ;
try {
this . os = new DataOutputStream ( new BufferedOutputStream ( new FileOutputStream ( temporaryHeapFile ) , outBuffer ) ) ;
} catch ( final OutOfMemoryError e ) {
// try this again without buffer
this . os = new DataOutputStream ( new FileOutputStream ( temporaryHeapFile ) ) ;
}
this . seek = 0 ;
}
/ * *
* add a BLOB to the heap : this adds the blob always to the end of the file
* newly added heap entries must have keys that have not been added before
* @param key
* @param blob
* @throws IOException
* @throws SpaceExceededException
* @throws SpaceExceededException
* /
public synchronized void add ( byte [ ] key , final byte [ ] blob ) throws IOException , SpaceExceededException {
assert blob . length > 0 ;
key = HeapReader . normalizeKey ( key , this . keylength ) ;
assert key . length = = this . keylength : "key.length == " + key . length + ", this.keylength = " + this . keylength ; // after normalizing they should be equal in length
assert this . index . get ( key ) < 0 : "index.get(key) = " + this . index . get ( key ) + ", index.size() = " + this . index . size ( ) + ", file.length() = " + this . heapFileTMP . length ( ) + ", key = " + UTF8 . String ( key ) ; // must not occur before
if ( ( blob = = null ) | | ( blob . length = = 0 ) ) return ;
this . index . putUnique ( key , this . seek ) ;
int chunkl = this . keylength + blob . length ;
this . os . writeInt ( chunkl ) ;
this . os . write ( key ) ;
this . os . write ( blob ) ;
this . seek + = chunkl + 4 ;
//os.flush(); // necessary? may cause bad IO performance :-(
}
/ * *
* close the BLOB table
* @throws
* /
public synchronized void close ( boolean writeIDX ) throws IOException {
// close the file
this . os . flush ( ) ;
this . os . close ( ) ;
this . os = null ;
// rename the file into final name
if ( this . heapFileREADY . exists ( ) ) FileUtils . deletedelete ( this . heapFileREADY ) ;
boolean renameok = this . heapFileTMP . renameTo ( this . heapFileREADY ) ;
if ( ! renameok ) throw new IOException ( "cannot rename " + this . heapFileTMP + " to " + this . heapFileREADY ) ;
if ( ! this . heapFileREADY . exists ( ) ) throw new IOException ( "renaming of " + this . heapFileREADY . toString ( ) + " failed: files still exists" ) ;
if ( this . heapFileTMP . exists ( ) ) throw new IOException ( "renaming to " + this . heapFileTMP . toString ( ) + " failed: file does not exist" ) ;
// generate index and gap files
if ( writeIDX & & this . index . size ( ) > 3 ) {
// now we can create a dump of the index and the gap information
// to speed up the next start
long start = System . currentTimeMillis ( ) ;
String fingerprint = HeapReader . fingerprintFileHash ( this . heapFileREADY ) ;
if ( fingerprint = = null ) {
log . severe ( "cannot write a dump for " + this . heapFileREADY . getName ( ) + ": fingerprint is null" ) ;
} else {
new Gap ( ) . dump ( fingerprintGapFile ( this . heapFileREADY , fingerprint ) ) ;
this . index . dump ( fingerprintIndexFile ( this . heapFileREADY , fingerprint ) ) ;
log . info ( "wrote a dump for the " + this . index . size ( ) + " index entries of " + this . heapFileREADY . getName ( ) + " in " + ( System . currentTimeMillis ( ) - start ) + " milliseconds." ) ;
}
this . index . close ( ) ;
this . index = null ;
} else {
// this is small.. just free resources, do not write index
this . index . close ( ) ;
this . index = null ;
}
}
public static void delete ( File f ) {
File p = f . getParentFile ( ) ;
String n = f . getName ( ) + "." ;
String [ ] l = p . list ( ) ;
FileUtils . deletedelete ( f ) ;
for ( String s : l ) {
if ( s . startsWith ( n ) & &
( s . endsWith ( ".idx" ) | | s . endsWith ( ".gap" ) ) )
FileUtils . deletedelete ( new File ( p , s ) ) ;
}
}
protected static File fingerprintIndexFile ( File f , String fingerprint ) {
assert f ! = null ;
return new File ( f . getParentFile ( ) , f . getName ( ) + "." + fingerprint + ".idx" ) ;
}
protected static File fingerprintGapFile ( File f , String fingerprint ) {
assert f ! = null ;
return new File ( f . getParentFile ( ) , f . getName ( ) + "." + fingerprint + ".gap" ) ;
}
}