You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
yacy_search_server/source/net/yacy/kelondro/blob/HeapWriter.java

198 lines
8.3 KiB

// HeapWriter.java
// (C) 2008 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 30.12.2008 on http://yacy.net
//
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package net.yacy.kelondro.blob;
import java.io.BufferedOutputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import net.yacy.cora.document.encoding.UTF8;
import net.yacy.cora.order.ByteOrder;
import net.yacy.cora.storage.HandleMap;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.cora.util.SpaceExceededException;
import net.yacy.kelondro.index.RowHandleMap;
import net.yacy.kelondro.util.FileUtils;
public final class HeapWriter {
private final static ConcurrentLog log = new ConcurrentLog("HeapWriter");
public final static byte[] ZERO = new byte[]{0};
private final int keylength; // the length of the primary key
private HandleMap index; // key/seek relation for used records
private final File heapFileTMP; // the temporary file of the heap during writing
private final File heapFileREADY; // the final file of the heap when the file is closed
private DataOutputStream os; // the output stream where the BLOB is written
private long seek; // the current write position
//private HashSet<String> doublecheck;// only for testing
/*
* This class implements a BLOB management based on a sequence of records
* The data structure is:
* file :== record*
* record :== reclen key blob
* reclen :== <4 byte integer == length of key and blob>
* key :== <bytes as defined with keylen, if first byte is zero then record is empty>
* blob :== <bytes of length reclen - keylen>
* that means that each record has the size reclen+4
*
* Because the blob sizes are stored with integers, one entry may not exceed 2GB
*
* With this class a BLOB file can only be written.
* To read them, use a kelondroBLOBHeapReader.
* A BLOBHeap can be also read and write in random access mode with kelondroBLOBHeap.
*/
/**
* create a heap file: a arbitrary number of BLOBs, indexed by an access key
* The heap file will be indexed upon initialization.
* @param temporaryHeapFile
* @param readyHeapFile
* @param keylength
* @param ordering
* @throws IOException
*/
public HeapWriter(final File temporaryHeapFile, final File readyHeapFile, final int keylength, final ByteOrder ordering, int outBuffer) throws IOException {
this.heapFileTMP = temporaryHeapFile;
this.heapFileREADY = readyHeapFile;
this.keylength = keylength;
this.index = new RowHandleMap(keylength, ordering, 8, 100000, readyHeapFile.getAbsolutePath());
final FileOutputStream fileStream = new FileOutputStream(temporaryHeapFile);
try {
try {
this.os = new DataOutputStream(new BufferedOutputStream(fileStream, outBuffer));
} catch (final OutOfMemoryError e) {
// try this again without buffer
this.os = new DataOutputStream(fileStream);
}
} catch(Exception e) {
try {
fileStream.close();
} catch(IOException ignored) {
log.warn("Could not close output stream on file " + temporaryHeapFile);
}
throw e;
}
this.seek = 0;
}
/**
* add a BLOB to the heap: this adds the blob always to the end of the file
* newly added heap entries must have keys that have not been added before
* @param key
* @param blob
* @throws IOException
* @throws SpaceExceededException
* @throws SpaceExceededException
*/
public synchronized void add(byte[] key, final byte[] blob) throws IOException, SpaceExceededException {
assert blob.length > 0;
key = HeapReader.normalizeKey(key, this.keylength);
assert key.length == this.keylength : "key.length == " + key.length + ", this.keylength = " + this.keylength; // after normalizing they should be equal in length
assert this.index.get(key) < 0 : "index.get(key) = " + this.index.get(key) + ", index.size() = " + this.index.size() + ", file.length() = " + this.heapFileTMP.length() + ", key = " + UTF8.String(key); // must not occur before
if ((blob == null) || (blob.length == 0)) return;
this.index.putUnique(key, this.seek);
int chunkl = this.keylength + blob.length;
this.os.writeInt(chunkl);
this.os.write(key);
this.os.write(blob);
this.seek += chunkl + 4;
//os.flush(); // necessary? may cause bad IO performance :-(
}
/**
* close the BLOB table
* @throws
*/
public synchronized void close(boolean writeIDX) throws IOException {
// close the file
try {
this.os.flush();
} catch(Exception e) {
// on flush error, free ram index resources before transmitting the exception
this.index.close();
this.index = null;
throw e;
} finally {
this.os.close();
}
this.os = null;
// rename the file into final name
if (this.heapFileREADY.exists()) FileUtils.deletedelete(this.heapFileREADY);
boolean renameok = this.heapFileTMP.renameTo(this.heapFileREADY);
if (!renameok) throw new IOException("cannot rename " + this.heapFileTMP + " to " + this.heapFileREADY);
if (!this.heapFileREADY.exists()) throw new IOException("renaming of " + this.heapFileREADY.toString() + " failed: files still exists");
if (this.heapFileTMP.exists()) throw new IOException("renaming to " + this.heapFileTMP.toString() + " failed: file does not exist");
// generate index and gap files
if (writeIDX && this.index.size() > 3) {
// now we can create a dump of the index and the gap information
// to speed up the next start
long start = System.currentTimeMillis();
String fingerprint = HeapReader.fingerprintFileHash(this.heapFileREADY);
if (fingerprint == null) {
log.severe("cannot write a dump for " + this.heapFileREADY.getName()+ ": fingerprint is null");
} else {
new Gap().dump(fingerprintGapFile(this.heapFileREADY, fingerprint));
this.index.dump(fingerprintIndexFile(this.heapFileREADY, fingerprint));
log.info("wrote a dump for the " + this.index.size() + " index entries of " + this.heapFileREADY.getName()+ " in " + (System.currentTimeMillis() - start) + " milliseconds.");
}
this.index.close();
this.index = null;
} else {
// this is small.. just free resources, do not write index
this.index.close();
this.index = null;
}
}
public static void delete(File f) {
File p = f.getParentFile();
String n = f.getName() + ".";
String[] l = p.list();
FileUtils.deletedelete(f);
for (String s: l) {
if (s.startsWith(n) &&
(s.endsWith(".idx") || s.endsWith(".gap")))
FileUtils.deletedelete(new File(p, s));
}
}
protected static File fingerprintIndexFile(File f, String fingerprint) {
assert f != null;
return new File(f.getParentFile(), f.getName() + "." + fingerprint + ".idx");
}
protected static File fingerprintGapFile(File f, String fingerprint) {
assert f != null;
return new File(f.getParentFile(), f.getName() + "." + fingerprint + ".gap");
}
}