migration of all databases that use the deprecated BLOBTree format into the BLOBHeap format. Old databases are migrated automatically.

This removes the last very IO-intensive data structures which were still used for Wiki, Blog and Bookmarks. Old database files will still remain in the DATA subdirectory but can be deleted manually if no major bugs appear during migration. There is no need for any user action, all migration is done automatically.

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5986 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 16 years ago
parent 4b4bddca00
commit 3d4b826ca5

@ -31,7 +31,6 @@ import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;
import de.anomic.kelondro.blob.BLOB;
import de.anomic.kelondro.blob.BLOBHeap;
import de.anomic.kelondro.blob.MapView;
import de.anomic.kelondro.order.Base64Order;
@ -67,8 +66,8 @@ public class CrawlProfile {
public CrawlProfile(final File file) throws IOException {
this.profileTableFile = file;
profileTableFile.getParentFile().mkdirs();
final BLOB dyn = new BLOBHeap(profileTableFile, yacySeedDB.commonHashLength, NaturalOrder.naturalOrder, 1024 * 64);
profileTable = new MapView(dyn, 500);
final BLOBHeap dyn = new BLOBHeap(profileTableFile, yacySeedDB.commonHashLength, NaturalOrder.naturalOrder, 1024 * 64);
profileTable = new MapView(dyn, 500, '_');
}
public void clear() {
@ -76,13 +75,13 @@ public class CrawlProfile {
if (profileTable != null) profileTable.close();
FileUtils.deletedelete(profileTableFile);
profileTableFile.getParentFile().mkdirs();
BLOB dyn = null;
BLOBHeap dyn = null;
try {
dyn = new BLOBHeap(profileTableFile, yacySeedDB.commonHashLength, NaturalOrder.naturalOrder, 1024 * 64);
} catch (IOException e) {
e.printStackTrace();
}
profileTable = new MapView(dyn, 500);
profileTable = new MapView(dyn, 500, '_');
}
public void close() {

@ -43,7 +43,6 @@ import java.util.concurrent.ConcurrentHashMap;
import de.anomic.http.httpClient;
import de.anomic.http.httpResponse;
import de.anomic.http.httpRequestHeader;
import de.anomic.kelondro.blob.BLOB;
import de.anomic.kelondro.blob.BLOBHeap;
import de.anomic.kelondro.blob.MapView;
import de.anomic.kelondro.order.NaturalOrder;
@ -71,13 +70,13 @@ public class RobotsTxt {
public RobotsTxt(final File robotsTableFile) {
this.robotsTableFile = robotsTableFile;
robotsTableFile.getParentFile().mkdirs();
BLOB blob = null;
BLOBHeap blob = null;
try {
blob = new BLOBHeap(robotsTableFile, 64, NaturalOrder.naturalOrder, 1024 * 1024);
} catch (final IOException e) {
e.printStackTrace();
}
robotsTable = new MapView(blob, 100);
robotsTable = new MapView(blob, 100, '_');
syncObjects = new ConcurrentHashMap<String, DomSync>();
}
@ -86,13 +85,13 @@ public class RobotsTxt {
if (robotsTable != null) robotsTable.close();
FileUtils.deletedelete(robotsTableFile);
robotsTableFile.getParentFile().mkdirs();
BLOB blob = null;
BLOBHeap blob = null;
try {
blob = new BLOBHeap(robotsTableFile, 64, NaturalOrder.naturalOrder, 1024 * 1024);
} catch (final IOException e) {
e.printStackTrace();
}
robotsTable = new MapView(blob, 100);
robotsTable = new MapView(blob, 100, '_');
syncObjects.clear();
}

@ -63,10 +63,11 @@ public class blogBoard {
MapView database = null;
public blogBoard(final File actpath) {
new File(actpath.getParent()).mkdir();
public blogBoard(final File actpath, final File newFile) throws IOException {
new File(actpath.getParent()).mkdir();
new File(newFile.getParent()).mkdir();
if (database == null) {
database = new MapView(new BLOBTree(actpath, true, true, keyLength, recordSize, '_', NaturalOrder.naturalOrder, true, false, false), 500);
database = new MapView(BLOBTree.toHeap(actpath, true, true, keyLength, recordSize, '_', NaturalOrder.naturalOrder, true, false, false, newFile), 500, '_');
}
}

@ -60,28 +60,34 @@ public class blogBoardComments {
private static final int recordSize = 512;
static SimpleDateFormat SimpleFormatter = new SimpleDateFormat(dateFormat);
static {
SimpleFormatter.setTimeZone(TimeZone.getTimeZone("GMT"));
}
private MapView database = null;
public blogBoardComments(final File actpath) {
new File(actpath.getParent()).mkdir();
public blogBoardComments(final File actpath, final File newFile) throws IOException {
new File(actpath.getParent()).mkdir();
new File(newFile.getParent()).mkdir();
if (database == null) {
database = new MapView(new BLOBTree(actpath, true, true, keyLength, recordSize, '_', NaturalOrder.naturalOrder, false, false, false), 500);
database = new MapView(BLOBTree.toHeap(actpath, true, true, keyLength, recordSize, '_', NaturalOrder.naturalOrder, false, false, false, newFile), 500, '_');
}
}
public int size() {
return database.size();
}
public void close() {
database.close();
}
static String dateString(final Date date) {
synchronized (SimpleFormatter) {
return SimpleFormatter.format(date);
}
}
private static String normalize(final String key) {
if (key == null) return "null";
return key.trim().toLowerCase();

@ -108,22 +108,25 @@ public class bookmarksDB {
// bookmarksDB's class constructor
// ------------------------------------
public bookmarksDB(final File bookmarksFile, final File tagsFile, final File datesFile) {
public bookmarksDB(
final File bookmarksFile, final File bookmarksFileNew,
final File tagsFile, final File tagsFileNew,
final File datesFile, final File datesFileNew) throws IOException {
// bookmarks
tagCache=new TreeMap<String, Tag>();
bookmarksFile.getParentFile().mkdirs();
//this.bookmarksTable = new kelondroMap(kelondroDyn.open(bookmarksFile, bufferkb * 1024, preloadTime, 12, 256, '_', true, false));
this.bookmarksTable = new MapView(new BLOBTree(bookmarksFile, true, true, 12, 256, '_', NaturalOrder.naturalOrder, true, false, false), 1000);
this.bookmarksTable = new MapView(BLOBTree.toHeap(bookmarksFile, true, true, 12, 256, '_', NaturalOrder.naturalOrder, true, false, false, bookmarksFileNew), 1000, '_');
// tags
tagsFile.getParentFile().mkdirs();
final boolean tagsFileExisted = tagsFile.exists();
this.tagsTable = new MapView(new BLOBTree(tagsFile, true, true, 12, 256, '_', NaturalOrder.naturalOrder, true, false, false), 500);
this.tagsTable = new MapView(BLOBTree.toHeap(tagsFile, true, true, 12, 256, '_', NaturalOrder.naturalOrder, true, false, false, tagsFileNew), 500, '_');
if (!tagsFileExisted) rebuildTags();
// dates
final boolean datesExisted = datesFile.exists();
this.datesTable = new MapView(new BLOBTree(datesFile, true, true, 20, 256, '_', NaturalOrder.naturalOrder, true, false, false), 500);
this.datesTable = new MapView(BLOBTree.toHeap(datesFile, true, true, 20, 256, '_', NaturalOrder.naturalOrder, true, false, false, datesFileNew), 500, '_');
if (!datesExisted) rebuildDates();
// autoReCrawl

@ -51,10 +51,11 @@ public class messageBoard {
MapView database = null;
private int sn = 0;
public messageBoard(final File path) {
public messageBoard(final File path, final File pathNew) throws IOException {
new File(path.getParent()).mkdir();
new File(pathNew.getParent()).mkdir();
if (database == null) {
database = new MapView(new BLOBTree(path, true, true, categoryLength + dateFormat.length() + 2, recordSize, '_', NaturalOrder.naturalOrder, true, false, false), 500);
database = new MapView(BLOBTree.toHeap(path, true, true, categoryLength + dateFormat.length() + 2, recordSize, '_', NaturalOrder.naturalOrder, true, false, false, pathNew), 500, '_');
}
sn = 0;
}

@ -36,6 +36,7 @@ import java.util.Map;
import java.util.Random;
import de.anomic.http.httpRequestHeader;
import de.anomic.kelondro.blob.BLOBHeap;
import de.anomic.kelondro.blob.BLOBTree;
import de.anomic.kelondro.blob.MapView;
import de.anomic.kelondro.order.Base64Order;
@ -55,10 +56,11 @@ public final class userDB {
HashMap<String, String> ipUsers = new HashMap<String, String>();
HashMap<String, Object> cookieUsers = new HashMap<String, Object>();
public userDB(final File userTableFile) {
this.userTableFile = userTableFile;
public userDB(final File userTableFile, final File userTableFileNew) throws IOException {
this.userTableFile = userTableFileNew;
userTableFile.getParentFile().mkdirs();
this.userTable = new MapView(new BLOBTree(userTableFile, true, true, 128, 256, '_', NaturalOrder.naturalOrder, true, false, false), 10);
userTableFileNew.getParentFile().mkdirs();
this.userTable = new MapView(BLOBTree.toHeap(userTableFile, true, true, 128, 256, '_', NaturalOrder.naturalOrder, true, false, false, userTableFile), 10, '_');
}
void resetDatabase() {
@ -66,7 +68,11 @@ public final class userDB {
if (userTable != null) userTable.close();
FileUtils.deletedelete(userTableFile);
userTableFile.getParentFile().mkdirs();
userTable = new MapView(new BLOBTree(userTableFile, true, true, 256, 512, '_', NaturalOrder.naturalOrder, true, false, false), 10);
try {
userTable = new MapView(new BLOBHeap(userTableFile, 256, NaturalOrder.naturalOrder, 1024 * 64), 10, '_');
} catch (IOException e) {
e.printStackTrace();
}
}
public void close() {

@ -52,14 +52,16 @@ public class wikiBoard {
MapView bkpbase = null;
static HashMap<String, String> authors = new HashMap<String, String>();
public wikiBoard(final File actpath, final File bkppath) {
public wikiBoard(
final File actpath, final File actpathNew,
final File bkppath, final File bkppathNew) throws IOException {
new File(actpath.getParent()).mkdirs();
if (datbase == null) {
datbase = new MapView(new BLOBTree(actpath, true, true, keyLength, recordSize, '_', NaturalOrder.naturalOrder, true, false, false), 500);
datbase = new MapView(BLOBTree.toHeap(actpath, true, true, keyLength, recordSize, '_', NaturalOrder.naturalOrder, true, false, false, actpathNew), 500, '_');
}
new File(bkppath.getParent()).mkdirs();
if (bkpbase == null) {
bkpbase = new MapView(new BLOBTree(bkppath, true, true, keyLength + dateFormat.length(), recordSize, '_', NaturalOrder.naturalOrder, true, false, false), 500);
bkpbase = new MapView(BLOBTree.toHeap(bkppath, true, true, keyLength + dateFormat.length(), recordSize, '_', NaturalOrder.naturalOrder, true, false, false, bkppathNew), 500, '_');
}
}

@ -150,6 +150,8 @@ public class BLOBArray implements BLOB {
} catch (ParseException e) {continue;}
}
}
// open all blob files
for (int i = 0; i < files.length; i++) {
if (files[i].length() >= 22 && files[i].startsWith(prefix) && files[i].endsWith(".blob")) {
try {

@ -459,7 +459,7 @@ public final class BLOBHeap extends BLOBHeapModifier implements BLOB {
final File f = new File("/Users/admin/blobtest.heap");
try {
//f.delete();
final MapView heap = new MapView(new BLOBHeap(f, 12, NaturalOrder.naturalOrder, 1024 * 512), 500);
final MapView heap = new MapView(new BLOBHeap(f, 12, NaturalOrder.naturalOrder, 1024 * 512), 500, '_');
heap.put("aaaaaaaaaaaa", map("aaaaaaaaaaaa", "eins zwei drei"));
heap.put("aaaaaaaaaaab", map("aaaaaaaaaaab", "vier fuenf sechs"));
heap.put("aaaaaaaaaaac", map("aaaaaaaaaaac", "sieben acht neun"));

@ -39,7 +39,10 @@ import de.anomic.kelondro.util.Log;
public class BLOBHeapModifier extends HeapReader implements BLOB {
/*
* This class adds a remove operation to a BLOBHeapReader
* This class adds a remove operation to a BLOBHeapReader. That means that a BLOBModifier can
* - read elements from a BLOB
* - remove elements from a BLOB
* but cannot write new entries to the BLOB
*/
/**

@ -48,16 +48,14 @@ import de.anomic.kelondro.io.AbstractRandomAccess;
import de.anomic.kelondro.io.RandomAccessInterface;
import de.anomic.kelondro.order.ByteOrder;
import de.anomic.kelondro.order.CloneableIterator;
import de.anomic.kelondro.order.NaturalOrder;
import de.anomic.kelondro.order.RotateIterator;
import de.anomic.kelondro.table.EcoTable;
import de.anomic.kelondro.table.FlexTable;
import de.anomic.kelondro.table.FlexWidthArray;
import de.anomic.kelondro.table.Tree;
import de.anomic.kelondro.util.FileUtils;
import de.anomic.kelondro.util.kelondroException;
public class BLOBTree implements BLOB {
public class BLOBTree {
private static final int counterlen = 8;
private static final int EcoFSBufferSize = 20;
@ -69,17 +67,14 @@ public class BLOBTree implements BLOB {
private final ObjectIndex index;
private ObjectBuffer buffer;
private final Row rowdef;
private File file;
/**
* Deprecated Class. Please use kelondroBLOBHeap instead
*/
@Deprecated
public BLOBTree(final File file, final boolean useNodeCache, final boolean useObjectCache, final int key,
private BLOBTree(final File file, final boolean useNodeCache, final boolean useObjectCache, final int key,
final int nodesize, final char fillChar, final ByteOrder objectOrder, final boolean usetree, final boolean writebuffer, final boolean resetOnFail) {
// creates or opens a dynamic tree
rowdef = new Row("byte[] key-" + (key + counterlen) + ", byte[] node-" + nodesize, objectOrder);
this.file = file;
ObjectIndex fbi;
if (usetree) {
try {
@ -117,57 +112,38 @@ public class BLOBTree implements BLOB {
//this.segmentCount = 0;
//if (!(tree.fileExisted)) writeSegmentCount();
buffer = new ObjectBuffer(file.toString());
/*
// debug
try {
kelondroCloneableIterator<byte[]> i = keys(true, false);
HashSet<String> t = new HashSet<String>();
while (i.hasNext()) {
byte[] b = i.next();
String s = new String(b);
t.add(s);
System.out.println("*** DEBUG BLOBTree " + file.getName() + " KEY=" + s);
}
Iterator<String> j = t.iterator();
while (j.hasNext()) {
String s = j.next();
byte[] r = this.get(s.getBytes());
if (r == null) System.out.println("*** DEBUG BLOBTree " + file.getName() + " KEY=" + s + " cannot be retrieved");
}
} catch (IOException e) {
e.printStackTrace();
}
*/
}
public String name() {
return this.file.getName();
}
public static final void delete(final File file) {
if (file.isFile()) {
FileUtils.deletedelete(file);
} else {
FlexWidthArray.delete(file.getParentFile(), file.getName());
public static BLOBHeap toHeap(final File file, final boolean useNodeCache, final boolean useObjectCache, final int key,
final int nodesize, final char fillChar, final ByteOrder objectOrder, final boolean usetree, final boolean writebuffer, final boolean resetOnFail, final File blob) throws IOException {
if (blob.exists() || !file.exists()) {
// open the blob file and ignore the tree
return new BLOBHeap(blob, key, objectOrder, 1024 * 64);
}
}
public synchronized void clear() throws IOException {
final String name = this.index.filename();
this.index.clear();
this.buffer = new ObjectBuffer(name);
}
public int keylength() {
return this.keylen;
}
public ByteOrder ordering() {
return this.rowdef.objectOrder;
}
public synchronized int size() {
return index.size();
// open a Tree and migrate everything to a Heap
BLOBTree tree = new BLOBTree(file, useNodeCache, useObjectCache, key, nodesize, fillChar, objectOrder, usetree, writebuffer, resetOnFail);
BLOBHeap heap = new BLOBHeap(blob, key, objectOrder, 1024 * 64);
Iterator<byte[]> i = tree.keys(true, false);
byte[] k, kk = new byte[key], v;
String s;
while (i.hasNext()) {
k = i.next();
//assert k.length == key : "k.length = " + k.length + ", key = " + key;
if (k == null) continue;
v = tree.get(k);
if (v == null) continue;
s = new String(v, "UTF-8").trim();
// enlarge entry key to fit into the given key length
if (k.length == key) {
heap.put(k, s.getBytes("UTF-8"));
} else {
System.arraycopy(k, 0, kk, 0, k.length);
for (int j = k.length; j < key; j++) kk[j] = (byte) fillChar;
heap.put(kk, s.getBytes("UTF-8"));
}
}
tree.close(false);
return heap;
}
private static String counter(final int c) {
@ -187,7 +163,7 @@ public class BLOBTree implements BLOB {
}
}
String origKey(final byte[] rawKey) {
private String origKey(final byte[] rawKey) {
int n = keylen - 1;
if (n >= rawKey.length) n = rawKey.length - 1;
while ((n > 0) && (rawKey[n] == (byte) fillChar)) n--;
@ -198,12 +174,12 @@ public class BLOBTree implements BLOB {
}
}
public class keyIterator implements CloneableIterator<byte[]> {
private class keyIterator implements CloneableIterator<byte[]> {
// the iterator iterates all keys
CloneableIterator<Row.Entry> ri;
String nextKey;
public keyIterator(final CloneableIterator<Row.Entry> iter) {
private keyIterator(final CloneableIterator<Row.Entry> iter) {
ri = iter;
nextKey = n();
}
@ -212,11 +188,11 @@ public class BLOBTree implements BLOB {
return new keyIterator(ri.clone(modifier));
}
public boolean hasNext() {
public boolean hasNext() {
return nextKey != null;
}
public byte[] next() {
public byte[] next() {
final String result = nextKey;
nextKey = n();
try {
@ -226,7 +202,7 @@ public class BLOBTree implements BLOB {
}
}
public void remove() {
public void remove() {
throw new UnsupportedOperationException("no remove in RawKeyIterator");
}
@ -263,17 +239,13 @@ public class BLOBTree implements BLOB {
}
public synchronized CloneableIterator<byte[]> keys(final boolean up, final boolean rotating) throws IOException {
private synchronized CloneableIterator<byte[]> keys(final boolean up, final boolean rotating) throws IOException {
// iterates only the keys of the Nodes
// enumerated objects are of type String
final keyIterator i = new keyIterator(index.rows(up, null));
if (rotating) return new RotateIterator<byte[]>(i, null, index.size());
return i;
}
public synchronized CloneableIterator<byte[]> keys(final boolean up, final byte[] firstKey) throws IOException {
return new keyIterator(index.rows(up, firstKey));
}
private byte[] getValueCached(final byte[] key) throws IOException {
@ -297,7 +269,7 @@ public class BLOBTree implements BLOB {
}
}
synchronized int get(final String key, final int pos) throws IOException {
private synchronized int get(final String key, final int pos) throws IOException {
final int reccnt = pos / reclen;
// read within a single record
final byte[] buf = getValueCached(elementKey(key, reccnt));
@ -307,13 +279,13 @@ public class BLOBTree implements BLOB {
return buf[recpos] & 0xFF;
}
public synchronized byte[] get(final byte[] key) throws IOException {
private synchronized byte[] get(final byte[] key) throws IOException {
final RandomAccessInterface ra = getRA(new String(key, "UTF-8"));
if (ra == null) return null;
return ra.readFully();
}
synchronized byte[] get(final String key, final int pos, final int len) throws IOException {
private synchronized byte[] get(final String key, final int pos, final int len) throws IOException {
final int recpos = pos % reclen;
final int reccnt = pos / reclen;
byte[] segment1;
@ -356,12 +328,8 @@ public class BLOBTree implements BLOB {
System.arraycopy(segment2, 0, result, segment1.length, segment2.length);
return result;
}
public synchronized void put(final byte[] key, final byte[] b) throws IOException {
put(new String(key), 0, b, 0, b.length);
}
synchronized void put(final String key, final int pos, final byte[] b, final int off, final int len) throws IOException {
private synchronized void put(final String key, final int pos, final byte[] b, final int off, final int len) throws IOException {
final int recpos = pos % reclen;
final int reccnt = pos / reclen;
byte[] buf;
@ -397,7 +365,7 @@ public class BLOBTree implements BLOB {
}
}
synchronized void put(final String key, final int pos, final int b) throws IOException {
private synchronized void put(final String key, final int pos, final int b) throws IOException {
final int recpos = pos % reclen;
final int reccnt = pos / reclen;
byte[] buf;
@ -413,42 +381,21 @@ public class BLOBTree implements BLOB {
buf[recpos] = (byte) b;
setValueCached(elementKey(key, reccnt), buf);
}
public synchronized void remove(final byte[] key) throws IOException {
// remove value in cache and tree
if (key == null) return;
int recpos = 0;
byte[] k;
while (index.get(k = elementKey(new String(key, "UTF-8"), recpos)) != null) {
index.remove(k);
buffer.remove(k);
recpos++;
}
//segmentCount--; writeSegmentCount();
}
public synchronized boolean has(final byte[] key) {
try {
return (key != null) && (getValueCached(elementKey(new String(key), 0)) != null);
} catch (IOException e) {
return false;
}
}
public synchronized RandomAccessInterface getRA(final String filekey) {
private synchronized RandomAccessInterface getRA(final String filekey) {
// this returns always a RARecord, even if no existed bevore
//return new kelondroBufferedRA(new RARecord(filekey), 512, 0);
return new RARecord(filekey);
}
public class RARecord extends AbstractRandomAccess implements RandomAccessInterface {
private class RARecord extends AbstractRandomAccess implements RandomAccessInterface {
int seekpos = 0;
int compLength = -1;
String filekey;
public RARecord(final String filekey) {
private RARecord(final String filekey) {
this.filekey = filekey;
}
@ -500,62 +447,7 @@ public class BLOBTree implements BLOB {
}
public synchronized void close(boolean writeIDX) {
private synchronized void close(boolean writeIDX) {
index.close();
}
public static void main(final String[] args) {
// test app for DB functions
// reads/writes files to a database table
// arguments:
// {-f2db/-db2f} <db-name> <key> <filename>
if (args.length == 1) {
// open a db and list keys
try {
final BLOB kd = new BLOBTree(new File(args[0]), true, true, 4 ,100, '_', NaturalOrder.naturalOrder, true, false, false);
System.out.println(kd.size() + " elements in DB");
final Iterator<byte[]> i = kd.keys(true, false);
while (i.hasNext())
System.out.println(new String(i.next()));
kd.close(true);
} catch (final IOException e) {
e.printStackTrace();
}
}
}
public static int countElements(final BLOBTree t) {
int count = 0;
try {
final Iterator<byte[]> iter = t.keys(true, false);
while (iter.hasNext()) {count++; if (iter.next() == null) System.out.println("ERROR! null element found");}
return count;
} catch (final IOException e) {
return -1;
}
}
public long length(byte[] key) throws IOException {
byte[] b = get(key);
if (b == null) return -1;
return b.length;
}
public long length() throws IOException {
return this.file.length();
}
public int replace(byte[] key, Rewriter rewriter) throws IOException {
byte[] b = get(key);
if (b == null) {
remove(key);
return 0;
}
byte[] c = rewriter.rewrite(b);
int reduced = b.length - c.length;
assert reduced >= 0;
put(key, c);
return reduced;
}
}

@ -29,6 +29,7 @@ import java.io.DataInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.Iterator;
import java.util.Map;
import java.util.Map.Entry;
@ -101,7 +102,9 @@ public class HeapReader {
// if this is successfull, return true; otherwise false
String fingerprint = HeapWriter.fingerprintFileHash(this.heapFile);
File fif = HeapWriter.fingerprintIndexFile(this.heapFile, fingerprint);
if (!fif.exists()) fif = new File(fif.getAbsolutePath() + ".gz");
File fgf = HeapWriter.fingerprintGapFile(this.heapFile, fingerprint);
if (!fgf.exists()) fgf = new File(fgf.getAbsolutePath() + ".gz");
if (!fif.exists() || !fgf.exists()) {
HeapWriter.deleteAllFingerprints(this.heapFile);
return false;
@ -326,6 +329,17 @@ public class HeapReader {
public long length() throws IOException {
return this.heapFile.length();
}
public String excave(final byte[] rawKey, char fillChar) {
int n = this.keylength - 1;
if (n >= rawKey.length) n = rawKey.length - 1;
while ((n > 0) && (rawKey[n] == (byte) fillChar)) n--;
try {
return new String(rawKey, 0, n + 1, "UTF-8");
} catch (UnsupportedEncodingException e) {
return new String(rawKey, 0, n + 1);
}
}
/**
* static iterator of entries in BLOBHeap files:

@ -128,7 +128,7 @@ public final class HeapWriter {
String n = f.getName();
String[] l = d.list();
for (int i = 0; i < l.length; i++) {
if (l[i].startsWith(n) && (l[i].endsWith(".idx") || l[i].endsWith(".gap"))) FileUtils.deletedelete(new File(d, l[i]));
if (l[i].startsWith(n) && (l[i].endsWith(".idx") || l[i].endsWith(".gap") || l[i].endsWith(".idx.gz") || l[i].endsWith(".gap.gz"))) FileUtils.deletedelete(new File(d, l[i]));
}
}

@ -44,8 +44,8 @@ public class MapDataMining extends MapView {
private HashMap<String, Object> accMap; // to store accumulations of specific fields
@SuppressWarnings("unchecked")
public MapDataMining(final BLOB dyn, final int cachesize, final String[] sortfields, final String[] longaccfields, final String[] doubleaccfields, final Method externalInitializer, final Object externalHandler) {
super(dyn, cachesize);
public MapDataMining(final BLOBHeap dyn, final int cachesize, final String[] sortfields, final String[] longaccfields, final String[] doubleaccfields, final Method externalInitializer, final Object externalHandler) {
super(dyn, cachesize, '_');
// create fast ordering clusters and acc fields
this.sortfields = sortfields;

@ -52,14 +52,16 @@ public class MapView {
private HashMap<String, Map<String, String>> cache;
private final long startup;
private final int cachesize;
private final char fillchar;
public MapView(final BLOB blob, final int cachesize) {
public MapView(final BLOBHeap blob, final int cachesize, char fillchar) {
this.blob = blob;
this.cache = new HashMap<String, Map<String, String>>();
this.cacheScore = new ScoreCluster<String>();
this.startup = System.currentTimeMillis();
this.cachesize = cachesize;
this.fillchar = fillchar;
/*
// debug
try {
@ -103,15 +105,15 @@ public class MapView {
}
private static String map2string(final Map<String, String> map, final String comment) {
final Iterator<Map.Entry<String, String>> iter = map.entrySet().iterator();
Map.Entry<String, String> entry;
final StringBuilder bb = new StringBuilder(map.size() * 40);
bb.append("# ").append(comment).append("\r\n");
while (iter.hasNext()) {
entry = iter.next();
bb.append(entry.getKey()).append('=');
if (entry.getValue() != null) { bb.append(entry.getValue()); }
bb.append("\r\n");
for (Map.Entry<String, String> entry: map.entrySet()) {
if (entry.getValue() != null) {
bb.append(entry.getKey());
bb.append('=');
bb.append(entry.getValue());
bb.append("\r\n");
}
}
bb.append("# EOF\r\n");
return bb.toString();
@ -147,7 +149,7 @@ public class MapView {
key = normalizeKey(key);
// write entry
blob.put(key.getBytes(), map2string(newMap, "W" + DateFormatter.formatShortSecond() + " ").getBytes());
blob.put(key.getBytes("UTF-8"), map2string(newMap, "W" + DateFormatter.formatShortSecond() + " ").getBytes("UTF-8"));
// check for space in cache
checkCacheSpace();
@ -202,7 +204,7 @@ public class MapView {
private String normalizeKey(String key) {
if (key.length() > blob.keylength()) key = key.substring(0, blob.keylength());
while (key.length() < blob.keylength()) key += "_";
while (key.length() < blob.keylength()) key += fillchar;
return key;
}
@ -215,13 +217,13 @@ public class MapView {
Map<String, String> map = cache.get(key);
if (map != null) return map;
// load map from kra
// load map
if (!(blob.has(key.getBytes()))) return null;
// read object
final byte[] b = blob.get(key.getBytes());
if (b == null) return null;
map = string2map(new String(b));
map = string2map(new String(b, "UTF-8"));
if (storeCache) {
// cache it also
@ -330,7 +332,7 @@ public class MapView {
return null;
}
try {
final Map<String, String> obj = get(new String(nextKey));
final Map<String, String> obj = get(new String(nextKey, "UTF-8"));
if (obj == null) throw new kelondroException("no more elements available");
return obj;
} catch (final IOException e) {
@ -350,9 +352,9 @@ public class MapView {
if (f.exists()) FileUtils.deletedelete(f);
try {
// make a blob
BLOB blob = new BLOBHeap(f, 12, NaturalOrder.naturalOrder, 1024 * 1024);
BLOBHeap blob = new BLOBHeap(f, 12, NaturalOrder.naturalOrder, 1024 * 1024);
// make map
MapView map = new MapView(blob, 1024);
MapView map = new MapView(blob, 1024, '_');
// put some values into the map
Map<String, String> m = new HashMap<String, String>();
m.put("k", "000"); map.put("123", m);
@ -361,7 +363,7 @@ public class MapView {
// iterate over keys
Iterator<byte[]> i = map.keys(true, false);
while (i.hasNext()) {
System.out.println("key: " + new String(i.next()));
System.out.println("key: " + new String(i.next(), "UTF-8"));
}
// clean up
map.close();

@ -41,6 +41,8 @@ import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
import de.anomic.kelondro.order.Base64Order;
import de.anomic.kelondro.order.ByteOrder;
@ -75,6 +77,7 @@ public class LongHandleIndex {
this(keylength, objectOrder, (int) (file.length() / (keylength + 8)), expectedspace);
// read the index dump and fill the index
InputStream is = new BufferedInputStream(new FileInputStream(file), 1024 * 1024);
if (file.getName().endsWith(".gz")) is = new GZIPInputStream(is);
byte[] a = new byte[keylength + 8];
int c;
while (true) {
@ -101,6 +104,7 @@ public class LongHandleIndex {
File tmp = new File(file.getParentFile(), file.getName() + ".tmp");
Iterator<Row.Entry> i = this.index.rows(true, null);
OutputStream os = new BufferedOutputStream(new FileOutputStream(tmp), 4 * 1024 * 1024);
if (file.getName().endsWith(".gz")) os = new GZIPOutputStream(os);
int c = 0;
while (i.hasNext()) {
os.write(i.next().bytes());

@ -42,7 +42,6 @@ import java.util.HashMap;
import java.util.Map;
import de.anomic.http.httpResponseHeader;
import de.anomic.kelondro.blob.BLOB;
import de.anomic.kelondro.blob.BLOBArray;
import de.anomic.kelondro.blob.BLOBCompressor;
import de.anomic.kelondro.blob.BLOBHeap;
@ -128,13 +127,13 @@ public final class plasmaHTCache {
private static void openDB() {
// open the response header database
final File dbfile = new File(cachePath, RESPONSE_HEADER_DB_NAME);
BLOB blob = null;
BLOBHeap blob = null;
try {
blob = new BLOBHeap(dbfile, yacySeedDB.commonHashLength, Base64Order.enhancedCoder, 1024 * 1024);
} catch (final IOException e) {
e.printStackTrace();
}
responseHeaderDB = new MapView(blob, 500);
responseHeaderDB = new MapView(blob, 500, '_');
try {
fileDBunbuffered = new BLOBArray(new File(cachePath, FILE_DB_NAME), prefix, 12, Base64Order.enhancedCoder, 1024 * 1024 * 2);
fileDBunbuffered.setMaxSize(maxCacheSize);

@ -269,7 +269,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
private static plasmaSwitchboard sb = null;
public plasmaSwitchboard(final File rootPath, final String initPath, final String configPath, final boolean applyPro) {
public plasmaSwitchboard(final File rootPath, final String initPath, final String configPath, final boolean applyPro) throws IOException {
super(rootPath, initPath, configPath, applyPro);
serverProfiling.startSystemProfiling();
sb=this;
@ -438,7 +438,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
// loading the robots.txt db
this.log.logConfig("Initializing robots.txt DB");
final File robotsDBFile = new File(this.plasmaPath, plasmaSwitchboardConstants.DBFILE_CRAWL_ROBOTS);
final File robotsDBFile = new File(this.plasmaPath, "crawlRobotsTxt.heap");
robots = new RobotsTxt(robotsDBFile);
this.log.logConfig("Loaded robots.txt DB from file " + robotsDBFile.getName() +
", " + robots.size() + " entries" +
@ -477,8 +477,9 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
// Init User DB
this.log.logConfig("Loading User DB");
final File userDbFile = new File(getRootPath(), plasmaSwitchboardConstants.DBFILE_USER);
this.userDB = new userDB(userDbFile);
final File userDbFileOld = new File(getRootPath(), "DATA/SETTINGS/user.db");
final File userDbFile = new File(getRootPath(), "DATA/SETTINGS/user.heap");
this.userDB = new userDB(userDbFileOld, userDbFile);
this.log.logConfig("Loaded User DB from file " + userDbFile.getName() +
", " + this.userDB.size() + " entries" +
", " + ppRamString(userDbFile.length()/1024));
@ -849,45 +850,55 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
setConfig(plasmaSwitchboardConstants.CRAWLJOB_REMOTE_CRAWL_LOADER_IDLESLEEP, Math.max(30000, 3600000 / ppm));
}
public void initMessages() {
public void initMessages() throws IOException {
this.log.logConfig("Starting Message Board");
final File messageDbFile = new File(workPath, plasmaSwitchboardConstants.DBFILE_MESSAGE);
this.messageDB = new messageBoard(messageDbFile);
final File messageDbFileOld = new File(workPath, "message.db");
final File messageDbFile = new File(workPath, "message.heap");
this.messageDB = new messageBoard(messageDbFileOld, messageDbFile);
this.log.logConfig("Loaded Message Board DB from file " + messageDbFile.getName() +
", " + this.messageDB.size() + " entries" +
", " + ppRamString(messageDbFile.length()/1024));
}
public void initWiki() {
public void initWiki() throws IOException {
this.log.logConfig("Starting Wiki Board");
final File wikiDbFile = new File(workPath, plasmaSwitchboardConstants.DBFILE_WIKI);
this.wikiDB = new wikiBoard(wikiDbFile, new File(workPath, plasmaSwitchboardConstants.DBFILE_WIKI_BKP));
final File wikiDbFileOld = new File(workPath, "wiki.db");
final File wikiDbFile = new File(workPath, "wiki.heap");
this.wikiDB = new wikiBoard(wikiDbFileOld, wikiDbFile, new File(workPath, "wiki-bkp.db"), new File(workPath, "wiki-bkp.heap"));
this.log.logConfig("Loaded Wiki Board DB from file " + wikiDbFile.getName() +
", " + this.wikiDB.size() + " entries" +
", " + ppRamString(wikiDbFile.length()/1024));
}
public void initBlog() {
public void initBlog() throws IOException {
this.log.logConfig("Starting Blog");
final File blogDbFile = new File(workPath, plasmaSwitchboardConstants.DBFILE_BLOG);
this.blogDB = new blogBoard(blogDbFile);
final File blogDbFileOld = new File(workPath, "blog.db");
final File blogDbFile = new File(workPath, "blog.heap");
this.blogDB = new blogBoard(blogDbFileOld, blogDbFile);
this.log.logConfig("Loaded Blog DB from file " + blogDbFile.getName() +
", " + this.blogDB.size() + " entries" +
", " + ppRamString(blogDbFile.length()/1024));
final File blogCommentDbFile = new File(workPath, plasmaSwitchboardConstants.DBFILE_BLOGCOMMENTS);
this.blogCommentDB = new blogBoardComments(blogCommentDbFile);
final File blogCommentDbFileOld = new File(workPath, "blogComment.db");
final File blogCommentDbFile = new File(workPath, "blogComment.heap");
this.blogCommentDB = new blogBoardComments(blogCommentDbFileOld, blogCommentDbFile);
this.log.logConfig("Loaded Blog-Comment DB from file " + blogCommentDbFile.getName() +
", " + this.blogCommentDB.size() + " entries" +
", " + ppRamString(blogCommentDbFile.length()/1024));
}
public void initBookmarks(){
public void initBookmarks() throws IOException{
this.log.logConfig("Loading Bookmarks DB");
final File bookmarksFile = new File(workPath, plasmaSwitchboardConstants.DBFILE_BOOKMARKS);
final File tagsFile = new File(workPath, plasmaSwitchboardConstants.DBFILE_BOOKMARKS_TAGS);
final File datesFile = new File(workPath, plasmaSwitchboardConstants.DBFILE_BOOKMARKS_DATES);
this.bookmarksDB = new bookmarksDB(bookmarksFile, tagsFile, datesFile);
final File bookmarksFileOld = new File(workPath, "bookmarks.db");
final File tagsFileOld = new File(workPath, "bookmarkTags.db");
final File datesFileOld = new File(workPath, "bookmarkDates.db");
final File bookmarksFile = new File(workPath, "bookmarks.heap");
final File tagsFile = new File(workPath, "bookmarkTags.heap");
final File datesFile = new File(workPath, "bookmarkDates.heap");
this.bookmarksDB = new bookmarksDB(
bookmarksFileOld, bookmarksFile,
tagsFileOld, tagsFile,
datesFileOld, datesFile);
this.log.logConfig("Loaded Bookmarks DB from files "+ bookmarksFile.getName()+ ", "+tagsFile.getName());
this.log.logConfig(this.bookmarksDB.tagsSize()+" Tag, "+this.bookmarksDB.bookmarksSize()+" Bookmarks");
}

@ -400,80 +400,7 @@ public final class plasmaSwitchboardConstants {
*/
public static final String WORK_PATH = "workPath";
public static final String WORK_PATH_DEFAULT = "DATA/WORK";
/**
* <p><code>public static final String <strong>DBFILE_MESSAGE</strong> = "message.db"</code></p>
* <p>Name of the file containing the database holding the user's peer-messages</p>
*
* @see plasmaSwitchboard#WORK_PATH for the folder, this file lies in
*/
public static final String DBFILE_MESSAGE = "message.db";
/**
* <p><code>public static final String <strong>DBFILE_WIKI</strong> = "wiki.db"</code></p>
* <p>Name of the file containing the database holding the whole wiki of this peer</p>
*
* @see plasmaSwitchboard#WORK_PATH for the folder, this file lies in
* @see plasmaSwitchboard#DBFILE_WIKI_BKP for the file previous versions of wiki-pages lie in
*/
public static final String DBFILE_WIKI = "wiki.db";
/**
* <p><code>public static final String <strong>DBFILE_WIKI_BKP</strong> = "wiki-bkp.db"</code></p>
* <p>Name of the file containing the database holding all versions but the latest of the wiki-pages of this peer</p>
*
* @see plasmaSwitchboard#WORK_PATH for the folder this file lies in
* @see plasmaSwitchboard#DBFILE_WIKI for the file the latest version of wiki-pages lie in
*/
public static final String DBFILE_WIKI_BKP = "wiki-bkp.db";
/**
* <p><code>public static final String <strong>DBFILE_BLOG</strong> = "blog.db"</code></p>
* <p>Name of the file containing the database holding all blog-entries available on this peer</p>
*
* @see plasmaSwitchboard#WORK_PATH for the folder this file lies in
*/
public static final String DBFILE_BLOG = "blog.db";
/**
* <p><code>public static final String <strong>DBFILE_BLOGCOMMENTS</strong> = "blogComment.db"</code></p>
* <p>Name of the file containing the database holding all blogComment-entries available on this peer</p>
*
* @see plasmaSwitchboard#WORK_PATH for the folder this file lies in
*/
public static final String DBFILE_BLOGCOMMENTS = "blogComment.db";
/**
* <p><code>public static final String <strong>DBFILE_BOOKMARKS</strong> = "bookmarks.db"</code></p>
* <p>Name of the file containing the database holding all bookmarks available on this peer</p>
*
* @see plasmaSwitchboard#WORK_PATH for the folder this file lies in
* @see bookmarksDB for more detailed overview about the bookmarks structure
*/
public static final String DBFILE_BOOKMARKS = "bookmarks.db";
/**
* <p><code>public static final String <strong>DBFILE_BOOKMARKS_TAGS</strong> = "bookmarkTags.db"</code></p>
* <p>Name of the file containing the database holding all tag-&gt;bookmark relations</p>
*
* @see plasmaSwitchboard#WORK_PATH for the folder this file lies in
* @see bookmarksDB for more detailed overview about the bookmarks structure
*/
public static final String DBFILE_BOOKMARKS_TAGS = "bookmarkTags.db";
/**
* <p><code>public static final String <strong>DBFILE_BOOKMARKS_DATES</strong> = "bookmarkDates.db"</code></p>
* <p>Name of the file containing the database holding all date-&gt;bookmark relations</p>
*
* @see plasmaSwitchboard#WORK_PATH for the folder this file lies in
* @see bookmarksDB for more detailed overview about the bookmarks structure
*/
public static final String DBFILE_BOOKMARKS_DATES = "bookmarkDates.db";
/**
* <p><code>public static final String <strong>DBFILE_CRAWL_ROBOTS</strong> = "crawlRobotsTxt.db"</code></p>
* <p>Name of the file containing the database holding all <code>robots.txt</code>-entries of the lately crawled domains</p>
*
* @see plasmaSwitchboard#PLASMA_PATH for the folder this file lies in
*/
public static final String DBFILE_CRAWL_ROBOTS = "crawlRobotsTxt.heap";
/**
* <p><code>public static final String <strong>DBFILE_USER</strong> = "DATA/SETTINGS/user.db"</code></p>
* <p>Path to the user-DB, beginning from the YaCy-installation's top-folder. It holds all rights the created
* users have as well as all other needed data about them</p>
*/
public static final String DBFILE_USER = "DATA/SETTINGS/user.db";
// we must distinguish the following cases: resource-load was initiated by
// 1) global crawling: the index is extern, not here (not possible here)
// 2) result of search queries, some indexes are here (not possible here)

@ -133,7 +133,11 @@ public class migration {
delete(tagsDBFile);
Log.logInfo("MIGRATION", "Migrating bookmarkTags.db to use wordhashs as keys.");
}
sb.initBookmarks();
try {
sb.initBookmarks();
} catch (IOException e) {
e.printStackTrace();
}
}
/**
@ -165,7 +169,11 @@ public class migration {
file.delete();
} catch (final IOException e) {}
}
sb.initWiki();
try {
sb.initWiki();
} catch (IOException e) {
e.printStackTrace();
}
}
@ -178,7 +186,11 @@ public class migration {
FileUtils.copy(file, file2);
file.delete();
} catch (final IOException e) {}
sb.initMessages();
try {
sb.initMessages();
} catch (IOException e) {
e.printStackTrace();
}
}
}

Loading…
Cancel
Save