From 3d4b826ca5af4da28de4eb6b4d215c83452099cd Mon Sep 17 00:00:00 2001 From: orbiter Date: Wed, 27 May 2009 15:04:04 +0000 Subject: [PATCH] migration of all databases that use the deprecated BLOBTree format into the BLOBHeap format. Old databases are migrated automatically. This removes the last very IO-intensive data structures which were still used for Wiki, Blog and Bookmarks. Old database files will still remain in the DATA subdirectory but can be deleted manually if no major bugs appear during migration. There is no need for any user action, all migration is done automatically. git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5986 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- source/de/anomic/crawler/CrawlProfile.java | 9 +- source/de/anomic/crawler/RobotsTxt.java | 9 +- source/de/anomic/data/blogBoard.java | 7 +- source/de/anomic/data/blogBoardComments.java | 14 +- source/de/anomic/data/bookmarksDB.java | 11 +- source/de/anomic/data/messageBoard.java | 5 +- source/de/anomic/data/userDB.java | 14 +- source/de/anomic/data/wiki/wikiBoard.java | 8 +- source/de/anomic/kelondro/blob/BLOBArray.java | 2 + source/de/anomic/kelondro/blob/BLOBHeap.java | 2 +- .../kelondro/blob/BLOBHeapModifier.java | 5 +- source/de/anomic/kelondro/blob/BLOBTree.java | 204 +++++------------- .../de/anomic/kelondro/blob/HeapReader.java | 14 ++ .../de/anomic/kelondro/blob/HeapWriter.java | 2 +- .../anomic/kelondro/blob/MapDataMining.java | 4 +- source/de/anomic/kelondro/blob/MapView.java | 36 ++-- .../kelondro/index/LongHandleIndex.java | 4 + source/de/anomic/plasma/plasmaHTCache.java | 5 +- .../de/anomic/plasma/plasmaSwitchboard.java | 51 +++-- .../plasma/plasmaSwitchboardConstants.java | 75 +------ source/migration.java | 18 +- 21 files changed, 191 insertions(+), 308 deletions(-) diff --git a/source/de/anomic/crawler/CrawlProfile.java b/source/de/anomic/crawler/CrawlProfile.java index 8fd5d9a60..f6c579fc6 100644 --- a/source/de/anomic/crawler/CrawlProfile.java +++ b/source/de/anomic/crawler/CrawlProfile.java @@ -31,7 +31,6 @@ import java.util.Map; import java.util.Set; import java.util.regex.Pattern; -import de.anomic.kelondro.blob.BLOB; import de.anomic.kelondro.blob.BLOBHeap; import de.anomic.kelondro.blob.MapView; import de.anomic.kelondro.order.Base64Order; @@ -67,8 +66,8 @@ public class CrawlProfile { public CrawlProfile(final File file) throws IOException { this.profileTableFile = file; profileTableFile.getParentFile().mkdirs(); - final BLOB dyn = new BLOBHeap(profileTableFile, yacySeedDB.commonHashLength, NaturalOrder.naturalOrder, 1024 * 64); - profileTable = new MapView(dyn, 500); + final BLOBHeap dyn = new BLOBHeap(profileTableFile, yacySeedDB.commonHashLength, NaturalOrder.naturalOrder, 1024 * 64); + profileTable = new MapView(dyn, 500, '_'); } public void clear() { @@ -76,13 +75,13 @@ public class CrawlProfile { if (profileTable != null) profileTable.close(); FileUtils.deletedelete(profileTableFile); profileTableFile.getParentFile().mkdirs(); - BLOB dyn = null; + BLOBHeap dyn = null; try { dyn = new BLOBHeap(profileTableFile, yacySeedDB.commonHashLength, NaturalOrder.naturalOrder, 1024 * 64); } catch (IOException e) { e.printStackTrace(); } - profileTable = new MapView(dyn, 500); + profileTable = new MapView(dyn, 500, '_'); } public void close() { diff --git a/source/de/anomic/crawler/RobotsTxt.java b/source/de/anomic/crawler/RobotsTxt.java index a8ad146da..adbd4b908 100644 --- a/source/de/anomic/crawler/RobotsTxt.java +++ b/source/de/anomic/crawler/RobotsTxt.java @@ -43,7 +43,6 @@ import java.util.concurrent.ConcurrentHashMap; import de.anomic.http.httpClient; import de.anomic.http.httpResponse; import de.anomic.http.httpRequestHeader; -import de.anomic.kelondro.blob.BLOB; import de.anomic.kelondro.blob.BLOBHeap; import de.anomic.kelondro.blob.MapView; import de.anomic.kelondro.order.NaturalOrder; @@ -71,13 +70,13 @@ public class RobotsTxt { public RobotsTxt(final File robotsTableFile) { this.robotsTableFile = robotsTableFile; robotsTableFile.getParentFile().mkdirs(); - BLOB blob = null; + BLOBHeap blob = null; try { blob = new BLOBHeap(robotsTableFile, 64, NaturalOrder.naturalOrder, 1024 * 1024); } catch (final IOException e) { e.printStackTrace(); } - robotsTable = new MapView(blob, 100); + robotsTable = new MapView(blob, 100, '_'); syncObjects = new ConcurrentHashMap(); } @@ -86,13 +85,13 @@ public class RobotsTxt { if (robotsTable != null) robotsTable.close(); FileUtils.deletedelete(robotsTableFile); robotsTableFile.getParentFile().mkdirs(); - BLOB blob = null; + BLOBHeap blob = null; try { blob = new BLOBHeap(robotsTableFile, 64, NaturalOrder.naturalOrder, 1024 * 1024); } catch (final IOException e) { e.printStackTrace(); } - robotsTable = new MapView(blob, 100); + robotsTable = new MapView(blob, 100, '_'); syncObjects.clear(); } diff --git a/source/de/anomic/data/blogBoard.java b/source/de/anomic/data/blogBoard.java index 0cf31ce95..bf079340a 100644 --- a/source/de/anomic/data/blogBoard.java +++ b/source/de/anomic/data/blogBoard.java @@ -63,10 +63,11 @@ public class blogBoard { MapView database = null; - public blogBoard(final File actpath) { - new File(actpath.getParent()).mkdir(); + public blogBoard(final File actpath, final File newFile) throws IOException { + new File(actpath.getParent()).mkdir(); + new File(newFile.getParent()).mkdir(); if (database == null) { - database = new MapView(new BLOBTree(actpath, true, true, keyLength, recordSize, '_', NaturalOrder.naturalOrder, true, false, false), 500); + database = new MapView(BLOBTree.toHeap(actpath, true, true, keyLength, recordSize, '_', NaturalOrder.naturalOrder, true, false, false, newFile), 500, '_'); } } diff --git a/source/de/anomic/data/blogBoardComments.java b/source/de/anomic/data/blogBoardComments.java index 9c2951723..83f373b27 100644 --- a/source/de/anomic/data/blogBoardComments.java +++ b/source/de/anomic/data/blogBoardComments.java @@ -60,28 +60,34 @@ public class blogBoardComments { private static final int recordSize = 512; static SimpleDateFormat SimpleFormatter = new SimpleDateFormat(dateFormat); - static { SimpleFormatter.setTimeZone(TimeZone.getTimeZone("GMT")); } + private MapView database = null; - public blogBoardComments(final File actpath) { - new File(actpath.getParent()).mkdir(); + + public blogBoardComments(final File actpath, final File newFile) throws IOException { + new File(actpath.getParent()).mkdir(); + new File(newFile.getParent()).mkdir(); if (database == null) { - database = new MapView(new BLOBTree(actpath, true, true, keyLength, recordSize, '_', NaturalOrder.naturalOrder, false, false, false), 500); + database = new MapView(BLOBTree.toHeap(actpath, true, true, keyLength, recordSize, '_', NaturalOrder.naturalOrder, false, false, false, newFile), 500, '_'); } } + public int size() { return database.size(); } + public void close() { database.close(); } + static String dateString(final Date date) { synchronized (SimpleFormatter) { return SimpleFormatter.format(date); } } + private static String normalize(final String key) { if (key == null) return "null"; return key.trim().toLowerCase(); diff --git a/source/de/anomic/data/bookmarksDB.java b/source/de/anomic/data/bookmarksDB.java index 2aaedfe6d..1d07eac2f 100644 --- a/source/de/anomic/data/bookmarksDB.java +++ b/source/de/anomic/data/bookmarksDB.java @@ -108,22 +108,25 @@ public class bookmarksDB { // bookmarksDB's class constructor // ------------------------------------ - public bookmarksDB(final File bookmarksFile, final File tagsFile, final File datesFile) { + public bookmarksDB( + final File bookmarksFile, final File bookmarksFileNew, + final File tagsFile, final File tagsFileNew, + final File datesFile, final File datesFileNew) throws IOException { // bookmarks tagCache=new TreeMap(); bookmarksFile.getParentFile().mkdirs(); //this.bookmarksTable = new kelondroMap(kelondroDyn.open(bookmarksFile, bufferkb * 1024, preloadTime, 12, 256, '_', true, false)); - this.bookmarksTable = new MapView(new BLOBTree(bookmarksFile, true, true, 12, 256, '_', NaturalOrder.naturalOrder, true, false, false), 1000); + this.bookmarksTable = new MapView(BLOBTree.toHeap(bookmarksFile, true, true, 12, 256, '_', NaturalOrder.naturalOrder, true, false, false, bookmarksFileNew), 1000, '_'); // tags tagsFile.getParentFile().mkdirs(); final boolean tagsFileExisted = tagsFile.exists(); - this.tagsTable = new MapView(new BLOBTree(tagsFile, true, true, 12, 256, '_', NaturalOrder.naturalOrder, true, false, false), 500); + this.tagsTable = new MapView(BLOBTree.toHeap(tagsFile, true, true, 12, 256, '_', NaturalOrder.naturalOrder, true, false, false, tagsFileNew), 500, '_'); if (!tagsFileExisted) rebuildTags(); // dates final boolean datesExisted = datesFile.exists(); - this.datesTable = new MapView(new BLOBTree(datesFile, true, true, 20, 256, '_', NaturalOrder.naturalOrder, true, false, false), 500); + this.datesTable = new MapView(BLOBTree.toHeap(datesFile, true, true, 20, 256, '_', NaturalOrder.naturalOrder, true, false, false, datesFileNew), 500, '_'); if (!datesExisted) rebuildDates(); // autoReCrawl diff --git a/source/de/anomic/data/messageBoard.java b/source/de/anomic/data/messageBoard.java index 8440fa684..dbf9d13cd 100644 --- a/source/de/anomic/data/messageBoard.java +++ b/source/de/anomic/data/messageBoard.java @@ -51,10 +51,11 @@ public class messageBoard { MapView database = null; private int sn = 0; - public messageBoard(final File path) { + public messageBoard(final File path, final File pathNew) throws IOException { new File(path.getParent()).mkdir(); + new File(pathNew.getParent()).mkdir(); if (database == null) { - database = new MapView(new BLOBTree(path, true, true, categoryLength + dateFormat.length() + 2, recordSize, '_', NaturalOrder.naturalOrder, true, false, false), 500); + database = new MapView(BLOBTree.toHeap(path, true, true, categoryLength + dateFormat.length() + 2, recordSize, '_', NaturalOrder.naturalOrder, true, false, false, pathNew), 500, '_'); } sn = 0; } diff --git a/source/de/anomic/data/userDB.java b/source/de/anomic/data/userDB.java index 5cbf00469..d3e764070 100644 --- a/source/de/anomic/data/userDB.java +++ b/source/de/anomic/data/userDB.java @@ -36,6 +36,7 @@ import java.util.Map; import java.util.Random; import de.anomic.http.httpRequestHeader; +import de.anomic.kelondro.blob.BLOBHeap; import de.anomic.kelondro.blob.BLOBTree; import de.anomic.kelondro.blob.MapView; import de.anomic.kelondro.order.Base64Order; @@ -55,10 +56,11 @@ public final class userDB { HashMap ipUsers = new HashMap(); HashMap cookieUsers = new HashMap(); - public userDB(final File userTableFile) { - this.userTableFile = userTableFile; + public userDB(final File userTableFile, final File userTableFileNew) throws IOException { + this.userTableFile = userTableFileNew; userTableFile.getParentFile().mkdirs(); - this.userTable = new MapView(new BLOBTree(userTableFile, true, true, 128, 256, '_', NaturalOrder.naturalOrder, true, false, false), 10); + userTableFileNew.getParentFile().mkdirs(); + this.userTable = new MapView(BLOBTree.toHeap(userTableFile, true, true, 128, 256, '_', NaturalOrder.naturalOrder, true, false, false, userTableFile), 10, '_'); } void resetDatabase() { @@ -66,7 +68,11 @@ public final class userDB { if (userTable != null) userTable.close(); FileUtils.deletedelete(userTableFile); userTableFile.getParentFile().mkdirs(); - userTable = new MapView(new BLOBTree(userTableFile, true, true, 256, 512, '_', NaturalOrder.naturalOrder, true, false, false), 10); + try { + userTable = new MapView(new BLOBHeap(userTableFile, 256, NaturalOrder.naturalOrder, 1024 * 64), 10, '_'); + } catch (IOException e) { + e.printStackTrace(); + } } public void close() { diff --git a/source/de/anomic/data/wiki/wikiBoard.java b/source/de/anomic/data/wiki/wikiBoard.java index 85cd23dc2..12f5802fc 100644 --- a/source/de/anomic/data/wiki/wikiBoard.java +++ b/source/de/anomic/data/wiki/wikiBoard.java @@ -52,14 +52,16 @@ public class wikiBoard { MapView bkpbase = null; static HashMap authors = new HashMap(); - public wikiBoard(final File actpath, final File bkppath) { + public wikiBoard( + final File actpath, final File actpathNew, + final File bkppath, final File bkppathNew) throws IOException { new File(actpath.getParent()).mkdirs(); if (datbase == null) { - datbase = new MapView(new BLOBTree(actpath, true, true, keyLength, recordSize, '_', NaturalOrder.naturalOrder, true, false, false), 500); + datbase = new MapView(BLOBTree.toHeap(actpath, true, true, keyLength, recordSize, '_', NaturalOrder.naturalOrder, true, false, false, actpathNew), 500, '_'); } new File(bkppath.getParent()).mkdirs(); if (bkpbase == null) { - bkpbase = new MapView(new BLOBTree(bkppath, true, true, keyLength + dateFormat.length(), recordSize, '_', NaturalOrder.naturalOrder, true, false, false), 500); + bkpbase = new MapView(BLOBTree.toHeap(bkppath, true, true, keyLength + dateFormat.length(), recordSize, '_', NaturalOrder.naturalOrder, true, false, false, bkppathNew), 500, '_'); } } diff --git a/source/de/anomic/kelondro/blob/BLOBArray.java b/source/de/anomic/kelondro/blob/BLOBArray.java index 702c32bf6..6f5fe0c39 100755 --- a/source/de/anomic/kelondro/blob/BLOBArray.java +++ b/source/de/anomic/kelondro/blob/BLOBArray.java @@ -150,6 +150,8 @@ public class BLOBArray implements BLOB { } catch (ParseException e) {continue;} } } + + // open all blob files for (int i = 0; i < files.length; i++) { if (files[i].length() >= 22 && files[i].startsWith(prefix) && files[i].endsWith(".blob")) { try { diff --git a/source/de/anomic/kelondro/blob/BLOBHeap.java b/source/de/anomic/kelondro/blob/BLOBHeap.java index 168dab216..bb8fc0ead 100755 --- a/source/de/anomic/kelondro/blob/BLOBHeap.java +++ b/source/de/anomic/kelondro/blob/BLOBHeap.java @@ -459,7 +459,7 @@ public final class BLOBHeap extends BLOBHeapModifier implements BLOB { final File f = new File("/Users/admin/blobtest.heap"); try { //f.delete(); - final MapView heap = new MapView(new BLOBHeap(f, 12, NaturalOrder.naturalOrder, 1024 * 512), 500); + final MapView heap = new MapView(new BLOBHeap(f, 12, NaturalOrder.naturalOrder, 1024 * 512), 500, '_'); heap.put("aaaaaaaaaaaa", map("aaaaaaaaaaaa", "eins zwei drei")); heap.put("aaaaaaaaaaab", map("aaaaaaaaaaab", "vier fuenf sechs")); heap.put("aaaaaaaaaaac", map("aaaaaaaaaaac", "sieben acht neun")); diff --git a/source/de/anomic/kelondro/blob/BLOBHeapModifier.java b/source/de/anomic/kelondro/blob/BLOBHeapModifier.java index b9dcb2c3f..4b8823f8d 100644 --- a/source/de/anomic/kelondro/blob/BLOBHeapModifier.java +++ b/source/de/anomic/kelondro/blob/BLOBHeapModifier.java @@ -39,7 +39,10 @@ import de.anomic.kelondro.util.Log; public class BLOBHeapModifier extends HeapReader implements BLOB { /* - * This class adds a remove operation to a BLOBHeapReader + * This class adds a remove operation to a BLOBHeapReader. That means that a BLOBModifier can + * - read elements from a BLOB + * - remove elements from a BLOB + * but cannot write new entries to the BLOB */ /** diff --git a/source/de/anomic/kelondro/blob/BLOBTree.java b/source/de/anomic/kelondro/blob/BLOBTree.java index 8ddea1ada..2aeb2f325 100644 --- a/source/de/anomic/kelondro/blob/BLOBTree.java +++ b/source/de/anomic/kelondro/blob/BLOBTree.java @@ -48,16 +48,14 @@ import de.anomic.kelondro.io.AbstractRandomAccess; import de.anomic.kelondro.io.RandomAccessInterface; import de.anomic.kelondro.order.ByteOrder; import de.anomic.kelondro.order.CloneableIterator; -import de.anomic.kelondro.order.NaturalOrder; import de.anomic.kelondro.order.RotateIterator; import de.anomic.kelondro.table.EcoTable; import de.anomic.kelondro.table.FlexTable; -import de.anomic.kelondro.table.FlexWidthArray; import de.anomic.kelondro.table.Tree; import de.anomic.kelondro.util.FileUtils; import de.anomic.kelondro.util.kelondroException; -public class BLOBTree implements BLOB { +public class BLOBTree { private static final int counterlen = 8; private static final int EcoFSBufferSize = 20; @@ -69,17 +67,14 @@ public class BLOBTree implements BLOB { private final ObjectIndex index; private ObjectBuffer buffer; private final Row rowdef; - private File file; /** * Deprecated Class. Please use kelondroBLOBHeap instead */ - @Deprecated - public BLOBTree(final File file, final boolean useNodeCache, final boolean useObjectCache, final int key, + private BLOBTree(final File file, final boolean useNodeCache, final boolean useObjectCache, final int key, final int nodesize, final char fillChar, final ByteOrder objectOrder, final boolean usetree, final boolean writebuffer, final boolean resetOnFail) { // creates or opens a dynamic tree rowdef = new Row("byte[] key-" + (key + counterlen) + ", byte[] node-" + nodesize, objectOrder); - this.file = file; ObjectIndex fbi; if (usetree) { try { @@ -117,57 +112,38 @@ public class BLOBTree implements BLOB { //this.segmentCount = 0; //if (!(tree.fileExisted)) writeSegmentCount(); buffer = new ObjectBuffer(file.toString()); - /* - // debug - try { - kelondroCloneableIterator i = keys(true, false); - HashSet t = new HashSet(); - while (i.hasNext()) { - byte[] b = i.next(); - String s = new String(b); - t.add(s); - System.out.println("*** DEBUG BLOBTree " + file.getName() + " KEY=" + s); - } - Iterator j = t.iterator(); - while (j.hasNext()) { - String s = j.next(); - byte[] r = this.get(s.getBytes()); - if (r == null) System.out.println("*** DEBUG BLOBTree " + file.getName() + " KEY=" + s + " cannot be retrieved"); - } - } catch (IOException e) { - e.printStackTrace(); - } - */ - } - - public String name() { - return this.file.getName(); } - public static final void delete(final File file) { - if (file.isFile()) { - FileUtils.deletedelete(file); - } else { - FlexWidthArray.delete(file.getParentFile(), file.getName()); + public static BLOBHeap toHeap(final File file, final boolean useNodeCache, final boolean useObjectCache, final int key, + final int nodesize, final char fillChar, final ByteOrder objectOrder, final boolean usetree, final boolean writebuffer, final boolean resetOnFail, final File blob) throws IOException { + if (blob.exists() || !file.exists()) { + // open the blob file and ignore the tree + return new BLOBHeap(blob, key, objectOrder, 1024 * 64); } - } - - public synchronized void clear() throws IOException { - final String name = this.index.filename(); - this.index.clear(); - this.buffer = new ObjectBuffer(name); - } - - public int keylength() { - return this.keylen; - } - - public ByteOrder ordering() { - return this.rowdef.objectOrder; - } - - public synchronized int size() { - return index.size(); + // open a Tree and migrate everything to a Heap + BLOBTree tree = new BLOBTree(file, useNodeCache, useObjectCache, key, nodesize, fillChar, objectOrder, usetree, writebuffer, resetOnFail); + BLOBHeap heap = new BLOBHeap(blob, key, objectOrder, 1024 * 64); + Iterator i = tree.keys(true, false); + byte[] k, kk = new byte[key], v; + String s; + while (i.hasNext()) { + k = i.next(); + //assert k.length == key : "k.length = " + k.length + ", key = " + key; + if (k == null) continue; + v = tree.get(k); + if (v == null) continue; + s = new String(v, "UTF-8").trim(); + // enlarge entry key to fit into the given key length + if (k.length == key) { + heap.put(k, s.getBytes("UTF-8")); + } else { + System.arraycopy(k, 0, kk, 0, k.length); + for (int j = k.length; j < key; j++) kk[j] = (byte) fillChar; + heap.put(kk, s.getBytes("UTF-8")); + } + } + tree.close(false); + return heap; } private static String counter(final int c) { @@ -187,7 +163,7 @@ public class BLOBTree implements BLOB { } } - String origKey(final byte[] rawKey) { + private String origKey(final byte[] rawKey) { int n = keylen - 1; if (n >= rawKey.length) n = rawKey.length - 1; while ((n > 0) && (rawKey[n] == (byte) fillChar)) n--; @@ -198,12 +174,12 @@ public class BLOBTree implements BLOB { } } - public class keyIterator implements CloneableIterator { + private class keyIterator implements CloneableIterator { // the iterator iterates all keys CloneableIterator ri; String nextKey; - public keyIterator(final CloneableIterator iter) { + private keyIterator(final CloneableIterator iter) { ri = iter; nextKey = n(); } @@ -212,11 +188,11 @@ public class BLOBTree implements BLOB { return new keyIterator(ri.clone(modifier)); } - public boolean hasNext() { + public boolean hasNext() { return nextKey != null; } - public byte[] next() { + public byte[] next() { final String result = nextKey; nextKey = n(); try { @@ -226,7 +202,7 @@ public class BLOBTree implements BLOB { } } - public void remove() { + public void remove() { throw new UnsupportedOperationException("no remove in RawKeyIterator"); } @@ -263,17 +239,13 @@ public class BLOBTree implements BLOB { } - public synchronized CloneableIterator keys(final boolean up, final boolean rotating) throws IOException { + private synchronized CloneableIterator keys(final boolean up, final boolean rotating) throws IOException { // iterates only the keys of the Nodes // enumerated objects are of type String final keyIterator i = new keyIterator(index.rows(up, null)); if (rotating) return new RotateIterator(i, null, index.size()); return i; } - - public synchronized CloneableIterator keys(final boolean up, final byte[] firstKey) throws IOException { - return new keyIterator(index.rows(up, firstKey)); - } private byte[] getValueCached(final byte[] key) throws IOException { @@ -297,7 +269,7 @@ public class BLOBTree implements BLOB { } } - synchronized int get(final String key, final int pos) throws IOException { + private synchronized int get(final String key, final int pos) throws IOException { final int reccnt = pos / reclen; // read within a single record final byte[] buf = getValueCached(elementKey(key, reccnt)); @@ -307,13 +279,13 @@ public class BLOBTree implements BLOB { return buf[recpos] & 0xFF; } - public synchronized byte[] get(final byte[] key) throws IOException { + private synchronized byte[] get(final byte[] key) throws IOException { final RandomAccessInterface ra = getRA(new String(key, "UTF-8")); if (ra == null) return null; return ra.readFully(); } - synchronized byte[] get(final String key, final int pos, final int len) throws IOException { + private synchronized byte[] get(final String key, final int pos, final int len) throws IOException { final int recpos = pos % reclen; final int reccnt = pos / reclen; byte[] segment1; @@ -356,12 +328,8 @@ public class BLOBTree implements BLOB { System.arraycopy(segment2, 0, result, segment1.length, segment2.length); return result; } - - public synchronized void put(final byte[] key, final byte[] b) throws IOException { - put(new String(key), 0, b, 0, b.length); - } - synchronized void put(final String key, final int pos, final byte[] b, final int off, final int len) throws IOException { + private synchronized void put(final String key, final int pos, final byte[] b, final int off, final int len) throws IOException { final int recpos = pos % reclen; final int reccnt = pos / reclen; byte[] buf; @@ -397,7 +365,7 @@ public class BLOBTree implements BLOB { } } - synchronized void put(final String key, final int pos, final int b) throws IOException { + private synchronized void put(final String key, final int pos, final int b) throws IOException { final int recpos = pos % reclen; final int reccnt = pos / reclen; byte[] buf; @@ -413,42 +381,21 @@ public class BLOBTree implements BLOB { buf[recpos] = (byte) b; setValueCached(elementKey(key, reccnt), buf); } - - public synchronized void remove(final byte[] key) throws IOException { - // remove value in cache and tree - if (key == null) return; - int recpos = 0; - byte[] k; - while (index.get(k = elementKey(new String(key, "UTF-8"), recpos)) != null) { - index.remove(k); - buffer.remove(k); - recpos++; - } - //segmentCount--; writeSegmentCount(); - } - - public synchronized boolean has(final byte[] key) { - try { - return (key != null) && (getValueCached(elementKey(new String(key), 0)) != null); - } catch (IOException e) { - return false; - } - } - - public synchronized RandomAccessInterface getRA(final String filekey) { + + private synchronized RandomAccessInterface getRA(final String filekey) { // this returns always a RARecord, even if no existed bevore //return new kelondroBufferedRA(new RARecord(filekey), 512, 0); return new RARecord(filekey); } - public class RARecord extends AbstractRandomAccess implements RandomAccessInterface { + private class RARecord extends AbstractRandomAccess implements RandomAccessInterface { int seekpos = 0; int compLength = -1; String filekey; - public RARecord(final String filekey) { + private RARecord(final String filekey) { this.filekey = filekey; } @@ -500,62 +447,7 @@ public class BLOBTree implements BLOB { } - public synchronized void close(boolean writeIDX) { + private synchronized void close(boolean writeIDX) { index.close(); } - - public static void main(final String[] args) { - // test app for DB functions - // reads/writes files to a database table - // arguments: - // {-f2db/-db2f} - - if (args.length == 1) { - // open a db and list keys - try { - final BLOB kd = new BLOBTree(new File(args[0]), true, true, 4 ,100, '_', NaturalOrder.naturalOrder, true, false, false); - System.out.println(kd.size() + " elements in DB"); - final Iterator i = kd.keys(true, false); - while (i.hasNext()) - System.out.println(new String(i.next())); - kd.close(true); - } catch (final IOException e) { - e.printStackTrace(); - } - } - } - - public static int countElements(final BLOBTree t) { - int count = 0; - try { - final Iterator iter = t.keys(true, false); - while (iter.hasNext()) {count++; if (iter.next() == null) System.out.println("ERROR! null element found");} - return count; - } catch (final IOException e) { - return -1; - } - } - - public long length(byte[] key) throws IOException { - byte[] b = get(key); - if (b == null) return -1; - return b.length; - } - - public long length() throws IOException { - return this.file.length(); - } - - public int replace(byte[] key, Rewriter rewriter) throws IOException { - byte[] b = get(key); - if (b == null) { - remove(key); - return 0; - } - byte[] c = rewriter.rewrite(b); - int reduced = b.length - c.length; - assert reduced >= 0; - put(key, c); - return reduced; - } } \ No newline at end of file diff --git a/source/de/anomic/kelondro/blob/HeapReader.java b/source/de/anomic/kelondro/blob/HeapReader.java index 0c6f7bc66..dc973c0c6 100644 --- a/source/de/anomic/kelondro/blob/HeapReader.java +++ b/source/de/anomic/kelondro/blob/HeapReader.java @@ -29,6 +29,7 @@ import java.io.DataInputStream; import java.io.File; import java.io.FileInputStream; import java.io.IOException; +import java.io.UnsupportedEncodingException; import java.util.Iterator; import java.util.Map; import java.util.Map.Entry; @@ -101,7 +102,9 @@ public class HeapReader { // if this is successfull, return true; otherwise false String fingerprint = HeapWriter.fingerprintFileHash(this.heapFile); File fif = HeapWriter.fingerprintIndexFile(this.heapFile, fingerprint); + if (!fif.exists()) fif = new File(fif.getAbsolutePath() + ".gz"); File fgf = HeapWriter.fingerprintGapFile(this.heapFile, fingerprint); + if (!fgf.exists()) fgf = new File(fgf.getAbsolutePath() + ".gz"); if (!fif.exists() || !fgf.exists()) { HeapWriter.deleteAllFingerprints(this.heapFile); return false; @@ -326,6 +329,17 @@ public class HeapReader { public long length() throws IOException { return this.heapFile.length(); } + + public String excave(final byte[] rawKey, char fillChar) { + int n = this.keylength - 1; + if (n >= rawKey.length) n = rawKey.length - 1; + while ((n > 0) && (rawKey[n] == (byte) fillChar)) n--; + try { + return new String(rawKey, 0, n + 1, "UTF-8"); + } catch (UnsupportedEncodingException e) { + return new String(rawKey, 0, n + 1); + } + } /** * static iterator of entries in BLOBHeap files: diff --git a/source/de/anomic/kelondro/blob/HeapWriter.java b/source/de/anomic/kelondro/blob/HeapWriter.java index 9318e6f00..55c476743 100644 --- a/source/de/anomic/kelondro/blob/HeapWriter.java +++ b/source/de/anomic/kelondro/blob/HeapWriter.java @@ -128,7 +128,7 @@ public final class HeapWriter { String n = f.getName(); String[] l = d.list(); for (int i = 0; i < l.length; i++) { - if (l[i].startsWith(n) && (l[i].endsWith(".idx") || l[i].endsWith(".gap"))) FileUtils.deletedelete(new File(d, l[i])); + if (l[i].startsWith(n) && (l[i].endsWith(".idx") || l[i].endsWith(".gap") || l[i].endsWith(".idx.gz") || l[i].endsWith(".gap.gz"))) FileUtils.deletedelete(new File(d, l[i])); } } diff --git a/source/de/anomic/kelondro/blob/MapDataMining.java b/source/de/anomic/kelondro/blob/MapDataMining.java index 86952fb4c..45e17c18d 100644 --- a/source/de/anomic/kelondro/blob/MapDataMining.java +++ b/source/de/anomic/kelondro/blob/MapDataMining.java @@ -44,8 +44,8 @@ public class MapDataMining extends MapView { private HashMap accMap; // to store accumulations of specific fields @SuppressWarnings("unchecked") - public MapDataMining(final BLOB dyn, final int cachesize, final String[] sortfields, final String[] longaccfields, final String[] doubleaccfields, final Method externalInitializer, final Object externalHandler) { - super(dyn, cachesize); + public MapDataMining(final BLOBHeap dyn, final int cachesize, final String[] sortfields, final String[] longaccfields, final String[] doubleaccfields, final Method externalInitializer, final Object externalHandler) { + super(dyn, cachesize, '_'); // create fast ordering clusters and acc fields this.sortfields = sortfields; diff --git a/source/de/anomic/kelondro/blob/MapView.java b/source/de/anomic/kelondro/blob/MapView.java index 8d0e181bf..095825a0e 100644 --- a/source/de/anomic/kelondro/blob/MapView.java +++ b/source/de/anomic/kelondro/blob/MapView.java @@ -52,14 +52,16 @@ public class MapView { private HashMap> cache; private final long startup; private final int cachesize; + private final char fillchar; - public MapView(final BLOB blob, final int cachesize) { + + public MapView(final BLOBHeap blob, final int cachesize, char fillchar) { this.blob = blob; this.cache = new HashMap>(); this.cacheScore = new ScoreCluster(); this.startup = System.currentTimeMillis(); this.cachesize = cachesize; - + this.fillchar = fillchar; /* // debug try { @@ -103,15 +105,15 @@ public class MapView { } private static String map2string(final Map map, final String comment) { - final Iterator> iter = map.entrySet().iterator(); - Map.Entry entry; final StringBuilder bb = new StringBuilder(map.size() * 40); bb.append("# ").append(comment).append("\r\n"); - while (iter.hasNext()) { - entry = iter.next(); - bb.append(entry.getKey()).append('='); - if (entry.getValue() != null) { bb.append(entry.getValue()); } - bb.append("\r\n"); + for (Map.Entry entry: map.entrySet()) { + if (entry.getValue() != null) { + bb.append(entry.getKey()); + bb.append('='); + bb.append(entry.getValue()); + bb.append("\r\n"); + } } bb.append("# EOF\r\n"); return bb.toString(); @@ -147,7 +149,7 @@ public class MapView { key = normalizeKey(key); // write entry - blob.put(key.getBytes(), map2string(newMap, "W" + DateFormatter.formatShortSecond() + " ").getBytes()); + blob.put(key.getBytes("UTF-8"), map2string(newMap, "W" + DateFormatter.formatShortSecond() + " ").getBytes("UTF-8")); // check for space in cache checkCacheSpace(); @@ -202,7 +204,7 @@ public class MapView { private String normalizeKey(String key) { if (key.length() > blob.keylength()) key = key.substring(0, blob.keylength()); - while (key.length() < blob.keylength()) key += "_"; + while (key.length() < blob.keylength()) key += fillchar; return key; } @@ -215,13 +217,13 @@ public class MapView { Map map = cache.get(key); if (map != null) return map; - // load map from kra + // load map if (!(blob.has(key.getBytes()))) return null; // read object final byte[] b = blob.get(key.getBytes()); if (b == null) return null; - map = string2map(new String(b)); + map = string2map(new String(b, "UTF-8")); if (storeCache) { // cache it also @@ -330,7 +332,7 @@ public class MapView { return null; } try { - final Map obj = get(new String(nextKey)); + final Map obj = get(new String(nextKey, "UTF-8")); if (obj == null) throw new kelondroException("no more elements available"); return obj; } catch (final IOException e) { @@ -350,9 +352,9 @@ public class MapView { if (f.exists()) FileUtils.deletedelete(f); try { // make a blob - BLOB blob = new BLOBHeap(f, 12, NaturalOrder.naturalOrder, 1024 * 1024); + BLOBHeap blob = new BLOBHeap(f, 12, NaturalOrder.naturalOrder, 1024 * 1024); // make map - MapView map = new MapView(blob, 1024); + MapView map = new MapView(blob, 1024, '_'); // put some values into the map Map m = new HashMap(); m.put("k", "000"); map.put("123", m); @@ -361,7 +363,7 @@ public class MapView { // iterate over keys Iterator i = map.keys(true, false); while (i.hasNext()) { - System.out.println("key: " + new String(i.next())); + System.out.println("key: " + new String(i.next(), "UTF-8")); } // clean up map.close(); diff --git a/source/de/anomic/kelondro/index/LongHandleIndex.java b/source/de/anomic/kelondro/index/LongHandleIndex.java index 25ba2e83f..70ed6ccd5 100644 --- a/source/de/anomic/kelondro/index/LongHandleIndex.java +++ b/source/de/anomic/kelondro/index/LongHandleIndex.java @@ -41,6 +41,8 @@ import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; +import java.util.zip.GZIPInputStream; +import java.util.zip.GZIPOutputStream; import de.anomic.kelondro.order.Base64Order; import de.anomic.kelondro.order.ByteOrder; @@ -75,6 +77,7 @@ public class LongHandleIndex { this(keylength, objectOrder, (int) (file.length() / (keylength + 8)), expectedspace); // read the index dump and fill the index InputStream is = new BufferedInputStream(new FileInputStream(file), 1024 * 1024); + if (file.getName().endsWith(".gz")) is = new GZIPInputStream(is); byte[] a = new byte[keylength + 8]; int c; while (true) { @@ -101,6 +104,7 @@ public class LongHandleIndex { File tmp = new File(file.getParentFile(), file.getName() + ".tmp"); Iterator i = this.index.rows(true, null); OutputStream os = new BufferedOutputStream(new FileOutputStream(tmp), 4 * 1024 * 1024); + if (file.getName().endsWith(".gz")) os = new GZIPOutputStream(os); int c = 0; while (i.hasNext()) { os.write(i.next().bytes()); diff --git a/source/de/anomic/plasma/plasmaHTCache.java b/source/de/anomic/plasma/plasmaHTCache.java index bf182ca7d..4e64ac7ca 100644 --- a/source/de/anomic/plasma/plasmaHTCache.java +++ b/source/de/anomic/plasma/plasmaHTCache.java @@ -42,7 +42,6 @@ import java.util.HashMap; import java.util.Map; import de.anomic.http.httpResponseHeader; -import de.anomic.kelondro.blob.BLOB; import de.anomic.kelondro.blob.BLOBArray; import de.anomic.kelondro.blob.BLOBCompressor; import de.anomic.kelondro.blob.BLOBHeap; @@ -128,13 +127,13 @@ public final class plasmaHTCache { private static void openDB() { // open the response header database final File dbfile = new File(cachePath, RESPONSE_HEADER_DB_NAME); - BLOB blob = null; + BLOBHeap blob = null; try { blob = new BLOBHeap(dbfile, yacySeedDB.commonHashLength, Base64Order.enhancedCoder, 1024 * 1024); } catch (final IOException e) { e.printStackTrace(); } - responseHeaderDB = new MapView(blob, 500); + responseHeaderDB = new MapView(blob, 500, '_'); try { fileDBunbuffered = new BLOBArray(new File(cachePath, FILE_DB_NAME), prefix, 12, Base64Order.enhancedCoder, 1024 * 1024 * 2); fileDBunbuffered.setMaxSize(maxCacheSize); diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index 2c9b6ca99..459492bc0 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -269,7 +269,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitchpublic static final String DBFILE_MESSAGE = "message.db"

- *

Name of the file containing the database holding the user's peer-messages

- * - * @see plasmaSwitchboard#WORK_PATH for the folder, this file lies in - */ - public static final String DBFILE_MESSAGE = "message.db"; - /** - *

public static final String DBFILE_WIKI = "wiki.db"

- *

Name of the file containing the database holding the whole wiki of this peer

- * - * @see plasmaSwitchboard#WORK_PATH for the folder, this file lies in - * @see plasmaSwitchboard#DBFILE_WIKI_BKP for the file previous versions of wiki-pages lie in - */ - public static final String DBFILE_WIKI = "wiki.db"; - /** - *

public static final String DBFILE_WIKI_BKP = "wiki-bkp.db"

- *

Name of the file containing the database holding all versions but the latest of the wiki-pages of this peer

- * - * @see plasmaSwitchboard#WORK_PATH for the folder this file lies in - * @see plasmaSwitchboard#DBFILE_WIKI for the file the latest version of wiki-pages lie in - */ - public static final String DBFILE_WIKI_BKP = "wiki-bkp.db"; - /** - *

public static final String DBFILE_BLOG = "blog.db"

- *

Name of the file containing the database holding all blog-entries available on this peer

- * - * @see plasmaSwitchboard#WORK_PATH for the folder this file lies in - */ - public static final String DBFILE_BLOG = "blog.db"; - /** - *

public static final String DBFILE_BLOGCOMMENTS = "blogComment.db"

- *

Name of the file containing the database holding all blogComment-entries available on this peer

- * - * @see plasmaSwitchboard#WORK_PATH for the folder this file lies in - */ - public static final String DBFILE_BLOGCOMMENTS = "blogComment.db"; - /** - *

public static final String DBFILE_BOOKMARKS = "bookmarks.db"

- *

Name of the file containing the database holding all bookmarks available on this peer

- * - * @see plasmaSwitchboard#WORK_PATH for the folder this file lies in - * @see bookmarksDB for more detailed overview about the bookmarks structure - */ - public static final String DBFILE_BOOKMARKS = "bookmarks.db"; - /** - *

public static final String DBFILE_BOOKMARKS_TAGS = "bookmarkTags.db"

- *

Name of the file containing the database holding all tag->bookmark relations

- * - * @see plasmaSwitchboard#WORK_PATH for the folder this file lies in - * @see bookmarksDB for more detailed overview about the bookmarks structure - */ - public static final String DBFILE_BOOKMARKS_TAGS = "bookmarkTags.db"; - /** - *

public static final String DBFILE_BOOKMARKS_DATES = "bookmarkDates.db"

- *

Name of the file containing the database holding all date->bookmark relations

- * - * @see plasmaSwitchboard#WORK_PATH for the folder this file lies in - * @see bookmarksDB for more detailed overview about the bookmarks structure - */ - public static final String DBFILE_BOOKMARKS_DATES = "bookmarkDates.db"; - /** - *

public static final String DBFILE_CRAWL_ROBOTS = "crawlRobotsTxt.db"

- *

Name of the file containing the database holding all robots.txt-entries of the lately crawled domains

- * - * @see plasmaSwitchboard#PLASMA_PATH for the folder this file lies in - */ - public static final String DBFILE_CRAWL_ROBOTS = "crawlRobotsTxt.heap"; - /** - *

public static final String DBFILE_USER = "DATA/SETTINGS/user.db"

- *

Path to the user-DB, beginning from the YaCy-installation's top-folder. It holds all rights the created - * users have as well as all other needed data about them

- */ - public static final String DBFILE_USER = "DATA/SETTINGS/user.db"; + // we must distinguish the following cases: resource-load was initiated by // 1) global crawling: the index is extern, not here (not possible here) // 2) result of search queries, some indexes are here (not possible here) diff --git a/source/migration.java b/source/migration.java index 014bd8f9e..0dc9011db 100644 --- a/source/migration.java +++ b/source/migration.java @@ -133,7 +133,11 @@ public class migration { delete(tagsDBFile); Log.logInfo("MIGRATION", "Migrating bookmarkTags.db to use wordhashs as keys."); } - sb.initBookmarks(); + try { + sb.initBookmarks(); + } catch (IOException e) { + e.printStackTrace(); + } } /** @@ -165,7 +169,11 @@ public class migration { file.delete(); } catch (final IOException e) {} } - sb.initWiki(); + try { + sb.initWiki(); + } catch (IOException e) { + e.printStackTrace(); + } } @@ -178,7 +186,11 @@ public class migration { FileUtils.copy(file, file2); file.delete(); } catch (final IOException e) {} - sb.initMessages(); + try { + sb.initMessages(); + } catch (IOException e) { + e.printStackTrace(); + } } }