diff --git a/source/de/anomic/crawler/CrawlProfile.java b/source/de/anomic/crawler/CrawlProfile.java index b1274c156..a78fb560f 100644 --- a/source/de/anomic/crawler/CrawlProfile.java +++ b/source/de/anomic/crawler/CrawlProfile.java @@ -31,8 +31,7 @@ import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import java.util.regex.Pattern; -import net.yacy.kelondro.blob.Heap; -import net.yacy.kelondro.blob.MapView; +import net.yacy.kelondro.blob.MapHeap; import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.word.Word; import net.yacy.kelondro.index.RowSpaceExceededException; @@ -52,15 +51,14 @@ public class CrawlProfile { static HashMap> domsCache = new HashMap>(); - MapView profileTable; + MapHeap profileTable; private final File profileTableFile; public CrawlProfile(final File file) throws IOException { //System.out.println("loading crawl profile from " + file); this.profileTableFile = file; profileTableFile.getParentFile().mkdirs(); - final Heap dyn = new Heap(profileTableFile, Word.commonHashLength, NaturalOrder.naturalOrder, 1024 * 64); - profileTable = new MapView(dyn, 500, '_'); + profileTable = new MapHeap(profileTableFile, Word.commonHashLength, NaturalOrder.naturalOrder, 1024 * 64, 500, '_'); profileIterator pi = new profileIterator(true); entry e; while (pi.hasNext()) { @@ -75,13 +73,11 @@ public class CrawlProfile { if (profileTable != null) profileTable.close(); FileUtils.deletedelete(profileTableFile); profileTableFile.getParentFile().mkdirs(); - Heap dyn = null; try { - dyn = new Heap(profileTableFile, Word.commonHashLength, NaturalOrder.naturalOrder, 1024 * 64); + profileTable = new MapHeap(profileTableFile, Word.commonHashLength, NaturalOrder.naturalOrder, 1024 * 64, 500, '_'); } catch (IOException e) { Log.logException(e); } - profileTable = new MapView(dyn, 500, '_'); } public void close() { diff --git a/source/de/anomic/crawler/RobotsTxt.java b/source/de/anomic/crawler/RobotsTxt.java index db17d1ea0..7e9a42e64 100644 --- a/source/de/anomic/crawler/RobotsTxt.java +++ b/source/de/anomic/crawler/RobotsTxt.java @@ -36,8 +36,7 @@ import java.util.Date; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; -import net.yacy.kelondro.blob.Heap; -import net.yacy.kelondro.blob.MapView; +import net.yacy.kelondro.blob.MapHeap; import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.order.NaturalOrder; @@ -57,7 +56,7 @@ public class RobotsTxt { public static final String ROBOTS_DB_PATH_SEPARATOR = ";"; private static final Log log = new Log("ROBOTS"); - MapView robotsTable; + MapHeap robotsTable; private final File robotsTableFile; private final ConcurrentHashMap syncObjects; //private static final HashSet loadedRobots = new HashSet(); // only for debugging @@ -69,13 +68,11 @@ public class RobotsTxt { public RobotsTxt(final File robotsTableFile) { this.robotsTableFile = robotsTableFile; robotsTableFile.getParentFile().mkdirs(); - Heap blob = null; try { - blob = new Heap(robotsTableFile, 64, NaturalOrder.naturalOrder, 1024 * 1024); + robotsTable = new MapHeap(robotsTableFile, 64, NaturalOrder.naturalOrder, 1024 * 1024, 100, '_'); } catch (final IOException e) { Log.logException(e); } - robotsTable = new MapView(blob, 100, '_'); syncObjects = new ConcurrentHashMap(); } @@ -84,13 +81,11 @@ public class RobotsTxt { if (robotsTable != null) robotsTable.close(); FileUtils.deletedelete(robotsTableFile); robotsTableFile.getParentFile().mkdirs(); - Heap blob = null; try { - blob = new Heap(robotsTableFile, 64, NaturalOrder.naturalOrder, 1024 * 1024); + robotsTable = new MapHeap(robotsTableFile, 64, NaturalOrder.naturalOrder, 1024 * 1024, 100, '_'); } catch (final IOException e) { Log.logException(e); } - robotsTable = new MapView(blob, 100, '_'); syncObjects.clear(); } diff --git a/source/de/anomic/data/blogBoard.java b/source/de/anomic/data/blogBoard.java index e669ddf60..78940209c 100644 --- a/source/de/anomic/data/blogBoard.java +++ b/source/de/anomic/data/blogBoard.java @@ -42,8 +42,7 @@ import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; -import net.yacy.kelondro.blob.Heap; -import net.yacy.kelondro.blob.MapView; +import net.yacy.kelondro.blob.MapHeap; import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.order.Base64Order; import net.yacy.kelondro.order.NaturalOrder; @@ -61,13 +60,13 @@ public class blogBoard { public static final int keyLength = 64; - MapView database = null; + MapHeap database = null; public blogBoard(final File actpath) throws IOException { new File(actpath.getParent()).mkdir(); if (database == null) { //database = new MapView(BLOBTree.toHeap(actpath, true, true, keyLength, recordSize, '_', NaturalOrder.naturalOrder, newFile), 500, '_'); - database = new MapView(new Heap(actpath, keyLength, NaturalOrder.naturalOrder, 1024 * 64), 500, '_'); + database = new MapHeap(actpath, keyLength, NaturalOrder.naturalOrder, 1024 * 64, 500, '_'); } } @@ -143,7 +142,7 @@ public class blogBoard { return readBlogEntry(key, database); } - private BlogEntry readBlogEntry(String key, final MapView base) { + private BlogEntry readBlogEntry(String key, final MapHeap base) { key = normalize(key); if (key.length() > keyLength) key = key.substring(0, keyLength); Map record; diff --git a/source/de/anomic/data/blogBoardComments.java b/source/de/anomic/data/blogBoardComments.java index 5c323ce8a..21cc0cd63 100644 --- a/source/de/anomic/data/blogBoardComments.java +++ b/source/de/anomic/data/blogBoardComments.java @@ -42,8 +42,7 @@ import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; -import net.yacy.kelondro.blob.Heap; -import net.yacy.kelondro.blob.MapView; +import net.yacy.kelondro.blob.MapHeap; import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.order.Base64Order; import net.yacy.kelondro.order.NaturalOrder; @@ -65,13 +64,13 @@ public class blogBoardComments { SimpleFormatter.setTimeZone(TimeZone.getTimeZone("GMT")); } - private MapView database = null; + private MapHeap database = null; public blogBoardComments(final File actpath) throws IOException { new File(actpath.getParent()).mkdir(); if (database == null) { //database = new MapView(BLOBTree.toHeap(actpath, true, true, keyLength, recordSize, '_', NaturalOrder.naturalOrder, newFile), 500, '_'); - database = new MapView(new Heap(actpath, keyLength, NaturalOrder.naturalOrder, 1024 * 64), 500, '_'); + database = new MapHeap(actpath, keyLength, NaturalOrder.naturalOrder, 1024 * 64, 500, '_'); } } @@ -121,7 +120,7 @@ public class blogBoardComments { //System.out.println("DEBUG: read from blogBoardComments"); return read(key, database); } - private CommentEntry read(String key, final MapView base) { + private CommentEntry read(String key, final MapHeap base) { key = normalize(key); if (key.length() > keyLength) key = key.substring(0, keyLength); Map record; diff --git a/source/de/anomic/data/bookmarksDB.java b/source/de/anomic/data/bookmarksDB.java index 426a623a8..339b08cbc 100644 --- a/source/de/anomic/data/bookmarksDB.java +++ b/source/de/anomic/data/bookmarksDB.java @@ -57,8 +57,7 @@ import javax.xml.parsers.ParserConfigurationException; import net.yacy.document.parser.html.ContentScraper; import net.yacy.document.parser.html.TransformerWriter; -import net.yacy.kelondro.blob.Heap; -import net.yacy.kelondro.blob.MapView; +import net.yacy.kelondro.blob.MapHeap; import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.word.Word; import net.yacy.kelondro.logging.Log; @@ -94,14 +93,14 @@ public class bookmarksDB { final static String SLEEP_TIME = "3600000"; // default sleepTime: check for recrawls every hour // bookmarks - MapView bookmarksTable; // kelondroMap bookmarksTable; + MapHeap bookmarksTable; // kelondroMap bookmarksTable; // tags - MapView tagsTable; + MapHeap tagsTable; TreeMap tagCache; // dates - MapView datesTable; + MapHeap datesTable; // autoReCrawl private final BusyThread autoReCrawl; @@ -116,19 +115,19 @@ public class bookmarksDB { bookmarksFile.getParentFile().mkdirs(); //this.bookmarksTable = new kelondroMap(kelondroDyn.open(bookmarksFile, bufferkb * 1024, preloadTime, 12, 256, '_', true, false)); //this.bookmarksTable = new MapView(BLOBTree.toHeap(bookmarksFile, true, true, 12, 256, '_', NaturalOrder.naturalOrder, bookmarksFileNew), 1000, '_'); - this.bookmarksTable = new MapView(new Heap(bookmarksFile, 12, NaturalOrder.naturalOrder, 1024 * 64), 1000, '_'); + this.bookmarksTable = new MapHeap(bookmarksFile, 12, NaturalOrder.naturalOrder, 1024 * 64, 1000, '_'); // tags tagsFile.getParentFile().mkdirs(); final boolean tagsFileExisted = tagsFile.exists(); //this.tagsTable = new MapView(BLOBTree.toHeap(tagsFile, true, true, 12, 256, '_', NaturalOrder.naturalOrder, tagsFileNew), 500, '_'); - this.tagsTable = new MapView(new Heap(tagsFile, 12, NaturalOrder.naturalOrder, 1024 * 64), 500, '_'); + this.tagsTable = new MapHeap(tagsFile, 12, NaturalOrder.naturalOrder, 1024 * 64, 500, '_'); if (!tagsFileExisted) rebuildTags(); // dates final boolean datesExisted = datesFile.exists(); //this.datesTable = new MapView(BLOBTree.toHeap(datesFile, true, true, 20, 256, '_', NaturalOrder.naturalOrder, datesFileNew), 500, '_'); - this.datesTable = new MapView(new Heap(datesFile, 20, NaturalOrder.naturalOrder, 1024 * 64), 500, '_'); + this.datesTable = new MapHeap(datesFile, 20, NaturalOrder.naturalOrder, 1024 * 64, 500, '_'); if (!datesExisted) rebuildDates(); // autoReCrawl diff --git a/source/de/anomic/data/messageBoard.java b/source/de/anomic/data/messageBoard.java index add9189c0..d8f4d1a37 100644 --- a/source/de/anomic/data/messageBoard.java +++ b/source/de/anomic/data/messageBoard.java @@ -32,8 +32,7 @@ import java.util.Locale; import java.util.Map; import java.util.TimeZone; -import net.yacy.kelondro.blob.Heap; -import net.yacy.kelondro.blob.MapView; +import net.yacy.kelondro.blob.MapHeap; import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.order.Base64Order; import net.yacy.kelondro.order.NaturalOrder; @@ -50,14 +49,14 @@ public class messageBoard { SimpleFormatter.setTimeZone(TimeZone.getTimeZone("GMT")); } - MapView database = null; + MapHeap database = null; private int sn = 0; public messageBoard(final File path) throws IOException { new File(path.getParent()).mkdir(); if (database == null) { //database = new MapView(BLOBTree.toHeap(path, true, true, categoryLength + dateFormat.length() + 2, recordSize, '_', NaturalOrder.naturalOrder, pathNew), 500, '_'); - database = new MapView(new Heap(path, categoryLength + dateFormat.length() + 2, NaturalOrder.naturalOrder, 1024 * 64), 500, '_'); + database = new MapHeap(path, categoryLength + dateFormat.length() + 2, NaturalOrder.naturalOrder, 1024 * 64, 500, '_'); } sn = 0; } diff --git a/source/de/anomic/data/userDB.java b/source/de/anomic/data/userDB.java index 080a4ce7a..2cee3c711 100644 --- a/source/de/anomic/data/userDB.java +++ b/source/de/anomic/data/userDB.java @@ -35,8 +35,7 @@ import java.util.Iterator; import java.util.Map; import java.util.Random; -import net.yacy.kelondro.blob.Heap; -import net.yacy.kelondro.blob.MapView; +import net.yacy.kelondro.blob.MapHeap; import net.yacy.kelondro.index.RowSpaceExceededException; import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.order.Base64Order; @@ -53,7 +52,7 @@ public final class userDB { public static final int USERNAME_MAX_LENGTH = 128; public static final int USERNAME_MIN_LENGTH = 4; - MapView userTable; + MapHeap userTable; private final File userTableFile; HashMap ipUsers = new HashMap(); HashMap cookieUsers = new HashMap(); @@ -62,7 +61,7 @@ public final class userDB { this.userTableFile = userTableFile; userTableFile.getParentFile().mkdirs(); //this.userTable = new MapView(BLOBTree.toHeap(userTableFile, true, true, 128, 256, '_', NaturalOrder.naturalOrder, userTableFile), 10, '_'); - this.userTable = new MapView(new Heap(userTableFile, 128, NaturalOrder.naturalOrder, 1024 * 64), 10, '_'); + this.userTable = new MapHeap(userTableFile, 128, NaturalOrder.naturalOrder, 1024 * 64, 10, '_'); } void resetDatabase() { @@ -71,7 +70,7 @@ public final class userDB { FileUtils.deletedelete(userTableFile); userTableFile.getParentFile().mkdirs(); try { - userTable = new MapView(new Heap(userTableFile, 256, NaturalOrder.naturalOrder, 1024 * 64), 10, '_'); + userTable = new MapHeap(userTableFile, 256, NaturalOrder.naturalOrder, 1024 * 64, 10, '_'); } catch (IOException e) { Log.logException(e); } diff --git a/source/de/anomic/data/wiki/wikiBoard.java b/source/de/anomic/data/wiki/wikiBoard.java index 23e80ec31..7b192bab0 100644 --- a/source/de/anomic/data/wiki/wikiBoard.java +++ b/source/de/anomic/data/wiki/wikiBoard.java @@ -35,8 +35,7 @@ import java.util.Locale; import java.util.Map; import java.util.TimeZone; -import net.yacy.kelondro.blob.Heap; -import net.yacy.kelondro.blob.MapView; +import net.yacy.kelondro.blob.MapHeap; import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.order.Base64Order; import net.yacy.kelondro.order.NaturalOrder; @@ -53,20 +52,20 @@ public class wikiBoard { SimpleFormatter.setTimeZone(TimeZone.getTimeZone("GMT")); } - MapView datbase = null; - MapView bkpbase = null; + MapHeap datbase = null; + MapHeap bkpbase = null; static HashMap authors = new HashMap(); public wikiBoard( final File actpath, final File bkppath) throws IOException { new File(actpath.getParent()).mkdirs(); if (datbase == null) { //datbase = new MapView(BLOBTree.toHeap(actpath, true, true, keyLength, recordSize, '_', NaturalOrder.naturalOrder, actpathNew), 500, '_'); - datbase = new MapView(new Heap(actpath, keyLength, NaturalOrder.naturalOrder, 1024 * 64), 500, '_'); + datbase = new MapHeap(actpath, keyLength, NaturalOrder.naturalOrder, 1024 * 64, 500, '_'); } new File(bkppath.getParent()).mkdirs(); if (bkpbase == null) { //bkpbase = new MapView(BLOBTree.toHeap(bkppath, true, true, keyLength + dateFormat.length(), recordSize, '_', NaturalOrder.naturalOrder, bkppathNew), 500, '_'); - bkpbase = new MapView(new Heap(bkppath, keyLength + dateFormat.length(), NaturalOrder.naturalOrder, 1024 * 64), 500, '_'); + bkpbase = new MapHeap(bkppath, keyLength + dateFormat.length(), NaturalOrder.naturalOrder, 1024 * 64, 500, '_'); } } @@ -280,7 +279,7 @@ public class wikiBoard { return read(key, datbase); } - entry read(String key, final MapView base) { + entry read(String key, final MapHeap base) { try { key = normalize(key); if (key.length() > keyLength) key = key.substring(0, keyLength); diff --git a/source/de/anomic/http/client/Cache.java b/source/de/anomic/http/client/Cache.java index 076f141cd..7e2481cb3 100644 --- a/source/de/anomic/http/client/Cache.java +++ b/source/de/anomic/http/client/Cache.java @@ -42,8 +42,7 @@ import java.util.Map; import net.yacy.kelondro.blob.ArrayStack; import net.yacy.kelondro.blob.Compressor; -import net.yacy.kelondro.blob.Heap; -import net.yacy.kelondro.blob.MapView; +import net.yacy.kelondro.blob.MapHeap; import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.word.Word; import net.yacy.kelondro.logging.Log; @@ -56,7 +55,7 @@ public final class Cache { private static final String RESPONSE_HEADER_DB_NAME = "responseHeader.heap"; private static final String FILE_DB_NAME = "file.array"; - private static MapView responseHeaderDB = null; + private static MapHeap responseHeaderDB = null; private static Compressor fileDB = null; private static ArrayStack fileDBunbuffered = null; @@ -78,13 +77,11 @@ public final class Cache { // open the response header database final File dbfile = new File(cachePath, RESPONSE_HEADER_DB_NAME); - Heap blob = null; try { - blob = new Heap(dbfile, Word.commonHashLength, Base64Order.enhancedCoder, 1024 * 1024); + responseHeaderDB = new MapHeap(dbfile, Word.commonHashLength, Base64Order.enhancedCoder, 1024 * 1024, 500, '_'); } catch (final IOException e) { Log.logException(e); } - responseHeaderDB = new MapView(blob, 500, '_'); try { fileDBunbuffered = new ArrayStack(new File(cachePath, FILE_DB_NAME), prefix, 12, Base64Order.enhancedCoder, 1024 * 1024 * 2); fileDBunbuffered.setMaxSize(maxCacheSize); diff --git a/source/de/anomic/yacy/yacySeedDB.java b/source/de/anomic/yacy/yacySeedDB.java index 260ae8743..2456feaba 100644 --- a/source/de/anomic/yacy/yacySeedDB.java +++ b/source/de/anomic/yacy/yacySeedDB.java @@ -39,7 +39,6 @@ import java.util.Iterator; import java.util.Map; import java.util.TreeMap; -import net.yacy.kelondro.blob.Heap; import net.yacy.kelondro.blob.MapDataMining; import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.word.Word; @@ -242,12 +241,12 @@ public final class yacySeedDB implements AlternativeDomainNames { Log.logWarning("yacySeedDB", "could not create directories for "+ seedDBFile.getParent()); } try { - return new MapDataMining(new Heap(seedDBFile, Word.commonHashLength, Base64Order.enhancedCoder, 1024 * 512), 500, sortFields, longaccFields, doubleaccFields, null, this); + return new MapDataMining(seedDBFile, Word.commonHashLength, Base64Order.enhancedCoder, 1024 * 512, 500, sortFields, longaccFields, doubleaccFields, null, this); } catch (final Exception e) { // try again FileUtils.deletedelete(seedDBFile); try { - return new MapDataMining(new Heap(seedDBFile, Word.commonHashLength, Base64Order.enhancedCoder, 1024 * 512), 500, sortFields, longaccFields, doubleaccFields, null, this); + return new MapDataMining(seedDBFile, Word.commonHashLength, Base64Order.enhancedCoder, 1024 * 512, 500, sortFields, longaccFields, doubleaccFields, null, this); } catch (IOException e1) { Log.logException(e1); System.exit(-1); diff --git a/source/net/yacy/document/parser/torrentParser.java b/source/net/yacy/document/parser/torrentParser.java index d494972e2..51533abc0 100644 --- a/source/net/yacy/document/parser/torrentParser.java +++ b/source/net/yacy/document/parser/torrentParser.java @@ -88,7 +88,7 @@ public class torrentParser extends AbstractParser implements Idiom { if (bo.getType() != BType.dictionary) throw new ParserException("BDecoder object is not a dictionary", location); Map map = bo.getMap(); BObject commento = map.get("comment"); - String comment = (commento == null) ? "" : commento.getString(); + String comment = (commento == null) ? "" : new String(commento.getString()); //Date creation = new Date(map.get("creation date").getInteger()); BObject infoo = map.get("info"); StringBuilder filenames = new StringBuilder(); @@ -107,7 +107,7 @@ public class torrentParser extends AbstractParser implements Idiom { } } BObject nameo = info.get("name"); - if (nameo != null) name = nameo.getString(); + if (nameo != null) name = new String(nameo.getString()); } try { return new Document( diff --git a/source/net/yacy/kelondro/blob/BEncodedHeap.java b/source/net/yacy/kelondro/blob/BEncodedHeap.java new file mode 100644 index 000000000..e098dc60c --- /dev/null +++ b/source/net/yacy/kelondro/blob/BEncodedHeap.java @@ -0,0 +1,256 @@ +// BEncodedHeap.java +// (C) 2010 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany +// first published 12.01.2010 on http://yacy.net +// +// $LastChangedDate: 2008-03-14 01:16:04 +0100 (Fr, 14 Mrz 2008) $ +// $LastChangedRevision: 6563 $ +// $LastChangedBy: orbiter $ +// +// LICENSE +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +package net.yacy.kelondro.blob; + +import java.io.File; +import java.io.IOException; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; +import java.util.Map.Entry; + +import net.yacy.kelondro.index.RowSpaceExceededException; +import net.yacy.kelondro.logging.Log; +import net.yacy.kelondro.order.ByteOrder; +import net.yacy.kelondro.order.NaturalOrder; +import net.yacy.kelondro.util.BDecoder; +import net.yacy.kelondro.util.BEncoder; +import net.yacy.kelondro.util.FileUtils; +import net.yacy.kelondro.util.BDecoder.BObject; + +/** + * store a table of properties (instead of fixed-field entries) + * this is realized using blobs and BEncoded property lists + */ +public class BEncodedHeap implements Iterable>> { + + private Heap table; + + /** + * produce or open a properties table + * @param location the file + * @param keylength length of access keys + * @param ordering ordering on the keys + * @param buffermax maximum number of lines that shall be buffered for writing + * @throws IOException + */ + public BEncodedHeap( + final File location, + final int keylength, + final ByteOrder ordering, + int buffermax) throws IOException { + this.table = new Heap(location, keylength, ordering, buffermax); + } + + /** + * convenience method to open a properies table + * @param location the file + * @param keylength length of access keys + */ + public BEncodedHeap( + final File location, + final int keylength) throws IOException { + this.table = new Heap(location, keylength, NaturalOrder.naturalOrder, 100); + } + + public void close() { + this.table.close(); + } + + /** + * insert a map into the table + * @param key + * @param map + * @throws RowSpaceExceededException + * @throws IOException + */ + public void put(byte[] key, Map map) throws RowSpaceExceededException, IOException { + byte[] b = BEncoder.encode(BEncoder.transcode(map)); + System.out.println(new String(b)); + this.table.put(key, b); + } + + /** + * select a map from the table + * @param key + * @return the map if one found or NULL if no entry exists or the entry is corrupt + * @throws IOException + */ + public Map get(byte[] key) throws IOException { + byte[] b = this.table.get(key); + if (b == null) return null; + return b2m(b); + } + + private static Map b2m(byte[] b) { + if (b == null) return null; + System.out.println("b = " + new String(b)); + BDecoder decoder = new BDecoder(b); + BObject bobj = decoder.parse(); + if (bobj.getType() != BDecoder.BType.dictionary) return null; + Map map = bobj.getMap(); + Map m = new HashMap(); + for (Map.Entry entry: map.entrySet()) { + if (entry.getValue().getType() != BDecoder.BType.string) continue; + m.put(entry.getKey(), entry.getValue().getString()); + } + return m; + } + + /** + * delete a map from the table + * @param key + * @throws IOException + */ + public void delete(byte[] key) throws IOException { + this.table.remove(key); + } + + /** + * check if a row with given key exists in the table + * @param key + * @return true if the row exists + */ + public boolean has(byte[] key) { + return this.table.has(key); + } + + /** + * iterate all keys of the table + * @return an iterator of byte[] + * @throws IOException + */ + public Iterator keys() throws IOException { + return this.table.keys(true, false); + } + + /** + * iterate all rows of the table. + * Be aware that this first closes the table to force flushing of all elements in + * the write buffer. After that an iterator on the closed file is generated and then + * the file is opened again. + */ + public Iterator>> iterator() { + File location = this.table.location(); + int keylen = this.table.keylength(); + ByteOrder order = this.table.ordering(); + int buffermax = this.table.getBuffermax(); + this.table.close(); + try { + Iterator>> iter = new EntryIter(location, keylen); + this.table = new Heap(location, keylen, order, buffermax); + return iter; + } catch (IOException e) { + Log.logSevere("PropertiesTable", e.getMessage(), e); + return null; + } + } + + /** + * iterate all rows of the table. this is a static method that expects that the given + * file is not opened by any other application + * @param location + * @param keylen + * @return + * @throws IOException + */ + public static Iterator>> iterator(File location, int keylen) throws IOException { + return new EntryIter(location, keylen); + } + + private static class EntryIter implements Iterator>> { + HeapReader.entries iter; + public EntryIter(File location, int keylen) throws IOException { + iter = new HeapReader.entries(location, keylen); + } + + public boolean hasNext() { + return iter.hasNext(); + } + + public Entry> next() { + Map.Entry entry = iter.next(); + Map map = b2m(entry.getValue()); + return new b2mEntry(entry.getKey(), map); + } + + public void remove() { + throw new UnsupportedOperationException(); + } + + } + + public static class b2mEntry implements Map.Entry> { + private final byte[] s; + private Map b; + + public b2mEntry(final byte[] s, final Map b) { + this.s = s; + this.b = b; + } + + public byte[] getKey() { + return s; + } + + public Map getValue() { + return b; + } + + public Map setValue(Map value) { + Map b1 = b; + b = value; + return b1; + } + } + + public static void main(String[] args) { + // test the class + File f = new File(new File("maptest").getAbsolutePath()); + //System.out.println(f.getAbsolutePath()); + //System.out.println(f.getParent()); + if (f.exists()) FileUtils.deletedelete(f); + try { + BEncodedHeap map = new BEncodedHeap(f, 4); + // put some values into the map + Map m = new HashMap(); + m.put("k", "000".getBytes()); map.put("123".getBytes(), m); + m.put("k", "111".getBytes()); map.put("456".getBytes(), m); + m.put("k", "222".getBytes()); map.put("789".getBytes(), m); + // iterate over keys + Iterator>> i = map.iterator(); + while (i.hasNext()) { + Map.Entry> entry = i.next(); + System.out.println(new String(entry.getKey(), "UTF-8") + ": " + entry.getValue()); + } + // clean up + map.close(); + } catch (IOException e) { + Log.logException(e); + } catch (RowSpaceExceededException e) { + Log.logException(e); + } + } +} diff --git a/source/net/yacy/kelondro/blob/Heap.java b/source/net/yacy/kelondro/blob/Heap.java index 49252f29f..b85d1d1cc 100755 --- a/source/net/yacy/kelondro/blob/Heap.java +++ b/source/net/yacy/kelondro/blob/Heap.java @@ -41,7 +41,7 @@ import net.yacy.kelondro.order.NaturalOrder; public final class Heap extends HeapModifier implements BLOB { - + private HashMap buffer; // a write buffer to limit IO to the file; attention: Maps cannot use byte[] as key private int buffersize; // bytes that are buffered in buffer private final int buffermax; // maximum size of the buffer @@ -143,8 +143,11 @@ public final class Heap extends HeapModifier implements BLOB { final int pos = (int) file.length(); index.put(key, pos); file.seek(pos); - file.writeInt(key.length + blob.length); + file.writeInt(this.keylength + blob.length); file.write(key); + if (this.keylength > key.length) { + for (int i = 0; i < this.keylength - key.length; i++) file.write(HeapWriter.ZERO); + } file.write(blob, 0, blob.length); } @@ -171,7 +174,7 @@ public final class Heap extends HeapModifier implements BLOB { key = entry.getKey().getBytes(); assert key.length == this.keylength : "key.length = " + key.length + ", this.keylength = " + this.keylength; blob = entry.getValue(); - posBuffer += 4 + key.length + blob.length; + posBuffer += 4 + this.keylength + blob.length; } assert l + (4 + this.keylength) * this.buffer.size() == posBuffer : "l = " + l + ", this.keylength = " + this.keylength + ", this.buffer.size() = " + this.buffer.size() + ", posBuffer = " + posBuffer; @@ -188,7 +191,7 @@ public final class Heap extends HeapModifier implements BLOB { assert key.length == this.keylength : "key.length = " + key.length + ", this.keylength = " + this.keylength; blob = entry.getValue(); index.put(key, posFile); - b = AbstractWriter.int2array(key.length + blob.length); + b = AbstractWriter.int2array(this.keylength + blob.length); assert b.length == 4; assert posBuffer + 4 < ba.length : "posBuffer = " + posBuffer + ", ba.length = " + ba.length; System.arraycopy(b, 0, ba, posBuffer, 4); @@ -198,8 +201,8 @@ public final class Heap extends HeapModifier implements BLOB { //System.out.println("*** DEBUG posFile=" + posFile + ",blob.length=" + blob.length + ",ba.length=" + ba.length + ",posBuffer=" + posBuffer + ",key.length=" + key.length); //System.err.println("*** DEBUG posFile=" + posFile + ",blob.length=" + blob.length + ",ba.length=" + ba.length + ",posBuffer=" + posBuffer + ",key.length=" + key.length); System.arraycopy(blob, 0, ba, posBuffer + 4 + this.keylength, blob.length); //java.lang.ArrayIndexOutOfBoundsException here - posFile += 4 + key.length + blob.length; - posBuffer += 4 + key.length + blob.length; + posFile += 4 + this.keylength + blob.length; + posBuffer += 4 + this.keylength + blob.length; } assert ba.length == posBuffer; // must fit exactly this.file.seek(pos); @@ -355,6 +358,9 @@ public final class Heap extends HeapModifier implements BLOB { final int reclenf = file.readInt(); assert reclenf == reclen; file.write(key); + if (this.keylength > key.length) { + for (int j = 0; j < this.keylength - key.length; j++) file.write(HeapWriter.ZERO); + } file.write(b); // remove the entry from the free list @@ -383,6 +389,9 @@ public final class Heap extends HeapModifier implements BLOB { file.seek(lseek); file.writeInt(reclen); file.write(key); + if (this.keylength > key.length) { + for (int j = 0; j < this.keylength - key.length; j++) file.write(HeapWriter.ZERO); + } file.write(b); // add the index to the new entry @@ -505,7 +514,7 @@ public final class Heap extends HeapModifier implements BLOB { final File f = new File("/Users/admin/blobtest.heap"); try { //f.delete(); - final MapView heap = new MapView(new Heap(f, 12, NaturalOrder.naturalOrder, 1024 * 512), 500, '_'); + final MapHeap heap = new MapHeap(f, 12, NaturalOrder.naturalOrder, 1024 * 512, 500, '_'); heap.put("aaaaaaaaaaaa", map("aaaaaaaaaaaa", "eins zwei drei")); heap.put("aaaaaaaaaaab", map("aaaaaaaaaaab", "vier fuenf sechs")); heap.put("aaaaaaaaaaac", map("aaaaaaaaaaac", "sieben acht neun")); diff --git a/source/net/yacy/kelondro/blob/HeapWriter.java b/source/net/yacy/kelondro/blob/HeapWriter.java index df3b014b8..feb25f6d2 100644 --- a/source/net/yacy/kelondro/blob/HeapWriter.java +++ b/source/net/yacy/kelondro/blob/HeapWriter.java @@ -40,6 +40,8 @@ import net.yacy.kelondro.util.FileUtils; public final class HeapWriter { + public final static byte[] ZERO = new byte[]{0}; + private final int keylength; // the length of the primary key private HandleMap index; // key/seek relation for used records private final File heapFileTMP; // the temporary file of the heap during writing @@ -101,9 +103,12 @@ public final class HeapWriter { assert index.get(key) < 0 : "index.get(key) = " + index.get(key) + ", index.size() = " + index.size() + ", file.length() = " + this.heapFileTMP.length() + ", key = " + new String(key); // must not occur before if ((blob == null) || (blob.length == 0)) return; index.putUnique(key, this.seek); - int chunkl = key.length + blob.length; + int chunkl = this.keylength + blob.length; os.writeInt(chunkl); os.write(key); + if (this.keylength > key.length) { + for (int i = 0; i < this.keylength - key.length; i++) os.write(ZERO); + } os.write(blob); //assert (this.doublecheck.add(new String(key))) : "doublecheck failed for " + new String(key); this.seek += chunkl + 4; diff --git a/source/net/yacy/kelondro/blob/MapDataMining.java b/source/net/yacy/kelondro/blob/MapDataMining.java index eb81cb64b..36201cc54 100644 --- a/source/net/yacy/kelondro/blob/MapDataMining.java +++ b/source/net/yacy/kelondro/blob/MapDataMining.java @@ -27,6 +27,7 @@ package net.yacy.kelondro.blob; +import java.io.File; import java.io.IOException; import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; @@ -36,19 +37,29 @@ import java.util.Map; import net.yacy.kelondro.index.RowSpaceExceededException; import net.yacy.kelondro.logging.Log; +import net.yacy.kelondro.order.ByteOrder; import net.yacy.kelondro.order.CloneableIterator; import net.yacy.kelondro.util.ScoreCluster; -public class MapDataMining extends MapView { +public class MapDataMining extends MapHeap { private final String[] sortfields, longaccfields, doubleaccfields; private HashMap> sortClusterMap; // a String-kelondroMScoreCluster - relation private HashMap accMap; // to store accumulations of specific fields @SuppressWarnings("unchecked") - public MapDataMining(final Heap dyn, final int cachesize, final String[] sortfields, final String[] longaccfields, final String[] doubleaccfields, final Method externalInitializer, final Object externalHandler) { - super(dyn, cachesize, '_'); + public MapDataMining(final File heapFile, + final int keylength, + final ByteOrder ordering, + int buffermax, + final int cachesize, + final String[] sortfields, + final String[] longaccfields, + final String[] doubleaccfields, + final Method externalInitializer, + final Object externalHandler) throws IOException { + super(heapFile, keylength, ordering, buffermax, cachesize, '_'); // create fast ordering clusters and acc fields this.sortfields = sortfields; @@ -86,7 +97,7 @@ public class MapDataMining extends MapView { // fill cluster and accumulator with values if ((sortfields != null) || (longaccfields != null) || (doubleaccfields != null)) try { - final CloneableIterator it = dyn.keys(true, false); + final CloneableIterator it = super.keys(true, false); String mapname; Object cell; long valuel; diff --git a/source/net/yacy/kelondro/blob/MapView.java b/source/net/yacy/kelondro/blob/MapHeap.java similarity index 96% rename from source/net/yacy/kelondro/blob/MapView.java rename to source/net/yacy/kelondro/blob/MapHeap.java index a627ed6b6..6091e9b5a 100644 --- a/source/net/yacy/kelondro/blob/MapView.java +++ b/source/net/yacy/kelondro/blob/MapHeap.java @@ -40,6 +40,7 @@ import net.yacy.kelondro.index.ARC; import net.yacy.kelondro.index.ConcurrentARC; import net.yacy.kelondro.index.RowSpaceExceededException; import net.yacy.kelondro.logging.Log; +import net.yacy.kelondro.order.ByteOrder; import net.yacy.kelondro.order.CloneableIterator; import net.yacy.kelondro.order.NaturalOrder; import net.yacy.kelondro.order.RotateIterator; @@ -49,15 +50,21 @@ import net.yacy.kelondro.util.kelondroException; -public class MapView { +public class MapHeap { private BLOB blob; private ARC> cache; private final char fillchar; - public MapView(final Heap blob, final int cachesize, char fillchar) { - this.blob = blob; + public MapHeap( + final File heapFile, + final int keylength, + final ByteOrder ordering, + int buffermax, + final int cachesize, + char fillchar) throws IOException { + this.blob = new Heap(heapFile, keylength, ordering, buffermax); this.cache = new ConcurrentARC>(cachesize, Runtime.getRuntime().availableProcessors()); this.fillchar = fillchar; /* @@ -370,10 +377,8 @@ public class MapView { File f = new File("maptest"); if (f.exists()) FileUtils.deletedelete(f); try { - // make a blob - Heap blob = new Heap(f, 12, NaturalOrder.naturalOrder, 1024 * 1024); // make map - MapView map = new MapView(blob, 1024, '_'); + MapHeap map = new MapHeap(f, 12, NaturalOrder.naturalOrder, 1024 * 1024, 1024, '_'); // put some values into the map Map m = new HashMap(); m.put("k", "000"); map.put("123", m); diff --git a/source/net/yacy/kelondro/util/BDecoder.java b/source/net/yacy/kelondro/util/BDecoder.java index 5167dc6e0..96ae8610c 100644 --- a/source/net/yacy/kelondro/util/BDecoder.java +++ b/source/net/yacy/kelondro/util/BDecoder.java @@ -26,6 +26,7 @@ package net.yacy.kelondro.util; import java.io.File; import java.io.IOException; +import java.io.OutputStream; import java.util.ArrayList; import java.util.LinkedHashMap; import java.util.List; @@ -33,6 +34,12 @@ import java.util.Map; public class BDecoder { + private final static byte[] _e = "e".getBytes(); + private final static byte[] _i = "i".getBytes(); + private final static byte[] _d = "d".getBytes(); + private final static byte[] _l = "l".getBytes(); + private final static byte[] _p = ":".getBytes(); + private final byte[] b; private int pos; @@ -41,20 +48,21 @@ public class BDecoder { this.pos = 0; } - public enum BType { + public static enum BType { string, integer, list, dictionary; } - public interface BObject { + public static interface BObject { public BType getType(); - public String getString(); + public byte[] getString(); public long getInteger(); public List getList(); public Map getMap(); public String toString(); + public void toStream(OutputStream os) throws IOException; } - public class BDfltObject implements BObject { + public static abstract class BDfltObject implements BObject { public long getInteger() { throw new UnsupportedOperationException(); @@ -68,7 +76,7 @@ public class BDecoder { throw new UnsupportedOperationException(); } - public String getString() { + public byte[] getString() { throw new UnsupportedOperationException(); } @@ -82,24 +90,29 @@ public class BDecoder { } - public class BStringObject extends BDfltObject implements BObject { - String s; - public BStringObject(String s) { - this.s = s; + public static class BStringObject extends BDfltObject implements BObject { + private byte[] b; + public BStringObject(byte[] b) { + this.b = b; } public BType getType() { return BType.string; } - public String getString() { - return this.s; + public byte[] getString() { + return this.b; } public String toString() { - return this.s; + return new String(this.b); + } + public void toStream(OutputStream os) throws IOException { + os.write(Integer.toString(this.b.length).getBytes()); + os.write(_p); + os.write(this.b); } } - public class BListObject extends BDfltObject implements BObject { - List l; + public static class BListObject extends BDfltObject implements BObject { + private List l; public BListObject(List l) { this.l = l; } @@ -117,10 +130,15 @@ public class BDecoder { s.append("]"); return s.toString(); } + public void toStream(OutputStream os) throws IOException { + os.write(_l); + for (BObject bo: this.l) bo.toStream(os); + os.write(_e); + } } - public class BDictionaryObject extends BDfltObject implements BObject { - Map m; + public static class BDictionaryObject extends BDfltObject implements BObject { + private Map m; public BDictionaryObject(Map m) { this.m = m; } @@ -138,10 +156,18 @@ public class BDecoder { s.append("}"); return s.toString(); } + public void toStream(OutputStream os) throws IOException { + os.write(_d); + for (Map.Entry e: this.m.entrySet()) { + new BStringObject(e.getKey().getBytes()).toStream(os); + e.getValue().toStream(os); + } + os.write(_e); + } } - public class BIntegerObject extends BDfltObject implements BObject { - long i; + public static class BIntegerObject extends BDfltObject implements BObject { + private long i; public BIntegerObject(long i) { this.i = i; } @@ -154,18 +180,23 @@ public class BDecoder { public String toString() { return Long.toString(this.i); } + public void toStream(OutputStream os) throws IOException { + os.write(_i); + os.write(Long.toString(this.i).getBytes()); + os.write(_e); + } } private Map convertToMap(final List list) { final Map m = new LinkedHashMap(); final int length = list.size(); for (int i = 0; i < length; i += 2) { - final String key = list.get(i).getString(); + final byte[] key = list.get(i).getString(); BObject value = null; if (i + 1 < length) { value = list.get(i + 1); } - m.put(key, value); + m.put(new String(key), value); } return m; } @@ -191,9 +222,39 @@ public class BDecoder { end++; while (b[end] != ':') ++end; final int len = Integer.parseInt(new String(b, pos, end - pos)); - final String str = new String(b, end + 1, len); + final byte[] s = new byte[len]; + System.arraycopy(b, end + 1, s, 0, len); pos = end + len + 1; - return new BStringObject(str); + return new BStringObject(s); + } else if (ch == 'l') { + pos++; + return new BListObject(readList()); + } else if (ch == 'd') { + pos++; + return new BDictionaryObject(convertToMap(readList())); + } else if (ch == 'i') { + pos++; + int end = pos; + while (b[end] != 'e') ++end; + BIntegerObject io = new BIntegerObject(Long.parseLong(new String(b, pos, end - pos))); + pos = end + 1; + return io; + } else { + return null; + } + } + /* + public static BObject parse(InputStream is) { + if (is.available() < 1) return null; + char ch = (char) is.read(); + if ((ch >= '0') && (ch <= '9')) { + StringBuilder s = new StringBuilder(); + s.append(ch); + while ((ch = (char) is.read()) != ':') s.append(ch); + int len = Integer.parseInt(s.toString()); + byte[] b = new byte[len]; + is.read(b); + return new BStringObject(new String(b)); } else if (ch == 'l') { pos++; return new BListObject(readList()); @@ -211,6 +272,7 @@ public class BDecoder { return null; } } + */ public static void print(BObject bo, int t) { for (int i = 0; i < t; i++) System.out.print(" "); diff --git a/source/net/yacy/kelondro/util/BEncoder.java b/source/net/yacy/kelondro/util/BEncoder.java new file mode 100644 index 000000000..14a07c96c --- /dev/null +++ b/source/net/yacy/kelondro/util/BEncoder.java @@ -0,0 +1,89 @@ +// BEncoder.java +// (C) 2010 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany +// first published 12.01.2010 on http://yacy.net +// +// $LastChangedDate: 2008-03-14 01:16:04 +0100 (Fr, 14 Mrz 2008) $ +// $LastChangedRevision: 6563 $ +// $LastChangedBy: orbiter $ +// +// LICENSE +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +package net.yacy.kelondro.util; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import net.yacy.kelondro.util.BDecoder.BObject; + +public class BEncoder { + + // lists + public static List transcode(List list) { + ArrayList l = new ArrayList(list.size()); + for (byte[] entry: list) l.add(new BDecoder.BStringObject(entry)); + return l; + } + + public static byte[] encode(List list) { + BDecoder.BListObject l = new BDecoder.BListObject(list); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + try { + l.toStream(baos); + baos.close(); + return baos.toByteArray(); + } catch (IOException e) { + } + return null; + } + + // maps + public static Map transcode(Map map) { + Map m = new HashMap(); + for (Map.Entry entry: map.entrySet()) m.put(entry.getKey(), new BDecoder.BStringObject(entry.getValue())); + return m; + } + + public static byte[] encode(Map map) { + BDecoder.BDictionaryObject dict = new BDecoder.BDictionaryObject(map); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + try { + dict.toStream(baos); + baos.close(); + return baos.toByteArray(); + } catch (IOException e) { + } + return null; + } + + public static void main(final String[] args) { + Map m = new HashMap(); + m.put("k", "000".getBytes()); + m.put("r", "111".getBytes()); + m.put("s", "222".getBytes()); + Map t = transcode(m); + byte[] b = encode(t); + System.out.println(new String(b)); + BDecoder d = new BDecoder(b); + BObject o = d.parse(); + System.out.println(o.toString()); + } + +} diff --git a/source/net/yacy/yacy.java b/source/net/yacy/yacy.java index aa0aec1e6..0b6c27214 100644 --- a/source/net/yacy/yacy.java +++ b/source/net/yacy/yacy.java @@ -46,7 +46,6 @@ import java.util.concurrent.Semaphore; import java.util.zip.ZipEntry; import java.util.zip.ZipOutputStream; -import net.yacy.kelondro.blob.Heap; import net.yacy.kelondro.blob.MapDataMining; import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.URIMetadataRow; @@ -920,7 +919,7 @@ public final class yacy { final String[] dbFileNames = {"seed.new.db","seed.old.db","seed.pot.db"}; for (int i=0; i < dbFileNames.length; i++) { final File dbFile = new File(yacyDBPath,dbFileNames[i]); - final MapDataMining db = new MapDataMining(new Heap(dbFile, Word.commonHashLength, Base64Order.enhancedCoder, 1024 * 512), 500, yacySeedDB.sortFields, yacySeedDB.longaccFields, yacySeedDB.doubleaccFields, null, null); + final MapDataMining db = new MapDataMining(dbFile, Word.commonHashLength, Base64Order.enhancedCoder, 1024 * 512, 500, yacySeedDB.sortFields, yacySeedDB.longaccFields, yacySeedDB.doubleaccFields, null, null); MapDataMining.mapIterator it; it = db.maps(true, false);