- added BEncoder class

- added BEncodedHeap class that encodes B data structures and stores that to a heap
- refactoring of MapView, this is now named MapHeap to fit into the naming scheme of the BEncodedHeap

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6579 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 15 years ago
parent 81035e7080
commit 5df628a2a4

@ -31,8 +31,7 @@ import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.regex.Pattern;
import net.yacy.kelondro.blob.Heap;
import net.yacy.kelondro.blob.MapView;
import net.yacy.kelondro.blob.MapHeap;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.index.RowSpaceExceededException;
@ -52,15 +51,14 @@ public class CrawlProfile {
static HashMap<String, ConcurrentHashMap<String, DomProfile>> domsCache = new HashMap<String, ConcurrentHashMap<String, DomProfile>>();
MapView profileTable;
MapHeap profileTable;
private final File profileTableFile;
public CrawlProfile(final File file) throws IOException {
//System.out.println("loading crawl profile from " + file);
this.profileTableFile = file;
profileTableFile.getParentFile().mkdirs();
final Heap dyn = new Heap(profileTableFile, Word.commonHashLength, NaturalOrder.naturalOrder, 1024 * 64);
profileTable = new MapView(dyn, 500, '_');
profileTable = new MapHeap(profileTableFile, Word.commonHashLength, NaturalOrder.naturalOrder, 1024 * 64, 500, '_');
profileIterator pi = new profileIterator(true);
entry e;
while (pi.hasNext()) {
@ -75,13 +73,11 @@ public class CrawlProfile {
if (profileTable != null) profileTable.close();
FileUtils.deletedelete(profileTableFile);
profileTableFile.getParentFile().mkdirs();
Heap dyn = null;
try {
dyn = new Heap(profileTableFile, Word.commonHashLength, NaturalOrder.naturalOrder, 1024 * 64);
profileTable = new MapHeap(profileTableFile, Word.commonHashLength, NaturalOrder.naturalOrder, 1024 * 64, 500, '_');
} catch (IOException e) {
Log.logException(e);
}
profileTable = new MapView(dyn, 500, '_');
}
public void close() {

@ -36,8 +36,7 @@ import java.util.Date;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import net.yacy.kelondro.blob.Heap;
import net.yacy.kelondro.blob.MapView;
import net.yacy.kelondro.blob.MapHeap;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.NaturalOrder;
@ -57,7 +56,7 @@ public class RobotsTxt {
public static final String ROBOTS_DB_PATH_SEPARATOR = ";";
private static final Log log = new Log("ROBOTS");
MapView robotsTable;
MapHeap robotsTable;
private final File robotsTableFile;
private final ConcurrentHashMap<String, DomSync> syncObjects;
//private static final HashSet<String> loadedRobots = new HashSet<String>(); // only for debugging
@ -69,13 +68,11 @@ public class RobotsTxt {
public RobotsTxt(final File robotsTableFile) {
this.robotsTableFile = robotsTableFile;
robotsTableFile.getParentFile().mkdirs();
Heap blob = null;
try {
blob = new Heap(robotsTableFile, 64, NaturalOrder.naturalOrder, 1024 * 1024);
robotsTable = new MapHeap(robotsTableFile, 64, NaturalOrder.naturalOrder, 1024 * 1024, 100, '_');
} catch (final IOException e) {
Log.logException(e);
}
robotsTable = new MapView(blob, 100, '_');
syncObjects = new ConcurrentHashMap<String, DomSync>();
}
@ -84,13 +81,11 @@ public class RobotsTxt {
if (robotsTable != null) robotsTable.close();
FileUtils.deletedelete(robotsTableFile);
robotsTableFile.getParentFile().mkdirs();
Heap blob = null;
try {
blob = new Heap(robotsTableFile, 64, NaturalOrder.naturalOrder, 1024 * 1024);
robotsTable = new MapHeap(robotsTableFile, 64, NaturalOrder.naturalOrder, 1024 * 1024, 100, '_');
} catch (final IOException e) {
Log.logException(e);
}
robotsTable = new MapView(blob, 100, '_');
syncObjects.clear();
}

@ -42,8 +42,7 @@ import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import net.yacy.kelondro.blob.Heap;
import net.yacy.kelondro.blob.MapView;
import net.yacy.kelondro.blob.MapHeap;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.Base64Order;
import net.yacy.kelondro.order.NaturalOrder;
@ -61,13 +60,13 @@ public class blogBoard {
public static final int keyLength = 64;
MapView database = null;
MapHeap database = null;
public blogBoard(final File actpath) throws IOException {
new File(actpath.getParent()).mkdir();
if (database == null) {
//database = new MapView(BLOBTree.toHeap(actpath, true, true, keyLength, recordSize, '_', NaturalOrder.naturalOrder, newFile), 500, '_');
database = new MapView(new Heap(actpath, keyLength, NaturalOrder.naturalOrder, 1024 * 64), 500, '_');
database = new MapHeap(actpath, keyLength, NaturalOrder.naturalOrder, 1024 * 64, 500, '_');
}
}
@ -143,7 +142,7 @@ public class blogBoard {
return readBlogEntry(key, database);
}
private BlogEntry readBlogEntry(String key, final MapView base) {
private BlogEntry readBlogEntry(String key, final MapHeap base) {
key = normalize(key);
if (key.length() > keyLength) key = key.substring(0, keyLength);
Map<String, String> record;

@ -42,8 +42,7 @@ import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import net.yacy.kelondro.blob.Heap;
import net.yacy.kelondro.blob.MapView;
import net.yacy.kelondro.blob.MapHeap;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.Base64Order;
import net.yacy.kelondro.order.NaturalOrder;
@ -65,13 +64,13 @@ public class blogBoardComments {
SimpleFormatter.setTimeZone(TimeZone.getTimeZone("GMT"));
}
private MapView database = null;
private MapHeap database = null;
public blogBoardComments(final File actpath) throws IOException {
new File(actpath.getParent()).mkdir();
if (database == null) {
//database = new MapView(BLOBTree.toHeap(actpath, true, true, keyLength, recordSize, '_', NaturalOrder.naturalOrder, newFile), 500, '_');
database = new MapView(new Heap(actpath, keyLength, NaturalOrder.naturalOrder, 1024 * 64), 500, '_');
database = new MapHeap(actpath, keyLength, NaturalOrder.naturalOrder, 1024 * 64, 500, '_');
}
}
@ -121,7 +120,7 @@ public class blogBoardComments {
//System.out.println("DEBUG: read from blogBoardComments");
return read(key, database);
}
private CommentEntry read(String key, final MapView base) {
private CommentEntry read(String key, final MapHeap base) {
key = normalize(key);
if (key.length() > keyLength) key = key.substring(0, keyLength);
Map<String, String> record;

@ -57,8 +57,7 @@ import javax.xml.parsers.ParserConfigurationException;
import net.yacy.document.parser.html.ContentScraper;
import net.yacy.document.parser.html.TransformerWriter;
import net.yacy.kelondro.blob.Heap;
import net.yacy.kelondro.blob.MapView;
import net.yacy.kelondro.blob.MapHeap;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.logging.Log;
@ -94,14 +93,14 @@ public class bookmarksDB {
final static String SLEEP_TIME = "3600000"; // default sleepTime: check for recrawls every hour
// bookmarks
MapView bookmarksTable; // kelondroMap bookmarksTable;
MapHeap bookmarksTable; // kelondroMap bookmarksTable;
// tags
MapView tagsTable;
MapHeap tagsTable;
TreeMap<String, Tag> tagCache;
// dates
MapView datesTable;
MapHeap datesTable;
// autoReCrawl
private final BusyThread autoReCrawl;
@ -116,19 +115,19 @@ public class bookmarksDB {
bookmarksFile.getParentFile().mkdirs();
//this.bookmarksTable = new kelondroMap(kelondroDyn.open(bookmarksFile, bufferkb * 1024, preloadTime, 12, 256, '_', true, false));
//this.bookmarksTable = new MapView(BLOBTree.toHeap(bookmarksFile, true, true, 12, 256, '_', NaturalOrder.naturalOrder, bookmarksFileNew), 1000, '_');
this.bookmarksTable = new MapView(new Heap(bookmarksFile, 12, NaturalOrder.naturalOrder, 1024 * 64), 1000, '_');
this.bookmarksTable = new MapHeap(bookmarksFile, 12, NaturalOrder.naturalOrder, 1024 * 64, 1000, '_');
// tags
tagsFile.getParentFile().mkdirs();
final boolean tagsFileExisted = tagsFile.exists();
//this.tagsTable = new MapView(BLOBTree.toHeap(tagsFile, true, true, 12, 256, '_', NaturalOrder.naturalOrder, tagsFileNew), 500, '_');
this.tagsTable = new MapView(new Heap(tagsFile, 12, NaturalOrder.naturalOrder, 1024 * 64), 500, '_');
this.tagsTable = new MapHeap(tagsFile, 12, NaturalOrder.naturalOrder, 1024 * 64, 500, '_');
if (!tagsFileExisted) rebuildTags();
// dates
final boolean datesExisted = datesFile.exists();
//this.datesTable = new MapView(BLOBTree.toHeap(datesFile, true, true, 20, 256, '_', NaturalOrder.naturalOrder, datesFileNew), 500, '_');
this.datesTable = new MapView(new Heap(datesFile, 20, NaturalOrder.naturalOrder, 1024 * 64), 500, '_');
this.datesTable = new MapHeap(datesFile, 20, NaturalOrder.naturalOrder, 1024 * 64, 500, '_');
if (!datesExisted) rebuildDates();
// autoReCrawl

@ -32,8 +32,7 @@ import java.util.Locale;
import java.util.Map;
import java.util.TimeZone;
import net.yacy.kelondro.blob.Heap;
import net.yacy.kelondro.blob.MapView;
import net.yacy.kelondro.blob.MapHeap;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.Base64Order;
import net.yacy.kelondro.order.NaturalOrder;
@ -50,14 +49,14 @@ public class messageBoard {
SimpleFormatter.setTimeZone(TimeZone.getTimeZone("GMT"));
}
MapView database = null;
MapHeap database = null;
private int sn = 0;
public messageBoard(final File path) throws IOException {
new File(path.getParent()).mkdir();
if (database == null) {
//database = new MapView(BLOBTree.toHeap(path, true, true, categoryLength + dateFormat.length() + 2, recordSize, '_', NaturalOrder.naturalOrder, pathNew), 500, '_');
database = new MapView(new Heap(path, categoryLength + dateFormat.length() + 2, NaturalOrder.naturalOrder, 1024 * 64), 500, '_');
database = new MapHeap(path, categoryLength + dateFormat.length() + 2, NaturalOrder.naturalOrder, 1024 * 64, 500, '_');
}
sn = 0;
}

@ -35,8 +35,7 @@ import java.util.Iterator;
import java.util.Map;
import java.util.Random;
import net.yacy.kelondro.blob.Heap;
import net.yacy.kelondro.blob.MapView;
import net.yacy.kelondro.blob.MapHeap;
import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.Base64Order;
@ -53,7 +52,7 @@ public final class userDB {
public static final int USERNAME_MAX_LENGTH = 128;
public static final int USERNAME_MIN_LENGTH = 4;
MapView userTable;
MapHeap userTable;
private final File userTableFile;
HashMap<String, String> ipUsers = new HashMap<String, String>();
HashMap<String, Object> cookieUsers = new HashMap<String, Object>();
@ -62,7 +61,7 @@ public final class userDB {
this.userTableFile = userTableFile;
userTableFile.getParentFile().mkdirs();
//this.userTable = new MapView(BLOBTree.toHeap(userTableFile, true, true, 128, 256, '_', NaturalOrder.naturalOrder, userTableFile), 10, '_');
this.userTable = new MapView(new Heap(userTableFile, 128, NaturalOrder.naturalOrder, 1024 * 64), 10, '_');
this.userTable = new MapHeap(userTableFile, 128, NaturalOrder.naturalOrder, 1024 * 64, 10, '_');
}
void resetDatabase() {
@ -71,7 +70,7 @@ public final class userDB {
FileUtils.deletedelete(userTableFile);
userTableFile.getParentFile().mkdirs();
try {
userTable = new MapView(new Heap(userTableFile, 256, NaturalOrder.naturalOrder, 1024 * 64), 10, '_');
userTable = new MapHeap(userTableFile, 256, NaturalOrder.naturalOrder, 1024 * 64, 10, '_');
} catch (IOException e) {
Log.logException(e);
}

@ -35,8 +35,7 @@ import java.util.Locale;
import java.util.Map;
import java.util.TimeZone;
import net.yacy.kelondro.blob.Heap;
import net.yacy.kelondro.blob.MapView;
import net.yacy.kelondro.blob.MapHeap;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.Base64Order;
import net.yacy.kelondro.order.NaturalOrder;
@ -53,20 +52,20 @@ public class wikiBoard {
SimpleFormatter.setTimeZone(TimeZone.getTimeZone("GMT"));
}
MapView datbase = null;
MapView bkpbase = null;
MapHeap datbase = null;
MapHeap bkpbase = null;
static HashMap<String, String> authors = new HashMap<String, String>();
public wikiBoard( final File actpath, final File bkppath) throws IOException {
new File(actpath.getParent()).mkdirs();
if (datbase == null) {
//datbase = new MapView(BLOBTree.toHeap(actpath, true, true, keyLength, recordSize, '_', NaturalOrder.naturalOrder, actpathNew), 500, '_');
datbase = new MapView(new Heap(actpath, keyLength, NaturalOrder.naturalOrder, 1024 * 64), 500, '_');
datbase = new MapHeap(actpath, keyLength, NaturalOrder.naturalOrder, 1024 * 64, 500, '_');
}
new File(bkppath.getParent()).mkdirs();
if (bkpbase == null) {
//bkpbase = new MapView(BLOBTree.toHeap(bkppath, true, true, keyLength + dateFormat.length(), recordSize, '_', NaturalOrder.naturalOrder, bkppathNew), 500, '_');
bkpbase = new MapView(new Heap(bkppath, keyLength + dateFormat.length(), NaturalOrder.naturalOrder, 1024 * 64), 500, '_');
bkpbase = new MapHeap(bkppath, keyLength + dateFormat.length(), NaturalOrder.naturalOrder, 1024 * 64, 500, '_');
}
}
@ -280,7 +279,7 @@ public class wikiBoard {
return read(key, datbase);
}
entry read(String key, final MapView base) {
entry read(String key, final MapHeap base) {
try {
key = normalize(key);
if (key.length() > keyLength) key = key.substring(0, keyLength);

@ -42,8 +42,7 @@ import java.util.Map;
import net.yacy.kelondro.blob.ArrayStack;
import net.yacy.kelondro.blob.Compressor;
import net.yacy.kelondro.blob.Heap;
import net.yacy.kelondro.blob.MapView;
import net.yacy.kelondro.blob.MapHeap;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.logging.Log;
@ -56,7 +55,7 @@ public final class Cache {
private static final String RESPONSE_HEADER_DB_NAME = "responseHeader.heap";
private static final String FILE_DB_NAME = "file.array";
private static MapView responseHeaderDB = null;
private static MapHeap responseHeaderDB = null;
private static Compressor fileDB = null;
private static ArrayStack fileDBunbuffered = null;
@ -78,13 +77,11 @@ public final class Cache {
// open the response header database
final File dbfile = new File(cachePath, RESPONSE_HEADER_DB_NAME);
Heap blob = null;
try {
blob = new Heap(dbfile, Word.commonHashLength, Base64Order.enhancedCoder, 1024 * 1024);
responseHeaderDB = new MapHeap(dbfile, Word.commonHashLength, Base64Order.enhancedCoder, 1024 * 1024, 500, '_');
} catch (final IOException e) {
Log.logException(e);
}
responseHeaderDB = new MapView(blob, 500, '_');
try {
fileDBunbuffered = new ArrayStack(new File(cachePath, FILE_DB_NAME), prefix, 12, Base64Order.enhancedCoder, 1024 * 1024 * 2);
fileDBunbuffered.setMaxSize(maxCacheSize);

@ -39,7 +39,6 @@ import java.util.Iterator;
import java.util.Map;
import java.util.TreeMap;
import net.yacy.kelondro.blob.Heap;
import net.yacy.kelondro.blob.MapDataMining;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.word.Word;
@ -242,12 +241,12 @@ public final class yacySeedDB implements AlternativeDomainNames {
Log.logWarning("yacySeedDB", "could not create directories for "+ seedDBFile.getParent());
}
try {
return new MapDataMining(new Heap(seedDBFile, Word.commonHashLength, Base64Order.enhancedCoder, 1024 * 512), 500, sortFields, longaccFields, doubleaccFields, null, this);
return new MapDataMining(seedDBFile, Word.commonHashLength, Base64Order.enhancedCoder, 1024 * 512, 500, sortFields, longaccFields, doubleaccFields, null, this);
} catch (final Exception e) {
// try again
FileUtils.deletedelete(seedDBFile);
try {
return new MapDataMining(new Heap(seedDBFile, Word.commonHashLength, Base64Order.enhancedCoder, 1024 * 512), 500, sortFields, longaccFields, doubleaccFields, null, this);
return new MapDataMining(seedDBFile, Word.commonHashLength, Base64Order.enhancedCoder, 1024 * 512, 500, sortFields, longaccFields, doubleaccFields, null, this);
} catch (IOException e1) {
Log.logException(e1);
System.exit(-1);

@ -88,7 +88,7 @@ public class torrentParser extends AbstractParser implements Idiom {
if (bo.getType() != BType.dictionary) throw new ParserException("BDecoder object is not a dictionary", location);
Map<String, BObject> map = bo.getMap();
BObject commento = map.get("comment");
String comment = (commento == null) ? "" : commento.getString();
String comment = (commento == null) ? "" : new String(commento.getString());
//Date creation = new Date(map.get("creation date").getInteger());
BObject infoo = map.get("info");
StringBuilder filenames = new StringBuilder();
@ -107,7 +107,7 @@ public class torrentParser extends AbstractParser implements Idiom {
}
}
BObject nameo = info.get("name");
if (nameo != null) name = nameo.getString();
if (nameo != null) name = new String(nameo.getString());
}
try {
return new Document(

@ -0,0 +1,256 @@
// BEncodedHeap.java
// (C) 2010 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 12.01.2010 on http://yacy.net
//
// $LastChangedDate: 2008-03-14 01:16:04 +0100 (Fr, 14 Mrz 2008) $
// $LastChangedRevision: 6563 $
// $LastChangedBy: orbiter $
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package net.yacy.kelondro.blob;
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Map.Entry;
import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.ByteOrder;
import net.yacy.kelondro.order.NaturalOrder;
import net.yacy.kelondro.util.BDecoder;
import net.yacy.kelondro.util.BEncoder;
import net.yacy.kelondro.util.FileUtils;
import net.yacy.kelondro.util.BDecoder.BObject;
/**
* store a table of properties (instead of fixed-field entries)
* this is realized using blobs and BEncoded property lists
*/
public class BEncodedHeap implements Iterable<Map.Entry<byte[], Map<String, byte[]>>> {
private Heap table;
/**
* produce or open a properties table
* @param location the file
* @param keylength length of access keys
* @param ordering ordering on the keys
* @param buffermax maximum number of lines that shall be buffered for writing
* @throws IOException
*/
public BEncodedHeap(
final File location,
final int keylength,
final ByteOrder ordering,
int buffermax) throws IOException {
this.table = new Heap(location, keylength, ordering, buffermax);
}
/**
* convenience method to open a properies table
* @param location the file
* @param keylength length of access keys
*/
public BEncodedHeap(
final File location,
final int keylength) throws IOException {
this.table = new Heap(location, keylength, NaturalOrder.naturalOrder, 100);
}
public void close() {
this.table.close();
}
/**
* insert a map into the table
* @param key
* @param map
* @throws RowSpaceExceededException
* @throws IOException
*/
public void put(byte[] key, Map<String, byte[]> map) throws RowSpaceExceededException, IOException {
byte[] b = BEncoder.encode(BEncoder.transcode(map));
System.out.println(new String(b));
this.table.put(key, b);
}
/**
* select a map from the table
* @param key
* @return the map if one found or NULL if no entry exists or the entry is corrupt
* @throws IOException
*/
public Map<String, byte[]> get(byte[] key) throws IOException {
byte[] b = this.table.get(key);
if (b == null) return null;
return b2m(b);
}
private static Map<String, byte[]> b2m(byte[] b) {
if (b == null) return null;
System.out.println("b = " + new String(b));
BDecoder decoder = new BDecoder(b);
BObject bobj = decoder.parse();
if (bobj.getType() != BDecoder.BType.dictionary) return null;
Map<String, BDecoder.BObject> map = bobj.getMap();
Map<String, byte[]> m = new HashMap<String, byte[]>();
for (Map.Entry<String, BDecoder.BObject> entry: map.entrySet()) {
if (entry.getValue().getType() != BDecoder.BType.string) continue;
m.put(entry.getKey(), entry.getValue().getString());
}
return m;
}
/**
* delete a map from the table
* @param key
* @throws IOException
*/
public void delete(byte[] key) throws IOException {
this.table.remove(key);
}
/**
* check if a row with given key exists in the table
* @param key
* @return true if the row exists
*/
public boolean has(byte[] key) {
return this.table.has(key);
}
/**
* iterate all keys of the table
* @return an iterator of byte[]
* @throws IOException
*/
public Iterator<byte[]> keys() throws IOException {
return this.table.keys(true, false);
}
/**
* iterate all rows of the table.
* Be aware that this first closes the table to force flushing of all elements in
* the write buffer. After that an iterator on the closed file is generated and then
* the file is opened again.
*/
public Iterator<Map.Entry<byte[], Map<String, byte[]>>> iterator() {
File location = this.table.location();
int keylen = this.table.keylength();
ByteOrder order = this.table.ordering();
int buffermax = this.table.getBuffermax();
this.table.close();
try {
Iterator<Map.Entry<byte[], Map<String, byte[]>>> iter = new EntryIter(location, keylen);
this.table = new Heap(location, keylen, order, buffermax);
return iter;
} catch (IOException e) {
Log.logSevere("PropertiesTable", e.getMessage(), e);
return null;
}
}
/**
* iterate all rows of the table. this is a static method that expects that the given
* file is not opened by any other application
* @param location
* @param keylen
* @return
* @throws IOException
*/
public static Iterator<Map.Entry<byte[], Map<String, byte[]>>> iterator(File location, int keylen) throws IOException {
return new EntryIter(location, keylen);
}
private static class EntryIter implements Iterator<Map.Entry<byte[], Map<String, byte[]>>> {
HeapReader.entries iter;
public EntryIter(File location, int keylen) throws IOException {
iter = new HeapReader.entries(location, keylen);
}
public boolean hasNext() {
return iter.hasNext();
}
public Entry<byte[], Map<String, byte[]>> next() {
Map.Entry<byte[], byte[]> entry = iter.next();
Map<String, byte[]> map = b2m(entry.getValue());
return new b2mEntry(entry.getKey(), map);
}
public void remove() {
throw new UnsupportedOperationException();
}
}
public static class b2mEntry implements Map.Entry<byte[], Map<String, byte[]>> {
private final byte[] s;
private Map<String,byte[]> b;
public b2mEntry(final byte[] s, final Map<String, byte[]> b) {
this.s = s;
this.b = b;
}
public byte[] getKey() {
return s;
}
public Map<String, byte[]> getValue() {
return b;
}
public Map<String, byte[]> setValue(Map<String, byte[]> value) {
Map<String, byte[]> b1 = b;
b = value;
return b1;
}
}
public static void main(String[] args) {
// test the class
File f = new File(new File("maptest").getAbsolutePath());
//System.out.println(f.getAbsolutePath());
//System.out.println(f.getParent());
if (f.exists()) FileUtils.deletedelete(f);
try {
BEncodedHeap map = new BEncodedHeap(f, 4);
// put some values into the map
Map<String, byte[]> m = new HashMap<String, byte[]>();
m.put("k", "000".getBytes()); map.put("123".getBytes(), m);
m.put("k", "111".getBytes()); map.put("456".getBytes(), m);
m.put("k", "222".getBytes()); map.put("789".getBytes(), m);
// iterate over keys
Iterator<Map.Entry<byte[], Map<String, byte[]>>> i = map.iterator();
while (i.hasNext()) {
Map.Entry<byte[], Map<String, byte[]>> entry = i.next();
System.out.println(new String(entry.getKey(), "UTF-8") + ": " + entry.getValue());
}
// clean up
map.close();
} catch (IOException e) {
Log.logException(e);
} catch (RowSpaceExceededException e) {
Log.logException(e);
}
}
}

@ -41,7 +41,7 @@ import net.yacy.kelondro.order.NaturalOrder;
public final class Heap extends HeapModifier implements BLOB {
private HashMap<String, byte[]> buffer; // a write buffer to limit IO to the file; attention: Maps cannot use byte[] as key
private int buffersize; // bytes that are buffered in buffer
private final int buffermax; // maximum size of the buffer
@ -143,8 +143,11 @@ public final class Heap extends HeapModifier implements BLOB {
final int pos = (int) file.length();
index.put(key, pos);
file.seek(pos);
file.writeInt(key.length + blob.length);
file.writeInt(this.keylength + blob.length);
file.write(key);
if (this.keylength > key.length) {
for (int i = 0; i < this.keylength - key.length; i++) file.write(HeapWriter.ZERO);
}
file.write(blob, 0, blob.length);
}
@ -171,7 +174,7 @@ public final class Heap extends HeapModifier implements BLOB {
key = entry.getKey().getBytes();
assert key.length == this.keylength : "key.length = " + key.length + ", this.keylength = " + this.keylength;
blob = entry.getValue();
posBuffer += 4 + key.length + blob.length;
posBuffer += 4 + this.keylength + blob.length;
}
assert l + (4 + this.keylength) * this.buffer.size() == posBuffer : "l = " + l + ", this.keylength = " + this.keylength + ", this.buffer.size() = " + this.buffer.size() + ", posBuffer = " + posBuffer;
@ -188,7 +191,7 @@ public final class Heap extends HeapModifier implements BLOB {
assert key.length == this.keylength : "key.length = " + key.length + ", this.keylength = " + this.keylength;
blob = entry.getValue();
index.put(key, posFile);
b = AbstractWriter.int2array(key.length + blob.length);
b = AbstractWriter.int2array(this.keylength + blob.length);
assert b.length == 4;
assert posBuffer + 4 < ba.length : "posBuffer = " + posBuffer + ", ba.length = " + ba.length;
System.arraycopy(b, 0, ba, posBuffer, 4);
@ -198,8 +201,8 @@ public final class Heap extends HeapModifier implements BLOB {
//System.out.println("*** DEBUG posFile=" + posFile + ",blob.length=" + blob.length + ",ba.length=" + ba.length + ",posBuffer=" + posBuffer + ",key.length=" + key.length);
//System.err.println("*** DEBUG posFile=" + posFile + ",blob.length=" + blob.length + ",ba.length=" + ba.length + ",posBuffer=" + posBuffer + ",key.length=" + key.length);
System.arraycopy(blob, 0, ba, posBuffer + 4 + this.keylength, blob.length); //java.lang.ArrayIndexOutOfBoundsException here
posFile += 4 + key.length + blob.length;
posBuffer += 4 + key.length + blob.length;
posFile += 4 + this.keylength + blob.length;
posBuffer += 4 + this.keylength + blob.length;
}
assert ba.length == posBuffer; // must fit exactly
this.file.seek(pos);
@ -355,6 +358,9 @@ public final class Heap extends HeapModifier implements BLOB {
final int reclenf = file.readInt();
assert reclenf == reclen;
file.write(key);
if (this.keylength > key.length) {
for (int j = 0; j < this.keylength - key.length; j++) file.write(HeapWriter.ZERO);
}
file.write(b);
// remove the entry from the free list
@ -383,6 +389,9 @@ public final class Heap extends HeapModifier implements BLOB {
file.seek(lseek);
file.writeInt(reclen);
file.write(key);
if (this.keylength > key.length) {
for (int j = 0; j < this.keylength - key.length; j++) file.write(HeapWriter.ZERO);
}
file.write(b);
// add the index to the new entry
@ -505,7 +514,7 @@ public final class Heap extends HeapModifier implements BLOB {
final File f = new File("/Users/admin/blobtest.heap");
try {
//f.delete();
final MapView heap = new MapView(new Heap(f, 12, NaturalOrder.naturalOrder, 1024 * 512), 500, '_');
final MapHeap heap = new MapHeap(f, 12, NaturalOrder.naturalOrder, 1024 * 512, 500, '_');
heap.put("aaaaaaaaaaaa", map("aaaaaaaaaaaa", "eins zwei drei"));
heap.put("aaaaaaaaaaab", map("aaaaaaaaaaab", "vier fuenf sechs"));
heap.put("aaaaaaaaaaac", map("aaaaaaaaaaac", "sieben acht neun"));

@ -40,6 +40,8 @@ import net.yacy.kelondro.util.FileUtils;
public final class HeapWriter {
public final static byte[] ZERO = new byte[]{0};
private final int keylength; // the length of the primary key
private HandleMap index; // key/seek relation for used records
private final File heapFileTMP; // the temporary file of the heap during writing
@ -101,9 +103,12 @@ public final class HeapWriter {
assert index.get(key) < 0 : "index.get(key) = " + index.get(key) + ", index.size() = " + index.size() + ", file.length() = " + this.heapFileTMP.length() + ", key = " + new String(key); // must not occur before
if ((blob == null) || (blob.length == 0)) return;
index.putUnique(key, this.seek);
int chunkl = key.length + blob.length;
int chunkl = this.keylength + blob.length;
os.writeInt(chunkl);
os.write(key);
if (this.keylength > key.length) {
for (int i = 0; i < this.keylength - key.length; i++) os.write(ZERO);
}
os.write(blob);
//assert (this.doublecheck.add(new String(key))) : "doublecheck failed for " + new String(key);
this.seek += chunkl + 4;

@ -27,6 +27,7 @@
package net.yacy.kelondro.blob;
import java.io.File;
import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
@ -36,19 +37,29 @@ import java.util.Map;
import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.ByteOrder;
import net.yacy.kelondro.order.CloneableIterator;
import net.yacy.kelondro.util.ScoreCluster;
public class MapDataMining extends MapView {
public class MapDataMining extends MapHeap {
private final String[] sortfields, longaccfields, doubleaccfields;
private HashMap<String, ScoreCluster<String>> sortClusterMap; // a String-kelondroMScoreCluster - relation
private HashMap<String, Object> accMap; // to store accumulations of specific fields
@SuppressWarnings("unchecked")
public MapDataMining(final Heap dyn, final int cachesize, final String[] sortfields, final String[] longaccfields, final String[] doubleaccfields, final Method externalInitializer, final Object externalHandler) {
super(dyn, cachesize, '_');
public MapDataMining(final File heapFile,
final int keylength,
final ByteOrder ordering,
int buffermax,
final int cachesize,
final String[] sortfields,
final String[] longaccfields,
final String[] doubleaccfields,
final Method externalInitializer,
final Object externalHandler) throws IOException {
super(heapFile, keylength, ordering, buffermax, cachesize, '_');
// create fast ordering clusters and acc fields
this.sortfields = sortfields;
@ -86,7 +97,7 @@ public class MapDataMining extends MapView {
// fill cluster and accumulator with values
if ((sortfields != null) || (longaccfields != null) || (doubleaccfields != null)) try {
final CloneableIterator<byte[]> it = dyn.keys(true, false);
final CloneableIterator<byte[]> it = super.keys(true, false);
String mapname;
Object cell;
long valuel;

@ -40,6 +40,7 @@ import net.yacy.kelondro.index.ARC;
import net.yacy.kelondro.index.ConcurrentARC;
import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.order.ByteOrder;
import net.yacy.kelondro.order.CloneableIterator;
import net.yacy.kelondro.order.NaturalOrder;
import net.yacy.kelondro.order.RotateIterator;
@ -49,15 +50,21 @@ import net.yacy.kelondro.util.kelondroException;
public class MapView {
public class MapHeap {
private BLOB blob;
private ARC<String, Map<String, String>> cache;
private final char fillchar;
public MapView(final Heap blob, final int cachesize, char fillchar) {
this.blob = blob;
public MapHeap(
final File heapFile,
final int keylength,
final ByteOrder ordering,
int buffermax,
final int cachesize,
char fillchar) throws IOException {
this.blob = new Heap(heapFile, keylength, ordering, buffermax);
this.cache = new ConcurrentARC<String, Map<String, String>>(cachesize, Runtime.getRuntime().availableProcessors());
this.fillchar = fillchar;
/*
@ -370,10 +377,8 @@ public class MapView {
File f = new File("maptest");
if (f.exists()) FileUtils.deletedelete(f);
try {
// make a blob
Heap blob = new Heap(f, 12, NaturalOrder.naturalOrder, 1024 * 1024);
// make map
MapView map = new MapView(blob, 1024, '_');
MapHeap map = new MapHeap(f, 12, NaturalOrder.naturalOrder, 1024 * 1024, 1024, '_');
// put some values into the map
Map<String, String> m = new HashMap<String, String>();
m.put("k", "000"); map.put("123", m);

@ -26,6 +26,7 @@ package net.yacy.kelondro.util;
import java.io.File;
import java.io.IOException;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
@ -33,6 +34,12 @@ import java.util.Map;
public class BDecoder {
private final static byte[] _e = "e".getBytes();
private final static byte[] _i = "i".getBytes();
private final static byte[] _d = "d".getBytes();
private final static byte[] _l = "l".getBytes();
private final static byte[] _p = ":".getBytes();
private final byte[] b;
private int pos;
@ -41,20 +48,21 @@ public class BDecoder {
this.pos = 0;
}
public enum BType {
public static enum BType {
string, integer, list, dictionary;
}
public interface BObject {
public static interface BObject {
public BType getType();
public String getString();
public byte[] getString();
public long getInteger();
public List<BObject> getList();
public Map<String, BObject> getMap();
public String toString();
public void toStream(OutputStream os) throws IOException;
}
public class BDfltObject implements BObject {
public static abstract class BDfltObject implements BObject {
public long getInteger() {
throw new UnsupportedOperationException();
@ -68,7 +76,7 @@ public class BDecoder {
throw new UnsupportedOperationException();
}
public String getString() {
public byte[] getString() {
throw new UnsupportedOperationException();
}
@ -82,24 +90,29 @@ public class BDecoder {
}
public class BStringObject extends BDfltObject implements BObject {
String s;
public BStringObject(String s) {
this.s = s;
public static class BStringObject extends BDfltObject implements BObject {
private byte[] b;
public BStringObject(byte[] b) {
this.b = b;
}
public BType getType() {
return BType.string;
}
public String getString() {
return this.s;
public byte[] getString() {
return this.b;
}
public String toString() {
return this.s;
return new String(this.b);
}
public void toStream(OutputStream os) throws IOException {
os.write(Integer.toString(this.b.length).getBytes());
os.write(_p);
os.write(this.b);
}
}
public class BListObject extends BDfltObject implements BObject {
List<BObject> l;
public static class BListObject extends BDfltObject implements BObject {
private List<BObject> l;
public BListObject(List<BObject> l) {
this.l = l;
}
@ -117,10 +130,15 @@ public class BDecoder {
s.append("]");
return s.toString();
}
public void toStream(OutputStream os) throws IOException {
os.write(_l);
for (BObject bo: this.l) bo.toStream(os);
os.write(_e);
}
}
public class BDictionaryObject extends BDfltObject implements BObject {
Map<String, BObject> m;
public static class BDictionaryObject extends BDfltObject implements BObject {
private Map<String, BObject> m;
public BDictionaryObject(Map<String, BObject> m) {
this.m = m;
}
@ -138,10 +156,18 @@ public class BDecoder {
s.append("}");
return s.toString();
}
public void toStream(OutputStream os) throws IOException {
os.write(_d);
for (Map.Entry<String, BObject> e: this.m.entrySet()) {
new BStringObject(e.getKey().getBytes()).toStream(os);
e.getValue().toStream(os);
}
os.write(_e);
}
}
public class BIntegerObject extends BDfltObject implements BObject {
long i;
public static class BIntegerObject extends BDfltObject implements BObject {
private long i;
public BIntegerObject(long i) {
this.i = i;
}
@ -154,18 +180,23 @@ public class BDecoder {
public String toString() {
return Long.toString(this.i);
}
public void toStream(OutputStream os) throws IOException {
os.write(_i);
os.write(Long.toString(this.i).getBytes());
os.write(_e);
}
}
private Map<String, BObject> convertToMap(final List<BObject> list) {
final Map<String, BObject> m = new LinkedHashMap<String, BObject>();
final int length = list.size();
for (int i = 0; i < length; i += 2) {
final String key = list.get(i).getString();
final byte[] key = list.get(i).getString();
BObject value = null;
if (i + 1 < length) {
value = list.get(i + 1);
}
m.put(key, value);
m.put(new String(key), value);
}
return m;
}
@ -191,9 +222,39 @@ public class BDecoder {
end++;
while (b[end] != ':') ++end;
final int len = Integer.parseInt(new String(b, pos, end - pos));
final String str = new String(b, end + 1, len);
final byte[] s = new byte[len];
System.arraycopy(b, end + 1, s, 0, len);
pos = end + len + 1;
return new BStringObject(str);
return new BStringObject(s);
} else if (ch == 'l') {
pos++;
return new BListObject(readList());
} else if (ch == 'd') {
pos++;
return new BDictionaryObject(convertToMap(readList()));
} else if (ch == 'i') {
pos++;
int end = pos;
while (b[end] != 'e') ++end;
BIntegerObject io = new BIntegerObject(Long.parseLong(new String(b, pos, end - pos)));
pos = end + 1;
return io;
} else {
return null;
}
}
/*
public static BObject parse(InputStream is) {
if (is.available() < 1) return null;
char ch = (char) is.read();
if ((ch >= '0') && (ch <= '9')) {
StringBuilder s = new StringBuilder();
s.append(ch);
while ((ch = (char) is.read()) != ':') s.append(ch);
int len = Integer.parseInt(s.toString());
byte[] b = new byte[len];
is.read(b);
return new BStringObject(new String(b));
} else if (ch == 'l') {
pos++;
return new BListObject(readList());
@ -211,6 +272,7 @@ public class BDecoder {
return null;
}
}
*/
public static void print(BObject bo, int t) {
for (int i = 0; i < t; i++) System.out.print(" ");

@ -0,0 +1,89 @@
// BEncoder.java
// (C) 2010 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 12.01.2010 on http://yacy.net
//
// $LastChangedDate: 2008-03-14 01:16:04 +0100 (Fr, 14 Mrz 2008) $
// $LastChangedRevision: 6563 $
// $LastChangedBy: orbiter $
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package net.yacy.kelondro.util;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import net.yacy.kelondro.util.BDecoder.BObject;
public class BEncoder {
// lists
public static List<BObject> transcode(List<byte[]> list) {
ArrayList<BObject> l = new ArrayList<BObject>(list.size());
for (byte[] entry: list) l.add(new BDecoder.BStringObject(entry));
return l;
}
public static byte[] encode(List<BObject> list) {
BDecoder.BListObject l = new BDecoder.BListObject(list);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
try {
l.toStream(baos);
baos.close();
return baos.toByteArray();
} catch (IOException e) {
}
return null;
}
// maps
public static Map<String, BObject> transcode(Map<String, byte[]> map) {
Map<String, BObject> m = new HashMap<String, BObject>();
for (Map.Entry<String, byte[]> entry: map.entrySet()) m.put(entry.getKey(), new BDecoder.BStringObject(entry.getValue()));
return m;
}
public static byte[] encode(Map<String, BObject> map) {
BDecoder.BDictionaryObject dict = new BDecoder.BDictionaryObject(map);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
try {
dict.toStream(baos);
baos.close();
return baos.toByteArray();
} catch (IOException e) {
}
return null;
}
public static void main(final String[] args) {
Map<String, byte[]> m = new HashMap<String, byte[]>();
m.put("k", "000".getBytes());
m.put("r", "111".getBytes());
m.put("s", "222".getBytes());
Map<String, BObject> t = transcode(m);
byte[] b = encode(t);
System.out.println(new String(b));
BDecoder d = new BDecoder(b);
BObject o = d.parse();
System.out.println(o.toString());
}
}

@ -46,7 +46,6 @@ import java.util.concurrent.Semaphore;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
import net.yacy.kelondro.blob.Heap;
import net.yacy.kelondro.blob.MapDataMining;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.meta.URIMetadataRow;
@ -920,7 +919,7 @@ public final class yacy {
final String[] dbFileNames = {"seed.new.db","seed.old.db","seed.pot.db"};
for (int i=0; i < dbFileNames.length; i++) {
final File dbFile = new File(yacyDBPath,dbFileNames[i]);
final MapDataMining db = new MapDataMining(new Heap(dbFile, Word.commonHashLength, Base64Order.enhancedCoder, 1024 * 512), 500, yacySeedDB.sortFields, yacySeedDB.longaccFields, yacySeedDB.doubleaccFields, null, null);
final MapDataMining db = new MapDataMining(dbFile, Word.commonHashLength, Base64Order.enhancedCoder, 1024 * 512, 500, yacySeedDB.sortFields, yacySeedDB.longaccFields, yacySeedDB.doubleaccFields, null, null);
MapDataMining.mapIterator it;
it = db.maps(true, false);

Loading…
Cancel
Save