From c60d0282fd3958aa110b07abbf82c1583d27f872 Mon Sep 17 00:00:00 2001 From: orbiter Date: Mon, 23 Aug 2010 21:27:58 +0000 Subject: [PATCH] more abstraction for tables stored in heaps: the BEncodedHeap now implements Map> This will make it possible that also different database storage types may be added that implement also the same Map> interface. git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7070 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- source/de/anomic/crawler/RobotsTxt.java | 7 +- source/de/anomic/server/serverObjects.java | 2 +- .../net/yacy/kelondro/blob/BEncodedHeap.java | 385 ++++++++++++++---- source/net/yacy/kelondro/blob/Tables.java | 10 +- 4 files changed, 308 insertions(+), 96 deletions(-) diff --git a/source/de/anomic/crawler/RobotsTxt.java b/source/de/anomic/crawler/RobotsTxt.java index a047b3212..3646c7e63 100644 --- a/source/de/anomic/crawler/RobotsTxt.java +++ b/source/de/anomic/crawler/RobotsTxt.java @@ -67,10 +67,7 @@ public class RobotsTxt { public void clear() { log.info("clearing robots table"); - try { - this.robotsTable.clear(); - } catch (IOException e) { - } + this.robotsTable.clear(); syncObjects.clear(); } @@ -221,7 +218,7 @@ public class RobotsTxt { private String addEntry(final RobotsEntry entry) { // writes a new page and returns key try { - this.robotsTable.put(this.robotsTable.encodedKey(entry.hostName), entry.getMem()); + this.robotsTable.insert(this.robotsTable.encodedKey(entry.hostName), entry.getMem()); return entry.hostName; } catch (final Exception e) { log.warn("cannot write robots.txt entry", e); diff --git a/source/de/anomic/server/serverObjects.java b/source/de/anomic/server/serverObjects.java index 0487785c1..36c644a55 100644 --- a/source/de/anomic/server/serverObjects.java +++ b/source/de/anomic/server/serverObjects.java @@ -92,7 +92,7 @@ public class serverObjects extends HashMap implements Cloneable * If value is null, then the element at key * is removed from the map. * @return The value that was added to the map. - * @see java.util.Hashtable#put(K, V) + * @see java.util.Hashtable#insert(K, V) */ @Override public String put(final String key, final String value) { diff --git a/source/net/yacy/kelondro/blob/BEncodedHeap.java b/source/net/yacy/kelondro/blob/BEncodedHeap.java index c3e6ff549..a51d94ca3 100644 --- a/source/net/yacy/kelondro/blob/BEncodedHeap.java +++ b/source/net/yacy/kelondro/blob/BEncodedHeap.java @@ -27,11 +27,13 @@ package net.yacy.kelondro.blob; import java.io.File; import java.io.IOException; import java.util.ArrayList; +import java.util.Collection; import java.util.HashMap; import java.util.Iterator; import java.util.LinkedHashSet; import java.util.Map; -import java.util.Map.Entry; +import java.util.Set; +import java.util.TreeSet; import net.yacy.kelondro.index.RowSpaceExceededException; import net.yacy.kelondro.logging.Log; @@ -48,7 +50,7 @@ import net.yacy.kelondro.util.BDecoder.BObject; * store a table of properties (instead of fixed-field entries) * this is realized using blobs and BEncoded property lists */ -public class BEncodedHeap implements /* Map>,*/ Iterable>> { +public class BEncodedHeap implements Map>, Iterable>> { private Heap table; private LinkedHashSet columnames; @@ -86,45 +88,126 @@ public class BEncodedHeap implements /* Map>,*/ Iter return Base64Order.enhancedCoder.encodeSubstring(Digest.encodeMD5Raw(key), this.table.keylength); } + private static class EntryIter implements Iterator>> { + HeapReader.entries iter; + public EntryIter(File location, int keylen) throws IOException { + iter = new HeapReader.entries(location, keylen); + } + + public boolean hasNext() { + return iter.hasNext(); + } + + public Entry> next() { + Map.Entry entry = iter.next(); + Map map = b2m(entry.getValue()); + return new b2mEntry(entry.getKey(), map); + } + + public void remove() { + throw new UnsupportedOperationException(); + } + + } + + public static class b2mEntry implements Map.Entry> { + private final byte[] s; + private Map b; + + public b2mEntry(final byte[] s, final Map b) { + this.s = s; + this.b = b; + } + + public byte[] getKey() { + return s; + } + + public Map getValue() { + return b; + } + + public Map setValue(Map value) { + Map b1 = b; + b = value; + return b1; + } + } + + private static Map b2m(byte[] b) { + if (b == null) return null; + //System.out.println("b = " + new String(b)); + BDecoder decoder = new BDecoder(b); + BObject bobj = decoder.parse(); + if (bobj.getType() != BDecoder.BType.dictionary) return null; + Map map = bobj.getMap(); + Map m = new HashMap(); + for (Map.Entry entry: map.entrySet()) { + if (entry.getValue().getType() != BDecoder.BType.string) continue; + m.put(entry.getKey(), entry.getValue().getString()); + } + return m; + } + + /** + * the map is stored inside a file; this method may return the file + * @return the file where the map is stored + */ public File getFile() { return this.table.heapFile; } - + + /** + * Retur the number of key-value mappings in this map. + * @return the number of entries mappings in this map + */ public int size() { return this.table.size(); } - public void close() { - this.table.close(); + /** + * return true if the table is empty + */ + public boolean isEmpty() { + return this.table.size() == 0; } - public void clear() throws IOException { - this.table.clear(); + /** + * check if a row with given key exists in the table + * @param name + * @return true if the row exists + */ + public boolean containsKey(byte[] pk) { + return this.table.has(pk); } /** - * insert a map into the table + * check if a row with given key exists in the table + * This method is here to implement the Map interface * @param name - * @param map - * @throws RowSpaceExceededException - * @throws IOException + * @return true if the row exists */ - public void put(byte[] pk, Map map) throws RowSpaceExceededException, IOException { - byte[] b = BEncoder.encode(BEncoder.transcode(map)); - this.table.put(pk, b); - this.columnames.addAll(map.keySet()); + @Override + public boolean containsKey(Object key) { + if (key instanceof byte[]) return containsKey((byte[]) key); + return false; } - public void put(byte[] pk, String key, byte[] value) throws IOException { - byte[] b = BEncoder.encodeMap(key, value); - this.table.put(pk, b); - this.columnames.add(key); + /** + * the containsValue method cannot be used in this method + * and is only here to implement the Map interface + */ + @Override + public boolean containsValue(Object value) { + // this method shall not be used because it is not appropriate for this kind of data + throw new UnsupportedOperationException(); } /** - * select a map from the table + * get a map from the table * @param name * @return the map if one found or NULL if no entry exists or the entry is corrupt + * @throws RowSpaceExceededException * @throws IOException */ public Map get(byte[] pk) throws IOException, RowSpaceExceededException { @@ -132,27 +215,83 @@ public class BEncodedHeap implements /* Map>,*/ Iter if (b == null) return null; return b2m(b); } - + + /** + * get a map from the table + * this method is here to implement the Map interface + * @param name + * @return the map if one found or NULL if no entry exists or the entry is corrupt + */ + @Override + public Map get(Object key) { + if (key instanceof byte[]) + try { + return get((byte[]) key); + } catch (IOException e) { + Log.logException(e); + return null; + } catch (RowSpaceExceededException e) { + Log.logException(e); + return null; + } + return null; + } + + /** + * convenience method to get a value from a map + * @param pk + * @param key + * @return the value + * @throws IOException + * @throws RowSpaceExceededException + */ public byte[] getProp(byte[] pk, String key) throws IOException, RowSpaceExceededException { byte[] b = this.table.get(pk); if (b == null) return null; Map map = b2m(b); return map.get(key); } + + /** + * insert a map into the table + * this method shall be used in exchange of the get method if the + * previous entry value is not needed. + * @param name + * @param map + * @throws RowSpaceExceededException + * @throws IOException + */ + public void insert(byte[] pk, Map map) throws RowSpaceExceededException, IOException { + byte[] b = BEncoder.encode(BEncoder.transcode(map)); + this.table.put(pk, b); + this.columnames.addAll(map.keySet()); + } - static Map b2m(byte[] b) { - if (b == null) return null; - //System.out.println("b = " + new String(b)); - BDecoder decoder = new BDecoder(b); - BObject bobj = decoder.parse(); - if (bobj.getType() != BDecoder.BType.dictionary) return null; - Map map = bobj.getMap(); - Map m = new HashMap(); - for (Map.Entry entry: map.entrySet()) { - if (entry.getValue().getType() != BDecoder.BType.string) continue; - m.put(entry.getKey(), entry.getValue().getString()); + public void insert(byte[] pk, String key, byte[] value) throws IOException { + byte[] b = BEncoder.encodeMap(key, value); + this.table.put(pk, b); + this.columnames.add(key); + } + + /** + * insert a map into the table + * @param name + * @param map + */ + public Map put(byte[] pk, Map map) { + try { + Map entry = this.get(pk); + byte[] b = BEncoder.encode(BEncoder.transcode(map)); + this.table.put(pk, b); + this.columnames.addAll(map.keySet()); + return entry; + } catch (IOException e) { + Log.logException(e); + return null; + } catch (RowSpaceExceededException e) { + Log.logException(e); + return null; } - return m; } /** @@ -165,14 +304,87 @@ public class BEncodedHeap implements /* Map>,*/ Iter } /** - * check if a row with given key exists in the table + * delete a map from the table * @param name - * @return true if the row exists + * @throws RowSpaceExceededException + * @throws IOException */ - public boolean has(byte[] pk) { - return this.table.has(pk); + public Map remove(byte[] key) throws IOException, RowSpaceExceededException { + Map value = get(key); + this.delete(key); + return value; + } + + @Override + public Map remove(Object key) { + if (key instanceof byte[]) + try { + return remove((byte[]) key); + } catch (IOException e) { + Log.logException(e); + return null; + } catch (RowSpaceExceededException e) { + Log.logException(e); + return null; + } + return null; + } + + /** + * Copy all the mappings from the specified map to this map. + * + * @param m mappings to be stored in this map + */ + public void putAll(Map> map) { + for (Map.Entry> me: map.entrySet()) { + try { + this.insert(me.getKey(), me.getValue()); + } catch (RowSpaceExceededException e) { + Log.logException(e); + } catch (IOException e) { + Log.logException(e); + e.printStackTrace(); + } + } } + /** + * remove all entries from the map; + * possibly removes the backend-file + */ + public void clear() { + try { + this.table.clear(); + } catch (IOException e) { + Log.logException(e); + } + } + + /** + * close the backen-file. + * Should be called explicitely to ensure that all data + * waiting in IO write buffers are flushed + */ + public void close() { + this.table.close(); + } + + /** + * Return a Set of the keys contained in this map. + * This may not be a useful method, if possible use the keys() + * method instead to iterate all keys from the backend-file + * + * @return a set view of the keys contained in this map + */ + public Set keySet() { + TreeSet set = new TreeSet(this.table.ordering); + try { + Iterator i = this.table.keys(true, false); + while (i.hasNext()) set.add(i.next()); + } catch (IOException e) {} + return set; + } + /** * iterate all keys of the table * @return an iterator of byte[] @@ -182,8 +394,40 @@ public class BEncodedHeap implements /* Map>,*/ Iter return this.table.keys(true, false); } + /** + * the values() method is not implemented in this class + * because it does not make sense to use such a method for + * file-based data structures. To get a collection view of + * all the entries, just use a entry iterator instead. + * + * @return nothing. The method throws always a UnsupportedOperationException + */ + public Collection> values() { + // this method shall not be used because it is not appropriate for this kind of data + throw new UnsupportedOperationException(); + } + + /** + * The abstract method entrySet() from AbstractMap must be implemented, + * but never used because that is not useful for this file-based storage class. + * To prevent the usage, a UnsupportedOperationException is thrown. + * To prevent that the method is used by the methods from AbstractMap, all such + * methods must be overriden in this class. These methods are: + * size, containsValue, containsKey, get, remove, putAll, clear, + * keySet, values, equals, hashCode and toString + * + * Instead of using this method, use the iterator() method to iterate + * all elements in the back-end blob file + */ + public Set>> entrySet() { + throw new UnsupportedOperationException(); + } + /** * iterate all rows of the table. + * This method implements the + * Iterable>> + * interface */ public Iterator>> iterator() { File location = this.table.location(); @@ -205,7 +449,7 @@ public class BEncodedHeap implements /* Map>,*/ Iter } } } - + /** * iterate all rows of the table. this is a static method that expects that the given * file is not opened by any other application @@ -218,52 +462,22 @@ public class BEncodedHeap implements /* Map>,*/ Iter return new EntryIter(location, keylen); } - private static class EntryIter implements Iterator>> { - HeapReader.entries iter; - public EntryIter(File location, int keylen) throws IOException { - iter = new HeapReader.entries(location, keylen); - } - - public boolean hasNext() { - return iter.hasNext(); - } - - public Entry> next() { - Map.Entry entry = iter.next(); - Map map = b2m(entry.getValue()); - return new b2mEntry(entry.getKey(), map); - } - - public void remove() { - throw new UnsupportedOperationException(); - } - - } - public static class b2mEntry implements Map.Entry> { - private final byte[] s; - private Map b; - - public b2mEntry(final byte[] s, final Map b) { - this.s = s; - this.b = b; - } - - public byte[] getKey() { - return s; - } - - public Map getValue() { - return b; - } - - public Map setValue(Map value) { - Map b1 = b; - b = value; - return b1; - } + /** + * a hashcode for the object + */ + public int hashCode() { + return this.table.name().hashCode(); } - + + /** + * Produce a list of column names from this table + * This method may be useful if the table shall be displayed + * as a table in GUIs. To show the first line of the table, the + * table header, a list of all column names is required. This can + * be generated with this method + * @return a list of column names + */ public ArrayList columns() { if (this.columnames.size() == 0) { for (Map.Entry> row: this) { @@ -286,9 +500,9 @@ public class BEncodedHeap implements /* Map>,*/ Iter BEncodedHeap map = new BEncodedHeap(f, 4); // put some values into the map Map m = new HashMap(); - m.put("k", "000".getBytes()); map.put("123".getBytes(), m); - m.put("k", "111".getBytes()); map.put("456".getBytes(), m); - m.put("k", "222".getBytes()); map.put("789".getBytes(), m); + m.put("k", "000".getBytes()); map.insert("123".getBytes(), m); + m.put("k", "111".getBytes()); map.insert("456".getBytes(), m); + m.put("k", "222".getBytes()); map.insert("789".getBytes(), m); // iterate over keys Iterator>> i = map.iterator(); while (i.hasNext()) { @@ -317,4 +531,5 @@ public class BEncodedHeap implements /* Map>,*/ Iter } } } + } diff --git a/source/net/yacy/kelondro/blob/Tables.java b/source/net/yacy/kelondro/blob/Tables.java index b3d243b2d..09e8465f6 100644 --- a/source/net/yacy/kelondro/blob/Tables.java +++ b/source/net/yacy/kelondro/blob/Tables.java @@ -176,14 +176,14 @@ public class Tables { byte[] uk = ukey(tablename); update(tablename, uk, map); BEncodedHeap heap = getHeap(system_table_pkcounter); - heap.put(tablename.getBytes(), system_table_pkcounter_counterName, uk); + heap.insert(tablename.getBytes(), system_table_pkcounter_counterName, uk); return uk; } public void update(final String table, byte[] pk, Map map) throws IOException { BEncodedHeap heap = getHeap(table); try { - heap.put(pk, map); + heap.insert(pk, map); } catch (RowSpaceExceededException e) { throw new IOException(e.getMessage()); } @@ -192,7 +192,7 @@ public class Tables { public void update(final String table, Row row) throws IOException { BEncodedHeap heap = getHeap(table); try { - heap.put(row.pk, row); + heap.insert(row.pk, row); } catch (RowSpaceExceededException e) { throw new IOException(e.getMessage()); } @@ -204,7 +204,7 @@ public class Tables { public Row select(final String table, byte[] pk) throws IOException, RowSpaceExceededException { BEncodedHeap heap = getHeap(table); - if (heap.has(pk)) return new Row(pk, heap.get(pk)); + if (heap.containsKey(pk)) return new Row(pk, heap.get(pk)); return null; } @@ -215,7 +215,7 @@ public class Tables { public boolean has(String table, byte[] key) throws IOException { BEncodedHeap heap = getHeap(table); - return heap.has(key); + return heap.containsKey(key); } public Iterator keys(String table) throws IOException {