From cf07b34c2d7ff2671587a22675b654a0e99911dd Mon Sep 17 00:00:00 2001 From: orbiter Date: Mon, 23 Aug 2010 23:38:03 +0000 Subject: [PATCH] implemented the Map interface in the ARC classes so it will be possible to instantiate ARCs as Map> Because such Maps with byte[] keys cannot be stored in hash maps (bad hashing on byte[]) another ARC with comparable Maps has been added This will make it possible to move the HTCache class 'Cache' into the cora package because that class may be used either with RAM caches (ARCs) or with file-based caches (BEncodedHeaps) git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7071 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- source/net/yacy/cora/storage/ARC.java | 36 +++++++- .../net/yacy/cora/storage/ComparableARC.java | 68 ++++++++++++++ .../net/yacy/cora/storage/ConcurrentARC.java | 86 +++++++++++++++--- source/net/yacy/cora/storage/HashARC.java | 46 ++++++++++ source/net/yacy/cora/storage/SimpleARC.java | 90 +++++++++++++------ .../net/yacy/kelondro/blob/BEncodedHeap.java | 4 - 6 files changed, 286 insertions(+), 44 deletions(-) create mode 100644 source/net/yacy/cora/storage/ComparableARC.java create mode 100644 source/net/yacy/cora/storage/HashARC.java diff --git a/source/net/yacy/cora/storage/ARC.java b/source/net/yacy/cora/storage/ARC.java index 862a72d68..47aebc939 100644 --- a/source/net/yacy/cora/storage/ARC.java +++ b/source/net/yacy/cora/storage/ARC.java @@ -21,6 +21,10 @@ package net.yacy.cora.storage; +import java.util.Iterator; +import java.util.Map; +import java.util.Set; + /** * This is a simple cache using two generations of hashtables to store the content with a LFU strategy. * The Algorithm is described in a slightly more complex version as Adaptive Replacement Cache, "ARC". @@ -30,7 +34,7 @@ package net.yacy.cora.storage; * at the same size. */ -public interface ARC { +public interface ARC extends Iterable> { /** * get the size of the ARC. this returns the sum of main and ghost cache @@ -43,7 +47,14 @@ public interface ARC { * @param s * @param v */ - public void put(K s, V v); + public void insert(K s, V v); + + /** + * put a value to the cache. + * @param s + * @param v + */ + public V put(K s, V v); /** * get a value from the cache. @@ -70,4 +81,25 @@ public interface ARC { * clear the cache */ public void clear(); + + /** + * iterator implements the Iterable interface + * the method can easily be implemented using the entrySet method + */ + public Iterator> iterator(); + + /** + * Return a Set view of the mappings contained in this map. + * This method is the basis for all methods that are implemented + * by a AbstractMap implementation + * + * @return a set view of the mappings contained in this map + */ + public Set> entrySet(); + + /** + * a hash code for this ARC + * @return a hash code + */ + int hashCode(); } diff --git a/source/net/yacy/cora/storage/ComparableARC.java b/source/net/yacy/cora/storage/ComparableARC.java new file mode 100644 index 000000000..8d40d0184 --- /dev/null +++ b/source/net/yacy/cora/storage/ComparableARC.java @@ -0,0 +1,68 @@ +/** + * ComparableARC + * an Adaptive Replacement Cache for comparable objects + * Copyright 2010 by Michael Peter Christen, mc@yacy.net, Frankfurt a. M., Germany + * First released 24.08.2010 at http://yacy.net + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program in the file lgpl21.txt + * If not, see . + */ + +package net.yacy.cora.storage; + +import java.util.Comparator; +import java.util.LinkedList; +import java.util.Map; +import java.util.TreeMap; + +public final class ComparableARC extends SimpleARC implements Map, Iterable>, ARC { + + + public ComparableARC(final int cacheSize, Comparator comparator) { + super.cacheSize = cacheSize / 2; + super.levelA = new LimitedTreeMap(this.cacheSize, comparator); + super.levelB = new LimitedTreeMap(this.cacheSize, comparator); + } + + private static class LimitedTreeMap extends TreeMap { + private static final long serialVersionUID = -2276429187676080820L; + int limit; + LinkedList keys; + public LimitedTreeMap(final int cacheSize, Comparator comparator) { + super(comparator); + this.limit = cacheSize; + this.keys = new LinkedList(); + } + public V put(K k, V v) { + V r = super.put(k, v); + keys.add(k); + if (keys.size() > this.limit) { + K w = this.keys.removeFirst(); + V t = super.remove(w); + assert t != null; + } + return r; + } + public V remove(Object k) { + V r = super.remove(k); + this.keys.remove(k); + return r; + } + public void clear() { + super.clear(); + this.keys.clear(); + } + } + +} diff --git a/source/net/yacy/cora/storage/ConcurrentARC.java b/source/net/yacy/cora/storage/ConcurrentARC.java index 020cd894f..f8f47f818 100644 --- a/source/net/yacy/cora/storage/ConcurrentARC.java +++ b/source/net/yacy/cora/storage/ConcurrentARC.java @@ -20,6 +20,13 @@ package net.yacy.cora.storage; +import java.util.AbstractMap; +import java.util.Comparator; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Map; +import java.util.Set; + /** * This is a simple cache using two generations of hashtables to store the content with a LFU strategy. @@ -30,7 +37,7 @@ package net.yacy.cora.storage; * at the same size. */ -public final class ConcurrentARC implements ARC { +public final class ConcurrentARC extends AbstractMap implements Map, Iterable>, ARC { private final int mask; private final ARC arc[]; @@ -39,19 +46,38 @@ public final class ConcurrentARC implements ARC { public ConcurrentARC(final int cacheSize, final int partitions) { int m = 1; while (m < partitions) m = m * 2; - this.arc = new SimpleARC[m]; - for (int i = 0; i < this.arc.length; i++) this.arc[i] = new SimpleARC(cacheSize / m); + this.arc = new HashARC[m]; + for (int i = 0; i < this.arc.length; i++) this.arc[i] = new HashARC(cacheSize / m); m -= 1; this.mask = m; } + @SuppressWarnings("unchecked") + public ConcurrentARC(final int cacheSize, final int partitions, Comparator comparator) { + int m = 1; + while (m < partitions) m = m * 2; + this.arc = new ComparableARC[m]; + for (int i = 0; i < this.arc.length; i++) this.arc[i] = new ComparableARC(cacheSize / m, comparator); + m -= 1; + this.mask = m; + } + + /** + * put a value to the cache. + * @param s + * @param v + */ + public final void insert(final K s, final V v) { + this.arc[s.hashCode() & mask].put(s, v); + } + /** * put a value to the cache. * @param s * @param v */ - public final void put(final K s, final V v) { - this.arc[s.hashCode() & mask].put(s, v); + public final V put(final K s, final V v) { + return this.arc[s.hashCode() & mask].put(s, v); } /** @@ -59,8 +85,9 @@ public final class ConcurrentARC implements ARC { * @param s * @return the value */ - public final V get(final K s) { - return this.arc[s.hashCode() & mask].get(s); + @SuppressWarnings("unchecked") + public final V get(final Object s) { + return this.arc[s.hashCode() & mask].get((K) s); } /** @@ -68,8 +95,9 @@ public final class ConcurrentARC implements ARC { * @param s * @return */ - public final boolean containsKey(final K s) { - return this.arc[s.hashCode() & mask].containsKey(s); + @SuppressWarnings("unchecked") + public final boolean containsKey(final Object s) { + return this.arc[s.hashCode() & mask].containsKey((K) s); } /** @@ -77,8 +105,9 @@ public final class ConcurrentARC implements ARC { * @param s * @return the old value */ - public final V remove(final K s) { - return this.arc[s.hashCode() & mask].remove(s); + @SuppressWarnings("unchecked") + public final V remove(final Object s) { + return this.arc[s.hashCode() & mask].remove((K) s); } /** @@ -88,9 +117,44 @@ public final class ConcurrentARC implements ARC { for (ARC a: this.arc) a.clear(); } + /** + * get the size of the ARC. + * @return the complete number of entries in the ARC cache + */ public final int size() { int s = 0; for (ARC a: this.arc) s += a.size(); return s; } + + /** + * iterator implements the Iterable interface + */ + public Iterator> iterator() { + return entrySet().iterator(); + } + + /** + * Return a Set view of the mappings contained in this map. + * This method is the basis for all methods that are implemented + * by a AbstractMap implementation + * + * @return a set view of the mappings contained in this map + */ + @Override + public Set> entrySet() { + Set> m = new HashSet>(); + for (ARC a: this.arc) { + for (Map.Entry entry: a.entrySet()) m.add(entry); + } + return m; + } + + /** + * a hash code for this ARC + * @return a hash code + */ + public int hashCode() { + return this.arc.hashCode(); + } } diff --git a/source/net/yacy/cora/storage/HashARC.java b/source/net/yacy/cora/storage/HashARC.java new file mode 100644 index 000000000..aad33d4f6 --- /dev/null +++ b/source/net/yacy/cora/storage/HashARC.java @@ -0,0 +1,46 @@ +/** + * HashARC + * an Adaptive Replacement Cache for objects that can be compared using hashing + * Copyright 2009 by Michael Peter Christen, mc@yacy.net, Frankfurt a. M., Germany + * First released 17.04.2009 at http://yacy.net + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program in the file lgpl21.txt + * If not, see . + */ + +package net.yacy.cora.storage; + +import java.util.LinkedHashMap; +import java.util.Map; + +public final class HashARC extends SimpleARC implements Map, Iterable>, ARC { + + public final static boolean accessOrder = false; // if false, then a insertion-order is used + + public HashARC(final int cacheSize) { + this.cacheSize = cacheSize / 2; + super.levelA = new LinkedHashMap(cacheSize, 0.1f, accessOrder) { + private static final long serialVersionUID = 1L; + @Override protected boolean removeEldestEntry(final Map.Entry eldest) { + return size() > HashARC.this.cacheSize; + } + }; + this.levelB = new LinkedHashMap(cacheSize, 0.1f, accessOrder) { + private static final long serialVersionUID = 1L; + @Override protected boolean removeEldestEntry(final Map.Entry eldest) { + return size() > HashARC.this.cacheSize; + } + }; + } +} diff --git a/source/net/yacy/cora/storage/SimpleARC.java b/source/net/yacy/cora/storage/SimpleARC.java index 376c324ee..f4d66c6bd 100644 --- a/source/net/yacy/cora/storage/SimpleARC.java +++ b/source/net/yacy/cora/storage/SimpleARC.java @@ -21,8 +21,11 @@ package net.yacy.cora.storage; -import java.util.LinkedHashMap; +import java.util.AbstractMap; +import java.util.HashSet; +import java.util.Iterator; import java.util.Map; +import java.util.Set; /** @@ -32,37 +35,21 @@ import java.util.Map; * or http://en.wikipedia.org/wiki/Adaptive_Replacement_Cache * This version omits the ghost entry handling which is described in ARC, and keeps both cache levels * at the same size. + * + * This class is defined abstract because it shall be used with either the HashARC or the ComparableARC classes */ -public final class SimpleARC implements ARC { +abstract class SimpleARC extends AbstractMap implements Map, Iterable>, ARC { - public final static boolean accessOrder = false; // if false, then a insertion-order is used - - protected final int cacheSize; - private final Map levelA, levelB; - - public SimpleARC(final int cacheSize) { - this.cacheSize = cacheSize / 2; - this.levelA = new LinkedHashMap(cacheSize, 0.1f, accessOrder) { - private static final long serialVersionUID = 1L; - @Override protected boolean removeEldestEntry(final Map.Entry eldest) { - return size() > SimpleARC.this.cacheSize; - } - }; - this.levelB = new LinkedHashMap(cacheSize, 0.1f, accessOrder) { - private static final long serialVersionUID = 1L; - @Override protected boolean removeEldestEntry(final Map.Entry eldest) { - return size() > SimpleARC.this.cacheSize; - } - }; - } + protected int cacheSize; + protected Map levelA, levelB; /** * put a value to the cache. * @param s * @param v */ - public final synchronized void put(final K s, final V v) { + public final synchronized void insert(final K s, final V v) { if (this.levelB.containsKey(s)) { this.levelB.put(s, v); assert (this.levelB.size() <= cacheSize); // the cache should shrink automatically @@ -72,19 +59,37 @@ public final class SimpleARC implements ARC { } } + /** + * put a value to the cache. + * @param s + * @param v + */ + public final synchronized V put(final K s, final V v) { + V r = null; + if (this.levelB.containsKey(s)) { + r = this.levelB.put(s, v); + assert (this.levelB.size() <= cacheSize); // the cache should shrink automatically + } else { + r = this.levelA.put(s, v); + assert (this.levelA.size() <= cacheSize); // the cache should shrink automatically + } + return r; + } + /** * get a value from the cache. * @param s * @return the value */ - public final synchronized V get(final K s) { + @SuppressWarnings("unchecked") + public final synchronized V get(final Object s) { V v = this.levelB.get(s); if (v != null) return v; v = this.levelA.remove(s); if (v == null) return null; // move value from A to B; since it was already removed from A, just put it to B //System.out.println("ARC: moving A->B, size(A) = " + this.levelA.size() + ", size(B) = " + this.levelB.size()); - this.levelB.put(s, v); + this.levelB.put((K) s, v); assert (this.levelB.size() <= cacheSize); // the cache should shrink automatically return v; } @@ -94,17 +99,18 @@ public final class SimpleARC implements ARC { * @param s * @return */ - public final synchronized boolean containsKey(final K s) { + public final synchronized boolean containsKey(final Object s) { if (this.levelB.containsKey(s)) return true; return this.levelA.containsKey(s); } + /** * remove an entry from the cache * @param s * @return the old value */ - public final synchronized V remove(final K s) { + public final synchronized V remove(final Object s) { final V r = this.levelB.remove(s); if (r != null) return r; return this.levelA.remove(s); @@ -125,4 +131,34 @@ public final class SimpleARC implements ARC { public final synchronized int size() { return this.levelA.size() + this.levelB.size(); } + + /** + * iterator implements the Iterable interface + */ + public Iterator> iterator() { + return entrySet().iterator(); + } + + /** + * Return a Set view of the mappings contained in this map. + * This method is the basis for all methods that are implemented + * by a AbstractMap implementation + * + * @return a set view of the mappings contained in this map + */ + @Override + public Set> entrySet() { + Set> m = new HashSet>(); + for (Map.Entry entry: this.levelA.entrySet()) m.add(entry); + for (Map.Entry entry: this.levelB.entrySet()) m.add(entry); + return m; + } + + /** + * a hash code for this ARC + * @return the hash code of one of the ARC partial hash tables + */ + public int hashCode() { + return this.levelA.hashCode(); + } } diff --git a/source/net/yacy/kelondro/blob/BEncodedHeap.java b/source/net/yacy/kelondro/blob/BEncodedHeap.java index a51d94ca3..cd7b514b4 100644 --- a/source/net/yacy/kelondro/blob/BEncodedHeap.java +++ b/source/net/yacy/kelondro/blob/BEncodedHeap.java @@ -187,7 +187,6 @@ public class BEncodedHeap implements Map>, Iterable< * @param name * @return true if the row exists */ - @Override public boolean containsKey(Object key) { if (key instanceof byte[]) return containsKey((byte[]) key); return false; @@ -197,7 +196,6 @@ public class BEncodedHeap implements Map>, Iterable< * the containsValue method cannot be used in this method * and is only here to implement the Map interface */ - @Override public boolean containsValue(Object value) { // this method shall not be used because it is not appropriate for this kind of data throw new UnsupportedOperationException(); @@ -222,7 +220,6 @@ public class BEncodedHeap implements Map>, Iterable< * @param name * @return the map if one found or NULL if no entry exists or the entry is corrupt */ - @Override public Map get(Object key) { if (key instanceof byte[]) try { @@ -315,7 +312,6 @@ public class BEncodedHeap implements Map>, Iterable< return value; } - @Override public Map remove(Object key) { if (key instanceof byte[]) try {