From 10d3e856b5291cb6a9bb10ba4f4c96c9176cdd31 Mon Sep 17 00:00:00 2001 From: orbiter Date: Sat, 29 Aug 2009 23:34:14 +0000 Subject: [PATCH] better concurrency, less blocking & performance hacks git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6277 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- source/de/anomic/document/Word.java | 5 +- source/de/anomic/kelondro/blob/MapView.java | 7 +- source/de/anomic/kelondro/index/ARC.java | 71 ++++++++++++++ .../anomic/kelondro/index/ConcurrentARC.java | 94 +++++++++++++++++++ .../de/anomic/kelondro/index/SimpleARC.java | 21 ++--- source/de/anomic/kelondro/text/IndexCell.java | 4 +- source/de/anomic/search/TextSnippet.java | 7 +- 7 files changed, 188 insertions(+), 21 deletions(-) create mode 100644 source/de/anomic/kelondro/index/ARC.java create mode 100644 source/de/anomic/kelondro/index/ConcurrentARC.java diff --git a/source/de/anomic/document/Word.java b/source/de/anomic/document/Word.java index 1e549b324..b9c5fea96 100644 --- a/source/de/anomic/document/Word.java +++ b/source/de/anomic/document/Word.java @@ -32,7 +32,8 @@ import java.util.Locale; import java.util.Set; import java.util.TreeSet; -import de.anomic.kelondro.index.SimpleARC; +import de.anomic.kelondro.index.ARC; +import de.anomic.kelondro.index.ConcurrentARC; import de.anomic.kelondro.order.Base64Order; import de.anomic.kelondro.order.Bitfield; import de.anomic.kelondro.order.Digest; @@ -42,7 +43,7 @@ import de.anomic.yacy.yacySeedDB; public class Word { public static final int hashCacheSize = Math.max(2000, Math.min(100000, (int) (MemoryControl.available() / 20000L))); - private static final SimpleARC hashCache = new SimpleARC(hashCacheSize); + private static final ARC hashCache = new ConcurrentARC(hashCacheSize, Runtime.getRuntime().availableProcessors()); // object carries statistics for words and sentences public int count; // number of occurrences diff --git a/source/de/anomic/kelondro/blob/MapView.java b/source/de/anomic/kelondro/blob/MapView.java index 72dfad79f..70edd653b 100644 --- a/source/de/anomic/kelondro/blob/MapView.java +++ b/source/de/anomic/kelondro/blob/MapView.java @@ -36,7 +36,8 @@ import java.util.HashMap; import java.util.Iterator; import java.util.Map; -import de.anomic.kelondro.index.SimpleARC; +import de.anomic.kelondro.index.ARC; +import de.anomic.kelondro.index.ConcurrentARC; import de.anomic.kelondro.order.CloneableIterator; import de.anomic.kelondro.order.NaturalOrder; import de.anomic.kelondro.order.RotateIterator; @@ -48,13 +49,13 @@ import de.anomic.kelondro.util.kelondroException; public class MapView { private BLOB blob; - private SimpleARC> cache; + private ARC> cache; private final char fillchar; public MapView(final Heap blob, final int cachesize, char fillchar) { this.blob = blob; - this.cache = new SimpleARC>(cachesize); + this.cache = new ConcurrentARC>(cachesize, Runtime.getRuntime().availableProcessors()); this.fillchar = fillchar; /* // debug diff --git a/source/de/anomic/kelondro/index/ARC.java b/source/de/anomic/kelondro/index/ARC.java new file mode 100644 index 000000000..bb6908c3b --- /dev/null +++ b/source/de/anomic/kelondro/index/ARC.java @@ -0,0 +1,71 @@ +// ARC.java +// an interface for Adaptive Replacement Caches +// (C) 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany +// first published 29.08.2009 on http://yacy.net +// +// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $ +// $LastChangedRevision: 1986 $ +// $LastChangedBy: orbiter $ +// +// LICENSE +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +package de.anomic.kelondro.index; + +/** + * This is a simple cache using two generations of hashtables to store the content with a LFU strategy. + * The Algorithm is described in a slightly more complex version as Adaptive Replacement Cache, "ARC". + * For details see http://www.almaden.ibm.com/cs/people/dmodha/ARC.pdf + * or http://en.wikipedia.org/wiki/Adaptive_Replacement_Cache + * This version omits the ghost entry handling which is described in ARC, and keeps both cache levels + * at the same size. + */ + +public interface ARC { + + /** + * put a value to the cache. + * @param s + * @param v + */ + public void put(K s, V v); + + /** + * get a value from the cache. + * @param s + * @return the value + */ + public V get(K s); + + /** + * check if the map contains the key + * @param s + * @return + */ + public boolean containsKey(K s); + + /** + * remove an entry from the cache + * @param s + * @return the old value + */ + public V remove(K s); + + /** + * clear the cache + */ + public void clear(); +} diff --git a/source/de/anomic/kelondro/index/ConcurrentARC.java b/source/de/anomic/kelondro/index/ConcurrentARC.java new file mode 100644 index 000000000..9b4843f98 --- /dev/null +++ b/source/de/anomic/kelondro/index/ConcurrentARC.java @@ -0,0 +1,94 @@ +// ConcurrentARC.java +// a Simple Adaptive Replacement Cache +// (C) 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany +// first published 17.04.2009 on http://yacy.net +// +// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $ +// $LastChangedRevision: 1986 $ +// $LastChangedBy: orbiter $ +// +// LICENSE +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +package de.anomic.kelondro.index; + +/** + * This is a simple cache using two generations of hashtables to store the content with a LFU strategy. + * The Algorithm is described in a slightly more complex version as Adaptive Replacement Cache, "ARC". + * For details see http://www.almaden.ibm.com/cs/people/dmodha/ARC.pdf + * or http://en.wikipedia.org/wiki/Adaptive_Replacement_Cache + * This version omits the ghost entry handling which is described in ARC, and keeps both cache levels + * at the same size. + */ + +public class ConcurrentARC implements ARC { + + protected int cacheSize; + private int mask; + private ARC arc[]; + + @SuppressWarnings("unchecked") + public ConcurrentARC(final int cacheSize, int partitions) { + this.mask = 1; + while (this.mask < partitions) this.mask = this.mask * 2; + this.arc = new SimpleARC[mask]; + for (int i = 0; i < this.arc.length; i++) this.arc[i] = new SimpleARC(cacheSize / this.mask); + this.mask -= 1; + } + + /** + * put a value to the cache. + * @param s + * @param v + */ + public void put(K s, V v) { + this.arc[s.hashCode() & mask].put(s, v); + } + + /** + * get a value from the cache. + * @param s + * @return the value + */ + public V get(K s) { + return this.arc[s.hashCode() & mask].get(s); + } + + /** + * check if the map contains the key + * @param s + * @return + */ + public boolean containsKey(K s) { + return this.arc[s.hashCode() & mask].containsKey(s); + } + + /** + * remove an entry from the cache + * @param s + * @return the old value + */ + public V remove(K s) { + return this.arc[s.hashCode() & mask].remove(s); + } + + /** + * clear the cache + */ + public void clear() { + for (ARC a: this.arc) a.clear(); + } +} diff --git a/source/de/anomic/kelondro/index/SimpleARC.java b/source/de/anomic/kelondro/index/SimpleARC.java index 5a8795d05..bc49eabb8 100644 --- a/source/de/anomic/kelondro/index/SimpleARC.java +++ b/source/de/anomic/kelondro/index/SimpleARC.java @@ -25,7 +25,6 @@ package de.anomic.kelondro.index; -import java.util.Collections; import java.util.LinkedHashMap; import java.util.Map; @@ -38,7 +37,7 @@ import java.util.Map; * at the same size. */ -public class SimpleARC { +public class SimpleARC implements ARC { public final static boolean accessOrder = false; // if false, then a insertion-order is used @@ -47,18 +46,18 @@ public class SimpleARC { public SimpleARC(int cacheSize) { this.cacheSize = cacheSize / 2; - this.levelA = Collections.synchronizedMap(new LinkedHashMap(cacheSize, 0.1f, accessOrder) { + this.levelA = new LinkedHashMap(cacheSize, 0.1f, accessOrder) { private static final long serialVersionUID = 1L; @Override protected boolean removeEldestEntry(Map.Entry eldest) { return size() > SimpleARC.this.cacheSize; } - }); - this.levelB = Collections.synchronizedMap(new LinkedHashMap(cacheSize, 0.1f, accessOrder) { + }; + this.levelB = new LinkedHashMap(cacheSize, 0.1f, accessOrder) { private static final long serialVersionUID = 1L; @Override protected boolean removeEldestEntry(Map.Entry eldest) { return size() > SimpleARC.this.cacheSize; } - }); + }; } /** @@ -66,7 +65,7 @@ public class SimpleARC { * @param s * @param v */ - public void put(K s, V v) { + public synchronized void put(K s, V v) { if (this.levelB.containsKey(s)) { this.levelB.put(s, v); assert (this.levelB.size() <= cacheSize); // the cache should shrink automatically @@ -81,7 +80,7 @@ public class SimpleARC { * @param s * @return the value */ - public V get(K s) { + public synchronized V get(K s) { V v = this.levelB.get(s); if (v != null) return v; v = this.levelA.remove(s); @@ -98,7 +97,7 @@ public class SimpleARC { * @param s * @return */ - public boolean containsKey(K s) { + public synchronized boolean containsKey(K s) { if (this.levelB.containsKey(s)) return true; return this.levelA.containsKey(s); } @@ -108,7 +107,7 @@ public class SimpleARC { * @param s * @return the old value */ - public V remove(K s) { + public synchronized V remove(K s) { V r = this.levelB.remove(s); if (r != null) return r; return this.levelA.remove(s); @@ -117,7 +116,7 @@ public class SimpleARC { /** * clear the cache */ - public void clear() { + public synchronized void clear() { this.levelA.clear(); this.levelB.clear(); } diff --git a/source/de/anomic/kelondro/text/IndexCell.java b/source/de/anomic/kelondro/text/IndexCell.java index 5bc22e6f1..cc1c90165 100644 --- a/source/de/anomic/kelondro/text/IndexCell.java +++ b/source/de/anomic/kelondro/text/IndexCell.java @@ -294,7 +294,7 @@ public final class IndexCell extends AbstractBu private void cleanCache() { // dump the cache if necessary - synchronized (this) { + if (this.ram.size() >= this.maxRamEntries || (this.ram.size() > 3000 && !MemoryControl.request(80L * 1024L * 1024L, false))) synchronized (this) { if (this.ram.size() >= this.maxRamEntries || (this.ram.size() > 3000 && !MemoryControl.request(80L * 1024L * 1024L, false))) { // dump the ram File dumpFile = this.array.newContainerBLOBFile(); @@ -308,7 +308,7 @@ public final class IndexCell extends AbstractBu } // clean-up the cache - synchronized (this) { + if (this.array.entries() > 50 || (this.lastCleanup + cleanupCycle < System.currentTimeMillis())) synchronized (this) { if (this.array.entries() > 50 || (this.lastCleanup + cleanupCycle < System.currentTimeMillis())) { //System.out.println("----cleanup check"); this.array.shrink(this.targetFileSize, this.maxFileSize); diff --git a/source/de/anomic/search/TextSnippet.java b/source/de/anomic/search/TextSnippet.java index fb23d1155..dff052516 100644 --- a/source/de/anomic/search/TextSnippet.java +++ b/source/de/anomic/search/TextSnippet.java @@ -42,7 +42,8 @@ import de.anomic.document.Word; import de.anomic.document.parser.html.CharacterCoding; import de.anomic.http.client.Cache; import de.anomic.http.metadata.ResponseHeader; -import de.anomic.kelondro.index.SimpleARC; +import de.anomic.kelondro.index.ARC; +import de.anomic.kelondro.index.ConcurrentARC; import de.anomic.kelondro.order.Base64Order; import de.anomic.kelondro.text.metadataPrototype.URLMetadataRow; import de.anomic.kelondro.util.SetTools; @@ -66,8 +67,8 @@ public class TextSnippet { public static final int ERROR_PARSER_NO_LINES = 15; public static final int ERROR_NO_MATCH = 16; - private static final SimpleARC snippetsCache = new SimpleARC(maxCache); - private static final SimpleARC faviconCache = new SimpleARC(maxCache); + private static final ARC snippetsCache = new ConcurrentARC(maxCache, Math.max(10, Runtime.getRuntime().availableProcessors())); + private static final ARC faviconCache = new ConcurrentARC(maxCache, Math.max(10, Runtime.getRuntime().availableProcessors())); private final yacyURL url; private String line;