better concurrency, less blocking & performance hacks

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6277 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 16 years ago
parent 1a9cfd8718
commit 10d3e856b5

@ -32,7 +32,8 @@ import java.util.Locale;
import java.util.Set;
import java.util.TreeSet;
import de.anomic.kelondro.index.SimpleARC;
import de.anomic.kelondro.index.ARC;
import de.anomic.kelondro.index.ConcurrentARC;
import de.anomic.kelondro.order.Base64Order;
import de.anomic.kelondro.order.Bitfield;
import de.anomic.kelondro.order.Digest;
@ -42,7 +43,7 @@ import de.anomic.yacy.yacySeedDB;
public class Word {
public static final int hashCacheSize = Math.max(2000, Math.min(100000, (int) (MemoryControl.available() / 20000L)));
private static final SimpleARC<String, byte[]> hashCache = new SimpleARC<String, byte[]>(hashCacheSize);
private static final ARC<String, byte[]> hashCache = new ConcurrentARC<String, byte[]>(hashCacheSize, Runtime.getRuntime().availableProcessors());
// object carries statistics for words and sentences
public int count; // number of occurrences

@ -36,7 +36,8 @@ import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import de.anomic.kelondro.index.SimpleARC;
import de.anomic.kelondro.index.ARC;
import de.anomic.kelondro.index.ConcurrentARC;
import de.anomic.kelondro.order.CloneableIterator;
import de.anomic.kelondro.order.NaturalOrder;
import de.anomic.kelondro.order.RotateIterator;
@ -48,13 +49,13 @@ import de.anomic.kelondro.util.kelondroException;
public class MapView {
private BLOB blob;
private SimpleARC<String, Map<String, String>> cache;
private ARC<String, Map<String, String>> cache;
private final char fillchar;
public MapView(final Heap blob, final int cachesize, char fillchar) {
this.blob = blob;
this.cache = new SimpleARC<String, Map<String, String>>(cachesize);
this.cache = new ConcurrentARC<String, Map<String, String>>(cachesize, Runtime.getRuntime().availableProcessors());
this.fillchar = fillchar;
/*
// debug

@ -0,0 +1,71 @@
// ARC.java
// an interface for Adaptive Replacement Caches
// (C) 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 29.08.2009 on http://yacy.net
//
// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
// $LastChangedRevision: 1986 $
// $LastChangedBy: orbiter $
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package de.anomic.kelondro.index;
/**
* This is a simple cache using two generations of hashtables to store the content with a LFU strategy.
* The Algorithm is described in a slightly more complex version as Adaptive Replacement Cache, "ARC".
* For details see http://www.almaden.ibm.com/cs/people/dmodha/ARC.pdf
* or http://en.wikipedia.org/wiki/Adaptive_Replacement_Cache
* This version omits the ghost entry handling which is described in ARC, and keeps both cache levels
* at the same size.
*/
public interface ARC<K, V> {
/**
* put a value to the cache.
* @param s
* @param v
*/
public void put(K s, V v);
/**
* get a value from the cache.
* @param s
* @return the value
*/
public V get(K s);
/**
* check if the map contains the key
* @param s
* @return
*/
public boolean containsKey(K s);
/**
* remove an entry from the cache
* @param s
* @return the old value
*/
public V remove(K s);
/**
* clear the cache
*/
public void clear();
}

@ -0,0 +1,94 @@
// ConcurrentARC.java
// a Simple Adaptive Replacement Cache
// (C) 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 17.04.2009 on http://yacy.net
//
// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
// $LastChangedRevision: 1986 $
// $LastChangedBy: orbiter $
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package de.anomic.kelondro.index;
/**
* This is a simple cache using two generations of hashtables to store the content with a LFU strategy.
* The Algorithm is described in a slightly more complex version as Adaptive Replacement Cache, "ARC".
* For details see http://www.almaden.ibm.com/cs/people/dmodha/ARC.pdf
* or http://en.wikipedia.org/wiki/Adaptive_Replacement_Cache
* This version omits the ghost entry handling which is described in ARC, and keeps both cache levels
* at the same size.
*/
public class ConcurrentARC<K, V> implements ARC<K, V> {
protected int cacheSize;
private int mask;
private ARC<K, V> arc[];
@SuppressWarnings("unchecked")
public ConcurrentARC(final int cacheSize, int partitions) {
this.mask = 1;
while (this.mask < partitions) this.mask = this.mask * 2;
this.arc = new SimpleARC[mask];
for (int i = 0; i < this.arc.length; i++) this.arc[i] = new SimpleARC<K, V>(cacheSize / this.mask);
this.mask -= 1;
}
/**
* put a value to the cache.
* @param s
* @param v
*/
public void put(K s, V v) {
this.arc[s.hashCode() & mask].put(s, v);
}
/**
* get a value from the cache.
* @param s
* @return the value
*/
public V get(K s) {
return this.arc[s.hashCode() & mask].get(s);
}
/**
* check if the map contains the key
* @param s
* @return
*/
public boolean containsKey(K s) {
return this.arc[s.hashCode() & mask].containsKey(s);
}
/**
* remove an entry from the cache
* @param s
* @return the old value
*/
public V remove(K s) {
return this.arc[s.hashCode() & mask].remove(s);
}
/**
* clear the cache
*/
public void clear() {
for (ARC<K, V> a: this.arc) a.clear();
}
}

@ -25,7 +25,6 @@
package de.anomic.kelondro.index;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.Map;
@ -38,7 +37,7 @@ import java.util.Map;
* at the same size.
*/
public class SimpleARC <K, V> {
public class SimpleARC<K, V> implements ARC<K, V> {
public final static boolean accessOrder = false; // if false, then a insertion-order is used
@ -47,18 +46,18 @@ public class SimpleARC <K, V> {
public SimpleARC(int cacheSize) {
this.cacheSize = cacheSize / 2;
this.levelA = Collections.synchronizedMap(new LinkedHashMap<K, V>(cacheSize, 0.1f, accessOrder) {
this.levelA = new LinkedHashMap<K, V>(cacheSize, 0.1f, accessOrder) {
private static final long serialVersionUID = 1L;
@Override protected boolean removeEldestEntry(Map.Entry<K, V> eldest) {
return size() > SimpleARC.this.cacheSize;
}
});
this.levelB = Collections.synchronizedMap(new LinkedHashMap<K, V>(cacheSize, 0.1f, accessOrder) {
};
this.levelB = new LinkedHashMap<K, V>(cacheSize, 0.1f, accessOrder) {
private static final long serialVersionUID = 1L;
@Override protected boolean removeEldestEntry(Map.Entry<K, V> eldest) {
return size() > SimpleARC.this.cacheSize;
}
});
};
}
/**
@ -66,7 +65,7 @@ public class SimpleARC <K, V> {
* @param s
* @param v
*/
public void put(K s, V v) {
public synchronized void put(K s, V v) {
if (this.levelB.containsKey(s)) {
this.levelB.put(s, v);
assert (this.levelB.size() <= cacheSize); // the cache should shrink automatically
@ -81,7 +80,7 @@ public class SimpleARC <K, V> {
* @param s
* @return the value
*/
public V get(K s) {
public synchronized V get(K s) {
V v = this.levelB.get(s);
if (v != null) return v;
v = this.levelA.remove(s);
@ -98,7 +97,7 @@ public class SimpleARC <K, V> {
* @param s
* @return
*/
public boolean containsKey(K s) {
public synchronized boolean containsKey(K s) {
if (this.levelB.containsKey(s)) return true;
return this.levelA.containsKey(s);
}
@ -108,7 +107,7 @@ public class SimpleARC <K, V> {
* @param s
* @return the old value
*/
public V remove(K s) {
public synchronized V remove(K s) {
V r = this.levelB.remove(s);
if (r != null) return r;
return this.levelA.remove(s);
@ -117,7 +116,7 @@ public class SimpleARC <K, V> {
/**
* clear the cache
*/
public void clear() {
public synchronized void clear() {
this.levelA.clear();
this.levelB.clear();
}

@ -294,7 +294,7 @@ public final class IndexCell<ReferenceType extends Reference> extends AbstractBu
private void cleanCache() {
// dump the cache if necessary
synchronized (this) {
if (this.ram.size() >= this.maxRamEntries || (this.ram.size() > 3000 && !MemoryControl.request(80L * 1024L * 1024L, false))) synchronized (this) {
if (this.ram.size() >= this.maxRamEntries || (this.ram.size() > 3000 && !MemoryControl.request(80L * 1024L * 1024L, false))) {
// dump the ram
File dumpFile = this.array.newContainerBLOBFile();
@ -308,7 +308,7 @@ public final class IndexCell<ReferenceType extends Reference> extends AbstractBu
}
// clean-up the cache
synchronized (this) {
if (this.array.entries() > 50 || (this.lastCleanup + cleanupCycle < System.currentTimeMillis())) synchronized (this) {
if (this.array.entries() > 50 || (this.lastCleanup + cleanupCycle < System.currentTimeMillis())) {
//System.out.println("----cleanup check");
this.array.shrink(this.targetFileSize, this.maxFileSize);

@ -42,7 +42,8 @@ import de.anomic.document.Word;
import de.anomic.document.parser.html.CharacterCoding;
import de.anomic.http.client.Cache;
import de.anomic.http.metadata.ResponseHeader;
import de.anomic.kelondro.index.SimpleARC;
import de.anomic.kelondro.index.ARC;
import de.anomic.kelondro.index.ConcurrentARC;
import de.anomic.kelondro.order.Base64Order;
import de.anomic.kelondro.text.metadataPrototype.URLMetadataRow;
import de.anomic.kelondro.util.SetTools;
@ -66,8 +67,8 @@ public class TextSnippet {
public static final int ERROR_PARSER_NO_LINES = 15;
public static final int ERROR_NO_MATCH = 16;
private static final SimpleARC<String, String> snippetsCache = new SimpleARC<String, String>(maxCache);
private static final SimpleARC<String, yacyURL> faviconCache = new SimpleARC<String, yacyURL>(maxCache);
private static final ARC<String, String> snippetsCache = new ConcurrentARC<String, String>(maxCache, Math.max(10, Runtime.getRuntime().availableProcessors()));
private static final ARC<String, yacyURL> faviconCache = new ConcurrentARC<String, yacyURL>(maxCache, Math.max(10, Runtime.getRuntime().availableProcessors()));
private final yacyURL url;
private String line;

Loading…
Cancel
Save