From 8ba7ff53531f0ad41f3f5ad3a3f8f6c143537ca1 Mon Sep 17 00:00:00 2001 From: orbiter Date: Tue, 5 May 2009 22:40:40 +0000 Subject: [PATCH] a fix and another speed enhancement for the RWI cache git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5927 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- .../text/ReferenceContainerCache.java | 100 ++---------------- 1 file changed, 7 insertions(+), 93 deletions(-) diff --git a/source/de/anomic/kelondro/text/ReferenceContainerCache.java b/source/de/anomic/kelondro/text/ReferenceContainerCache.java index d63c3a1d6..69f9f5c11 100644 --- a/source/de/anomic/kelondro/text/ReferenceContainerCache.java +++ b/source/de/anomic/kelondro/text/ReferenceContainerCache.java @@ -28,7 +28,6 @@ package de.anomic.kelondro.text; import java.io.File; import java.io.IOException; -import java.util.ArrayList; import java.util.Iterator; import java.util.Map; import java.util.Set; @@ -89,31 +88,6 @@ public final class ReferenceContainerCache exte this.cache = new ConcurrentHashMap>(); } - /** - * this is the new cache file format initialization - * @param heapFile - * @throws IOException - */ - public void initWriteModeFromBLOB(final File blobFile) throws IOException { - Log.logInfo("indexContainerRAMHeap", "restoring rwi blob dump '" + blobFile.getName() + "'"); - final long start = System.currentTimeMillis(); - //this.cache = Collections.synchronizedSortedMap(new TreeMap>(this.termOrder)); - this.cache = new ConcurrentHashMap>(); - int urlCount = 0; - synchronized (cache) { - for (final ReferenceContainer container : new blobFileEntries(blobFile, factory, this.payloadrow)) { - // TODO: in this loop a lot of memory may be allocated. A check if the memory gets low is necessary. But what do when the memory is low? - if (container == null) break; - //System.out.println("***DEBUG indexContainerHeap.initwriteModeFromBLOB*** container.size = " + container.size() + ", container.sorted = " + container.sorted()); - cache.put(new ByteArray(container.getTermHash()), container); - urlCount += container.size(); - } - } - // remove idx and gap files if they exist here - HeapWriter.deleteAllFingerprints(blobFile); - Log.logInfo("indexContainerRAMHeap", "finished rwi blob restore: " + cache.size() + " words, " + urlCount + " word/URL relations in " + (System.currentTimeMillis() - start) + " milliseconds"); - } - public void dump(final File heapFile, int writeBuffer) { assert this.cache != null; Log.logInfo("indexContainerRAMHeap", "creating rwi heap dump '" + heapFile.getName() + "', " + cache.size() + " rwi's"); @@ -253,57 +227,6 @@ public final class ReferenceContainerCache exte return max; } - public byte[] maxReferencesHash() { - // iterate to find the max score - int max = 0; - byte[] hash = null; - for (ReferenceContainer container : cache.values()) { - if (container.size() > max) { - max = container.size(); - hash = container.getTermHash(); - } - } - return hash; - } - - public ArrayList maxReferencesHash(int bound) { - // iterate to find the max score - ArrayList hashes = new ArrayList(); - for (ReferenceContainer container : cache.values()) { - if (container.size() >= bound) { - hashes.add(container.getTermHash()); - } - } - return hashes; - } - - public ReferenceContainer latest() { - ReferenceContainer c = null; - for (ReferenceContainer container : cache.values()) { - if (c == null) {c = container; continue;} - if (container.lastWrote() > c.lastWrote()) {c = container; continue;} - } - return c; - } - - public ReferenceContainer first() { - ReferenceContainer c = null; - for (ReferenceContainer container : cache.values()) { - if (c == null) {c = container; continue;} - if (container.lastWrote() < c.lastWrote()) {c = container; continue;} - } - return c; - } - - public ArrayList overAge(long maxage) { - ArrayList hashes = new ArrayList(); - long limit = System.currentTimeMillis() - maxage; - for (ReferenceContainer container : cache.values()) { - if (container.lastWrote() < limit) hashes.add(container.getTermHash()); - } - return hashes; - } - /** * return an iterator object that creates top-level-clones of the indexContainers * in the cache, so that manipulations of the iterated objects do not change @@ -497,25 +420,16 @@ public final class ReferenceContainerCache exte assert this.cache != null; ByteArray tha = new ByteArray(termHash); - // first synchronization: check if the entry is empty, and quickly set the entry - ReferenceContainer container = null; - synchronized (cache) { - container = cache.remove(tha); - if (container == null) { - container = new ReferenceContainer(factory, termHash, this.payloadrow, 1); - container.put(newEntry); - cache.put(tha, container); - return; - } - } - - // if the entry must be merged, first release the synchronization to do the merge concurrently + // first access the cache without synchronization + ReferenceContainer container = cache.remove(tha); + if (container == null) container = new ReferenceContainer(factory, termHash, this.payloadrow, 1); container.put(newEntry); - // then get a new lock. If the entry was written in the meantime, merge again + // synchronization: check if the entry is still empty and set new value synchronized (cache) { - ReferenceContainer containerNew = cache.get(tha); - if (container != null) container.putAllRecent(containerNew); + ReferenceContainer containerNew = cache.put(tha, container); + if (containerNew == null) return; + container.putAllRecent(containerNew); cache.put(tha, container); } }