- modified, simplified old kelondroHTCache object; I believe it should be replaced by something completely new

- removed tree data type in kelondroHTCache - added new class kelondroHeap; may be the core for a storage object that will once replace the many-files strategy of kelondroHTCache - removed compatibility mode in indexRAMRI git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4747 6c8d7289-2bf4-0310-a012-ef5d649a1542
17 years ago · 32b5b057b9
parent d3715e02ae
commit 32b5b057b9
6 changed files with 246 additions and 321 deletions
--- a/defaults/yacy.init
+++ b/defaults/yacy.init
@ -206,25 +206,6 @@ proxyCacheSize__pro = 1024
 # storage place for new releases
 releases = DATA/RELEASE

-# use the mostly direct mapping of URLs to Filenames
-# makes it easy watching the content of the cache using file browsers
-# problems arise when a file already exists where a new entry expects a directory
-# or vice versa.
-# when set to false, the file names are set to the hash of the URL and the
-# directory is build from protokoll, hostname and port, as with the old
-# layout.
-# the advantage of this scheme is that no directory/file collisions can
-# occurr.
-# switching this flag will take effect after a restart of yacy.
-# files that are present under the previously used layout will be renamed
-# to the new location and thus be accessible immediately. so an accumulated
-# cache is still usable after the switch.
-# possible values are {tree, hash}
-proxyCacheLayout = hash
-
-# the migration flag shows, if the different layout shall be migrated from one to another
-proxyCacheMigration = true
-
 # the following mime-types are the whitelist for indexing
 #
 # parseableMimeTypes.HTML: specifies mime-types that can be indexed with built-in html parser
--- a/source/de/anomic/index/indexContainerHeap.java
+++ b/source/de/anomic/index/indexContainerHeap.java
@ -42,19 +42,12 @@ import java.util.Set;
 import java.util.SortedMap;
 import java.util.TreeMap;

-import de.anomic.kelondro.kelondroBase64Order;
-import de.anomic.kelondro.kelondroBufferedRA;
 import de.anomic.kelondro.kelondroByteOrder;
 import de.anomic.kelondro.kelondroBytesLongMap;
 import de.anomic.kelondro.kelondroCloneableIterator;
-import de.anomic.kelondro.kelondroException;
-import de.anomic.kelondro.kelondroFixedWidthArray;
 import de.anomic.kelondro.kelondroRow;
 import de.anomic.kelondro.kelondroRowSet;
-import de.anomic.kelondro.kelondroRow.EntryIndex;
-import de.anomic.server.serverMemory;
 import de.anomic.server.logging.serverLog;
-import de.anomic.yacy.yacySeedDB;

 public final class indexContainerHeap {

@ -154,7 +147,7 @@ public final class indexContainerHeap {
        synchronized (index) {
            is = new DataInputStream(new BufferedInputStream(new FileInputStream(heapFile), 64*1024));
        
-            // dont test available() here because this does not work for files > 2GB
+            // don't test available() here because this does not work for files > 2GB
            loop: while (true) {
                // remember seek position
                seek0 = seek;
@ -476,70 +469,4 @@ public final class indexContainerHeap {
        cache.put(wordHash, container);
    }
    
-    /**
-     * this is a compatibility method for a old heap dump format. don't use it if not necessary
-     * @param indexArrayFile
-     * @throws IOException
-     */
-    public void restoreArray(File indexArrayFile) throws IOException {
-        // is only here to read old array data structures
-        if (!(indexArrayFile.exists())) return;
-        this.readOnlyMode = false;
-        kelondroFixedWidthArray dumpArray;
-        kelondroBufferedRA readBuffer = null;
-        kelondroRow bufferStructureBasis = new kelondroRow(
-                    "byte[] wordhash-" + yacySeedDB.commonHashLength + ", " +
-                    "Cardinal occ-4 {b256}, " +
-                    "Cardinal time-8 {b256}, " +
-                    "byte[] urlprops-" + payloadrow.objectsize,
-                    kelondroBase64Order.enhancedCoder, 0);
-        dumpArray = new kelondroFixedWidthArray(indexArrayFile, bufferStructureBasis, 0);
-        log.logInfo("started restore of ram cache '" + indexArrayFile.getName() + "', " + dumpArray.size() + " word/URL relations");
-        long startTime = System.currentTimeMillis();
-        long messageTime = System.currentTimeMillis() + 5000;
-        long urlCount = 0, urlsPerSecond = 0;
-        this.cache = Collections.synchronizedSortedMap(new TreeMap<String, indexContainer>(new kelondroByteOrder.StringOrder(payloadrow.getOrdering())));
-        try {
-            Iterator<EntryIndex> i = dumpArray.contentRows(-1);
-            String wordHash;
-            //long creationTime;
-            indexRWIRowEntry wordEntry;
-            kelondroRow.EntryIndex row;
-            while (i.hasNext()) {
-                // get out one entry
-                row = i.next();
-                if ((row == null) || (row.empty(0)) || (row.empty(3))) continue;
-                wordHash = row.getColString(0, "UTF-8");
-                //creationTime = kelondroRecords.bytes2long(row[2]);
-                wordEntry = new indexRWIRowEntry(row.getColBytes(3));
-                
-                // store to cache
-                indexContainer container = cache.get(wordHash);
-                if (container == null) container = new indexContainer(wordHash, payloadrow, 1);
-                container.put(wordEntry);
-                cache.put(wordHash, container);
-                
-                urlCount++;
-                // protect against memory shortage
-                //while (serverMemory.free() < 1000000) {flushFromMem(); java.lang.System.gc();}
-                // write a log
-                if (System.currentTimeMillis() > messageTime) {
-                    serverMemory.gc(1000, "indexRAMRI, for better statistic-2"); // for better statistic - thq
-                    urlsPerSecond = 1 + urlCount * 1000 / (1 + System.currentTimeMillis() - startTime);
-                    log.logInfo("restoring status: " + urlCount + " urls done, " + ((dumpArray.size() - urlCount) / urlsPerSecond) + " seconds remaining, free mem = " + (serverMemory.free() / 1024 / 1024) + "MB");
-                    messageTime = System.currentTimeMillis() + 5000;
-                }
-            }
-            if (readBuffer != null) readBuffer.close();
-            dumpArray.close();
-            dumpArray = null;
-            log.logInfo("finished restore: " + cache.size() + " words in " + ((System.currentTimeMillis() - startTime) / 1000) + " seconds");
-        } catch (kelondroException e) {
-            // restore failed
-            log.logSevere("failed restore of indexCache array dump: " + e.getMessage(), e);
-        } finally {
-            if (dumpArray != null) try {dumpArray.close();}catch(Exception e){}
-        }
-    }
-    
 }
--- a/source/de/anomic/index/indexRAMRI.java
+++ b/source/de/anomic/index/indexRAMRI.java
@ -61,24 +61,11 @@ public final class indexRAMRI implements indexRI, indexRIReader {
        this.cacheReferenceCountLimit = wCacheReferenceCountLimitInit;
        this.cacheReferenceAgeLimit = wCacheReferenceAgeLimitInit;
        this.log = log;
-        File indexArrayFile = new File(databaseRoot, oldArrayName);
        this.indexHeapFile = new File(databaseRoot, newHeapName);
        this.heap = new indexContainerHeap(payloadrow, log);
        
        // read in dump of last session
-        if (indexArrayFile.exists()) {
-            try {
-                heap.restoreArray(indexArrayFile);
-                for (indexContainer ic : (Iterable<indexContainer>) heap.wordContainers(null, false)) {
-                    this.hashDate.setScore(ic.getWordHash(), intTime(ic.lastWrote()));
-                    this.hashScore.setScore(ic.getWordHash(), ic.size());
-                }
-            } catch (IOException e){
-                log.logSevere("unable to restore cache dump: " + e.getMessage(), e);
-            }
-            indexArrayFile.delete();
-            if (indexArrayFile.exists()) log.logSevere("cannot delete old array file: " + indexArrayFile.toString() + "; please delete manually");
-        } else if (indexHeapFile.exists()) {
+        if (indexHeapFile.exists()) {
            try {
                heap.initWriteMode(indexHeapFile);
                for (indexContainer ic : (Iterable<indexContainer>) heap.wordContainers(null, false)) {
--- a/source/de/anomic/kelondro/kelondroHeap.java
+++ b/source/de/anomic/kelondro/kelondroHeap.java
@ -0,0 +1,194 @@
+// kelondroHeap.java
+// (C) 2008 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
+// first published 30.04.2008 on http://yacy.net
+//
+// This is a part of YaCy, a peer-to-peer based web search engine
+//
+// $LastChangedDate: 2008-03-14 01:16:04 +0100 (Fr, 14 Mrz 2008) $
+// $LastChangedRevision: 4558 $
+// $LastChangedBy: orbiter $
+//
+// LICENSE
+// 
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+package de.anomic.kelondro;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+
+import de.anomic.kelondro.kelondroByteOrder;
+import de.anomic.kelondro.kelondroBytesLongMap;
+
+public final class kelondroHeap {
+
+    private kelondroBytesLongMap index;
+    private File heapFile;
+    private kelondroByteOrder ordering;
+
+    /**
+     * create a heap file: a arbitrary number of BLOBs, indexed by an access key
+     * The heap file will be opened at initialization time, indexed and closed again.
+     * Heap files are only opened when BLOBs are read from it or new one are appended
+     * @param heapFile
+     * @param keylength
+     * @param ordering
+     * @throws IOException
+     */
+    public kelondroHeap(File heapFile, int keylength, kelondroByteOrder ordering) throws IOException {
+        this.index = null;
+        this.ordering = ordering;
+        this.heapFile = heapFile;
+        if (!(heapFile.exists())) throw new IOException("file " + heapFile + " does not exist");
+        if (heapFile.length() >= (long) Integer.MAX_VALUE) throw new IOException("file " + heapFile + " too large, index can only be crated for files less than 2GB");
+        
+        this.index = new kelondroBytesLongMap(keylength, this.ordering, 0);
+        DataInputStream is = null;
+        String keystring;
+        byte[] key = new byte[keylength];
+        int reclen;
+        long seek = 0, seek0;
+        is = new DataInputStream(new BufferedInputStream(new FileInputStream(heapFile), 64*1024));
+
+        // don't test available() here because this does not work for files > 2GB
+        loop: while (true) {
+            // remember seek position
+            seek0 = seek;
+        
+            // read length of the following record without the length of the record size bytes
+            try {
+                reclen = is.readInt();
+            } catch (IOException e) {
+                break loop; // terminate loop
+            }
+            seek += 4L;
+            
+            // read key
+            try {
+                is.readFully(key);
+            } catch (IOException e) {
+                break loop; // terminate loop
+            }
+            keystring = new String(key);
+            seek += (long) keystring.length();
+        
+            // skip content
+            seek += (long) reclen;
+            while (reclen > 0) reclen -= is.skip(reclen);
+            
+            // store access address to entry
+            try {
+                index.addl(key, seek0);
+            } catch (IOException e) {
+                e.printStackTrace();
+                break loop;
+            }
+        }
+        is.close();
+    }
+    
+    /**
+     * the number of BLOBs in the heap
+     * @return the number of BLOBs in the heap
+     */
+    public int size() {
+        return this.index.size();
+    }
+
+    /**
+     * test if a key is in the heap file
+     * @param key
+     * @return true if the key exists, false othervise
+     */
+    public boolean has(String key) {
+        assert index != null;
+        assert index.row().primaryKeyLength == key.length();
+        
+        // check if the index contains the key
+        try {
+            return index.getl(key.getBytes()) >= 0;
+        } catch (IOException e) {
+            e.printStackTrace();
+            return false;
+        }
+    }
+
+    /**
+     * add a BLOB to the heap
+     * @param key
+     * @param blob
+     * @throws IOException
+     */
+    public synchronized void add(String key, byte[] blob) throws IOException {
+        add(key, blob, 0, blob.length);
+    }
+    
+    /**
+     * add a BLOB to the heap
+     * @param key
+     * @param blob
+     * @throws IOException
+     */
+    public synchronized void add(String key, byte[] blob, int offset, int len) throws IOException {
+        assert index.row().primaryKeyLength == key.length();
+        if ((blob == null) || (blob.length == 0)) return;
+        DataOutputStream os = null;
+        try {
+            os = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(heapFile)));
+        } catch (FileNotFoundException e) {
+            throw new IOException(e.getMessage());
+        }
+        int pos = os.size();
+        os.writeInt(len);
+        os.write(key.getBytes());
+        os.write(blob, offset, len);
+        os.close();
+        index.putl(key.getBytes(), pos);
+    }
+    
+    /**
+     * read a blob from the heap
+     * @param key
+     * @return
+     * @throws IOException
+     */
+    public byte[] get(String key) throws IOException {
+        assert index.row().primaryKeyLength == key.length();
+        
+        // check if the index contains the key
+        long pos = index.getl(key.getBytes());
+        if (pos < 0) return null;
+        
+        // access the file and read the container
+        RandomAccessFile raf = new RandomAccessFile(heapFile, "r");
+        int len = raf.readInt();
+        byte[] record = new byte[len];
+        
+        raf.seek(pos + 4 + index.row().primaryKeyLength);
+        raf.readFully(record);
+        
+        raf.close();
+        return record;
+    }
+
+}
--- a/source/de/anomic/plasma/plasmaHTCache.java
+++ b/source/de/anomic/plasma/plasmaHTCache.java
@ -61,14 +61,12 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.lang.StringBuffer;
 import java.net.InetAddress;
-import java.util.Collections;
 import java.util.Date;
 import java.util.HashMap;
-import java.util.HashSet;
 import java.util.Iterator;
-import java.util.LinkedList;
 import java.util.Map;
-import java.util.TreeMap;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentLinkedQueue;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;

@ -100,16 +98,14 @@ public final class plasmaHTCache {
    public  static final long oneday = 1000 * 60 * 60 * 24; // milliseconds of a day

    static kelondroMapObjects responseHeaderDB = null;
-    private static final LinkedList<Entry> cacheStack = new LinkedList<Entry>();
-    private static final Map<String, File> cacheAge = Collections.synchronizedMap(new TreeMap<String, File>()); // a <date+hash, cache-path> - relation
+    private static final ConcurrentLinkedQueue<Entry> cacheStack = new ConcurrentLinkedQueue<Entry>();
+    private static final ConcurrentHashMap<String, File> cacheAge = new ConcurrentHashMap<String, File>(); // a <date+hash, cache-path> - relation
    public static long curCacheSize = 0;
    public static long maxCacheSize;
    public static File cachePath;
    public static final serverLog log = new serverLog("HTCACHE");
-    public static final HashSet<File> filesInUse = new HashSet<File>(); // can we delete this file
-    public static String cacheLayout;
-    public static boolean cacheMigration;
-
+    private static long lastcleanup = System.currentTimeMillis();
+    
    private static ResourceInfoFactory objFactory = new ResourceInfoFactory();
    private static serverThread cacheScanThread;

@ -126,29 +122,6 @@ public final class plasmaHTCache {
    public static final char DT_BINARY  = 'b';
    public static final char DT_UNKNOWN = 'u';

-    // appearance locations: (used for flags)
-    public static final int AP_TITLE     =  0; // title tag from html header
-    public static final int AP_H1        =  1; // headline - top level
-    public static final int AP_H2        =  2; // headline, second level
-    public static final int AP_H3        =  3; // headline, 3rd level
-    public static final int AP_H4        =  4; // headline, 4th level
-    public static final int AP_H5        =  5; // headline, 5th level
-    public static final int AP_H6        =  6; // headline, 6th level
-    public static final int AP_TEXT      =  7; // word appears in text (used to check validation of other appearances against spam)
-    public static final int AP_DOM       =  8; // word inside an url: in Domain
-    public static final int AP_PATH      =  9; // word inside an url: in path
-    public static final int AP_IMG       = 10; // tag inside image references
-    public static final int AP_ANCHOR    = 11; // anchor description
-    public static final int AP_ENV       = 12; // word appears in environment (similar to anchor appearance)
-    public static final int AP_BOLD      = 13; // may be interpreted as emphasized
-    public static final int AP_ITALICS   = 14; // may be interpreted as emphasized
-    public static final int AP_WEAK      = 15; // for Text that is small or bareley visible
-    public static final int AP_INVISIBLE = 16; // good for spam detection
-    public static final int AP_TAG       = 17; // for tagged indexeing (i.e. using mp3 tags)
-    public static final int AP_AUTHOR    = 18; // word appears in author name
-    public static final int AP_OPUS      = 19; // word appears in name of opus, which may be an album name (in mp3 tags)
-    public static final int AP_TRACK     = 20; // word appears in track name (i.e. in mp3 tags)
-    
    // URL attributes
    public static final int UA_LOCAL    =  0; // URL was crawled locally
    public static final int UA_TILDE    =  1; // tilde appears in URL
@ -229,11 +202,9 @@ public final class plasmaHTCache {
        return doctype;
    }
    
-    public static void init(File htCachePath, long CacheSizeMax, String layout, boolean migration) {
+    public static void init(File htCachePath, long CacheSizeMax) {
        
        cachePath = htCachePath;
-        cacheLayout = layout;
-        cacheMigration = migration;
        maxCacheSize = CacheSizeMax;
        

@ -328,9 +299,7 @@ public final class plasmaHTCache {
    }

    public static int size() {
-        synchronized (cacheStack) {
-            return cacheStack.size();
-        }
+        return cacheStack.size();
    }

    public static int dbSize() {
@ -338,17 +307,11 @@ public final class plasmaHTCache {
    }
    
    public static void push(Entry entry) {
-        synchronized (cacheStack) {
-            cacheStack.add(entry);
-        }
+        cacheStack.add(entry);
    }

    public static Entry pop() {
-        synchronized (cacheStack) {
-        if (cacheStack.size() > 0)
-            return cacheStack.removeFirst();
-        return null;
-        }
+        return cacheStack.poll();
    }

    /**
@ -388,18 +351,15 @@ public final class plasmaHTCache {
        return true;
    }

-    private static long lastcleanup = System.currentTimeMillis();
    public static void writeFileAnnouncement(File file) {
-        synchronized (cacheAge) {
-            if (file.exists()) {
-                curCacheSize += file.length();
-                if (System.currentTimeMillis() - lastcleanup > 300000) {
-                    // call the cleanup job only every 5 minutes
-                    cleanup();
-                    lastcleanup = System.currentTimeMillis();
-                }
-                cacheAge.put(ageString(file.lastModified(), file), file);
+        if (file.exists()) {
+            curCacheSize += file.length();
+            if (System.currentTimeMillis() - lastcleanup > 300000) {
+                // call the cleanup job only every 5 minutes
+                cleanup();
+                lastcleanup = System.currentTimeMillis();
            }
+            cacheAge.put(ageString(file.lastModified(), file), file);
        }
    }
    
@ -419,7 +379,7 @@ public final class plasmaHTCache {
    }

    private static boolean deleteFile(File obj) {
-        if (obj.exists() && !filesInUse.contains(obj)) {
+        if (obj.exists()) {
            long size = obj.length();
            if (obj.delete()) {
                curCacheSize -= size;
@ -446,41 +406,38 @@ public final class plasmaHTCache {

    private static void cleanupDoIt(long newCacheSize) {
        File file;
-        synchronized (cacheAge) {
-            Iterator<Map.Entry<String, File>> iter = cacheAge.entrySet().iterator();
-            Map.Entry<String, File> entry;
-            while (iter.hasNext() && curCacheSize >= newCacheSize) {
-                if (Thread.currentThread().isInterrupted()) return;
-                entry = iter.next();
-                String key = entry.getKey();
-                file = entry.getValue();
-                long t = Long.parseLong(key.substring(0, 16), 16);
-                if (System.currentTimeMillis() - t < 300000) break; // files must have been at least 5 minutes in the cache before they are deleted
-                if (file != null) {
-                    if (filesInUse.contains(file)) continue;
-                    if (log.isFinest()) log.logFinest("Trying to delete [" + key + "] = old file: " + file.toString());
-                    // This needs to be called *before* the file is deleted
-                    String urlHash = getHash(file);
-                    if (deleteFileandDirs(file, "OLD")) {
-                        try {
-                            // As the file is gone, the entry in responseHeader.db is not needed anymore
-                            if (urlHash != null) {
-                                if (log.isFinest()) log.logFinest("Trying to remove responseHeader for URLhash: " + urlHash);
-                                responseHeaderDB.remove(urlHash);
-                            } else {
-                                yacyURL url = getURL(file);
-                                if (url != null) {
-                                    if (log.isFinest()) log.logFinest("Trying to remove responseHeader for URL: " + url.toNormalform(false, true));
-                                    responseHeaderDB.remove(url.hash());
-                                }
+        Iterator<Map.Entry<String, File>> iter = cacheAge.entrySet().iterator();
+        Map.Entry<String, File> entry;
+        while (iter.hasNext() && curCacheSize >= newCacheSize) {
+            if (Thread.currentThread().isInterrupted()) return;
+            entry = iter.next();
+            String key = entry.getKey();
+            file = entry.getValue();
+            long t = Long.parseLong(key.substring(0, 16), 16);
+            if (System.currentTimeMillis() - t < 300000) break; // files must have been at least 5 minutes in the cache before they are deleted
+            if (file != null) {
+                if (log.isFinest()) log.logFinest("Trying to delete [" + key + "] = old file: " + file.toString());
+                // This needs to be called *before* the file is deleted
+                String urlHash = getHash(file);
+                if (deleteFileandDirs(file, "OLD")) {
+                    try {
+                        // As the file is gone, the entry in responseHeader.db is not needed anymore
+                        if (urlHash != null) {
+                            if (log.isFinest()) log.logFinest("Trying to remove responseHeader for URLhash: " + urlHash);
+                            responseHeaderDB.remove(urlHash);
+                        } else {
+                            yacyURL url = getURL(file);
+                            if (url != null) {
+                                if (log.isFinest()) log.logFinest("Trying to remove responseHeader for URL: " + url.toNormalform(false, true));
+                                responseHeaderDB.remove(url.hash());
                            }
-                        } catch (IOException e) {
-                            log.logInfo("IOExeption removing response header from DB: " + e.getMessage(), e);
                        }
+                    } catch (IOException e) {
+                        log.logInfo("IOExeption removing response header from DB: " + e.getMessage(), e);
                    }
                }
-                iter.remove();
            }
+            iter.remove();
        }
    }

@ -664,29 +621,6 @@ public final class plasmaHTCache {
        return plasmaParser.mediaExtContains(urlString);
    }

-    /**
-     * This function moves an old cached object (if it exists) to the new position
-     */
-    private static void moveCachedObject(File oldpath, File newpath) {
-        try {
-            if (oldpath.exists() && oldpath.isFile() && (!newpath.exists())) {
-                long d = oldpath.lastModified();
-                newpath.getParentFile().mkdirs();
-                if (oldpath.renameTo(newpath)) {
-                    cacheAge.put(ageString(d, newpath), newpath);
-                    File obj = oldpath.getParentFile();
-                    while ((!(obj.equals(cachePath))) && (obj.isDirectory()) && (obj.list().length == 0)) {
-                        if (obj.delete()) if (log.isFine()) log.logFine("DELETED EMPTY DIRECTORY : " + obj.toString());
-                        obj = obj.getParentFile();
-                    }
-                }
-            }
-        } catch (Exception e) {
-            if (log.isFine()) log.logFine("moveCachedObject('" + oldpath.toString() + "','" +
-                        newpath.toString() + "')", e);
-        }
-    }
-
    private static String replaceRegex(String input, String regex, String replacement) {
        if (input == null) { return ""; }
        if (input.length() > 0) {
@ -767,34 +701,9 @@ public final class plasmaHTCache {
            fileName.append('!').append(port);
        }

-        // generate cache path according to storage method
-        if (cacheLayout.equals("tree")) {
-            File FileTree = treeFile(fileName, "tree", path);
-            if (cacheMigration) {
-                moveCachedObject(hashFile(fileName, "hash", extention, url.hash()), FileTree);
-                moveCachedObject(hashFile(fileName, null, extention, url.hash()), FileTree); // temporary migration
-                moveCachedObject(treeFile(fileName, null, path), FileTree);           // temporary migration
-            }
-            return FileTree;
-        }
-        if (cacheLayout.equals("hash")) {
-            File FileFlat = hashFile(fileName, "hash", extention, url.hash());
-            if (cacheMigration) {
-                moveCachedObject(treeFile(fileName, "tree", path), FileFlat);
-                moveCachedObject(treeFile(fileName, null, path), FileFlat);           // temporary migration
-                moveCachedObject(hashFile(fileName, null, extention, url.hash()), FileFlat); // temporary migration
-            }
-            return FileFlat;
-        }
-        return null;
-    }
-
-    private static File treeFile(StringBuffer fileName, String prefix, String path) {
-        StringBuffer f = new StringBuffer(fileName.length() + 30);
-        f.append(fileName);
-        if (prefix != null) f.append('/').append(prefix);
-        f.append(path);
-        return new File(cachePath, f.toString());
+        // generate cache path
+        File FileFlat = hashFile(fileName, "hash", extention, url.hash());
+        return FileFlat;
    }
    
    private static File hashFile(StringBuffer fileName, String prefix, String extention, String urlhash) {
@ -807,7 +716,6 @@ public final class plasmaHTCache {
        return new File(cachePath, f.toString());
    }
    
-    
    /**
     * This is a helper function that extracts the Hash from the filename
     */
--- a/source/de/anomic/plasma/plasmaSwitchboard.java
+++ b/source/de/anomic/plasma/plasmaSwitchboard.java
@ -593,46 +593,6 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<plasmaSwitchbo
     * @see plasmaSwitchboard#PROXY_CACHE_LAYOUT_TREE
     * @see plasmaSwitchboard#PROXY_CACHE_LAYOUT_HASH
     */
-    public static final String PROXY_CACHE_LAYOUT               = "proxyCacheLayout";
-    /**
-     * <p><code>public static final String <strong>PROXY_CACHE_LAYOUT_TREE</strong> = "tree"</code></p>
-     * <p>Setting the file-/folder-structure for {@link #PROXY_CACHE_LAYOUT}. Websites are stored in a folder-layout
-     * according to the layout, the URL purported. The first folder is either <code>http</code> or <code>https</code>
-     * depending on the protocol used to fetch the website, descending follows the hostname and the sub-folders on the
-     * website up to the actual file itself.</p>  
-     * <p>When using <code>tree</code>, be aware that
-     * the possibility of inconsistencies between folders and files with the same name may occur which prevent proper
-     * storage of the fetched site. Below is an example how files are stored:</p>
-     * <pre>
-     * /html/
-     * /html/www.example.com/
-     * /html/www.example.com/index/
-     * /html/www.example.com/index/en/
-     * /html/www.example.com/index/en/index.html</pre>
-     */
-    public static final String PROXY_CACHE_LAYOUT_TREE          = "tree";
-    /**
-     * <p><code>public static final String <strong>PROXY_CACHE_LAYOUT_HASH</strong> = "hash"</code></p>
-     * <p>Setting the file-/folder-structure for {@link #PROXY_CACHE_LAYOUT}. Websites are stored using the MD5-sum of
-     * their respective URLs. This method prevents collisions on some websites caused by using the {@link #PROXY_CACHE_LAYOUT_TREE}
-     * layout.</p>
-     * <p>Similarly to {@link #PROXY_CACHE_LAYOUT_TREE}, the top-folders name is given by the protocol used to fetch the site,
-     * followed by either <code>www</code> or &ndash; if the hostname does not start with "www" &ndash; <code>other</code>.
-     * Afterwards the next folder has the rest of the hostname as name, followed by a folder <code>hash</code> which contains
-     * a folder consisting of the first two letters of the hash. Another folder named after the 3rd and 4th letters of the
-     * hash follows which finally contains the file named after the full 18-characters long hash.
-     * Below is an example how files are stored:</p>
-     * <pre>
-     * /html/
-     * /html/www/
-     * /html/www/example.com/
-     * /html/www/example.com/hash/
-     * /html/www/example.com/hash/0d/
-     * /html/www/example.com/hash/0d/f8/
-     * /html/www/example.com/hash/0d/f8/0df83a8444f48317d8</pre>
-     */
-    public static final String PROXY_CACHE_LAYOUT_HASH          = "hash";
-    public static final String PROXY_CACHE_MIGRATION            = "proxyCacheMigration";
    
    //////////////////////////////////////////////////////////////////////////////////////////////
    // Cluster settings
@ -1087,9 +1047,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<plasmaSwitchbo
        htCachePath = getConfigPath(HTCACHE_PATH, HTCACHE_PATH_DEFAULT);
        this.log.logInfo("HTCACHE Path = " + htCachePath.getAbsolutePath());
        long maxCacheSize = 1024 * 1024 * Long.parseLong(getConfig(PROXY_CACHE_SIZE, "2")); // this is megabyte
-        String cacheLayout = getConfig(PROXY_CACHE_LAYOUT, PROXY_CACHE_LAYOUT_TREE);
-        boolean cacheMigration = getConfigBool(PROXY_CACHE_MIGRATION, true);
-        plasmaHTCache.init(htCachePath, maxCacheSize, cacheLayout, cacheMigration);
+        plasmaHTCache.init(htCachePath, maxCacheSize);
        
        // create the release download directory
        releasePath = getConfigPath(RELEASE_PATH, RELEASE_PATH_DEFAULT);
@ -1139,19 +1097,6 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<plasmaSwitchbo
        // create queue
        this.sbQueue = new plasmaSwitchboardQueue(wordIndex, new File(this.plasmaPath, "switchboardQueue2.stack"), this.profilesActiveCrawls);
        
-        // going through the sbQueue Entries and registering all content files as in use
-        int count = 0;
-        plasmaSwitchboardQueue.QueueEntry queueEntry;
-        Iterator<plasmaSwitchboardQueue.QueueEntry> i1 = sbQueue.entryIterator(true);
-        while (i1.hasNext()) {
-            queueEntry = i1.next();
-            if ((queueEntry != null) && (queueEntry.url() != null) && (queueEntry.cacheFile().exists())) {
-                plasmaHTCache.filesInUse.add(queueEntry.cacheFile());
-                count++;
-            }
-        }
-        this.log.logConfig(count + " files in htcache reported to the cachemanager as in use.");
-        
        // define an extension-blacklist
        log.logConfig("Parser: Initializing Extension Mappings for Media/Parser");
        plasmaParser.initMediaExt(plasmaParser.extString2extList(getConfig(PARSER_MEDIA_EXT,"")));
@ -1696,12 +1641,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<plasmaSwitchbo
        /* =========================================================================
         * INDEXING
         * ========================================================================= */          
-        if (doIndexing && isSupportedContent){
-            
-            // registering the cachefile as in use
-            if (entry.cacheFile().exists()) {
-                plasmaHTCache.filesInUse.add(entry.cacheFile());
-            }
+        if (doIndexing && isSupportedContent) {
            
            // enqueue for further crawling
            enQueue(this.sbQueue.newEntry(
@ -2152,10 +2092,6 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<plasmaSwitchbo
            document = null;
        }
        if (document == null) {
-            if (!in.queueEntry.profile().storeHTCache()) {
-                plasmaHTCache.filesInUse.remove(in.queueEntry.cacheFile());
-                //plasmaHTCache.deleteURLfromCache(entry.url());
-            }
            in.queueEntry.close();
            return null;
        }
@ -2235,10 +2171,6 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<plasmaSwitchbo
            condenser = null;
        }
        if (condenser == null) {
-            if (!in.queueEntry.profile().storeHTCache()) {
-                plasmaHTCache.filesInUse.remove(in.queueEntry.cacheFile());
-                //plasmaHTCache.deleteURLfromCache(entry.url());
-            }
            in.queueEntry.close();
            return null;
        }
@ -2305,10 +2237,6 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<plasmaSwitchbo
    public void storeDocumentIndex(indexingQueueEntry in) {
        in.queueEntry.updateStatus(plasmaSwitchboardQueue.QUEUE_STATE_INDEXSTORAGE);
        storeDocumentIndex(in.queueEntry, in.document, in.condenser);
-        if (!in.queueEntry.profile().storeHTCache()) {
-                plasmaHTCache.filesInUse.remove(in.queueEntry.cacheFile());
-                //plasmaHTCache.deleteURLfromCache(entry.url());
-        }
        in.queueEntry.updateStatus(plasmaSwitchboardQueue.QUEUE_STATE_FINISHED);
        in.queueEntry.close();
    }