many bugfixes, memory leak fixes, performance enhancements; new kelondroHashtable; activated snippets

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@313 6c8d7289-2bf4-0310-a012-ef5d649a1542
20 years ago · 1e7f062350
parent dc1d707eb2
commit 1e7f062350
17 changed files with 284 additions and 180 deletions
--- a/htroot/index.java
+++ b/htroot/index.java
@ -81,7 +81,7 @@ public class index {
                    referrerprop.put("clientip", header.get("CLIENTIP"));
                    referrerprop.put("useragent", header.get("User-Agent"));
                    referrerprop.put("date", (new serverDate()).toShortString(false));
-                    try { sb.facilityDB.update("backlinks", referer, referrerprop); } catch (IOException e) {}
+                    if (sb.facilityDB != null) try { sb.facilityDB.update("backlinks", referer, referrerprop); } catch (IOException e) {}
                }
            }

@ -114,7 +114,7 @@ public class index {

        // process search words
        String querystring = (String) post.get("search", "");
-	try { sb.facilityDB.update("zeitgeist", querystring, post); } catch (Exception e) {}
+	if (sb.facilityDB != null) try { sb.facilityDB.update("zeitgeist", querystring, post); } catch (Exception e) {}
        TreeSet query = cleanQuery(querystring);
        // filter out stopwords
        TreeSet filtered = kelondroMSetTools.joinConstructive(query, plasmaSwitchboard.stopwords);
--- a/source/de/anomic/kelondro/kelondroArray.java
+++ b/source/de/anomic/kelondro/kelondroArray.java
@ -45,13 +45,9 @@

 package de.anomic.kelondro;

-import java.io.BufferedReader;
+
 import java.io.File;
-import java.io.FileReader;
 import java.io.IOException;
-import java.io.RandomAccessFile;
-import java.util.Iterator;
-import java.util.StringTokenizer;

 public class kelondroArray extends kelondroRecords {

@ -60,7 +56,7 @@ public class kelondroArray extends kelondroRecords {
    private static short thisOHHandles = 0; // and two handles overhead for a double-chained list
    
    public kelondroArray(File file, int[] columns, int intprops) throws IOException {
-	// this creates a new tree
+	// this creates a new array
 	super(file, 0, thisOHBytes, thisOHHandles, columns, intprops, columns.length /*txtProps*/, 80 /*txtPropWidth*/);
        for (int i = 0; i < intprops; i++) setHandle(i, new Handle(0));
    }
@ -90,6 +86,17 @@ public class kelondroArray extends kelondroRecords {
        return getNode(new Handle(index)).getValues();
    }

+    
+    public synchronized int seti(int index, int value) throws IOException {
+        int before = getHandle(index).hashCode();
+        setHandle(index, new Handle(index));
+        return before;
+    }
+
+    public synchronized int geti(int index) throws IOException {
+        return getHandle(index).hashCode();
+    }
+    
    public void print() throws IOException {
        System.out.println("PRINTOUT of table, length=" + size());
        byte[][] row;
--- a/source/de/anomic/kelondro/kelondroMScoreCluster.java
+++ b/source/de/anomic/kelondro/kelondroMScoreCluster.java
@ -146,6 +146,7 @@ public class kelondroMScoreCluster {
        // set new value
        c = scoreKey(en, ec);
        cs = new Long(c);
+        Object oldcs = refkeyDB.remove(obj); if (oldcs != null) keyrefDB.remove(oldcs); // avoid memory leak
        refkeyDB.put(obj, cs);
        keyrefDB.put(cs, obj);
        
@ -174,6 +175,7 @@ public class kelondroMScoreCluster {
        // set new value
        c = scoreKey(en, ec);
        cs = new Long(c);
+        Object oldcs = refkeyDB.remove(obj); if (oldcs != null) keyrefDB.remove(oldcs); // avoid memory leak
        refkeyDB.put(obj, cs);
        keyrefDB.put(cs, obj);
        
--- a/source/de/anomic/kelondro/kelondroMSetTools.java
+++ b/source/de/anomic/kelondro/kelondroMSetTools.java
@ -59,9 +59,11 @@ public class kelondroMSetTools {
 	throw new ClassCastException();
    }

-    private static int log2(int x) {
+    public static int log2a(int x) {
+        // this computes 1 + log2
+        // it is the number of bits in x, not the logarithmus by 2
 	int l = 0;
-	while (x > 0) {x = x >> 1; l++;}
+	while (x > 0) {x = x >>> 1; l++;}
 	return l;
    }

@ -84,7 +86,7 @@ public class kelondroMSetTools {
 	int high = ((map.size() > set.size()) ? map.size() : set.size());
 	int low  = ((map.size() > set.size()) ? set.size() : map.size());
 	int stepsEnum = 10 * (high + low - 1);
-	int stepsTest = 12 * log2(high) * low;
+	int stepsTest = 12 * log2a(high) * low;

 	// start most efficient method
 	if (stepsEnum > stepsTest) {
@ -156,7 +158,7 @@ public class kelondroMSetTools {
 	int high = ((set1.size() > set2.size()) ? set1.size() : set2.size());
 	int low  = ((set1.size() > set2.size()) ? set2.size() : set1.size());
 	int stepsEnum = 10 * (high + low - 1);
-	int stepsTest = 12 * log2(high) * low;
+	int stepsTest = 12 * log2a(high) * low;

 	// start most efficient method
 	if (stepsEnum > stepsTest) {
--- a/source/de/anomic/kelondro/kelondroRecords.java
+++ b/source/de/anomic/kelondro/kelondroRecords.java
@ -744,6 +744,10 @@ public class kelondroRecords {
    }
    
    // additional properties
+    public synchronized int handles() {
+	return this.HANDLES.length;
+    }
+    
    protected void setHandle(int pos, Handle handle) throws IOException {
 	if (pos >= HANDLES.length) throw new IllegalArgumentException("setHandle: handle array exceeded");
 	if (handle == null) handle = new Handle(NUL);
--- a/source/de/anomic/kelondro/kelondroTree.java
+++ b/source/de/anomic/kelondro/kelondroTree.java
@ -119,18 +119,16 @@ public class kelondroTree extends kelondroRecords implements Comparator {
 	super(ra, buffersize);
    }

-    private static byte abs(byte b) {
-	// for height computation
-	if (b < 0) return (byte) -b; else return b;
-    }
-
    // Returns the value to which this map maps the specified key.
    public synchronized byte[][] get(byte[] key) throws IOException {
 	//System.out.println("kelondroTree.get " + new String(key) + " in " + filename);
 	Search search = new Search(key);
 	if (search.found()) {
-	    return search.getMatcher().getValues();
+            byte[][] result = search.getMatcher().getValues();
+            search = null;
+	    return result;
 	} else {
+            search = null;
 	    return null;
 	}
    }
@ -306,6 +304,7 @@ public class kelondroTree extends kelondroRecords implements Comparator {
 	    // a node with this key exist. simply overwrite the content and return old content
 	    Node e = searchResult.getMatcher();
 	    byte[][] result = e.setValues(newrow);
+            searchResult = null;
 	    return result;
 	} else if (searchResult.isRoot()) {
 	    // a node with this key does not exist and there is no node at all
@ -320,6 +319,7 @@ public class kelondroTree extends kelondroRecords implements Comparator {
 	    e.setOHHandle(new Handle[] {null, null, null}); // {parent, leftchild, rightchild}
 	    // do updates
 	    setHandle(root, e.handle());
+            searchResult = null;
 	    return null;
 	} else {
 	    // a node with this key does not exist
@ -375,7 +375,7 @@ public class kelondroTree extends kelondroRecords implements Comparator {
 		    parentOHByte[balance]--;
 		    path = "R" + path;
 		}
-		increasedHight = ((abs(parentOHByte[balance]) - abs(prevHight)) > 0);
+		increasedHight = ((java.lang.Math.abs((int) parentOHByte[balance]) - java.lang.Math.abs((int) prevHight)) > 0);
 		parentNode.setOHByte(parentOHByte);

 		// here we either stop because we had no increased hight,
@ -384,7 +384,7 @@ public class kelondroTree extends kelondroRecords implements Comparator {
 		if (!(increasedHight)) break; // finished

 		// check rotation need
-		if (abs(parentOHByte[balance]) > 1) {
+		if (java.lang.Math.abs((int) parentOHByte[balance]) > 1) {
 		    // rotate and stop then
 		    //System.out.println("* DB DEBUG: " + path.substring(0,2) + " ROTATION AT NODE " + parentNode.handle().toString() + ": BALANCE=" + parentOHByte[balance]);
 		    if (path.startsWith("LL")) {
@ -561,6 +561,7 @@ public class kelondroTree extends kelondroRecords implements Comparator {
 	    Node result = search.getMatcher();
 	    byte[][] values = result.getValues();
 	    remove(result, search.getParent());
+            search = null;
 	    return values;
 	} else {
 	    return null;
@ -722,9 +723,12 @@ public class kelondroTree extends kelondroRecords implements Comparator {
 	try {
            Search s = new Search(firstKey);
            if (s.found()) {
-                return new nodeIterator(up, rotating, s.getMatcher());
+                Node matcher = s.getMatcher();
+                s = null;
+                return new nodeIterator(up, rotating, matcher);
            } else {
                Node nn = s.getParent();
+                s = null;
                if (nn == null) {
                    return (new HashSet()).iterator(); // an empty iterator
                } else {
@ -862,9 +866,12 @@ public class kelondroTree extends kelondroRecords implements Comparator {
    public synchronized Iterator rows(boolean up, boolean rotating, byte[] firstKey) throws IOException {
        Search s = new Search(firstKey);
        if (s.found()) {
-            return new rowIterator(new nodeIterator(up, rotating, s.getMatcher()));
+            Node matcher = s.getMatcher();
+            s = null;
+            return new rowIterator(new nodeIterator(up, rotating, matcher));
        } else {
            Node nn = s.getParent();
+            s = null;
            if (nn == null) {
                return (Iterator) (new HashSet()).iterator();
            } else {
@ -910,9 +917,12 @@ public class kelondroTree extends kelondroRecords implements Comparator {
    public synchronized Iterator keys(boolean up, boolean rotating, byte[] firstKey) throws IOException {
        Search s = new Search(firstKey);
        if (s.found()) {
-            return new keyIterator(new nodeIterator(up, rotating, s.getMatcher()));
+            Node matcher = s.getMatcher();
+            s = null;
+            return new keyIterator(new nodeIterator(up, rotating, matcher));
        } else {
            Node nn = s.getParent();
+            s = null;
            if (nn == null) {
                return (Iterator) (new HashSet()).iterator();
            } else {
--- a/source/de/anomic/plasma/plasmaCrawlNURL.java
+++ b/source/de/anomic/plasma/plasmaCrawlNURL.java
@ -134,13 +134,21 @@ public class plasmaCrawlNURL extends plasmaURL {
        public void run() {
            Iterator i;
            try {
+                //System.out.println("init     coreStack index");
                i =     coreStack.iterator(); while (i.hasNext()) stackIndex.add(new String(((kelondroRecords.Node) i.next()).getKey()));
+                //System.out.println("init    limitStack index");
                i =    limitStack.iterator(); while (i.hasNext()) stackIndex.add(new String(((kelondroRecords.Node) i.next()).getKey()));
+                //System.out.println("init overhangStack index");
                i = overhangStack.iterator(); while (i.hasNext()) stackIndex.add(new String(((kelondroRecords.Node) i.next()).getKey()));
+                //System.out.println("init   remoteStack index");
                i =   remoteStack.iterator(); while (i.hasNext()) stackIndex.add(new String(((kelondroRecords.Node) i.next()).getKey()));
+                //System.out.println("init    imageStack index");
                i =    imageStack.iterator(); while (i.hasNext()) stackIndex.add(new String(((kelondroRecords.Node) i.next()).getKey()));
+                //System.out.println("init    movieStack index");
                i =    movieStack.iterator(); while (i.hasNext()) stackIndex.add(new String(((kelondroRecords.Node) i.next()).getKey()));
+                //System.out.println("init    musicStack index");
                i =    musicStack.iterator(); while (i.hasNext()) stackIndex.add(new String(((kelondroRecords.Node) i.next()).getKey()));
+                //System.out.println("finished           index init");
            } catch (IOException e) {}
        }
    }
--- a/source/de/anomic/plasma/plasmaCrawlWorker.java
+++ b/source/de/anomic/plasma/plasmaCrawlWorker.java
@ -298,12 +298,14 @@ public final class plasmaCrawlWorker extends Thread {
                        htCache.status = plasmaHTCache.CACHE_PASSING;
                    }
                    // enQueue new entry with response header
-                    if ((initiator == null) || (initiator.length() == 0)) {
-                        // enqueued for proxy writings
-                        cacheManager.stackProcess(htCache);
-                    } else {
-                        // direct processing for crawling
-                        cacheManager.process(htCache);
+                    if (profile != null) {
+                        if ((initiator == null) || (initiator.length() == 0)) {
+                            // enqueued for proxy writings
+                            cacheManager.stackProcess(htCache);
+                        } else {
+                            // direct processing for crawling
+                            cacheManager.process(htCache);
+                        }
                    }
                } catch (SocketException e) {
                    // this may happen if the client suddenly closes its connection
--- a/source/de/anomic/plasma/plasmaHTCache.java
+++ b/source/de/anomic/plasma/plasmaHTCache.java
@ -52,10 +52,12 @@ package de.anomic.plasma;

 import java.io.File;
 import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
 import java.io.IOException;
 import java.net.MalformedURLException;
 import java.net.URL;
 import java.util.Date;
+import java.util.Map;
 import java.util.LinkedList;
 import java.util.TreeMap;

@ -206,9 +208,9 @@ public final class plasmaHTCache {
        }
    
    public httpHeader getCachedResponse(String urlHash) throws IOException {
-        httpHeader header = new httpHeader(null, responseHeaderDB.get(urlHash));
-        //System.out.println("DEBUG: getCachedResponse hash=" + urlHash + ", header=" + header.toString());
-        return header;
+        Map hdb = responseHeaderDB.get(urlHash);
+        if (hdb == null) return null;
+        return new httpHeader(null, hdb);
    }

    public boolean idle() {
@ -245,76 +247,76 @@ public final class plasmaHTCache {
    }
    
    synchronized public void process(Entry entry) throws IOException {
-
+        
        if (entry == null) return;
        
-	// store response header
-	if ((entry.status == CACHE_FILL) ||
-	    (entry.status == CACHE_STALE_RELOAD_GOOD) ||
-	    (entry.status == CACHE_STALE_RELOAD_BAD)) {
-	    responseHeaderDB.set(entry.nomalizedURLHash, entry.responseHeader);
-	}
-
-	// work off unwritten files and undone parsing
-	String storeError = null;
-	if (((entry.status == CACHE_FILL) || (entry.status == CACHE_STALE_RELOAD_GOOD)) &&
-	    ((storeError = entry.shallStoreCache()) == null)) {
-
-        // write file if not written yet
-        if (entry.cacheArray != null) try {
-            if (entry.cacheFile.exists()) {
-                currCacheSize -= entry.cacheFile.length();
-                entry.cacheFile.delete();
+        // store response header
+        if ((entry.status == CACHE_FILL) ||
+                (entry.status == CACHE_STALE_RELOAD_GOOD) ||
+                (entry.status == CACHE_STALE_RELOAD_BAD)) {
+            responseHeaderDB.set(entry.nomalizedURLHash, entry.responseHeader);
+        }
+        
+        // work off unwritten files and undone parsing
+        String storeError = null;
+        if (((entry.status == CACHE_FILL) || (entry.status == CACHE_STALE_RELOAD_GOOD)) &&
+                ((storeError = entry.shallStoreCache()) == null)) {
+            
+            // write file if not written yet
+            if (entry.cacheArray != null) try {
+                if (entry.cacheFile.exists()) {
+                    currCacheSize -= entry.cacheFile.length();
+                    entry.cacheFile.delete();
+                }
+                entry.cacheFile.getParentFile().mkdirs();
+                log.logInfo("WRITE FILE (" + entry.cacheArray.length + " bytes) " + entry.cacheFile);
+                serverFileUtils.write(entry.cacheArray, entry.cacheFile);
+                log.logDebug("AFTER WRITE cacheArray = " + entry.cacheFile + ": " + ((entry.cacheArray == null) ? "empty" : "full"));
+                //entry.cacheArray = null;
+            } catch (FileNotFoundException e) {
+                // this is the case of a "(Not a directory)" error, which should be prohibited
+                // by the shallStoreCache() property. However, sometimes the error still occurs
+                // In this case do nothing.
+                log.logError("File storage failed: " + e.getMessage());
            }
-            entry.cacheFile.getParentFile().mkdirs();
-            log.logInfo("WRITE FILE (" + entry.cacheArray.length + " bytes) " + entry.cacheFile);
-            serverFileUtils.write(entry.cacheArray, entry.cacheFile);
-            log.logDebug("AFTER WRITE cacheArray = " + entry.cacheFile + ": " + ((entry.cacheArray == null) ? "empty" : "full"));
-            //entry.cacheArray = null;
-        } catch (FileNotFoundException e) {
-            // this is the case of a "(Not a directory)" error, which should be prohibited
-            // by the shallStoreCache() property. However, sometimes the error still occurs
-            // In this case do nothing.
-            log.logError("File storage failed: " + e.getMessage());
+            
+            // update statistics
+            currCacheSize += entry.cacheFile.length();
+            cacheAge.put(ageString(entry.cacheFile.lastModified(), entry.cacheFile), entry.cacheFile);
+            
+            // enqueue in switchboard
+            switchboard.enQueue(entry);
+        } else if (entry.status == CACHE_PASSING) {
+            // even if the file should not be stored in the cache, it can be used to be indexed
+            if (storeError != null) log.logDebug("NOT STORED " + entry.cacheFile + ":" + storeError);
+            
+            // enqueue in switchboard
+            switchboard.enQueue(entry);
+        }
+        
+        // write log
+        
+        switch (entry.status) {
+            case CACHE_UNFILLED:
+                log.logInfo("CACHE UNFILLED: " + entry.cacheFile); break;
+            case CACHE_FILL:
+                log.logInfo("CACHE FILL: " + entry.cacheFile +
+                        ((entry.cacheArray == null) ? "" : " (cacheArray is filled)") +
+                        ((entry.scraper    == null) ? "" : " (scraper is filled)"));
+                break;
+            case CACHE_HIT:
+                log.logInfo("CACHE HIT: " + entry.cacheFile); break;
+            case CACHE_STALE_NO_RELOAD:
+                log.logInfo("CACHE STALE, NO RELOAD: " + entry.cacheFile); break;
+            case CACHE_STALE_RELOAD_GOOD:
+                log.logInfo("CACHE STALE, NECESSARY RELOAD: " + entry.cacheFile); break;
+            case CACHE_STALE_RELOAD_BAD:
+                log.logInfo("CACHE STALE, SUPERFLUOUS RELOAD: " + entry.cacheFile); break;
+            case CACHE_PASSING:
+                log.logInfo("PASSING: " + entry.cacheFile); break;
+            default:
+                log.logInfo("CACHE STATE UNKNOWN: " + entry.cacheFile); break;
        }
-	    
-	    // update statistics
-	    currCacheSize += entry.cacheFile.length();
-	    cacheAge.put(ageString(entry.cacheFile.lastModified(), entry.cacheFile), entry.cacheFile);
-
-	    // enqueue in switchboard
-	    switchboard.enQueue(entry);
-	} else if (entry.status == CACHE_PASSING) {
-	    // even if the file should not be stored in the cache, it can be used to be indexed
-	    if (storeError != null) log.logDebug("NOT STORED " + entry.cacheFile + ":" + storeError);
-
-	    // enqueue in switchboard
-	    switchboard.enQueue(entry);
-	}
-
-	// write log
-
-	    switch (entry.status) {
-	    case CACHE_UNFILLED:
-		log.logInfo("CACHE UNFILLED: " + entry.cacheFile); break;
-	    case CACHE_FILL:
-		log.logInfo("CACHE FILL: " + entry.cacheFile +
-			    ((entry.cacheArray == null) ? "" : " (cacheArray is filled)") +
-			    ((entry.scraper    == null) ? "" : " (scraper is filled)"));
-			    break;
-	    case CACHE_HIT:
-		log.logInfo("CACHE HIT: " + entry.cacheFile); break;
-	    case CACHE_STALE_NO_RELOAD:
-		log.logInfo("CACHE STALE, NO RELOAD: " + entry.cacheFile); break;
-	    case CACHE_STALE_RELOAD_GOOD:
-		log.logInfo("CACHE STALE, NECESSARY RELOAD: " + entry.cacheFile); break;
-	    case CACHE_STALE_RELOAD_BAD:
-		log.logInfo("CACHE STALE, SUPERFLUOUS RELOAD: " + entry.cacheFile); break;
-	    case CACHE_PASSING:
-		log.logInfo("PASSING: " + entry.cacheFile); break;
-	    default:
-		log.logInfo("CACHE STATE UNKNOWN: " + entry.cacheFile); break;
-	    }
    }
    

@ -453,6 +455,32 @@ public final class plasmaHTCache {
 	return null;
    }
    
+    public byte[] loadResource(URL url) {
+        // load the url as resource from the cache
+        File f = getCachePath(url);
+        if (f.exists()) try {
+            return serverFileUtils.read(f);
+        } catch (IOException e) {
+            return null;
+        } else {
+            return null;
+        }
+    }
+    
+    /*
+    public void saveResource(URL url, byte[] resource) {
+        File f = getCachePath(url);
+        f.getParentFile().mkdirs();
+        FileOutputStream fos = null;
+        try {
+            fos = new FileOutputStream(f);
+            htCache.cacheArray = res.writeContent(fos); // writes in cacheArray and cache file
+        } finally {
+            if (fos!=null)try{fos.close();}catch(Exception e){}
+        }
+    }
+    */
+    
    public static boolean isPOST(String urlString) {
 	return ((urlString.indexOf("?") >= 0) ||
 		(urlString.indexOf("&") >= 0));
--- a/source/de/anomic/plasma/plasmaParser.java
+++ b/source/de/anomic/plasma/plasmaParser.java
@ -143,8 +143,8 @@ public final class plasmaParser {
     * @see #initMediaExt(String)
     */
    static {
-		initMediaExt("swf,wmv,jpg,jpeg,jpe,rm,mov,mpg,mpeg,mp3,asf,gif,png,avi,zip,rar," +
-			"sit,hqx,img,dmg,tar,gz,ps,xls,ppt,ram,bz2,arj");
+		initMediaExt(extString2extList("swf,wmv,jpg,jpeg,jpe,rm,mov,mpg,mpeg,mp3,asf,gif,png,avi,zip,rar," +
+			"sit,hqx,img,dmg,tar,gz,ps,xls,ppt,ram,bz2,arj"));
        
        /* ===================================================
         * initializing the parser object pool
@ -200,25 +200,31 @@ public final class plasmaParser {
        setEnabledParserList(mimeTypes);
    }
    
-    public static void initMediaExt(String mediaExtString) {
+    public static List extString2extList(String extString) {
        LinkedList extensions = new LinkedList();
-        if ((mediaExtString == null) || (mediaExtString.length() == 0)) {
-            
+        if ((extString == null) || (extString.length() == 0)) {
+            return extensions;
        } else {
-            
-            String[] xs = mediaExtString.split(",");
+            String[] xs = extString.split(",");
            for (int i = 0; i < xs.length; i++) extensions.add(xs[i].toLowerCase().trim());
        }
-        initMediaExt(extensions);
+        return extensions;
    }
    
    public static void initMediaExt(List mediaExtList) {
        synchronized (mediaExtSet) {
            mediaExtSet.clear();
-    		mediaExtSet.addAll(mediaExtList);
-		}
+            mediaExtSet.addAll(mediaExtList);
+        }
    }
    
+    public static void initSupportedFileExt(List supportedFileExtList) {
+        synchronized (mediaExtSet) {
+            supportedFileExt.clear();
+            supportedFileExt.addAll(supportedFileExtList);
+        }
+    }
+        
    public static boolean realtimeParsableMimeTypesContains(String mimeType) {
        mimeType = getRealMimeType(mimeType);
        synchronized (realtimeParsableMimeTypes) {
@ -238,6 +244,12 @@ public final class plasmaParser {
        }
    }
    
+    public static boolean supportedFileExtContains(String mediaExt) {
+        if (supportedFileExt == null) return false;
+        //System.out.println("supported ext: " + supportedFileExt.toString());
+        return (supportedFileExt.contains(mediaExt));
+    }
+    
    public static boolean mediaExtContains(String mediaExt) {
        if (mediaExt == null) return false;
        
@ -316,16 +328,16 @@ public final class plasmaParser {
        }
        
        synchronized (enabledParserList) {
-			enabledParserList.clear();
-            enabledParserList.putAll(newEnabledParsers);            
-		}
+            //enabledParserList.clear();
+            enabledParserList.putAll(newEnabledParsers);
+        }
        
        
        synchronized (supportedFileExt) {
-			supportedFileExt.clear();
+            //supportedFileExt.clear();
            supportedFileExt.addAll(newSupportedFileExt);
-		}
-        
+        }
+
        return (String[])newEnabledParsers.keySet().toArray(new String[newEnabledParsers.size()]);
    }
    
--- a/source/de/anomic/plasma/plasmaSnippetCache.java
+++ b/source/de/anomic/plasma/plasmaSnippetCache.java
@ -114,26 +114,38 @@ public class plasmaSnippetCache {
        return (String) snippetsCache.get(key);
    }
    
-    public String retrieve(java.net.URL url, boolean fetchOnline, Set query, boolean queryAreHashes) {
-        if (query.size() == 0) return null;
-        if (!(queryAreHashes)) query = plasmaSearch.words2hashes(query);
+    public String retrieve(java.net.URL url, boolean fetchOnline, Set queryhashes) {
+        if (queryhashes.size() == 0) {
+            //System.out.println("found no queryhashes for url retrieve " + url);
+            return null;
+        }
        String urlhash = plasmaURL.urlHash(url);
        
        // try to get snippet from snippetCache
-        String wordhashes = yacySearch.set2string(query);
+        String wordhashes = yacySearch.set2string(queryhashes);
        String snippet = retrieve(wordhashes, urlhash);
-        if (snippet != null) return snippet;
+        if (snippet != null) {
+            //System.out.println("found snippet for url " + url + " in cache: " + snippet);
+            return snippet;
+        }
        
        // if the snippet is not in the cache, we can try to get it from the htcache
        plasmaParserDocument document = getDocument(url, fetchOnline);
-        if (document == null) return null;
+        if (document == null) {
+            //System.out.println("cannot load document for url " + url);
+            return null;
+        }
+        //System.out.println("loaded document for url " + url);
        String[] sentences = document.getSentences();
        //System.out.println("----" + url.toString()); for (int l = 0; l < sentences.length; l++) System.out.println(sentences[l]);
-        if ((sentences == null) || (sentences.length == 0)) return null;
+        if ((sentences == null) || (sentences.length == 0)) {
+            //System.out.println("found no sentences in url " + url);
+            return null;
+        }

        // we have found a parseable non-empty file: use the lines
        TreeMap sentencematrix = hashMatrix(sentences);
-        Iterator i = query.iterator();
+        Iterator i = queryhashes.iterator();
        String hash;
        kelondroMScoreCluster hitTable = new kelondroMScoreCluster();
        Iterator j;
@ -151,8 +163,9 @@ public class plasmaSnippetCache {
        Integer maxLine = (Integer) hitTable.getMaxObject();
        if (maxLine == null) return null;
        snippet = sentences[maxLine.intValue()];
-        if (snippet.length() > 140) return null;
-        
+        //System.out.println("loaded snippet for url " + url + ": " + snippet);
+        if (snippet.length() > 120) snippet = snippet.substring(0, 120);
+
        // finally store this snippet in our own cache
        store(wordhashes, urlhash, snippet);
        return snippet;
@ -175,10 +188,10 @@ public class plasmaSnippetCache {
        // load the url as resource from the web
        try {
            //return httpc.singleGET(url, 5000, null, null, remoteProxyHost, remoteProxyPort);
-            byte[] resource = getResourceFromCache(url);
+            byte[] resource = cacheManager.loadResource(url);
            if ((fetchOnline) && (resource == null)) {
                loadResourceFromWeb(url, 5000);
-                resource = getResourceFromCache(url);
+                resource = cacheManager.loadResource(url);
            }
            return resource;
        } catch (IOException e) {
@ -186,20 +199,6 @@ public class plasmaSnippetCache {
        }
    }
    
-    private byte[] getResourceFromCache(URL url) {
-        // load the url as resource from the cache
-        String path = htmlFilterContentScraper.urlNormalform(url).substring(6);
-        File cache = cacheManager.cachePath;
-        File f = new File(cache, path);
-        if (f.exists()) try {
-            return serverFileUtils.read(f);
-        } catch (IOException e) {
-            return null;
-        } else {
-            return null;
-        }
-    }
-    
    private void loadResourceFromWeb(URL url, int socketTimeout) throws IOException {
        plasmaCrawlWorker.load(
            url, 
@ -221,14 +220,23 @@ public class plasmaSnippetCache {
        httpHeader header = null;
        try {
            header = cacheManager.getCachedResponse(plasmaURL.urlHash(url));
-        } catch (IOException e) {
-            return null;
-        }
-        if (header == null) return null;
-        if (plasmaParser.supportedMimeTypesContains(header.mime())) {
-            return parser.parseSource(url, header.mime(), resource);
+        } catch (IOException e) {}
+        
+        if (header == null) {
+            String filename = url.getFile();
+            int p = filename.lastIndexOf('.');
+            if ((p < 0) ||
+                ((p >= 0) && (plasmaParser.supportedFileExtContains(filename.substring(p + 1))))) {
+                return parser.parseSource(url, "text/html", resource);
+            } else {
+                return null;
+            }
        } else {
-            return null;
+            if (plasmaParser.supportedMimeTypesContains(header.mime())) {
+                return parser.parseSource(url, header.mime(), resource);
+            } else {
+                return null;
+            }
        }
    }
 }
--- a/source/de/anomic/plasma/plasmaSwitchboard.java
+++ b/source/de/anomic/plasma/plasmaSwitchboard.java
@ -263,9 +263,10 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
        this.parser = new plasmaParser();  
        
        // define an extension-blacklist
-        log.logSystem("Parser: Initializing Media Extensions");
-        plasmaParser.initMediaExt(getConfig("mediaExt",null));
-        
+        log.logSystem("Parser: Initializing Extension Mappings for Media/Parser");
+        plasmaParser.initMediaExt(plasmaParser.extString2extList(getConfig("mediaExt","")));
+        plasmaParser.initSupportedFileExt(plasmaParser.extString2extList(getConfig("parseableExt","")));
+
        // define a realtime parsable mimetype list
        log.logSystem("Parser: Initializing Mime Types");
        plasmaParser.initRealtimeParsableMimeTypes(getConfig("parseableRealtimeMimeTypes","application/xhtml+xml,text/html,text/plain"));
@ -300,6 +301,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
        cleanProfiles();

        // init facility DB
+        /*
        log.logSystem("Starting Facility Database");
        File facilityDBpath = new File(getRootPath(), "DATA/SETTINGS/");
        facilityDB = new kelondroTables(facilityDBpath);
@ -312,7 +314,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
        facilityDB.update("statistik", (new serverDate()).toShortString(false).substring(0, 11), new long[]{1,2,3,4,5,6});
        long[] testresult = facilityDB.selectLong("statistik", "yyyyMMddHHm");
        testresult = facilityDB.selectLong("statistik", (new serverDate()).toShortString(false).substring(0, 11));
-        
+        */
        
        // generate snippets cache
        log.logSystem("Initializing Snippet Cache");
@ -322,17 +324,22 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
        
        // start yacy core
        log.logSystem("Starting YaCy Protocol Core");
+        //try{Thread.currentThread().sleep(5000);} catch (InterruptedException e) {} // for profiler
        yacyCore yc = new yacyCore(this);
+        //log.logSystem("Started YaCy Protocol Core");
+        //System.gc(); try{Thread.currentThread().sleep(5000);} catch (InterruptedException e) {} // for profiler
        serverInstantThread.oneTimeJob(yc, "loadSeeds", yc.log, 3000);
        
        // deploy threads
        log.logSystem("Starting Threads");
+        System.gc(); // help for profiler
        int indexing_cluster = Integer.parseInt(getConfig("80_indexing_cluster", "1"));
        if (indexing_cluster < 1) indexing_cluster = 1;
        deployThread("90_cleanup", "Cleanup", "simple cleaning process for monitoring information" ,
                     new serverInstantThread(this, "cleanupJob", "cleanupJobSize"), 10000); // all 5 Minutes
        deployThread("80_indexing", "Parsing/Indexing", "thread that performes document parsing and indexing" ,
                     new serverInstantThread(this, "deQueue", "queueSize"), 10000);
+        
        for (int i = 1; i < indexing_cluster; i++) {
            setConfig((i + 80) + "_indexing_idlesleep", getConfig("80_indexing_idlesleep", ""));
            setConfig((i + 80) + "_indexing_busysleep", getConfig("80_indexing_busysleep", ""));
@ -344,7 +351,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
        deployThread("62_remotetriggeredcrawl", "Remote Crawl Job", "thread that performes a single crawl/indexing step triggered by a remote peer",
                     new serverInstantThread(this, "remoteTriggeredCrawlJob", "remoteTriggeredCrawlJobSize"), 30000);
        deployThread("61_globalcrawltrigger", "Global Crawl Trigger", "thread that triggeres remote peers for crawling",
-                     new serverInstantThread(this, "limitCrawlTriggerJob", "limitCrawlTriggerJobSize"), 30000);
+                   new serverInstantThread(this, "limitCrawlTriggerJob", "limitCrawlTriggerJobSize"), 30000); // error here?
        deployThread("50_localcrawl", "Local Crawl", "thread that performes a single crawl step from the local crawl queue",
                     new serverInstantThread(this, "coreCrawlJob", "coreCrawlJobSize"), 10000);
        deployThread("40_peerseedcycle", "Seed-List Upload", "task that a principal peer performes to generate and upload a seed-list to a ftp account",
@ -357,6 +364,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
            
        // init migratiion from 0.37 -> 0.38
        classicCache = new plasmaWordIndexClassicCacheMigration(plasmaPath, wordIndex);
+        
        if (classicCache.size() > 0) {
            setConfig("99_indexcachemigration_idlesleep" , 10000);
            setConfig("99_indexcachemigration_busysleep" , 40);
@ -451,7 +459,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
            cacheLoader.close();
            wikiDB.close();
            messageDB.close();
-            facilityDB.close();
+            if (facilityDB != null) facilityDB.close();
            urlPool.close();
            profiles.close();
            parser.close();            
@ -577,6 +585,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
    }
    
    public boolean coreCrawlJob() {
+        System.gc(); // debug
        if (urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_CORE) == 0) {
            //log.logDebug("CoreCrawl: queue is empty");
            return false;
@ -1128,35 +1137,43 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
        char[] order;
        String urlmask;
        long time;
-        public presearch(Set queryhashes, char[] order, long time /*milliseconds*/, String urlmask) {
+        int fetchcount;
+        public presearch(Set queryhashes, char[] order, long time /*milliseconds*/, String urlmask, int fetchcount) {
            this.queryhashes = queryhashes;
            this.order = order;
            this.urlmask = urlmask;
            this.time = time;
+            this.fetchcount = fetchcount;
        }
        public void run() {
            try {
                // search the database locally
+                log.logDebug("presearch: started job");
                plasmaWordIndexEntity idx = searchManager.searchHashes(queryhashes, time);
-                plasmaSearch.result acc = searchManager.order(idx, queryhashes, stopwords, order, time, 3);
+                log.logDebug("presearch: found " + idx.size() + " results");
+                plasmaSearch.result acc = searchManager.order(idx, queryhashes, stopwords, order, time, fetchcount);
                if (acc == null) return;
+                log.logDebug("presearch: ordered results, now " + acc.sizeOrdered() + " URLs ready for fetch");
                
                // take some elements and fetch the snippets
                int i = 0;
                plasmaCrawlLURL.entry urlentry;
-                String urlstring;
-                while ((acc.hasMoreElements()) && (i < 3)) {
+                String urlstring, snippet;
+                while ((acc.hasMoreElements()) && (i < fetchcount)) {
                    urlentry = acc.nextElement();
                    if (urlentry.url().getHost().endsWith(".yacyh")) continue;
                    urlstring = htmlFilterContentScraper.urlNormalform(urlentry.url());
                    if (urlstring.matches(urlmask)) { //.* is default
-			snippetCache.retrieve(urlentry.url(), true, queryhashes, true);
+                        log.logDebug("presearch: fetching URL " + urlstring);
+			snippet = snippetCache.retrieve(urlentry.url(), true, queryhashes);
+                        if (snippet != null) log.logDebug("found snippet for URL " + urlstring + ": '" + snippet + "'");
                        i++;
                    }
                }
            } catch (IOException e) {
                e.printStackTrace();
            }
+            log.logDebug("presearch: job terminated");
        }
    }
    
@ -1169,7 +1186,6 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
            if (order2.equals("quality")) order[1] = plasmaSearch.O_QUALITY; else order[1] = plasmaSearch.O_AGE;
            
            // filter out words that appear in bluelist
-            Set queryhashes = plasmaSearch.words2hashes(querywords);
            Iterator it = querywords.iterator();
            String word, gs = "";
            while (it.hasNext()) {
@ -1177,13 +1193,14 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
                if (blueList.contains(word)) it.remove(); else gs += "+" + word;
            }
            if (gs.length() > 0) gs = gs.substring(1);
+            Set queryhashes = plasmaSearch.words2hashes(querywords);
            
            // log
            log.logInfo("INIT WORD SEARCH: " + gs + " - " + count + " links, " + (time / 1000) + " seconds");
            long timestamp = System.currentTimeMillis();
            
-            //Thread preselect = new presearch(querywords, order, time / 10, urlmask);
-            //preselect.start();
+            Thread preselect = new presearch(queryhashes, order, time / 10, urlmask, 5);
+            preselect.start();
            
            // do global fetching
            int globalresults = 0;
@ -1266,7 +1283,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
 			prop.put("results_" + i + "_urlname", urlname); 
 			prop.put("results_" + i + "_date", dateString(urlentry.moddate()));
                        prop.put("results_" + i + "_size", Long.toString(urlentry.size()));
-                        snippet = snippetCache.retrieve(url, false, querywords, false);
+                        snippet = snippetCache.retrieve(url, false, queryhashes);
                        if ((snippet == null) || (snippet.length() < 10)) {
                            prop.put("results_" + i + "_snippet", 0);
                            prop.put("results_" + i + "_snippet_text", "");
@ -1343,7 +1360,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
                String snippet;
                while ((acc.hasMoreElements()) && (i < count)) {
                    urlentry = acc.nextElement();
-                    snippet = snippetCache.retrieve(urlentry.url(), false, hashes, true);
+                    snippet = snippetCache.retrieve(urlentry.url(), false, hashes);
                    if ((snippet == null) || (snippet.length() < 10)) {
                        resource = urlentry.toString();
                    } else {
--- a/source/de/anomic/plasma/plasmaWordIndexCache.java
+++ b/source/de/anomic/plasma/plasmaWordIndexCache.java
@ -170,7 +170,7 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
                if (System.currentTimeMillis() > messageTime) {
                    System.gc(); // for better statistic
                    wordsPerSecond = wordcount * 1000 / (1 + System.currentTimeMillis() - startTime);
-                    log.logInfo("dumping status: " + wordcount + " words done, " + (cache.size() / wordsPerSecond) + " seconds remaining, free mem = " + (Runtime.getRuntime().freeMemory() / 1024 / 1024) + "MB");
+                    log.logInfo("dumping status: " + wordcount + " words done, " + (cache.size() / (wordsPerSecond + 1)) + " seconds remaining, free mem = " + (Runtime.getRuntime().freeMemory() / 1024 / 1024) + "MB");
                    messageTime = System.currentTimeMillis() + 5000;
                }
            }
@ -552,11 +552,9 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
        flushThread.pause();
 	//serverLog.logDebug("PLASMA INDEXING", "addEntryToIndexMem: cache.size=" + cache.size() + "; hashScore.size=" + hashScore.size());
        while (cache.size() >= this.maxWords) flushFromMem();
-        if ((cache.size() > 10000) && (Runtime.getRuntime().freeMemory() < 11000000)) flushFromMem();
-        while ((cache.size() > 0) && (Runtime.getRuntime().freeMemory() < 1000000)) {
-            flushFromMem();
-            System.gc();
-        }
+        if ((cache.size() > 10000) && (Runtime.getRuntime().freeMemory() < 5000000)) flushFromMem();
+        if ((cache.size() > 0) && (Runtime.getRuntime().freeMemory() < 1000000)) flushFromMem();
+        
 	//if (flushc > 0) serverLog.logDebug("PLASMA INDEXING", "addEntryToIndexMem - flushed " + flushc + " entries");

 	// put new words into cache
--- a/source/de/anomic/plasma/plasmaWordIndexEntity.java
+++ b/source/de/anomic/plasma/plasmaWordIndexEntity.java
@ -79,6 +79,7 @@ public class plasmaWordIndexEntity {
    }
    
    private kelondroTree indexFile(File databaseRoot, String wordHash) throws IOException {
+        if (wordHash.length() < 12) throw new IOException("word hash wrong: '" + wordHash + "'");
 	theLocation = wordHash2path(databaseRoot, wordHash);
 	File fp = theLocation.getParentFile();
 	if (fp != null) fp.mkdirs();
@ -97,7 +98,8 @@ public class plasmaWordIndexEntity {

    public static File wordHash2path(File databaseRoot, String hash) {
 	// creates a path that constructs hashing on a file system
-	return new File (databaseRoot, "WORDS/" +
+
+        return new File (databaseRoot, "WORDS/" +
 			 hash.substring(0,1) + "/" + hash.substring(1,2) + "/" + hash.substring(2,4) + "/" +
 			 hash.substring(4,6) + "/" + hash + ".db");
    }
--- a/source/de/anomic/server/serverByteBuffer.java
+++ b/source/de/anomic/server/serverByteBuffer.java
@ -236,23 +236,26 @@ public final class serverByteBuffer extends OutputStream {
 	length = length - start;
 	return this;
    }
-
+        
    private serverByteBuffer trim(int start, int end) {
-	if (end > length) throw new IndexOutOfBoundsException("trim: end > length");
-	trim(start);
+        // the end value is outside (+1) of the wanted target array
+	if (start > length) throw new IndexOutOfBoundsException("trim: start > length");
+        if (end > length) throw new IndexOutOfBoundsException("trim: end > length");
+        if (start > end) throw new IndexOutOfBoundsException("trim: start > end");
+        offset = offset + start;
 	length = end - start;
 	return this;
    }

    public serverByteBuffer trim() {
-	int l = 0; while ((l < length) && (buffer[l] <= 32)) l++;
-	int r = length; while ((r > 0) && (buffer[r - 1] <= 32)) r--;
-	if ((l <= r) && (l < length)) return trim(l, r);
-	return this;
+        int l = 0; while ((l < length) && (buffer[offset + l] <= 32)) l++;
+	int r = length; while ((r > 0) && (buffer[offset + r - 1] <= 32)) r--;
+        if (l > r) r = l;
+	return trim(l, r);
    }

    public String toString() {
-	return new String(getBytes(), offset, length);
+        return new String(buffer, offset, length);
    }

    public Properties propParser() {
--- a/source/de/anomic/server/serverCore.java
+++ b/source/de/anomic/server/serverCore.java
@ -450,8 +450,8 @@ public final class serverCore extends serverAbstractThread implements serverThre
         */        
        public SessionPool(SessionFactory objFactory) {
            super(objFactory);
-            this.setMaxIdle(75); // Maximum idle threads.
-            this.setMaxActive(150); // Maximum active threads.
+            this.setMaxIdle(50); // Maximum idle threads.
+            this.setMaxActive(100); // Maximum active threads.
            this.setMinEvictableIdleTimeMillis(30000); //Evictor runs every 30 secs.
            //this.setMaxWait(1000); // Wait 1 second till a thread is available
        }
--- a/yacy.init
+++ b/yacy.init
@ -100,6 +100,7 @@ parseableMimeTypes=
 # this is important to recognize <a href> - tags as not-html reference
 # These files will be excluded from indexing _(Please keep extensions in alphabetical order)_
 mediaExt=ace,arj,asf,avi,bin,bz2,css,deb,doc,dmg,gif,gz,hqx,img,iso,jar,jpe,jpg,jpeg,mpeg,mov,mp3,mpg,ogg,png,pdf,ppt,ps,ram,rar,rm,rpm,sit,swf,sxc,sxd,sxi,sxw,tar,tgz,torrent,wmv,xcf,xls,zip
+parseableExt=html,htm,txt

 # Promotion Strings
 # These strings appear in the Web Mask of the YACY search client