From 19dbed7cc85bfbbeca4b288bc07f87f86cc5c054 Mon Sep 17 00:00:00 2001
From: orbiter <orbiter@6c8d7289-2bf4-0310-a012-ef5d649a1542>
Date: Tue, 12 Jul 2005 15:09:35 +0000
Subject: [PATCH] code clean-up

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@401 6c8d7289-2bf4-0310-a012-ef5d649a1542
---
 htroot/IndexControl_p.java                    |   1 +
 htroot/IndexCreateIndexingQueue_p.java        |   4 +-
 htroot/IndexCreateWWWGlobalQueue_p.java       |   4 +-
 htroot/IndexCreateWWWLocalQueue_p.java        |   4 +-
 htroot/yacy/crawlReceipt.java                 |   2 +-
 .../de/anomic/kelondro/kelondroMSetTools.java |  47 ++
 source/de/anomic/plasma/plasmaCrawlEURL.java  |  20 +-
 source/de/anomic/plasma/plasmaCrawlNURL.java  |  28 +-
 .../de/anomic/plasma/plasmaSnippetCache.java  |  37 ++
 .../de/anomic/plasma/plasmaSwitchboard.java   | 413 ++----------------
 source/de/anomic/plasma/plasmaURLPattern.java |   4 +-
 source/de/anomic/plasma/plasmaURLPool.java    |  17 +-
 source/de/anomic/plasma/plasmaWordIndex.java  |   4 +
 .../plasma/plasmaWordIndexDistribution.java   | 254 +++++++++++
 source/de/anomic/server/serverFileUtils.java  |  22 +-
 source/de/anomic/yacy/yacyClient.java         |   6 +-
 16 files changed, 439 insertions(+), 428 deletions(-)
 create mode 100644 source/de/anomic/plasma/plasmaWordIndexDistribution.java

diff --git a/htroot/IndexControl_p.java b/htroot/IndexControl_p.java
index 567c6a627..9ed16be34 100644
--- a/htroot/IndexControl_p.java
+++ b/htroot/IndexControl_p.java
@@ -108,6 +108,7 @@ public class IndexControl_p {
         if (post.containsKey("setIndexDistribute")) {
             boolean allowDistributeIndex = ((String) post.get("indexDistribute", "")).equals("on");
             switchboard.setConfig("allowDistributeIndex", (allowDistributeIndex) ? "true" : "false");
+            if (allowDistributeIndex) switchboard.indexDistribution.enable(); else switchboard.indexDistribution.disable(); 
         }
         
         if (post.containsKey("setIndexReceive")) {
diff --git a/htroot/IndexCreateIndexingQueue_p.java b/htroot/IndexCreateIndexingQueue_p.java
index accb2fda9..2c8dfc8e4 100644
--- a/htroot/IndexCreateIndexingQueue_p.java
+++ b/htroot/IndexCreateIndexingQueue_p.java
@@ -123,11 +123,11 @@ public class IndexCreateIndexingQueue_p {
             }
             dark = true;
             String url, initiatorHash, executorHash;
-            plasmaCrawlEURL.entry entry;
+            plasmaCrawlEURL.Entry entry;
             yacySeed initiatorSeed, executorSeed;
             int j=0;
             for (i = switchboard.urlPool.errorURL.stackSize() - 1; i >= (switchboard.urlPool.errorURL.stackSize() - showRejectedCount); i--) {
-                entry = (plasmaCrawlEURL.entry) switchboard.urlPool.errorURL.getStack(i);
+                entry = (plasmaCrawlEURL.Entry) switchboard.urlPool.errorURL.getStack(i);
                 initiatorHash = entry.initiator();
                 executorHash = entry.executor();
                 url = entry.url().toString();
diff --git a/htroot/IndexCreateWWWGlobalQueue_p.java b/htroot/IndexCreateWWWGlobalQueue_p.java
index d7dbf45f0..e174e3dd2 100644
--- a/htroot/IndexCreateWWWGlobalQueue_p.java
+++ b/htroot/IndexCreateWWWGlobalQueue_p.java
@@ -85,10 +85,10 @@ public class IndexCreateWWWGlobalQueue_p {
             prop.put("crawler-queue", 0);
         } else {
             prop.put("crawler-queue", 1);
-            plasmaCrawlNURL.entry[] crawlerList = switchboard.urlPool.noticeURL.top(plasmaCrawlNURL.STACK_TYPE_LIMIT, 100);
+            plasmaCrawlNURL.Entry[] crawlerList = switchboard.urlPool.noticeURL.top(plasmaCrawlNURL.STACK_TYPE_LIMIT, 100);
             prop.put("crawler-queue_num", stackSize);//num Entries
             prop.put("crawler-queue_show-num", crawlerList.length); //showin sjow-num most recent
-            plasmaCrawlNURL.entry urle;
+            plasmaCrawlNURL.Entry urle;
             boolean dark = true;
             yacySeed initiator;
             int i;
diff --git a/htroot/IndexCreateWWWLocalQueue_p.java b/htroot/IndexCreateWWWLocalQueue_p.java
index 9fb72806e..7605bb8b3 100644
--- a/htroot/IndexCreateWWWLocalQueue_p.java
+++ b/htroot/IndexCreateWWWLocalQueue_p.java
@@ -85,10 +85,10 @@ public class IndexCreateWWWLocalQueue_p {
             prop.put("crawler-queue", 0);
         } else {
             prop.put("crawler-queue", 1);
-            plasmaCrawlNURL.entry[] crawlerList = switchboard.urlPool.noticeURL.top(plasmaCrawlNURL.STACK_TYPE_CORE, 100);
+            plasmaCrawlNURL.Entry[] crawlerList = switchboard.urlPool.noticeURL.top(plasmaCrawlNURL.STACK_TYPE_CORE, 100);
             prop.put("crawler-queue_num", stackSize);//num Entries
             prop.put("crawler-queue_show-num", crawlerList.length); //showin sjow-num most recent
-            plasmaCrawlNURL.entry urle;
+            plasmaCrawlNURL.Entry urle;
             boolean dark = true;
             yacySeed initiator;
             int i;
diff --git a/htroot/yacy/crawlReceipt.java b/htroot/yacy/crawlReceipt.java
index 142c04f24..7b381f6af 100644
--- a/htroot/yacy/crawlReceipt.java
+++ b/htroot/yacy/crawlReceipt.java
@@ -125,7 +125,7 @@ public class crawlReceipt {
             // ready for more
             prop.put("delay", "10");
         } else {
-            plasmaCrawlNURL.entry en = switchboard.urlPool.noticeURL.getEntry(urlhash);
+            plasmaCrawlNURL.Entry en = switchboard.urlPool.noticeURL.getEntry(urlhash);
             if (en != null) {
                 switchboard.urlPool.errorURL.newEntry(en.url(), en.referrerHash(), en.initiator(), iam, en.name(), result + ":" + reason, new bitfield(plasmaURL.urlFlagLength), false);
                 switchboard.urlPool.noticeURL.remove(urlhash);
diff --git a/source/de/anomic/kelondro/kelondroMSetTools.java b/source/de/anomic/kelondro/kelondroMSetTools.java
index ff1bd17b6..21c0b7594 100644
--- a/source/de/anomic/kelondro/kelondroMSetTools.java
+++ b/source/de/anomic/kelondro/kelondroMSetTools.java
@@ -40,6 +40,11 @@
 
 package de.anomic.kelondro;
 
+import java.io.File;
+import java.io.BufferedReader;
+import java.io.InputStreamReader;
+import java.io.IOException;
+import java.io.FileInputStream;
 import java.util.Comparator;
 import java.util.Iterator;
 import java.util.TreeMap;
@@ -351,6 +356,48 @@ public class kelondroMSetTools {
     
     // ------------------------------------------------------------------------------------------------
 
+    public static TreeMap loadMap(String mapname, String filename, String sep) {
+        TreeMap map = new TreeMap();
+        BufferedReader br = null;
+        try {
+            br = new BufferedReader(new InputStreamReader(new FileInputStream(filename)));
+            String line;
+            int pos;
+            while ((line = br.readLine()) != null) {
+                line = line.trim();
+                if ((line.length() > 0) && (!(line.startsWith("#"))) && ((pos = line.indexOf(sep)) > 0))
+                    map.put(line.substring(0, pos).trim().toLowerCase(), line.substring(pos + sep.length()).trim());
+            }
+        } catch (IOException e) {            
+        } finally {
+            if (br != null) try { br.close(); } catch (Exception e) {}
+        }
+        return map;
+    }
+    
+    public static TreeSet loadList(File file) {
+        TreeSet list = new TreeSet(kelondroMSetTools.fastStringComparator);
+        if (!(file.exists())) return list;
+        
+        BufferedReader br = null;
+        try {
+            br = new BufferedReader(new InputStreamReader(new FileInputStream(file)));
+            String line;
+            while ((line = br.readLine()) != null) {
+                line = line.trim();
+                if ((line.length() > 0) && (!(line.startsWith("#")))) list.add(line.trim().toLowerCase());
+            }
+            br.close();
+        } catch (IOException e) {            
+        } finally {
+            if (br != null) try{br.close();}catch(Exception e){}
+        }
+        return list;
+    }
+    
+    // ------------------------------------------------------------------------------------------------
+
+    
     public static void main(String[] args) {
 	TreeMap m = new TreeMap();
 	TreeSet s = new TreeSet();
diff --git a/source/de/anomic/plasma/plasmaCrawlEURL.java b/source/de/anomic/plasma/plasmaCrawlEURL.java
index ff41132bd..0ebed064b 100644
--- a/source/de/anomic/plasma/plasmaCrawlEURL.java
+++ b/source/de/anomic/plasma/plasmaCrawlEURL.java
@@ -84,7 +84,7 @@ public class plasmaCrawlEURL extends plasmaURL {
 	}
     }
 
-    public synchronized entry newEntry(URL url, String referrer, String initiator, String executor,
+    public synchronized Entry newEntry(URL url, String referrer, String initiator, String executor,
 				       String name, String failreason, bitfield flags, boolean retry) {
         if ((referrer == null) || (referrer.length() < urlHashLength)) referrer = dummyHash;
         if ((initiator == null) || (initiator.length() < urlHashLength)) initiator = dummyHash;
@@ -101,15 +101,15 @@ public class plasmaCrawlEURL extends plasmaURL {
         map.put("failreason", failreason);
         map.put("flags", flags);
         rejectedStack.add(map);
-        entry e =  new entry(url, referrer, initiator, executor, name, failreason, flags);
+        Entry e = new Entry(url, referrer, initiator, executor, name, failreason, flags);
         
         // put in table
         if (retry) e.store();
         return e;
     }
 
-    public synchronized entry getEntry(String hash) {
-	return new entry(hash);
+    public synchronized Entry getEntry(String hash) {
+	return new Entry(hash);
     }
 
     public void clearStack() {
@@ -120,13 +120,13 @@ public class plasmaCrawlEURL extends plasmaURL {
         return rejectedStack.size();
     }
     
-    public entry getStack(int pos) {
+    public Entry getStack(int pos) {
         HashMap m = (HashMap) rejectedStack.get(pos);
-        return new entry((URL) m.get("url"), (String) m.get("referrer"), (String) m.get("initiator"), (String) m.get("executor"),
+        return new Entry((URL) m.get("url"), (String) m.get("referrer"), (String) m.get("initiator"), (String) m.get("executor"),
 			 (String) m.get("name"), (String) m.get("failreason"), (bitfield) m.get("flags"));
     }
     
-    public class entry {
+    public class Entry {
 
 	private String   hash;       // the url's hash
         private String   referrer;   // the url's referrer hash
@@ -140,7 +140,7 @@ public class plasmaCrawlEURL extends plasmaURL {
         private String   failreason; // string describing reason for load fail
         private bitfield flags;      // extra space
 
-	public entry(URL url, String referrer, String initiator, String executor, String name, String failreason, bitfield flags) {
+	public Entry(URL url, String referrer, String initiator, String executor, String name, String failreason, bitfield flags) {
 	    // create new entry and store it into database
 	    this.hash       = urlHash(url);
 	    this.referrer   = (referrer == null) ? dummyHash : referrer;
@@ -156,7 +156,7 @@ public class plasmaCrawlEURL extends plasmaURL {
 	    
 	}
 
-	public entry(String hash) {
+	public Entry(String hash) {
 	    // generates an plasmaEURLEntry using the url hash
 	    // to speed up the access, the url-hashes are buffered
 	    // in the hash cache.
@@ -265,7 +265,7 @@ public class plasmaCrawlEURL extends plasmaURL {
             return i.hasNext();
         }
 	public Object nextElement() {
-            return new entry(new String(((byte[][]) i.next())[0]));
+            return new Entry(new String(((byte[][]) i.next())[0]));
         }
     }
     
diff --git a/source/de/anomic/plasma/plasmaCrawlNURL.java b/source/de/anomic/plasma/plasmaCrawlNURL.java
index 740e2b6c0..3ee22fbc2 100644
--- a/source/de/anomic/plasma/plasmaCrawlNURL.java
+++ b/source/de/anomic/plasma/plasmaCrawlNURL.java
@@ -187,9 +187,9 @@ public class plasmaCrawlNURL extends plasmaURL {
         return stackIndex.contains(urlhash);
     }
     
-    public synchronized entry newEntry(String initiator, URL url, Date loaddate, String referrer, String name,
+    public synchronized Entry newEntry(String initiator, URL url, Date loaddate, String referrer, String name,
                 String profile, int depth, int anchors, int forkfactor, int stackMode) {
-	entry e = new entry(initiator, url, referrer, name, loaddate, profile,
+	Entry e = new Entry(initiator, url, referrer, name, loaddate, profile,
                      depth, anchors, forkfactor);
         try {
             switch (stackMode) {
@@ -208,7 +208,7 @@ public class plasmaCrawlNURL extends plasmaURL {
         return e;
     }
 
-    public entry[] top(int stackType, int count) {
+    public Entry[] top(int stackType, int count) {
         switch (stackType) {
             case STACK_TYPE_CORE:     return top(coreStack, count);
             case STACK_TYPE_LIMIT:    return top(limitStack, count);
@@ -221,7 +221,7 @@ public class plasmaCrawlNURL extends plasmaURL {
         }
     }
     
-    public entry pop(int stackType) {
+    public Entry pop(int stackType) {
         switch (stackType) {
             case STACK_TYPE_CORE:     return pop(coreStack);
             case STACK_TYPE_LIMIT:    return pop(limitStack);
@@ -234,11 +234,11 @@ public class plasmaCrawlNURL extends plasmaURL {
         }
     }
     
-    private entry pop(kelondroStack stack) {
+    private Entry pop(kelondroStack stack) {
 	// this is a filo - pop
 	try {
 	    if (stack.size() > 0) {
-                entry e = new entry(new String(stack.pop()[0]));
+                Entry e = new Entry(new String(stack.pop()[0]));
                 stackIndex.remove(e.hash);
                 return e;
 	    } else {
@@ -249,13 +249,13 @@ public class plasmaCrawlNURL extends plasmaURL {
 	}
     }
 
-    private entry[] top(kelondroStack stack, int count) {
+    private Entry[] top(kelondroStack stack, int count) {
 	// this is a filo - top
         if (count > stack.size()) count = stack.size();
-        entry[] list = new entry[count];
+        Entry[] list = new Entry[count];
 	try {
             for (int i = 0; i < count; i++) {
-		list[i] = new entry(new String(stack.top(i)[0]));
+		list[i] = new Entry(new String(stack.top(i)[0]));
 	    }
             return list;
         } catch (IOException e) {
@@ -263,8 +263,8 @@ public class plasmaCrawlNURL extends plasmaURL {
 	}
     }
 
-    public synchronized entry getEntry(String hash) {
-	return new entry(hash);
+    public synchronized Entry getEntry(String hash) {
+	return new Entry(hash);
     }
 
     public synchronized void remove(String hash) {
@@ -273,7 +273,7 @@ public class plasmaCrawlNURL extends plasmaURL {
         } catch (IOException e) {}
     }
 
-    public class entry {
+    public class Entry {
 
         private String   initiator;     // the initiator hash, is NULL or "" if it is the own proxy;
                                         // if this is generated by a crawl, the own peer hash in entered
@@ -289,7 +289,7 @@ public class plasmaCrawlNURL extends plasmaURL {
         private bitfield flags;
         private int      handle;
         
-	public entry(String initiator, URL url, String referrer, String name, Date loaddate, String profileHandle,
+	public Entry(String initiator, URL url, String referrer, String name, Date loaddate, String profileHandle,
                      int depth, int anchors, int forkfactor) {
 	    // create new entry and store it into database
 	    this.hash          = urlHash(url);
@@ -307,7 +307,7 @@ public class plasmaCrawlNURL extends plasmaURL {
 	    store();
 	}
 
-	public entry(String hash) {
+	public Entry(String hash) {
 	    // generates an plasmaNURLEntry using the url hash
 	    // to speed up the access, the url-hashes are buffered
 	    // in the hash cache.
diff --git a/source/de/anomic/plasma/plasmaSnippetCache.java b/source/de/anomic/plasma/plasmaSnippetCache.java
index 9d35255e6..54d60a7f2 100644
--- a/source/de/anomic/plasma/plasmaSnippetCache.java
+++ b/source/de/anomic/plasma/plasmaSnippetCache.java
@@ -54,6 +54,7 @@ import de.anomic.http.httpHeader;
 import de.anomic.kelondro.kelondroMScoreCluster;
 import de.anomic.server.logging.serverLog;
 import de.anomic.yacy.yacySearch;
+import de.anomic.htmlFilter.htmlFilterContentScraper;
 
 public class plasmaSnippetCache {
 
@@ -368,4 +369,40 @@ public class plasmaSnippetCache {
             log);
     }
     
+    public void fetch(plasmaSearch.result acc, Set queryhashes, String urlmask, int fetchcount) {
+        // fetch snippets
+        int i = 0;
+        plasmaCrawlLURL.Entry urlentry;
+        String urlstring;
+        plasmaSnippetCache.result snippet;
+        while ((acc.hasMoreElements()) && (i < fetchcount)) {
+            urlentry = acc.nextElement();
+            if (urlentry.url().getHost().endsWith(".yacyh")) continue;
+            urlstring = htmlFilterContentScraper.urlNormalform(urlentry.url());
+            if ((urlstring.matches(urlmask)) &&
+                (!(existsInCache(urlentry.url(), queryhashes)))) {
+                new Fetcher(urlentry.url(), queryhashes).start();
+                i++;
+            }
+        }
+    }
+        
+    public class Fetcher extends Thread {
+        URL url;
+        Set queryhashes;
+        public Fetcher(URL url, Set queryhashes) {
+            if (url.getHost().endsWith(".yacyh")) return;
+            this.url = url;
+            this.queryhashes = queryhashes;
+        }
+        public void run() {
+            log.logDebug("snippetFetcher: try to get URL " + url);
+            plasmaSnippetCache.result snippet = retrieve(url, queryhashes, true, 260);
+            if (snippet.line == null)
+                log.logDebug("snippetFetcher: cannot get URL " + url + ". error(" + snippet.source + "): " + snippet.error);
+            else
+                log.logDebug("snippetFetcher: got URL " + url + ", the snippet is '" + snippet.line + "', source=" + snippet.source);
+        }
+    }
+    
 }
diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java
index 8989a0b35..d5c3a6413 100644
--- a/source/de/anomic/plasma/plasmaSwitchboard.java
+++ b/source/de/anomic/plasma/plasmaSwitchboard.java
@@ -157,28 +157,28 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
     public static plasmaURLPattern urlBlacklist;
     
     // storage management
-    private File                   cachePath;
-    private File                   plasmaPath;
-    public  File                   listsPath;
-    public  plasmaURLPool          urlPool;
-    public  plasmaWordIndex        wordIndex;
-    public  plasmaSearch           searchManager;
-    public  plasmaHTCache          cacheManager;
-    public  plasmaSnippetCache     snippetCache;
-    public  plasmaCrawlLoader      cacheLoader;
-    public  plasmaSwitchboardQueue sbQueue;
-    public  messageBoard           messageDB;
-    public  wikiBoard              wikiDB;
-    public  String                 remoteProxyHost;
-    public  int                    remoteProxyPort;
-    public  boolean                remoteProxyUse;
-    public  plasmaCrawlProfile     profiles;
-    public  plasmaCrawlProfile.entry defaultProxyProfile;
-    public  plasmaCrawlProfile.entry defaultRemoteProfile;
-    public  distributeIndex        indexDistribution;
-    public  HashMap                outgoingCookies, incomingCookies;
-    public  kelondroTables         facilityDB;
-    public  plasmaParser           parser;
+    private File                        cachePath;
+    private File                        plasmaPath;
+    public  File                        listsPath;
+    public  plasmaURLPool               urlPool;
+    public  plasmaWordIndex             wordIndex;
+    public  plasmaSearch                searchManager;
+    public  plasmaHTCache               cacheManager;
+    public  plasmaSnippetCache          snippetCache;
+    public  plasmaCrawlLoader           cacheLoader;
+    public  plasmaSwitchboardQueue      sbQueue;
+    public  messageBoard                messageDB;
+    public  wikiBoard                   wikiDB;
+    public  String                      remoteProxyHost;
+    public  int                         remoteProxyPort;
+    public  boolean                     remoteProxyUse;
+    public  plasmaCrawlProfile          profiles;
+    public  plasmaCrawlProfile.entry    defaultProxyProfile;
+    public  plasmaCrawlProfile.entry    defaultRemoteProfile;
+    public  plasmaWordIndexDistribution indexDistribution;
+    public  HashMap                     outgoingCookies, incomingCookies;
+    public  kelondroTables              facilityDB;
+    public  plasmaParser                parser;
     public  plasmaWordIndexClassicCacheMigration classicCache;
     
     private serverSemaphore shutdownSync = new serverSemaphore(0);
@@ -217,7 +217,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
 	if (blueList == null) {
 	    // read only once upon first instantiation of this class
 	    String f = getConfig("plasmaBlueList", null);
-	    if (f != null) blueList = loadList(new File(f)); else blueList= new TreeSet();
+	    if (f != null) blueList = kelondroMSetTools.loadList(new File(f)); else blueList= new TreeSet();
 	}
         
         // load the black-list / inspired by [AS]
@@ -231,7 +231,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
             
         // load stopwords
         if (stopwords == null) {
-            stopwords = loadList(new File(rootPath, "yacy.stopwords"));
+            stopwords = kelondroMSetTools.loadList(new File(rootPath, "yacy.stopwords"));
         }
         
 	// read memory amount
@@ -376,7 +376,9 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
         deployThread("30_peerping", "YaCy Core", "this is the p2p-control and peer-ping task",
                      peerPing = new serverInstantThread(yc, "peerPing", null), 2000);
         peerPing.setSyncObject(new Object());
-        indexDistribution = new distributeIndex(100 /*indexCount*/, 8000, 1 /*peerCount*/);
+        indexDistribution = new plasmaWordIndexDistribution(urlPool, wordIndex, log,
+                                                            getConfig("allowDistributeIndex", "false").equals("true"));
+        indexDistribution.setCounts(100 /*indexCount*/, 1 /*peerCount*/, 8000);
         deployThread("20_dhtdistribution", "DHT Distribution (currently by juniors only)", "selection, transfer and deletion of index entries that are not searched on your peer, but on others",
                      new serverInstantThread(indexDistribution, "job", null), 120000);
             
@@ -401,8 +403,6 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
     
     }
     
-
-        
     private static String ppRamString(int bytes) {
         if (bytes < 1024) return bytes + " KByte";
         bytes = bytes / 1024;
@@ -457,8 +457,6 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
     public plasmaHTCache getCacheManager() {
 	return cacheManager;
     }
-
-    
     
     synchronized public void htEntryStoreEnqueued(plasmaHTCache.Entry entry) throws IOException {
 	if (cacheManager.full())
@@ -497,33 +495,9 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
                 entry.name()
         ));
         
-        // write log
-
-        /*
-        switch (entry.status) {
-            case plasmaHTCache.CACHE_UNFILLED:
-                log.logInfo("CACHE UNFILLED: " + entry.cacheFile); break;
-            case plasmaHTCache.CACHE_FILL:
-                log.logInfo("CACHE FILL: " + entry.cacheFile + ((entry.cacheArray == null) ? "" : " (cacheArray is filled)"));
-                break;
-            case plasmaHTCache.CACHE_HIT:
-                log.logInfo("CACHE HIT: " + entry.cacheFile); break;
-            case plasmaHTCache.CACHE_STALE_NO_RELOAD:
-                log.logInfo("CACHE STALE, NO RELOAD: " + entry.cacheFile); break;
-            case plasmaHTCache.CACHE_STALE_RELOAD_GOOD:
-                log.logInfo("CACHE STALE, NECESSARY RELOAD: " + entry.cacheFile); break;
-            case plasmaHTCache.CACHE_STALE_RELOAD_BAD:
-                log.logInfo("CACHE STALE, SUPERFLUOUS RELOAD: " + entry.cacheFile); break;
-            case plasmaHTCache.CACHE_PASSING:
-                log.logInfo("PASSING: " + entry.cacheFile); break;
-            default:
-                log.logInfo("CACHE STATE UNKNOWN: " + entry.cacheFile); break;
-        }
-         */
         return true;
     }
     
-
     public boolean htEntryStoreJob() {
         if (cacheManager.empty()) return false;
         try {
@@ -536,26 +510,6 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
     public int htEntrySize() {
         return cacheManager.size();
     }
-    
-    private static TreeSet loadList(File file) {
-        TreeSet list = new TreeSet(kelondroMSetTools.fastStringComparator);
-        if (!(file.exists())) return list;
-        
-        BufferedReader br = null;
-        try {
-            br = new BufferedReader(new InputStreamReader(new FileInputStream(file)));
-            String line;
-            while ((line = br.readLine()) != null) {
-                line = line.trim();
-                if ((line.length() > 0) && (!(line.startsWith("#")))) list.add(line.trim().toLowerCase());
-            }
-            br.close();
-        } catch (IOException e) {            
-        } finally {
-            if (br != null) try{br.close();}catch(Exception e){}
-        }
-        return list;
-    }
 
     public void close() {
         log.logSystem("SWITCHBOARD SHUTDOWN STEP 1: sending termination signal to managed threads:");
@@ -726,7 +680,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
         }           
         
         // do a local crawl
-        plasmaCrawlNURL.entry urlEntry = urlPool.noticeURL.pop(plasmaCrawlNURL.STACK_TYPE_CORE);
+        plasmaCrawlNURL.Entry urlEntry = urlPool.noticeURL.pop(plasmaCrawlNURL.STACK_TYPE_CORE);
         String stats = "LOCALCRAWL[" + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_CORE) + ", " + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_LIMIT) + ", " + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_OVERHANG) + ", " + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_REMOTE) + "]";
         if (urlEntry.url() == null) {
             log.logError(stats + ": urlEntry.url() == null");
@@ -770,7 +724,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
         }           
         
         // start a global crawl, if possible
-        plasmaCrawlNURL.entry urlEntry = urlPool.noticeURL.pop(plasmaCrawlNURL.STACK_TYPE_LIMIT);
+        plasmaCrawlNURL.Entry urlEntry = urlPool.noticeURL.pop(plasmaCrawlNURL.STACK_TYPE_LIMIT);
         String stats = "REMOTECRAWLTRIGGER[" + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_CORE) + ", " + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_LIMIT) + ", " + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_OVERHANG) + ", " + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_REMOTE) + "]";
         if (urlEntry.url() == null) {
             log.logError(stats + ": urlEntry.url() == null");
@@ -855,7 +809,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
         }           
         
         // we don't want to crawl a global URL globally, since WE are the global part. (from this point of view)
-        plasmaCrawlNURL.entry urlEntry = urlPool.noticeURL.pop(plasmaCrawlNURL.STACK_TYPE_REMOTE);
+        plasmaCrawlNURL.Entry urlEntry = urlPool.noticeURL.pop(plasmaCrawlNURL.STACK_TYPE_REMOTE);
         String stats = "REMOTETRIGGEREDCRAWL[" + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_CORE) + ", " + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_LIMIT) + ", " + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_OVERHANG) + ", " + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_REMOTE) + "]";
         if (urlEntry.url() == null) {
             log.logError(stats + ": urlEntry.url() == null");
@@ -878,7 +832,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
     }
 
     private void processResourceStack(plasmaSwitchboardQueue.Entry entry) {
-        // work off one stack entry with a fresh resource (scraped web page)
+        // work off one stack entry with a fresh resource
         try {    
             // we must distinguish the following cases: resource-load was initiated by
             // 1) global crawling: the index is extern, not here (not possible here)
@@ -1104,7 +1058,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
 
         String nexturlhash = plasmaURL.urlHash(nexturl);
         String dbocc = "";
-        if ((dbocc = urlPool.testHash(nexturlhash)) != null) {
+        if ((dbocc = urlPool.exists(nexturlhash)) != null) {
             // DISTIGUISH OLD/RE-SEARCH CASES HERE!
             reason = "double_(registered_in_" + dbocc + ")";
             urlPool.errorURL.newEntry(nexturl, referrerHash, initiatorHash, yacyCore.seedDB.mySeed.hash,
@@ -1137,23 +1091,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
         return null;
     }
     
-    private URL hash2url(String urlhash) {
-        if (urlhash.equals(plasmaURL.dummyHash)) return null;
-        plasmaCrawlNURL.entry ne = urlPool.noticeURL.getEntry(urlhash);
-        if (ne != null) return ne.url();
-        plasmaCrawlLURL.Entry le = urlPool.loadedURL.getEntry(urlhash);
-        if (le != null) return le.url();
-        plasmaCrawlEURL.entry ee = urlPool.errorURL.getEntry(urlhash);
-        if (ee != null) return ee.url();
-        return null;
-    }
-    
-    private String hash2urlstring(String urlhash) {
-        URL u = hash2url(urlhash);
-        if (u == null) return plasmaURL.dummyHash; else return u.toString();
-    }
-    
-    private void processLocalCrawling(plasmaCrawlNURL.entry urlEntry, plasmaCrawlProfile.entry profile, String stats) {
+    private void processLocalCrawling(plasmaCrawlNURL.Entry urlEntry, plasmaCrawlProfile.entry profile, String stats) {
         // work off one Crawl stack entry
         if ((urlEntry == null) && (urlEntry.url() == null)) {
             log.logInfo(stats + ": urlEntry=null");
@@ -1164,7 +1102,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
         return;
     }
     
-    private boolean processRemoteCrawlTrigger(plasmaCrawlNURL.entry urlEntry) {
+    private boolean processRemoteCrawlTrigger(plasmaCrawlNURL.Entry urlEntry) {
         // return true iff another peer has/will index(ed) the url
         if (urlEntry == null) {
             log.logInfo("REMOTECRAWLTRIGGER[" + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_CORE) + ", " + urlPool.noticeURL.stackSize(plasmaCrawlNURL.STACK_TYPE_REMOTE) + "]: urlEntry=null");
@@ -1183,7 +1121,6 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
             log.logDebug("ERROR: plasmaSwitchboard.processRemoteCrawlTrigger - url is null. name=" + urlEntry.name());
             return true;
         }
-        String nexturlString = urlEntry.url().toString();
         String urlhash = plasmaURL.urlHash(urlEntry.url());
         
         // check remote crawl
@@ -1195,7 +1132,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
         }
         
         // do the request
-        HashMap page = yacyClient.crawlOrder(remoteSeed, nexturlString, hash2urlstring(urlEntry.referrerHash()), 0);
+        HashMap page = yacyClient.crawlOrder(remoteSeed, urlEntry.url(), urlPool.getURL(urlEntry.referrerHash()), 0);
 
         
         // check success
@@ -1216,17 +1153,17 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
                          the resource is also returned in lurl
         */
         if ((page == null) || (page.get("delay") == null)) {
-            log.logInfo("CRAWL: REMOTE CRAWL TO PEER " + remoteSeed.getName() + " FAILED. CAUSE: unknown (URL=" + nexturlString + ")");
+            log.logInfo("CRAWL: REMOTE CRAWL TO PEER " + remoteSeed.getName() + " FAILED. CAUSE: unknown (URL=" + urlEntry.url().toString() + ")");
             if (remoteSeed != null) yacyCore.peerActions.peerDeparture(remoteSeed);
             return false;
         } else try {
-            log.logDebug("plasmaSwitchboard.processRemoteCrawlTrigger: remoteSeed=" + remoteSeed.getName() + ", url=" + nexturlString + ", response=" + page.toString()); // DEBUG
+            log.logDebug("plasmaSwitchboard.processRemoteCrawlTrigger: remoteSeed=" + remoteSeed.getName() + ", url=" + urlEntry.url().toString() + ", response=" + page.toString()); // DEBUG
         
             int newdelay = Integer.parseInt((String) page.get("delay"));
             yacyCore.dhtAgent.setCrawlDelay(remoteSeed.hash, newdelay);
             String response = (String) page.get("response");
             if (response.equals("stacked")) {
-                log.logInfo("REMOTECRAWLTRIGGER: REMOTE CRAWL TO PEER " + remoteSeed.getName() + " PLACED URL=" + nexturlString + "; NEW DELAY=" + newdelay);
+                log.logInfo("REMOTECRAWLTRIGGER: REMOTE CRAWL TO PEER " + remoteSeed.getName() + " PLACED URL=" + urlEntry.url().toString() + "; NEW DELAY=" + newdelay);
                 return true;
             } else if (response.equals("double")) {
                 String lurl = (String) page.get("lurl");
@@ -1236,14 +1173,14 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
                         urlPool.loadedURL.newEntry(propStr, true),
                         yacyCore.seedDB.mySeed.hash, remoteSeed.hash, 1);
                         urlPool.noticeURL.remove(entry.hash());
-                    log.logInfo("REMOTECRAWLTRIGGER: REMOTE CRAWL TO PEER " + remoteSeed.getName() + " SUPERFLUOUS. CAUSE: " + page.get("reason") + " (URL=" + nexturlString + "). URL IS CONSIDERED AS 'LOADED!'");
+                    log.logInfo("REMOTECRAWLTRIGGER: REMOTE CRAWL TO PEER " + remoteSeed.getName() + " SUPERFLUOUS. CAUSE: " + page.get("reason") + " (URL=" + urlEntry.url().toString() + "). URL IS CONSIDERED AS 'LOADED!'");
                     return true;
                 } else {
-                    log.logInfo("REMOTECRAWLTRIGGER: REMOTE CRAWL TO PEER " + remoteSeed.getName() + " REJECTED. CAUSE: " + page.get("reason") + " (URL=" + nexturlString + ")");
+                    log.logInfo("REMOTECRAWLTRIGGER: REMOTE CRAWL TO PEER " + remoteSeed.getName() + " REJECTED. CAUSE: " + page.get("reason") + " (URL=" + urlEntry.url().toString() + ")");
                     return false;
                 }
             } else {
-                log.logInfo("REMOTECRAWLTRIGGER: REMOTE CRAWL TO PEER " + remoteSeed.getName() + " DENIED. RESPONSE=" + response + ", CAUSE=" + page.get("reason") + ", URL=" + nexturlString);
+                log.logInfo("REMOTECRAWLTRIGGER: REMOTE CRAWL TO PEER " + remoteSeed.getName() + " DENIED. RESPONSE=" + response + ", CAUSE=" + page.get("reason") + ", URL=" + urlEntry.url().toString());
                 return false;
             }
         } catch (Exception e) {
@@ -1253,7 +1190,6 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
             return false;
         }
     }
-
     
     private static SimpleDateFormat DateFormatter = new SimpleDateFormat("EEE, dd MMM yyyy");
     public static String dateString(Date date) {
@@ -1285,7 +1221,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
                 log.logDebug("presearch: ordered results, now " + acc.sizeOrdered() + " URLs ready for fetch");
                 
                 // take some elements and fetch the snippets
-                fetchSnippets(acc, queryhashes, urlmask, fetchcount);
+                snippetCache.fetch(acc, queryhashes, urlmask, fetchcount);
             } catch (IOException e) {
                 e.printStackTrace();
             }
@@ -1293,42 +1229,6 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
         }
     }
     
-    public void fetchSnippets(plasmaSearch.result acc, Set queryhashes, String urlmask, int fetchcount) {
-        // fetch the snippets
-        int i = 0;
-        plasmaCrawlLURL.Entry urlentry;
-        String urlstring;
-        plasmaSnippetCache.result snippet;
-        while ((acc.hasMoreElements()) && (i < fetchcount)) {
-            urlentry = acc.nextElement();
-            if (urlentry.url().getHost().endsWith(".yacyh")) continue;
-            urlstring = htmlFilterContentScraper.urlNormalform(urlentry.url());
-            if ((urlstring.matches(urlmask)) &&
-                (!(snippetCache.existsInCache(urlentry.url(), queryhashes)))) {
-                new snippetFetcher(urlentry.url(), queryhashes).start();
-                i++;
-            }
-        }
-    }
-        
-    public class snippetFetcher extends Thread {
-        URL url;
-        Set queryhashes;
-        public snippetFetcher(URL url, Set queryhashes) {
-            if (url.getHost().endsWith(".yacyh")) return;
-            this.url = url;
-            this.queryhashes = queryhashes;
-        }
-        public void run() {
-            log.logDebug("snippetFetcher: try to get URL " + url);
-            plasmaSnippetCache.result snippet = snippetCache.retrieve(url, queryhashes, true, 260);
-            if (snippet.line == null)
-                log.logDebug("snippetFetcher: cannot get URL " + url + ". error(" + snippet.source + "): " + snippet.error);
-            else
-                log.logDebug("snippetFetcher: got URL " + url + ", the snippet is '" + snippet.line + "', source=" + snippet.source);
-        }
-    }
-    
     public serverObjects searchFromLocal(Set querywords, String order1, String order2, int count, boolean global, long time /*milliseconds*/, String urlmask) {
         
         serverObjects prop = new serverObjects();
@@ -1380,7 +1280,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
             if (remainingTime < 500) remainingTime = 500;
             if (remainingTime > 3000) remainingTime = 3000;
             plasmaSearch.result acc = searchManager.order(idx, queryhashes, stopwords, order, remainingTime, 10);
-            if (!(global)) fetchSnippets(acc.cloneSmart(), queryhashes, urlmask, 10);
+            if (!(global)) snippetCache.fetch(acc.cloneSmart(), queryhashes, urlmask, 10);
             log.logDebug("SEARCH TIME AFTER ORDERING OF SEARCH RESULT: " + ((System.currentTimeMillis() - timestamp) / 1000) + " seconds");
             
             // result is a List of urlEntry elements: prepare answer
@@ -1625,233 +1525,6 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
         }
         return count;
     }
-
-    public class distributeIndex {
-        // distributes parts of the index to other peers
-        // stops as soon as an error occurrs
- 
-        int indexCount;
-        int peerCount;
-        long pause;
-        long maxTime;
-        
-	public distributeIndex(int indexCount, long maxTimePerTransfer, int peerCount) {
-           this.indexCount = indexCount;
-            this.peerCount = peerCount;
-            this.maxTime = maxTimePerTransfer;
-	}
-
-	public boolean job() {
-            if ((yacyCore.seedDB == null) ||
-                (yacyCore.seedDB.mySeed == null) ||
-                (yacyCore.seedDB.mySeed.isVirgin()) ||
-                (urlPool.loadedURL.size() < 10) ||
-                (wordIndex.size() < 100) ||
-                (!(yacyCore.seedDB.mySeed.isJunior()))) return false;
-
-            int transferred;
-            long starttime = System.currentTimeMillis();
-            try {
-                if (
-                (sbQueue.size() == 0) &&
-                (cacheLoader.size() == 0) &&
-                (urlPool.noticeURL.stackSize() == 0) &&
-                (getConfig("allowDistributeIndex", "false").equals("true")) &&
-                ((transferred = performTransferIndex(indexCount, peerCount, true)) > 0)) {
-                    indexCount = transferred;
-                    if ((System.currentTimeMillis() - starttime) > (maxTime * peerCount)) indexCount--; else indexCount++;
-                    if (indexCount < 30) indexCount = 30;
-                    return true;
-                } else {
-                    // make a long pause
-                    return false;
-                }
-            } catch (IllegalArgumentException ee) {
-                // this is a bug that occurres if a not-fixeable data-inconsistency in the table structure was detected
-                // make a long pause
-                log.logError("very bad data inconsistency: " + ee.getMessage());
-                //ee.printStackTrace();
-                return false;
-            }
-	}
-
-        public void setCounts(int indexCount, int peerCount, long pause) {
-            this.indexCount = indexCount;
-            if (indexCount < 30) indexCount = 30;
-            this.peerCount = peerCount;
-            this.pause = pause;
-        }
-        
-    }
-
-    public int performTransferIndex(int indexCount, int peerCount, boolean delete) {
-	if ((yacyCore.seedDB == null) || (yacyCore.seedDB.sizeConnected() == 0)) return -1;
-
-        // collect index
-        //String startPointHash = yacyCore.seedCache.mySeed.hash;
-        String startPointHash = serverCodings.encodeMD5B64("" + System.currentTimeMillis(), true).substring(0, yacySeedDB.commonHashLength);
-        plasmaWordIndexEntity[] indexEntities = selectTransferIndexes(startPointHash, indexCount);
-        if ((indexEntities == null) || (indexEntities.length == 0)) {
-            log.logDebug("No Index available for Index Transfer, hash start-point " + startPointHash);
-            return -1;
-        }
-        // count the indexes again, can be smaller as expected
-        indexCount = 0; for (int i = 0; i < indexEntities.length; i++) indexCount += indexEntities[i].size();
-        
-        // find start point for DHT-selection
-        String keyhash = indexEntities[indexEntities.length - 1].wordHash();
-        
-        // iterate over DHT-peers and send away the indexes
-        yacySeed seed;
-        int hc = 0;
-        Enumeration e = yacyCore.dhtAgent.getAcceptRemoteIndexSeeds(keyhash);
-        String error;
-        String peerNames = "";
-        while ((e.hasMoreElements()) && (hc < peerCount)) {
-            seed = (yacySeed) e.nextElement();
-            if (seed != null) {
-                error = yacyClient.transferIndex(seed, indexEntities, urlPool.loadedURL);
-                if (error == null) {
-                    log.logInfo("Index Transfer of " + indexCount + " words [" + indexEntities[0].wordHash() + " .. " + indexEntities[indexEntities.length-1].wordHash() + "] to peer " + seed.getName() + ":" + seed.hash + " successfull");
-                    peerNames += ", " + seed.getName();
-                    hc++;
-                } else {
-                    log.logWarning("Index Transfer to peer " + seed.getName() + ":" + seed.hash + " failed:'" + error + "', disconnecting peer");
-                    yacyCore.peerActions.peerDeparture(seed);
-                }
-            }
-        }
-        if (peerNames.length() > 0) peerNames = peerNames.substring(2); // remove comma
-        
-        // clean up and finish with deletion of indexes
-        if (hc >= peerCount) {
-            // success
-            if (delete) {
-                try {
-                    if (deleteTransferIndexes(indexEntities)) {
-                        log.logDebug("Deleted all transferred whole-word indexes locally");
-                        return indexCount;
-                    } else {
-                        log.logError("Deleted not all transferred whole-word indexes");
-                        return -1;
-                    }
-                } catch (IOException ee) {
-                    log.logError("Deletion of Indexes not possible:" + ee.getMessage());
-                    ee.printStackTrace();
-                    return -1;
-                }
-            } else {
-		// simply close the indexEntities
-		for (int i = 0; i < indexEntities.length; i++) try {
-		    indexEntities[i].close();
-		} catch (IOException ee) {}
-	    }
-            return indexCount;
-        } else {
-            log.logError("Index distribution failed. Too less peers (" + hc + ") received the index, not deleted locally.");
-            return -1;
-        }
-    }
-
-    private plasmaWordIndexEntity[] selectTransferIndexes(String hash, int count) {
-        Vector tmpEntities = new Vector();
-        String nexthash = "";
-        try {
-            Iterator wordHashIterator = wordIndex.wordHashes(hash, true, true);
-            plasmaWordIndexEntity indexEntity, tmpEntity;
-            Enumeration urlEnum;
-            plasmaWordIndexEntry indexEntry;
-            while ((count > 0) && (wordHashIterator.hasNext()) &&
-                   ((nexthash = (String) wordHashIterator.next()) != null) && (nexthash.trim().length() > 0)) {
-                indexEntity = wordIndex.getEntity(nexthash, true);
-                if (indexEntity.size() == 0) {
-                    indexEntity.deleteComplete();
-                } else if (indexEntity.size() <= count) {
-                    // take the whole entity
-                    tmpEntities.add(indexEntity);
-                    log.logDebug("Selected Whole Index (" + indexEntity.size() + " urls) for word " + indexEntity.wordHash());
-                    count -= indexEntity.size();
-                } else {
-                    // make an on-the-fly entity and insert values
-                    tmpEntity = new plasmaWordIndexEntity(indexEntity.wordHash());
-                    urlEnum = indexEntity.elements(true);
-                    while ((urlEnum.hasMoreElements()) && (count > 0)) {
-                        indexEntry = (plasmaWordIndexEntry) urlEnum.nextElement();
-                        tmpEntity.addEntry(indexEntry);
-                        count--;
-                    }
-                    urlEnum = null;
-                    log.logDebug("Selected Partial Index (" + tmpEntity.size() + " from " + indexEntity.size() +" urls) for word " + tmpEntity.wordHash());
-                    tmpEntities.add(tmpEntity);
-                    indexEntity.close(); // important: is not closed elswhere and cannot be deleted afterwards
-                    indexEntity = null;
-                }
-                
-            }
-            // transfer to array
-            plasmaWordIndexEntity[] indexEntities = new plasmaWordIndexEntity[tmpEntities.size()];
-            for (int i = 0; i < tmpEntities.size(); i++) indexEntities[i] = (plasmaWordIndexEntity) tmpEntities.elementAt(i);
-            return indexEntities;
-        } catch (IOException e) {
-            log.logError("selectTransferIndexes IO-Error (hash=" + nexthash + "): " + e.getMessage());
-            e.printStackTrace();
-            return new plasmaWordIndexEntity[0];
-        } catch (kelondroException e) {
-            log.logError("selectTransferIndexes database corrupted: " + e.getMessage());
-            e.printStackTrace();
-            return new plasmaWordIndexEntity[0];
-        }
-    }
-    
-    private boolean deleteTransferIndexes(plasmaWordIndexEntity[] indexEntities) throws IOException {
-        String wordhash;
-        Enumeration urlEnum;
-        plasmaWordIndexEntry indexEntry;
-        plasmaWordIndexEntity indexEntity;
-        String[] urlHashes;
-        int sz;
-        boolean success = true;
-        for (int i = 0; i < indexEntities.length; i++) {
-            if (indexEntities[i].isTMPEntity()) {
-                // delete entries separately
-                int c = 0;
-                urlHashes = new String[indexEntities[i].size()];
-                urlEnum = indexEntities[i].elements(true);
-                while (urlEnum.hasMoreElements()) {
-                    indexEntry = (plasmaWordIndexEntry) urlEnum.nextElement();
-                    urlHashes[c++] = indexEntry.getUrlHash();
-                }
-                wordIndex.removeEntries(indexEntities[i].wordHash(), urlHashes, true);
-                indexEntity = wordIndex.getEntity(indexEntities[i].wordHash(), true);
-                sz = indexEntity.size();
-                indexEntity.close();
-                log.logDebug("Deleted Partinal Index (" + c + " urls) for word " + indexEntities[i].wordHash() + "; " + sz + " entries left");
-                // DEBUG: now try to delete the remaining index. If this works, this routine is fine
-                /*
-                if (wordIndex.getEntity(indexEntities[i].wordHash()).deleteComplete())
-                    System.out.println("DEBUG: trial delete of partial word index " + indexEntities[i].wordHash() + " SUCCESSFULL");
-                else 
-                    System.out.println("DEBUG: trial delete of partial word index " + indexEntities[i].wordHash() + " FAILED");
-                 */
-                // end debug
-                indexEntities[i].close();
-            } else {
-                // delete complete file
-                if (indexEntities[i].deleteComplete()) {
-                    indexEntities[i].close();
-		} else {
-                    indexEntities[i].close();
-                    // have another try...
-                    if (!(plasmaWordIndexEntity.wordHash2path(plasmaPath, indexEntities[i].wordHash()).delete())) {
-                        success = false;
-                        log.logError("Could not delete whole Index for word " + indexEntities[i].wordHash());
-                    }
-                }
-            }
-	    indexEntities[i] = null;
-        }
-        return success;
-    }
     
     public int adminAuthenticated(httpHeader header) {
         String adminAccountBase64MD5 = getConfig("adminAccountBase64MD5", "");
diff --git a/source/de/anomic/plasma/plasmaURLPattern.java b/source/de/anomic/plasma/plasmaURLPattern.java
index 3fd3b2c9e..16cd37774 100644
--- a/source/de/anomic/plasma/plasmaURLPattern.java
+++ b/source/de/anomic/plasma/plasmaURLPattern.java
@@ -45,7 +45,7 @@ import java.lang.String;
 import java.util.HashMap;
 import java.io.File;
 
-import de.anomic.server.serverFileUtils;
+import de.anomic.kelondro.kelondroMSetTools;
 
 public class plasmaURLPattern {
     
@@ -71,7 +71,7 @@ public class plasmaURLPattern {
         
         if(filenamesarray.length >0)
             for(int i = 0; i < filenamesarray.length; i++)
-                hostpaths.putAll(serverFileUtils.loadMap(mapname, (new File(rootPath, filenamesarray[i])).toString(), sep));
+                hostpaths.putAll(kelondroMSetTools.loadMap(mapname, (new File(rootPath, filenamesarray[i])).toString(), sep));
     }
     
     public void remove(String host) {
diff --git a/source/de/anomic/plasma/plasmaURLPool.java b/source/de/anomic/plasma/plasmaURLPool.java
index 93e3318e0..7726919b2 100644
--- a/source/de/anomic/plasma/plasmaURLPool.java
+++ b/source/de/anomic/plasma/plasmaURLPool.java
@@ -44,8 +44,11 @@
 
 package de.anomic.plasma;
 
+import java.net.URL;
 import java.io.File;
 import java.io.IOException;
+import java.util.Set;
+import java.util.Iterator;
 
 public class plasmaURLPool {
     
@@ -60,12 +63,24 @@ public class plasmaURLPool {
         errorURL = new plasmaCrawlEURL(new File(plasmaPath, "urlErr0.db"), ramEURL);
     }
     
-    public String testHash(String hash) {
+    public String exists(String hash) {
         // tests if hash occurrs in any database
         // if it exists, the name of the database is returned,
         // if it not exists, null is returned
         if (loadedURL.exists(hash)) return "loaded";
         if (noticeURL.existsInStack(hash)) return "crawler";
+        if (errorURL.exists(hash)) return "errors";
+        return null;
+    }
+    
+    public URL getURL(String urlhash) {
+        if (urlhash.equals(plasmaURL.dummyHash)) return null;
+        plasmaCrawlNURL.Entry ne = noticeURL.getEntry(urlhash);
+        if (ne != null) return ne.url();
+        plasmaCrawlLURL.Entry le = loadedURL.getEntry(urlhash);
+        if (le != null) return le.url();
+        plasmaCrawlEURL.Entry ee = errorURL.getEntry(urlhash);
+        if (ee != null) return ee.url();
         return null;
     }
     
diff --git a/source/de/anomic/plasma/plasmaWordIndex.java b/source/de/anomic/plasma/plasmaWordIndex.java
index 222160d52..1da846107 100644
--- a/source/de/anomic/plasma/plasmaWordIndex.java
+++ b/source/de/anomic/plasma/plasmaWordIndex.java
@@ -67,6 +67,10 @@ public final class plasmaWordIndex {
         this.ramCache = new plasmaWordIndexCache(databaseRoot, fileDB, bufferkb, log);
     }
     
+    public File getRoot() {
+        return databaseRoot;
+    }
+    
     public int maxURLinWordCache() {
         return ramCache.maxURLinWordCache();
     }
diff --git a/source/de/anomic/plasma/plasmaWordIndexDistribution.java b/source/de/anomic/plasma/plasmaWordIndexDistribution.java
new file mode 100644
index 000000000..31202c413
--- /dev/null
+++ b/source/de/anomic/plasma/plasmaWordIndexDistribution.java
@@ -0,0 +1,254 @@
+
+
+package de.anomic.plasma;
+
+import java.io.IOException;
+import java.util.Enumeration;
+import java.util.Vector;
+import java.util.Iterator;
+
+import de.anomic.yacy.yacyCore;
+import de.anomic.yacy.yacySeed;
+import de.anomic.yacy.yacySeedDB;
+import de.anomic.yacy.yacyClient;
+import de.anomic.server.serverCodings;
+import de.anomic.server.logging.serverLog;
+import de.anomic.kelondro.kelondroException;
+
+public class plasmaWordIndexDistribution {
+    
+        // distributes parts of the index to other peers
+        // stops as soon as an error occurrs
+ 
+        private int indexCount;
+        private int peerCount;
+        private long maxTime;
+        
+        private plasmaURLPool urlPool;
+        private plasmaWordIndex wordIndex;
+        private serverLog log;
+        private boolean enabled;
+        
+	public plasmaWordIndexDistribution(plasmaURLPool urlPool, plasmaWordIndex wordIndex, serverLog log,
+                                           boolean enable) {
+            this.urlPool = urlPool;
+            this.wordIndex = wordIndex;
+            setCounts(100 /*indexCount*/,  1 /*peerCount*/, 8000);
+	}
+
+        public void enable() {
+            enabled = true;
+        }
+        
+        public void disable() {
+            enabled = false;
+        }
+        
+	public boolean job() {
+            if ((yacyCore.seedDB == null) ||
+                (yacyCore.seedDB.mySeed == null) ||
+                (yacyCore.seedDB.mySeed.isVirgin()) ||
+                (urlPool.loadedURL.size() < 10) ||
+                (wordIndex.size() < 100) ||
+                (!(yacyCore.seedDB.mySeed.isJunior()))) return false;
+
+            int transferred;
+            long starttime = System.currentTimeMillis();
+            try {
+                if (
+                (urlPool.noticeURL.stackSize() == 0) &&
+                (enabled) &&
+                ((transferred = performTransferIndex(indexCount, peerCount, true)) > 0)) {
+                    indexCount = transferred;
+                    if ((System.currentTimeMillis() - starttime) > (maxTime * peerCount)) indexCount--; else indexCount++;
+                    if (indexCount < 30) indexCount = 30;
+                    return true;
+                } else {
+                    // make a long pause
+                    return false;
+                }
+            } catch (IllegalArgumentException ee) {
+                // this is a bug that occurres if a not-fixeable data-inconsistency in the table structure was detected
+                // make a long pause
+                log.logError("very bad data inconsistency: " + ee.getMessage());
+                //ee.printStackTrace();
+                return false;
+            }
+	}
+
+        public void setCounts(int indexCount, int peerCount, long maxTimePerTransfer) {
+            this.maxTime = maxTimePerTransfer;
+            this.indexCount = indexCount;
+            if (indexCount < 30) indexCount = 30;
+            this.peerCount = peerCount;
+        }
+        
+        public int performTransferIndex(int indexCount, int peerCount, boolean delete) {
+	if ((yacyCore.seedDB == null) || (yacyCore.seedDB.sizeConnected() == 0)) return -1;
+
+        // collect index
+        //String startPointHash = yacyCore.seedCache.mySeed.hash;
+        String startPointHash = serverCodings.encodeMD5B64("" + System.currentTimeMillis(), true).substring(0, yacySeedDB.commonHashLength);
+        plasmaWordIndexEntity[] indexEntities = selectTransferIndexes(startPointHash, indexCount);
+        if ((indexEntities == null) || (indexEntities.length == 0)) {
+            log.logDebug("No Index available for Index Transfer, hash start-point " + startPointHash);
+            return -1;
+        }
+        // count the indexes again, can be smaller as expected
+        indexCount = 0; for (int i = 0; i < indexEntities.length; i++) indexCount += indexEntities[i].size();
+        
+        // find start point for DHT-selection
+        String keyhash = indexEntities[indexEntities.length - 1].wordHash();
+        
+        // iterate over DHT-peers and send away the indexes
+        yacySeed seed;
+        int hc = 0;
+        Enumeration e = yacyCore.dhtAgent.getAcceptRemoteIndexSeeds(keyhash);
+        String error;
+        String peerNames = "";
+        while ((e.hasMoreElements()) && (hc < peerCount)) {
+            seed = (yacySeed) e.nextElement();
+            if (seed != null) {
+                error = yacyClient.transferIndex(seed, indexEntities, urlPool.loadedURL);
+                if (error == null) {
+                    log.logInfo("Index Transfer of " + indexCount + " words [" + indexEntities[0].wordHash() + " .. " + indexEntities[indexEntities.length-1].wordHash() + "] to peer " + seed.getName() + ":" + seed.hash + " successfull");
+                    peerNames += ", " + seed.getName();
+                    hc++;
+                } else {
+                    log.logWarning("Index Transfer to peer " + seed.getName() + ":" + seed.hash + " failed:'" + error + "', disconnecting peer");
+                    yacyCore.peerActions.peerDeparture(seed);
+                }
+            }
+        }
+        if (peerNames.length() > 0) peerNames = peerNames.substring(2); // remove comma
+        
+        // clean up and finish with deletion of indexes
+        if (hc >= peerCount) {
+            // success
+            if (delete) {
+                try {
+                    if (deleteTransferIndexes(indexEntities)) {
+                        log.logDebug("Deleted all transferred whole-word indexes locally");
+                        return indexCount;
+                    } else {
+                        log.logError("Deleted not all transferred whole-word indexes");
+                        return -1;
+                    }
+                } catch (IOException ee) {
+                    log.logError("Deletion of Indexes not possible:" + ee.getMessage());
+                    ee.printStackTrace();
+                    return -1;
+                }
+            } else {
+		// simply close the indexEntities
+		for (int i = 0; i < indexEntities.length; i++) try {
+		    indexEntities[i].close();
+		} catch (IOException ee) {}
+	    }
+            return indexCount;
+        } else {
+            log.logError("Index distribution failed. Too less peers (" + hc + ") received the index, not deleted locally.");
+            return -1;
+        }
+    }
+
+    private plasmaWordIndexEntity[] selectTransferIndexes(String hash, int count) {
+        Vector tmpEntities = new Vector();
+        String nexthash = "";
+        try {
+            Iterator wordHashIterator = wordIndex.wordHashes(hash, true, true);
+            plasmaWordIndexEntity indexEntity, tmpEntity;
+            Enumeration urlEnum;
+            plasmaWordIndexEntry indexEntry;
+            while ((count > 0) && (wordHashIterator.hasNext()) &&
+                   ((nexthash = (String) wordHashIterator.next()) != null) && (nexthash.trim().length() > 0)) {
+                indexEntity = wordIndex.getEntity(nexthash, true);
+                if (indexEntity.size() == 0) {
+                    indexEntity.deleteComplete();
+                } else if (indexEntity.size() <= count) {
+                    // take the whole entity
+                    tmpEntities.add(indexEntity);
+                    log.logDebug("Selected Whole Index (" + indexEntity.size() + " urls) for word " + indexEntity.wordHash());
+                    count -= indexEntity.size();
+                } else {
+                    // make an on-the-fly entity and insert values
+                    tmpEntity = new plasmaWordIndexEntity(indexEntity.wordHash());
+                    urlEnum = indexEntity.elements(true);
+                    while ((urlEnum.hasMoreElements()) && (count > 0)) {
+                        indexEntry = (plasmaWordIndexEntry) urlEnum.nextElement();
+                        tmpEntity.addEntry(indexEntry);
+                        count--;
+                    }
+                    urlEnum = null;
+                    log.logDebug("Selected Partial Index (" + tmpEntity.size() + " from " + indexEntity.size() +" urls) for word " + tmpEntity.wordHash());
+                    tmpEntities.add(tmpEntity);
+                    indexEntity.close(); // important: is not closed elswhere and cannot be deleted afterwards
+                    indexEntity = null;
+                }
+                
+            }
+            // transfer to array
+            plasmaWordIndexEntity[] indexEntities = new plasmaWordIndexEntity[tmpEntities.size()];
+            for (int i = 0; i < tmpEntities.size(); i++) indexEntities[i] = (plasmaWordIndexEntity) tmpEntities.elementAt(i);
+            return indexEntities;
+        } catch (IOException e) {
+            log.logError("selectTransferIndexes IO-Error (hash=" + nexthash + "): " + e.getMessage());
+            e.printStackTrace();
+            return new plasmaWordIndexEntity[0];
+        } catch (kelondroException e) {
+            log.logError("selectTransferIndexes database corrupted: " + e.getMessage());
+            e.printStackTrace();
+            return new plasmaWordIndexEntity[0];
+        }
+    }
+    
+    private boolean deleteTransferIndexes(plasmaWordIndexEntity[] indexEntities) throws IOException {
+        String wordhash;
+        Enumeration urlEnum;
+        plasmaWordIndexEntry indexEntry;
+        plasmaWordIndexEntity indexEntity;
+        String[] urlHashes;
+        int sz;
+        boolean success = true;
+        for (int i = 0; i < indexEntities.length; i++) {
+            if (indexEntities[i].isTMPEntity()) {
+                // delete entries separately
+                int c = 0;
+                urlHashes = new String[indexEntities[i].size()];
+                urlEnum = indexEntities[i].elements(true);
+                while (urlEnum.hasMoreElements()) {
+                    indexEntry = (plasmaWordIndexEntry) urlEnum.nextElement();
+                    urlHashes[c++] = indexEntry.getUrlHash();
+                }
+                wordIndex.removeEntries(indexEntities[i].wordHash(), urlHashes, true);
+                indexEntity = wordIndex.getEntity(indexEntities[i].wordHash(), true);
+                sz = indexEntity.size();
+                indexEntity.close();
+                log.logDebug("Deleted Partinal Index (" + c + " urls) for word " + indexEntities[i].wordHash() + "; " + sz + " entries left");
+                // DEBUG: now try to delete the remaining index. If this works, this routine is fine
+                /*
+                if (wordIndex.getEntity(indexEntities[i].wordHash()).deleteComplete())
+                    System.out.println("DEBUG: trial delete of partial word index " + indexEntities[i].wordHash() + " SUCCESSFULL");
+                else 
+                    System.out.println("DEBUG: trial delete of partial word index " + indexEntities[i].wordHash() + " FAILED");
+                 */
+                // end debug
+                indexEntities[i].close();
+            } else {
+                // delete complete file
+                if (indexEntities[i].deleteComplete()) {
+                    indexEntities[i].close();
+		} else {
+                    indexEntities[i].close();
+                    // have another try...
+                    if (!(plasmaWordIndexEntity.wordHash2path(wordIndex.getRoot() /*PLASMADB*/, indexEntities[i].wordHash()).delete())) {
+                        success = false;
+                        log.logError("Could not delete whole Index for word " + indexEntities[i].wordHash());
+                    }
+                }
+            }
+	    indexEntities[i] = null;
+        }
+        return success;
+    }
+}
diff --git a/source/de/anomic/server/serverFileUtils.java b/source/de/anomic/server/serverFileUtils.java
index 85cb50f13..84d84ea66 100644
--- a/source/de/anomic/server/serverFileUtils.java
+++ b/source/de/anomic/server/serverFileUtils.java
@@ -51,7 +51,6 @@ import java.io.OutputStream;
 import java.io.BufferedReader;
 import java.io.InputStreamReader;
 import java.util.zip.GZIPOutputStream;
-import java.util.TreeMap;
 import java.util.HashSet;
 
 public final class serverFileUtils {
@@ -163,24 +162,5 @@ public final class serverFileUtils {
         }
         return set;
     }
-    
-    public static TreeMap loadMap(String mapname, String filename, String sep) {
-        TreeMap map = new TreeMap();
-        BufferedReader br = null;
-        try {
-            br = new BufferedReader(new InputStreamReader(new FileInputStream(filename)));
-            String line;
-            int pos;
-            while ((line = br.readLine()) != null) {
-                line = line.trim();
-                if ((line.length() > 0) && (!(line.startsWith("#"))) && ((pos = line.indexOf(sep)) > 0))
-                    map.put(line.substring(0, pos).trim().toLowerCase(), line.substring(pos + sep.length()).trim());
-            }
-        } catch (IOException e) {            
-        } finally {
-            if (br != null) try { br.close(); } catch (Exception e) {}
-        }
-        return map;
-    }
-    
+
 }
diff --git a/source/de/anomic/yacy/yacyClient.java b/source/de/anomic/yacy/yacyClient.java
index 17fd52a10..dfc8e29ff 100644
--- a/source/de/anomic/yacy/yacyClient.java
+++ b/source/de/anomic/yacy/yacyClient.java
@@ -450,7 +450,7 @@ public class yacyClient {
         }
     }
     
-    public static HashMap crawlOrder(yacySeed targetSeed, String url, String referrer, int depth) {
+    public static HashMap crawlOrder(yacySeed targetSeed, URL url, URL referrer, int depth) {
         // this post a message to the remote message board
         if (targetSeed == null) return null;
         if (yacyCore.seedDB.mySeed == null) return null;
@@ -467,8 +467,8 @@ public class yacyClient {
             "&process=crawl" +
             "&youare=" + targetSeed.hash +
             "&iam=" + yacyCore.seedDB.mySeed.hash +
-            "&url=" + crypt.simpleEncode(url) +
-            "&referrer=" + crypt.simpleEncode(referrer) +
+            "&url=" + crypt.simpleEncode(url.toString()) +
+            "&referrer=" + crypt.simpleEncode((referrer == null) ? "" : referrer.toString()) +
             "&depth=" + depth +
             "&ttl=0"
             ),