From 7535fd7447b93ce2cd5a253fd15cfd672844761c Mon Sep 17 00:00:00 2001
From: orbiter <orbiter@6c8d7289-2bf4-0310-a012-ef5d649a1542>
Date: Wed, 17 Dec 2008 22:53:06 +0000
Subject: [PATCH] - refactoring of CrawlEntry and CrawlStacker - introduced
 blocking queues in CrawlStacker to make it ready for concurrency - added a
 second busy thread for the CrawlStacker The CrawlStacker is multithreaded. It
 shall be transformed into a BlockingThread in another step. The concurrency
 of the stacker will hopefully solve some problems with cases where DNS
 blocks.

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5395 6c8d7289-2bf4-0310-a012-ef5d649a1542
---
 defaults/yacy.init                            |  11 +-
 htroot/IndexControlRWIs_p.java                |   6 +-
 htroot/PeerLoadPicture.java                   |   3 +-
 htroot/QuickCrawlLink_p.java                  |  22 ++-
 htroot/WatchCrawler_p.java                    |  28 ++-
 htroot/rct_p.java                             |  14 +-
 source/de/anomic/crawler/CrawlEntry.java      |  42 ++--
 source/de/anomic/crawler/CrawlQueues.java     |  25 ++-
 source/de/anomic/crawler/CrawlStacker.java    | 182 ++++--------------
 source/de/anomic/crawler/ProtocolLoader.java  |   8 +-
 source/de/anomic/data/SitemapParser.java      |  51 ++---
 source/de/anomic/data/bookmarksDB.java        |  75 +++-----
 source/de/anomic/index/indexURLReference.java |   1 +
 .../de/anomic/plasma/plasmaSwitchboard.java   |  21 +-
 .../plasma/plasmaSwitchboardConstants.java    |   3 +-
 source/de/anomic/server/serverDomains.java    |   3 +-
 .../server/serverInstantBlockingThread.java   |   2 +-
 .../de/anomic/server/serverProcessorJob.java  |   2 +-
 .../anomic/urlRedirector/urlRedirectord.java  |  12 +-
 19 files changed, 219 insertions(+), 292 deletions(-)

diff --git a/defaults/yacy.init b/defaults/yacy.init
index 972604e05..0a42d7836 100644
--- a/defaults/yacy.init
+++ b/defaults/yacy.init
@@ -568,9 +568,12 @@ performanceSpeed=100
 80_indexing_idlesleep=1000
 80_indexing_busysleep=10
 80_indexing_memprereq=6291456
-82_crawlstack_idlesleep=5000
+82_crawlstack_idlesleep=1000
 82_crawlstack_busysleep=0
 82_crawlstack_memprereq=1048576
+83_crawlstack_idlesleep=1200
+83_crawlstack_busysleep=0
+83_crawlstack_memprereq=1048576
 90_cleanup_idlesleep=300000
 90_cleanup_busysleep=300000
 90_cleanup_memprereq=0
@@ -818,12 +821,6 @@ svnRevision=0
 
 currentSkin=default
 
-# temporary flag for new database structure. set only true for testing
-# ALL DATA THAT IS CREATED WITH THIS FLAG ON WILL BE VOID IN A FINAL VERSION
-# table-types: RAM = 0, TREE = 1, FLEX = 2;
-# if you set this to a non-RAM value, you should increase the stacker.slots value
-tableTypeForPreNURL=0
-
 # flag to show if pages shall be usable for non-admin users
 # this can be applied to the Surftips.html and yacysearch.html page
 publicSurftips = true
diff --git a/htroot/IndexControlRWIs_p.java b/htroot/IndexControlRWIs_p.java
index afe797d24..00372ed19 100644
--- a/htroot/IndexControlRWIs_p.java
+++ b/htroot/IndexControlRWIs_p.java
@@ -109,11 +109,7 @@ public class IndexControlRWIs_p {
             if (post.containsKey("deletecomplete") && post.containsKey("confirmDelete")) {
                 sb.webIndex.clear();
                 sb.crawlQueues.clear();
-                try {
-                    sb.crawlStacker.clear();
-                } catch (final IOException e) {
-                    e.printStackTrace();
-                }
+                sb.crawlStacker.clear();
                 try {
                     sb.robots.clear();
                 } catch (final IOException e) {
diff --git a/htroot/PeerLoadPicture.java b/htroot/PeerLoadPicture.java
index 6eb25c0d0..30a5d2797 100644
--- a/htroot/PeerLoadPicture.java
+++ b/htroot/PeerLoadPicture.java
@@ -29,7 +29,8 @@ public class PeerLoadPicture {
         final CircleThreadPiece misc = new CircleThreadPiece("Misc.", new Color(190,  50, 180));
         final HashMap<String, CircleThreadPiece> pieces = new HashMap<String, CircleThreadPiece>();
         pieces.put(null, idle);
-        pieces.put(plasmaSwitchboardConstants.CRAWLSTACK, new CircleThreadPiece("Stacking",         new Color(115, 200, 210)));
+        pieces.put(plasmaSwitchboardConstants.CRAWLSTACK0, new CircleThreadPiece("Stacking0",         new Color(115, 200, 210)));
+        pieces.put(plasmaSwitchboardConstants.CRAWLSTACK1, new CircleThreadPiece("Stacking1",         new Color(115, 200, 210)));
         pieces.put(plasmaSwitchboardConstants.INDEXER,    new CircleThreadPiece("Parsing/Indexing", new Color(255, 130,   0)));
         pieces.put(plasmaSwitchboardConstants.INDEX_DIST, new CircleThreadPiece("DHT-Distribution", new Color(119, 136, 153)));
         pieces.put(plasmaSwitchboardConstants.PEER_PING,  new CircleThreadPiece("YaCy Core",        new Color(255, 230, 160)));
diff --git a/htroot/QuickCrawlLink_p.java b/htroot/QuickCrawlLink_p.java
index 5af704396..c7b94aa17 100644
--- a/htroot/QuickCrawlLink_p.java
+++ b/htroot/QuickCrawlLink_p.java
@@ -34,6 +34,7 @@ import java.net.MalformedURLException;
 import java.net.URLDecoder;
 import java.util.Date;
 
+import de.anomic.crawler.CrawlEntry;
 import de.anomic.crawler.CrawlProfile;
 import de.anomic.http.httpRequestHeader;
 import de.anomic.plasma.plasmaSwitchboard;
@@ -158,15 +159,18 @@ public class QuickCrawlLink_p {
             
             // stack URL
             String reasonString = null;
-            reasonString = sb.crawlStacker.stackCrawl(
-                        crawlingStartURL, 
-                        null, 
-                        sb.webIndex.seedDB.mySeed().hash, 
-                        (title==null)?"CRAWLING-ROOT":title, 
-                                new Date(), 
-                                0, 
-                                pe
-                );
+            reasonString = sb.crawlStacker.stackCrawl(new CrawlEntry(
+                    sb.webIndex.seedDB.mySeed().hash, 
+                    crawlingStartURL,
+                    null, 
+                    (title==null)?"CRAWLING-ROOT":title, 
+                    new Date(),
+                    null, 
+                    pe.handle(),
+                    0, 
+                    0,
+                    0
+                ));
             
             // validate rejection reason
             if (reasonString == null) {
diff --git a/htroot/WatchCrawler_p.java b/htroot/WatchCrawler_p.java
index 831a5a0de..804b9de0c 100644
--- a/htroot/WatchCrawler_p.java
+++ b/htroot/WatchCrawler_p.java
@@ -211,7 +211,18 @@ public class WatchCrawler_p {
                                     crawlingQ,
                                     indexText, indexMedia,
                                     storeHTCache, true, crawlOrder, xsstopw, xdstopw, xpstopw);
-                            final String reasonString = sb.crawlStacker.stackCrawl(url, null, sb.webIndex.seedDB.mySeed().hash, "CRAWLING-ROOT", new Date(), 0, pe);
+                            final String reasonString = sb.crawlStacker.stackCrawl(new CrawlEntry(
+                                    sb.webIndex.seedDB.mySeed().hash,
+                                    url,
+                                    null,
+                                    "CRAWLING-ROOT",
+                                    new Date(),
+                                    null,
+                                    pe.handle(),
+                                    0,
+                                    0,
+                                    0
+                                    ));
                             
                             if (reasonString == null) {
                             	// create a bookmark from crawl start url
@@ -260,6 +271,7 @@ public class WatchCrawler_p {
                                                 "", 
                                                 "", 
                                                 new Date(),
+                                                null,
                                                 pe.handle(),
                                                 0, 
                                                 0, 
@@ -338,14 +350,18 @@ public class WatchCrawler_p {
                                     if (nexturl == null) continue;
                                     
                                     // enqueuing the url for crawling
-                                    sb.crawlStacker.enqueueEntry(
+                                    sb.crawlStacker.enqueueEntry(new CrawlEntry(
+                                            sb.webIndex.seedDB.mySeed().hash, 
                                             nexturl, 
                                             "", 
-                                            sb.webIndex.seedDB.mySeed().hash, 
                                             e.getValue(), 
-                                            new Date(), 
-                                            0, 
-                                            profile);
+                                            new Date(),
+                                            null,
+                                            profile.handle(),
+                                            0,
+                                            0,
+                                            0
+                                            ));
                                 }
                                
                             } catch (final PatternSyntaxException e) {
diff --git a/htroot/rct_p.java b/htroot/rct_p.java
index cb2e2942d..b5b47babb 100644
--- a/htroot/rct_p.java
+++ b/htroot/rct_p.java
@@ -30,6 +30,7 @@ import java.text.ParseException;
 import java.util.Date;
 import java.util.Iterator;
 
+import de.anomic.crawler.CrawlEntry;
 import de.anomic.http.httpRequestHeader;
 import de.anomic.plasma.plasmaSwitchboard;
 import de.anomic.server.serverDate;
@@ -76,7 +77,18 @@ public class rct_p {
                         if (urlRejectReason == null) {
                             // stack url
                             if (sb.getLog().isFinest()) sb.getLog().logFinest("crawlOrder: stack: url='" + url + "'");
-                            sb.crawlStacker.enqueueEntry(url, (referrer == null) ? null : referrer.hash(), peerhash, "REMOTE-CRAWLING", loaddate, 0, sb.webIndex.defaultRemoteProfile);
+                            sb.crawlStacker.enqueueEntry(new CrawlEntry(
+                                    peerhash,
+                                    url,
+                                    (referrer == null) ? null : referrer.hash(),
+                                    "REMOTE-CRAWLING",
+                                    null,
+                                    loaddate,
+                                    sb.webIndex.defaultRemoteProfile.handle(),
+                                    0,
+                                    0,
+                                    0
+                                    ));
                         } else {
                             env.getLog().logWarning("crawlOrder: Rejected URL '" + urlToString(url) + "': " + urlRejectReason);
                         }
diff --git a/source/de/anomic/crawler/CrawlEntry.java b/source/de/anomic/crawler/CrawlEntry.java
index 24a58c057..08ded65a2 100755
--- a/source/de/anomic/crawler/CrawlEntry.java
+++ b/source/de/anomic/crawler/CrawlEntry.java
@@ -36,10 +36,11 @@ import de.anomic.kelondro.kelondroBitfield;
 import de.anomic.kelondro.kelondroNaturalOrder;
 import de.anomic.kelondro.kelondroRow;
 import de.anomic.plasma.plasmaSwitchboard;
+import de.anomic.server.serverProcessorJob;
 import de.anomic.yacy.yacySeedDB;
 import de.anomic.yacy.yacyURL;
 
-public class CrawlEntry {
+public class CrawlEntry extends serverProcessorJob {
     
     // row definition for balancer-related NURL-entries
     public final static kelondroRow rowdef = new kelondroRow(
@@ -80,7 +81,7 @@ public class CrawlEntry {
     private int      forkfactor;    // sum of anchors of all ancestors
     private kelondroBitfield flags;
     private int      handle;
-    private String   status;
+    private String   statusMessage;
     private int      initialHash;   // to provide a object hash that does not change even if the url changes because of redirection
     
     public static class domaccess {
@@ -116,38 +117,38 @@ public class CrawlEntry {
      * @param forkfactor sum of anchors of all ancestors
      */
     public CrawlEntry(
-                 final String initiator, 
-                 final yacyURL url, 
-                 final String referrerhash, 
-                 final String name, 
-                 final Date appdate,
-                 final String profileHandle,
-                 final int depth, 
-                 final int anchors, 
-                 final int forkfactor
+            final String initiator, 
+            final yacyURL url, 
+            final String referrerhash, 
+            final String name, 
+            final Date appdate,
+            final Date loaddate,
+            final String profileHandle,
+            final int depth, 
+            final int anchors, 
+            final int forkfactor
     ) {
         // create new entry and store it into database
-        assert appdate != null;
         assert url != null;
         assert initiator != null;
-        assert referrerhash != null;
         assert profileHandle.length() == yacySeedDB.commonHashLength : profileHandle + " != " + yacySeedDB.commonHashLength;
         this.initiator     = initiator;
         this.url           = url;
-        this.refhash       = referrerhash;
+        this.refhash       = (referrerhash == null) ? "" : referrerhash;
         this.name          = (name == null) ? "" : name;
         this.appdate       = (appdate == null) ? 0 : appdate.getTime();
+        this.loaddate      = (loaddate == null) ? 0 : loaddate.getTime();
         this.profileHandle = profileHandle; // must not be null
         this.depth         = depth;
         this.anchors       = anchors;
         this.forkfactor    = forkfactor;
         this.flags         = new kelondroBitfield(rowdef.width(10));
         this.handle        = 0;
-        this.loaddate      = 0;
         this.serverdate    = 0;
         this.imsdate       = 0;
-        this.status        = "loaded(args)";
+        this.statusMessage = "loaded(args)";
         this.initialHash   = url.hashCode();
+        this.status        = serverProcessorJob.STATUS_INITIATED;
     }
     
     public CrawlEntry(final kelondroRow.Entry entry) throws IOException {
@@ -172,7 +173,7 @@ public class CrawlEntry {
         this.loaddate = entry.getColLong(12);
         this.serverdate = entry.getColLong(13);
         this.imsdate = entry.getColLong(14);
-        this.status        = "loaded(kelondroRow.Entry)";
+        this.statusMessage        = "loaded(kelondroRow.Entry)";
         this.initialHash   = url.hashCode();
         return;
     }
@@ -182,12 +183,13 @@ public class CrawlEntry {
         return this.initialHash;
     }
     
-    public void setStatus(final String s) {
-        this.status = s;
+    public void setStatus(final String s, int code) {
+        this.statusMessage = s;
+        this.status = code;
     }
     
     public String getStatus() {
-        return this.status;
+        return this.statusMessage;
     }
     
     private static String normalizeHandle(final int h) {
diff --git a/source/de/anomic/crawler/CrawlQueues.java b/source/de/anomic/crawler/CrawlQueues.java
index 9e2e0621e..62a298c8b 100644
--- a/source/de/anomic/crawler/CrawlQueues.java
+++ b/source/de/anomic/crawler/CrawlQueues.java
@@ -42,6 +42,7 @@ import de.anomic.plasma.plasmaParser;
 import de.anomic.plasma.plasmaSwitchboard;
 import de.anomic.plasma.plasmaSwitchboardConstants;
 import de.anomic.server.serverDate;
+import de.anomic.server.serverProcessorJob;
 import de.anomic.server.logging.serverLog;
 import de.anomic.xml.RSSFeed;
 import de.anomic.xml.RSSMessage;
@@ -397,7 +398,18 @@ public class CrawlQueues {
             if (urlRejectReason == null) {
                 // stack url
                 if (sb.getLog().isFinest()) sb.getLog().logFinest("crawlOrder: stack: url='" + url + "'");
-                sb.crawlStacker.enqueueEntry(url, (referrer == null) ? null : referrer.hash(), hash, item.getDescription(), loaddate, 0, sb.webIndex.defaultRemoteProfile);
+                sb.crawlStacker.enqueueEntry(new CrawlEntry(
+                        hash,
+                        url,
+                        (referrer == null) ? null : referrer.hash(),
+                        item.getDescription(),
+                        null,
+                        loaddate,
+                        sb.webIndex.defaultRemoteProfile.handle(),
+                        0,
+                        0,
+                        0
+                ));
             } else {
                 log.logWarning("crawlOrder: Rejected URL '" + urlToString(url) + "': " + urlRejectReason);
             }
@@ -474,6 +486,7 @@ public class CrawlQueues {
                 "", 
                 "", 
                 new Date(),
+                new Date(),
                 (forText) ?
                     ((global) ?
                         sb.webIndex.defaultTextSnippetGlobalProfile.handle() :
@@ -500,7 +513,7 @@ public class CrawlQueues {
         
         public crawlWorker(final CrawlEntry entry) {
             this.entry = entry;
-            this.entry.setStatus("worker-initialized");
+            this.entry.setStatus("worker-initialized", serverProcessorJob.STATUS_INITIATED);
             this.code = Integer.valueOf(entry.hashCode());
             if (!workers.containsKey(code)) {
                 workers.put(code, this);
@@ -511,7 +524,7 @@ public class CrawlQueues {
         public void run() {
             try {
                 // checking robots.txt for http(s) resources
-                this.entry.setStatus("worker-checkingrobots");
+                this.entry.setStatus("worker-checkingrobots", serverProcessorJob.STATUS_STARTED);
                 if ((entry.url().getProtocol().equals("http") || entry.url().getProtocol().equals("https")) && sb.robots.isDisallowed(entry.url())) {
                     if (log.isFine()) log.logFine("Crawling of URL '" + entry.url().toString() + "' disallowed by robots.txt.");
                     final ZURL.Entry eentry = errorURL.newEntry(
@@ -524,7 +537,7 @@ public class CrawlQueues {
                     errorURL.push(eentry);         
                 } else {
                     // starting a load from the internet
-                    this.entry.setStatus("worker-loading");
+                    this.entry.setStatus("worker-loading", serverProcessorJob.STATUS_RUNNING);
                     final String result = loader.process(this.entry, plasmaParser.PARSER_MODE_CRAWLER);
                     if (result != null) {
                         final ZURL.Entry eentry = errorURL.newEntry(
@@ -536,7 +549,7 @@ public class CrawlQueues {
                         eentry.store();
                         errorURL.push(eentry);
                     } else {
-                        this.entry.setStatus("worker-processed");
+                        this.entry.setStatus("worker-processed", serverProcessorJob.STATUS_FINISHED);
                     }
                 }
             } catch (final Exception e) {
@@ -551,7 +564,7 @@ public class CrawlQueues {
                 e.printStackTrace();
             } finally {
                 workers.remove(code);
-                this.entry.setStatus("worker-finalized");
+                this.entry.setStatus("worker-finalized", serverProcessorJob.STATUS_FINISHED);
             }
         }
         
diff --git a/source/de/anomic/crawler/CrawlStacker.java b/source/de/anomic/crawler/CrawlStacker.java
index 4271b4241..82a13da11 100644
--- a/source/de/anomic/crawler/CrawlStacker.java
+++ b/source/de/anomic/crawler/CrawlStacker.java
@@ -28,17 +28,15 @@
 
 package de.anomic.crawler;
 
-import java.io.IOException;
 import java.net.UnknownHostException;
 import java.util.ArrayList;
 import java.util.Date;
-import java.util.LinkedList;
+import java.util.concurrent.ArrayBlockingQueue;
+import java.util.concurrent.BlockingQueue;
+import java.util.concurrent.LinkedBlockingQueue;
 
 import de.anomic.index.indexReferenceBlacklist;
 import de.anomic.index.indexURLReference;
-import de.anomic.kelondro.kelondroIndex;
-import de.anomic.kelondro.kelondroRow;
-import de.anomic.kelondro.kelondroRowSet;
 import de.anomic.plasma.plasmaSwitchboard;
 import de.anomic.plasma.plasmaWordIndex;
 import de.anomic.server.serverDomains;
@@ -49,13 +47,11 @@ public final class CrawlStacker {
     
     final serverLog log = new serverLog("STACKCRAWL");
     
-    private final LinkedList<String> urlEntryHashCache; // the order how this queue is processed; entries with known DNS entries go first
-    private kelondroIndex            urlEntryCache;     // the entries in the queue
-    private long                     dnsHit, dnsMiss;
-    private int                      alternateCount;
-    private CrawlQueues              nextQueue;
-    private plasmaWordIndex          wordIndex;
-    private boolean                  acceptLocalURLs, acceptGlobalURLs;
+    private BlockingQueue<CrawlEntry> fastQueue, slowQueue;
+    private long                      dnsHit, dnsMiss;
+    private CrawlQueues               nextQueue;
+    private plasmaWordIndex           wordIndex;
+    private boolean                   acceptLocalURLs, acceptGlobalURLs;
     
     // objects for the prefetch task
     private final ArrayList<String> dnsfetchHosts = new ArrayList<String>();    
@@ -68,26 +64,21 @@ public final class CrawlStacker {
         this.wordIndex = wordIndex;
         this.dnsHit = 0;
         this.dnsMiss = 0;
-        this.alternateCount = 0;
         this.acceptLocalURLs = acceptLocalURLs;
         this.acceptGlobalURLs = acceptGlobalURLs;
         
-        // init the message list
-        this.urlEntryHashCache = new LinkedList<String>();
-
-        this.urlEntryCache = new kelondroRowSet(CrawlEntry.rowdef, 0);
+        this.fastQueue = new LinkedBlockingQueue<CrawlEntry>();
+        this.slowQueue = new ArrayBlockingQueue<CrawlEntry>(1000);
         this.log.logInfo("STACKCRAWL thread initialized.");
     }
 
     public int size() {
-        synchronized (this.urlEntryHashCache) {
-            return this.urlEntryHashCache.size();
-        }
+        return this.fastQueue.size() + this.slowQueue.size();
     }
 
-    public void clear() throws IOException {
-        this.urlEntryHashCache.clear();
-        this.urlEntryCache.clear();
+    public void clear() {
+        this.fastQueue.clear();
+        this.slowQueue.clear();
     }
     
     public void close() {
@@ -98,11 +89,7 @@ public final class CrawlStacker {
         
         this.log.logInfo("Shutdown. Closing stackCrawl queue.");
 
-        // closing the db
-        this.urlEntryCache.close();
-            
-        // clearing the hash list
-        this.urlEntryHashCache.clear();
+        clear();
     }
 
     private boolean prefetchHost(final String host) {
@@ -121,41 +108,17 @@ public final class CrawlStacker {
     }
     
     public boolean job() {
+        if (this.fastQueue.size() > 0 && job(this.fastQueue)) return true;
+        if (this.slowQueue.size() == 0) return false;
+        return job(this.slowQueue);
+    }
+    
+    private boolean job(BlockingQueue<CrawlEntry> queue) {
         // this is the method that is called by the busy thread from outside
-        if (this.urlEntryHashCache.size() == 0) return false;
+        if (queue.size() == 0) return false;
         
         // get the next entry from the queue
-        String urlHash = null;
-        kelondroRow.Entry ec = null;
-        synchronized (this.urlEntryHashCache) {
-            urlHash = this.urlEntryHashCache.removeFirst();
-            if (urlHash == null) {
-                urlEntryHashCache.clear();
-                try {
-                    urlEntryCache.clear();
-                } catch (IOException e) {
-                    e.printStackTrace();
-                }
-                return false;
-            }
-            try {
-                ec = this.urlEntryCache.remove(urlHash.getBytes());
-            } catch (IOException e) {
-                e.printStackTrace();
-                return false;
-            }
-        }
-        if (urlHash == null || ec == null) return false;
-        
-        // make a crawl Entry out of it
-        CrawlEntry entry = null;
-        try {
-            entry = new CrawlEntry(ec);
-        } catch (IOException e1) {
-            e1.printStackTrace();
-            return false;
-        }
-            
+        CrawlEntry entry = queue.poll();
         if (entry == null) return false;
 
         try {
@@ -173,95 +136,30 @@ public final class CrawlStacker {
         }
         return true;
     }
-    
-    public String stackCrawl(
-            final yacyURL url,
-            final String referrerhash,
-            final String initiatorHash,
-            final String name,
-            final Date loadDate,
-            final int currentdepth,
-            final CrawlProfile.entry profile) {
-        // stacks a crawl item. The position can also be remote
-        // returns null if successful, a reason string if not successful
-        //this.log.logFinest("stackCrawl: nexturlString='" + nexturlString + "'");
-        
-        // add the url into the crawling queue
-        final CrawlEntry entry = new CrawlEntry(
-                initiatorHash,                               // initiator, needed for p2p-feedback
-                url,                                         // url clear text string
-                (referrerhash == null) ? "" : referrerhash,  // last url in crawling queue
-                name,                                        // load date
-                loadDate,                                    // the anchor name
-                (profile == null) ? null : profile.handle(), // profile must not be null!
-                currentdepth,                                // depth so far
-                0,                                           // anchors, default value
-                0                                            // forkfactor, default value
-        );
-        return stackCrawl(entry);
-    }
-    
-    public void enqueueEntry(
-            final yacyURL nexturl, 
-            final String referrerhash, 
-            final String initiatorHash, 
-            final String name, 
-            final Date loadDate, 
-            final int currentdepth, 
-            final CrawlProfile.entry profile) {
-        if (profile == null) return;
-        
+ 
+    public void enqueueEntry(final CrawlEntry entry) {
+     
         // DEBUG
-        if (log.isFinest()) log.logFinest("ENQUEUE "+ nexturl +", referer="+referrerhash +", initiator="+initiatorHash +", name="+name +", load="+loadDate +", depth="+currentdepth);
-        
-        // check first before we create a big object
-        if (this.urlEntryCache.has(nexturl.hash().getBytes())) return;
+        if (log.isFinest()) log.logFinest("ENQUEUE "+ entry.url() +", referer="+entry.referrerhash() +", initiator="+entry.initiator() +", name="+entry.name() +", load="+entry.loaddate() +", depth="+entry.depth());
 
-        // now create the big object before we enter the synchronized block
-        final CrawlEntry newEntry = new CrawlEntry(
-                    initiatorHash,
-                    nexturl,
-                    referrerhash,
-                    name,
-                    loadDate,
-                    profile.handle(),
-                    currentdepth,
-                    0,
-                    0
-                    );
-        if (newEntry == null) return;
-        final kelondroRow.Entry newEntryRow = newEntry.toRow();
-                
-        synchronized(this.urlEntryHashCache) {
-            kelondroRow.Entry oldValue;
+        if (prefetchHost(entry.url().getHost())) {
             try {
-                oldValue = this.urlEntryCache.put(newEntryRow);
-            } catch (final IOException e) {
-                oldValue = null;
-            }                        
-            if (oldValue == null) {
-                //System.out.println("*** debug crawlStacker dnsHit=" + this.dnsHit + ", dnsMiss=" + this.dnsMiss + ", alternateCount=" + this.alternateCount + ((this.dnsMiss > 0) ? (", Q=" + (this.dnsHit / this.dnsMiss)) : ""));
-                if (prefetchHost(nexturl.getHost())) {
-                    this.alternateCount++;
-                    this.urlEntryHashCache.addFirst(newEntry.url().hash());
-                    this.dnsHit++;
-                } else {
-                    if ((this.dnsMiss > 0) && (this.alternateCount > 2 * this.dnsHit / this.dnsMiss)) {
-                        this.urlEntryHashCache.addFirst(newEntry.url().hash());
-                        this.alternateCount = 0;
-                        //System.out.println("*** debug crawlStacker alternate switch, dnsHit=" + this.dnsHit + ", dnsMiss=" + this.dnsMiss + ", alternateCount=" + this.alternateCount + ", Q=" + (this.dnsHit / this.dnsMiss));
-                    } else {
-                        this.urlEntryHashCache.addLast(newEntry.url().hash());
-                    }
-                    this.dnsMiss++; 
-                }
+                this.fastQueue.put(entry);
+                this.dnsHit++;
+            } catch (InterruptedException e) {
+                e.printStackTrace();
+            }
+        } else {
+            try {
+                this.slowQueue.put(entry);
+                this.dnsMiss++; 
+            } catch (InterruptedException e) {
+                e.printStackTrace();
             }
         }
     }
     
-    
-    
-    private String stackCrawl(final CrawlEntry entry) {
+    public String stackCrawl(final CrawlEntry entry) {
         // stacks a crawl item. The position can also be remote
         // returns null if successful, a reason string if not successful
         //this.log.logFinest("stackCrawl: nexturlString='" + nexturlString + "'");
diff --git a/source/de/anomic/crawler/ProtocolLoader.java b/source/de/anomic/crawler/ProtocolLoader.java
index ff142ab1d..2dd643d87 100644
--- a/source/de/anomic/crawler/ProtocolLoader.java
+++ b/source/de/anomic/crawler/ProtocolLoader.java
@@ -36,6 +36,7 @@ import java.util.concurrent.ConcurrentHashMap;
 import de.anomic.index.indexDocumentMetadata;
 import de.anomic.plasma.plasmaSwitchboard;
 import de.anomic.server.serverCore;
+import de.anomic.server.serverProcessorJob;
 import de.anomic.server.logging.serverLog;
 
 public final class ProtocolLoader {
@@ -111,14 +112,15 @@ public final class ProtocolLoader {
         // returns null if everything went fine, a fail reason string if a problem occurred
         indexDocumentMetadata h;
         try {
+            entry.setStatus("loading", serverProcessorJob.STATUS_RUNNING);
             h = load(entry, parserMode);
             assert h != null;
-            entry.setStatus("loaded");
+            entry.setStatus("loaded", serverProcessorJob.STATUS_RUNNING);
             final boolean stored = sb.htEntryStoreProcess(h);
-            entry.setStatus("stored-" + ((stored) ? "ok" : "fail"));
+            entry.setStatus("stored-" + ((stored) ? "ok" : "fail"), serverProcessorJob.STATUS_FINISHED);
             return (stored) ? null : "not stored";
         } catch (IOException e) {
-            entry.setStatus("error");
+            entry.setStatus("error", serverProcessorJob.STATUS_FINISHED);
             log.logWarning("problem loading " + entry.url().toString());
             return "load error - " + e.getMessage();
         }
diff --git a/source/de/anomic/data/SitemapParser.java b/source/de/anomic/data/SitemapParser.java
index ab91f2ec7..250e49ac0 100644
--- a/source/de/anomic/data/SitemapParser.java
+++ b/source/de/anomic/data/SitemapParser.java
@@ -41,7 +41,6 @@ import org.xml.sax.helpers.DefaultHandler;
 import de.anomic.crawler.CrawlEntry;
 import de.anomic.crawler.CrawlProfile;
 import de.anomic.crawler.HTTPLoader;
-import de.anomic.crawler.ZURL;
 import de.anomic.http.JakartaCommonsHttpClient;
 import de.anomic.http.JakartaCommonsHttpResponse;
 import de.anomic.http.httpRequestHeader;
@@ -272,42 +271,20 @@ public class SitemapParser extends DefaultHandler {
             }
 
             // URL needs to crawled
-            String error = null;
-            error = this.sb.crawlStacker.stackCrawl(url,
-                                                             null, // this.siteMapURL.toString(),
-                                                             this.sb.webIndex.seedDB.mySeed().hash, this.nextURL, new Date(),
-                                                             0, this.crawlingProfile);
-
-            if (error != null) {
-                try {
-                    this.logger.logInfo("The URL '" + this.nextURL + "' can not be crawled. Reason: " + error);
-
-                    // insert URL into the error DB
-                    final ZURL.Entry ee = this.sb.crawlQueues.errorURL.newEntry(
-                            new CrawlEntry(
-                                    sb.webIndex.seedDB.mySeed().hash, 
-                                    new yacyURL(this.nextURL, null), 
-                                    "", 
-                                    "", 
-                                    new Date(),
-                                    null,
-                                    0, 
-                                    0, 
-                                    0),
-                            this.sb.webIndex.seedDB.mySeed().hash,
-                            new Date(),
-                            1,
-                            error);
-                    ee.store();
-                    this.sb.crawlQueues.errorURL.push(ee);
-                } catch (final MalformedURLException e) {/* ignore this */
-                }
-            } else {
-                this.logger.logInfo("New URL '" + this.nextURL + "' added for crawling.");
-
-                // count successfully added URLs
-                this.urlCounter++;
-            }
+            this.sb.crawlStacker.enqueueEntry(new CrawlEntry(
+                    this.sb.webIndex.seedDB.mySeed().hash,
+                    url,
+                    null, // this.siteMapURL.toString(),
+                    this.nextURL,
+                    new Date(),
+                    null,
+                    this.crawlingProfile.handle(),
+                    0,
+                    0,
+                    0
+                    ));
+            this.logger.logInfo("New URL '" + this.nextURL + "' added for crawling.");
+            this.urlCounter++;
         }
     }
 
diff --git a/source/de/anomic/data/bookmarksDB.java b/source/de/anomic/data/bookmarksDB.java
index fa021caac..153788b48 100644
--- a/source/de/anomic/data/bookmarksDB.java
+++ b/source/de/anomic/data/bookmarksDB.java
@@ -62,7 +62,6 @@ import org.xml.sax.SAXException;
 
 import de.anomic.crawler.CrawlEntry;
 import de.anomic.crawler.CrawlProfile;
-import de.anomic.crawler.ZURL;
 import de.anomic.htmlFilter.htmlFilterContentScraper;
 import de.anomic.htmlFilter.htmlFilterWriter;
 import de.anomic.index.indexWord;
@@ -259,49 +258,37 @@ public class bookmarksDB {
 	                        crawlingQ,
 	                        indexText, indexMedia,
 	                        storeHTCache, true, crawlOrder, xsstopw, xdstopw, xpstopw);
-	                String reasonString = sb.crawlStacker.stackCrawl(crawlingStartURL, null, sb.webIndex.seedDB.mySeed().hash, "CRAWLING-ROOT", new Date(), 0, pe);
-	                
-	                if (reasonString == null) {
-	                	serverLog.logInfo("BOOKMARKS", "autoReCrawl - adding crawl profile for: " + crawlingStart);
-	                	// serverLog.logInfo("BOOKMARKS", "autoReCrawl - crawl filter is set to: " + newcrawlingfilter);
-	                	// generate a YaCyNews if the global flag was set
-	                    if (crawlOrder) {
-	                        Map<String, String> m = new HashMap<String, String>(pe.map()); // must be cloned
-	                        m.remove("specificDepth");
-	                        m.remove("indexText");
-	                        m.remove("indexMedia");
-	                        m.remove("remoteIndexing");
-	                        m.remove("xsstopw");
-	                        m.remove("xpstopw");
-	                        m.remove("xdstopw");
-	                        m.remove("storeTXCache");
-	                        m.remove("storeHTCache");
-	                        m.remove("generalFilter");
-	                        m.remove("specificFilter");
-	                        m.put("intention", "Automatic ReCrawl!");
-	                        sb.webIndex.newsPool.publishMyNews(yacyNewsRecord.newRecord(sb.webIndex.seedDB.mySeed(), yacyNewsPool.CATEGORY_CRAWL_START, m));	                      
-	                    }                    
-	                } else {
-	                	serverLog.logInfo("BOOKMARKS", "autoReCrawl - error adding crawl profile: " + crawlingStart + "- " + reasonString);                	
-	                	ZURL.Entry ee = sb.crawlQueues.errorURL.newEntry(
-	                            new CrawlEntry(
-	                                    sb.webIndex.seedDB.mySeed().hash, 
-	                                    crawlingStartURL, 
-	                                    "", 
-	                                    "", 
-	                                    new Date(),
-	                                    pe.handle(),
-	                                    0, 
-	                                    0, 
-	                                    0),
-	                            sb.webIndex.seedDB.mySeed().hash,
-	                            new Date(),
-	                            1,
-	                            reasonString);
-	                    
-	                    ee.store();
-	                    sb.crawlQueues.errorURL.push(ee);
-	                }              
+	                sb.crawlStacker.enqueueEntry(new CrawlEntry(
+	                        sb.webIndex.seedDB.mySeed().hash,
+                            crawlingStartURL,
+	                        null,
+	                        "CRAWLING-ROOT",
+	                        new Date(),
+	                        null,
+	                        pe.handle(),
+	                        0,
+	                        0,
+	                        0
+	                        ));
+                	serverLog.logInfo("BOOKMARKS", "autoReCrawl - adding crawl profile for: " + crawlingStart);
+                	// serverLog.logInfo("BOOKMARKS", "autoReCrawl - crawl filter is set to: " + newcrawlingfilter);
+                	// generate a YaCyNews if the global flag was set
+                    if (crawlOrder) {
+                        Map<String, String> m = new HashMap<String, String>(pe.map()); // must be cloned
+                        m.remove("specificDepth");
+                        m.remove("indexText");
+                        m.remove("indexMedia");
+                        m.remove("remoteIndexing");
+                        m.remove("xsstopw");
+                        m.remove("xpstopw");
+                        m.remove("xdstopw");
+                        m.remove("storeTXCache");
+                        m.remove("storeHTCache");
+                        m.remove("generalFilter");
+                        m.remove("specificFilter");
+                        m.put("intention", "Automatic ReCrawl!");
+                        sb.webIndex.newsPool.publishMyNews(yacyNewsRecord.newRecord(sb.webIndex.seedDB.mySeed(), yacyNewsPool.CATEGORY_CRAWL_START, m));	                      
+                    }
 	    		} catch (MalformedURLException e1) {}
 			} // if
 		} // while(bit.hasNext())    	
diff --git a/source/de/anomic/index/indexURLReference.java b/source/de/anomic/index/indexURLReference.java
index 7e1c742d4..f767637c0 100644
--- a/source/de/anomic/index/indexURLReference.java
+++ b/source/de/anomic/index/indexURLReference.java
@@ -462,6 +462,7 @@ public class indexURLReference {
                 comp().url(), 
                 referrerHash(), 
                 comp().dc_title(),
+                null,
                 loaddate(), 
                 null,
                 0, 
diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java
index 4fea492a4..d77218ac9 100644
--- a/source/de/anomic/plasma/plasmaSwitchboard.java
+++ b/source/de/anomic/plasma/plasmaSwitchboard.java
@@ -578,8 +578,10 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
         
         deployThread(plasmaSwitchboardConstants.CLEANUP, "Cleanup", "simple cleaning process for monitoring information", null,
                      new serverInstantBusyThread(this, plasmaSwitchboardConstants.CLEANUP_METHOD_START, plasmaSwitchboardConstants.CLEANUP_METHOD_JOBCOUNT, plasmaSwitchboardConstants.CLEANUP_METHOD_FREEMEM), 600000); // all 5 Minutes, wait 10 minutes until first run
-        deployThread(plasmaSwitchboardConstants.CRAWLSTACK, "Crawl URL Stacker", "process that checks url for double-occurrences and for allowance/disallowance by robots.txt", null,
+        deployThread(plasmaSwitchboardConstants.CRAWLSTACK0, "Crawl URL Stacker", "process that checks url for double-occurrences and for allowance/disallowance by robots.txt", null,
                      new serverInstantBusyThread(crawlStacker, plasmaSwitchboardConstants.CRAWLSTACK_METHOD_START, plasmaSwitchboardConstants.CRAWLSTACK_METHOD_JOBCOUNT, plasmaSwitchboardConstants.CRAWLSTACK_METHOD_FREEMEM), 8000);
+        deployThread(plasmaSwitchboardConstants.CRAWLSTACK1, "Crawl URL Stacker", "process that checks url for double-occurrences and for allowance/disallowance by robots.txt", null,
+                    new serverInstantBusyThread(crawlStacker, plasmaSwitchboardConstants.CRAWLSTACK_METHOD_START, plasmaSwitchboardConstants.CRAWLSTACK_METHOD_JOBCOUNT, plasmaSwitchboardConstants.CRAWLSTACK_METHOD_FREEMEM), 8000);
         deployThread(plasmaSwitchboardConstants.INDEXER, "Indexing", "thread that either initiates a parsing/indexing queue, distributes the index into the DHT, stores parsed documents or flushes the index cache", "/IndexCreateIndexingQueue_p.html",
                      new serverInstantBusyThread(this, plasmaSwitchboardConstants.INDEXER_METHOD_START, plasmaSwitchboardConstants.INDEXER_METHOD_JOBCOUNT, plasmaSwitchboardConstants.INDEXER_METHOD_FREEMEM), 10000);
         deployThread(plasmaSwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL, "Remote Crawl Job", "thread that performes a single crawl/indexing step triggered by a remote peer", null,
@@ -716,6 +718,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
             synchronized (this.webIndex) {
                 this.webIndex.close();
             }
+            // TODO: restart CrawlStacker
             setConfig("network.unit.definition", networkDefinition);
             overwriteNetworkDefinition();
             final File indexPrimaryPath = getConfigPath(plasmaSwitchboardConstants.INDEX_PRIMARY_PATH, plasmaSwitchboardConstants.INDEX_PATH_DEFAULT);
@@ -1557,7 +1560,18 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
                 nextEntry = i.next();
                 nextUrl = nextEntry.getKey();
                 // enqueue the hyperlink into the pre-notice-url db
-                crawlStacker.enqueueEntry(nextUrl, entry.urlHash(), entry.initiator(), nextEntry.getValue(), docDate, entry.depth() + 1, entry.profile());
+                crawlStacker.enqueueEntry(new CrawlEntry(
+                        entry.initiator(),
+                        nextUrl,
+                        entry.urlHash(),
+                        nextEntry.getValue(),
+                        null,
+                        docDate,
+                        entry.profile().handle(),
+                        entry.depth() + 1,
+                        0,
+                        0
+                        ));
             }
             final long stackEndTime = System.currentTimeMillis();
             if (log.isInfo()) log.logInfo("CRAWL: ADDED " + hl.size() + " LINKS FROM " + entry.url().toNormalform(false, true) +
@@ -2049,7 +2063,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
                 url, 
                 referrerHash, 
                 (name == null) ? "" : name, 
-                new Date(), 
+                new Date(),
+                null,
                 null,
                 0, 
                 0, 
diff --git a/source/de/anomic/plasma/plasmaSwitchboardConstants.java b/source/de/anomic/plasma/plasmaSwitchboardConstants.java
index 623cb8742..4535dcd2d 100644
--- a/source/de/anomic/plasma/plasmaSwitchboardConstants.java
+++ b/source/de/anomic/plasma/plasmaSwitchboardConstants.java
@@ -140,7 +140,8 @@ public final class plasmaSwitchboardConstants {
      * <p><code>public static final String <strong>CRAWLSTACK</strong> = "82_crawlstack"</code></p>
      * <p>Name of the crawl stacker thread, performing several checks on new URLs to crawl, i.e. double-check</p>
      */
-    public static final String CRAWLSTACK                   = "82_crawlstack";
+    public static final String CRAWLSTACK0                   = "82_crawlstack";
+    public static final String CRAWLSTACK1                   = "83_crawlstack";
     public static final String CRAWLSTACK_METHOD_START      = "job";
     public static final String CRAWLSTACK_METHOD_JOBCOUNT   = "size";
     public static final String CRAWLSTACK_METHOD_FREEMEM    = null;
diff --git a/source/de/anomic/server/serverDomains.java b/source/de/anomic/server/serverDomains.java
index 1bdea4a77..3e3ee8f80 100644
--- a/source/de/anomic/server/serverDomains.java
+++ b/source/de/anomic/server/serverDomains.java
@@ -389,6 +389,7 @@ public class serverDomains {
     public static final int TLD_NorthAmericaOceania_ID = 4; // english-speaking countries
     public static final int TLD_Africa_ID              = 5; // africa
     public static final int TLD_Generic_ID             = 6; // anything else, also raw ip numbers
+    public static final int TLD_Local_ID               = 7; // a local address
 
     static {
         // assign TLD-ids and names
@@ -552,7 +553,7 @@ public class serverDomains {
         }
         final Integer i = TLDID.get(tld);
         if (i == null) {
-            return (isLocal(host)) ? 7 : TLD_Generic_ID;
+            return (isLocal(host)) ? TLD_Local_ID : TLD_Generic_ID;
         }
         return i.intValue();
     }
diff --git a/source/de/anomic/server/serverInstantBlockingThread.java b/source/de/anomic/server/serverInstantBlockingThread.java
index c55506af8..a143012e1 100644
--- a/source/de/anomic/server/serverInstantBlockingThread.java
+++ b/source/de/anomic/server/serverInstantBlockingThread.java
@@ -76,7 +76,7 @@ public class serverInstantBlockingThread<J extends serverProcessorJob> extends s
         
     @SuppressWarnings("unchecked")
     public J job(final J next) throws Exception {
-        if (next == null) return null; // poison pill: shutdown
+        if (next == null || next == serverProcessorJob.poisonPill) return null; // poison pill: shutdown
         instantThreadCounter++;
         //System.out.println("started job " + this.handle + ": " + this.getName());
         jobs.put(this.handle, this.getName());
diff --git a/source/de/anomic/server/serverProcessorJob.java b/source/de/anomic/server/serverProcessorJob.java
index b8bd9ae0f..c91a88033 100644
--- a/source/de/anomic/server/serverProcessorJob.java
+++ b/source/de/anomic/server/serverProcessorJob.java
@@ -32,7 +32,7 @@ public class serverProcessorJob {
     public final static int STATUS_FINISHED  =  3;
     public final static int STATUS_POISON    = 99;
     
-    public int status = 0;
+    public int status = STATUS_INITIATED;
     
     public serverProcessorJob() {
         this.status = STATUS_INITIATED;
diff --git a/source/de/anomic/urlRedirector/urlRedirectord.java b/source/de/anomic/urlRedirector/urlRedirectord.java
index ce781ae14..8e40d7778 100644
--- a/source/de/anomic/urlRedirector/urlRedirectord.java
+++ b/source/de/anomic/urlRedirector/urlRedirectord.java
@@ -7,6 +7,7 @@ import java.io.PrintWriter;
 import java.net.MalformedURLException;
 import java.util.Date;
 
+import de.anomic.crawler.CrawlEntry;
 import de.anomic.crawler.CrawlProfile;
 import de.anomic.data.userDB;
 import de.anomic.http.HttpClient;
@@ -195,15 +196,18 @@ public class urlRedirectord implements serverHandler, Cloneable {
                             sb.crawlQueues.errorURL.remove(urlhash);                            
                             
                             // enqueuing URL for crawling
-                            sb.crawlStacker.enqueueEntry(
+                            sb.crawlStacker.enqueueEntry(new CrawlEntry(
+                                    sb.webIndex.seedDB.mySeed().hash, 
                                     reqURL, 
                                     null, 
-                                    sb.webIndex.seedDB.mySeed().hash, 
                                     "URL Redirector", 
                                     new Date(), 
+                                    null,
+                                    profile.handle(),
                                     0, 
-                                    profile
-                            );   
+                                    0,
+                                    0
+                            ));   
                         } else {
                             reasonString = "Unsupporte file extension";
                         }