From 70dd26ec95e3a7d83958547b0f481f2211350bef Mon Sep 17 00:00:00 2001
From: orbiter <orbiter@6c8d7289-2bf4-0310-a012-ef5d649a1542>
Date: Thu, 19 Aug 2010 23:52:38 +0000
Subject: [PATCH] added the new crawl scheduling function to the crawl start
 menu: - the scheduler extends the option for re-crawl timing. Many people
 misunderstood the re-crawl timing feature because that was just a criteria
 for the url double-check and not a scheduler. Now the scheduler setting is
 combined with the re-crawl setting and people will have the choice between no
 re-crawl, re-crawl as was possible so far and a scheduled re-crawl. The
 'classic' re-crawl time is set automatically when the scheduling function is
 selected - removed the bookmark-based scheduler. This scheduler was not able
 to transport all attributes of a crawl start and did therefore not support
 special crawling starts i.e. for forums and wikis - since the old scheduler
 was not aber to crawl special forums and wikis, the must-not-match filter was
 statically fixed to all bad pages for these special use cases. Since the new
 scheduler can handle these filters, it is possible to remove the default
 settings for the filters - removed the busy thread that was used to trigger
 the bookmark-based scheduler - removed the crontab for the bookmark-based
 scheduler

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7051 6c8d7289-2bf4-0310-a012-ef5d649a1542
---
 defaults/autoReCrawl.conf                     |   8 -
 defaults/yacy.init                            |   5 -
 htroot/CrawlStart_p.html                      | 126 +++++++-----
 htroot/Crawler_p.java                         |  38 +++-
 htroot/PerformanceQueues_p.java               |  13 +-
 htroot/QuickCrawlLink_p.java                  |   2 +-
 htroot/Table_API_p.java                       |   2 +-
 source/de/anomic/crawler/CrawlProfile.java    |   1 -
 .../de/anomic/crawler/CrawlSwitchboard.java   |  14 +-
 source/de/anomic/data/SitemapParser.java      |   2 +-
 source/de/anomic/data/WorkTables.java         |  96 ++++++---
 source/de/anomic/data/bookmarksDB.java        | 188 +-----------------
 source/de/anomic/search/Switchboard.java      |   5 +-
 source/net/yacy/kelondro/blob/Tables.java     |  11 +
 14 files changed, 200 insertions(+), 311 deletions(-)
 delete mode 100644 defaults/autoReCrawl.conf
diff --git a/defaults/autoReCrawl.conf b/defaults/autoReCrawl.conf
deleted file mode 100644
index 217f2b1ba..000000000
--- a/defaults/autoReCrawl.conf
+++ /dev/null
@@ -1,8 +0,0 @@
-# YaCy autoReCrawl configuration for bookmark folders
-#
-# schedule|folder|filter|crawlingdepth|crawlingIfOlder|DomFilterDepth|DomMaxPages|crawlingQ|indexText|indexMedia|crawlOrder|xsstopw|storeHTCache
-3600000	/autoReCrawl/hourly	.*	1	59	-1	-1	true	true	true	true	false	false
-86400000	/autoReCrawl/daily	.*	3	1439	-1	-1	true	true	true	true	false	false
-604800000	/autoReCrawl/weekly	.*	3	10079	-1	-1	true	true	true	true	false	false
-2678400000	/autoReCrawl/monthly	.*	4	44639	-1	-1	true	true	true	true	false	false
-# eof
diff --git a/defaults/yacy.init b/defaults/yacy.init
index 54b6f5af9..671341eae 100644
--- a/defaults/yacy.init
+++ b/defaults/yacy.init
@@ -579,11 +579,6 @@ filterOutStopwordsFromTopwords=true
 90_cleanup_busysleep=300000
 90_cleanup_memprereq=0
 
-# autoReCrawl Options
-autoReCrawl_idlesleep = 3600000
-autoReCrawl_busysleep = 3600000
-autoReCrawl_memprereq = -1
-
 # additional attributes:
 # performanceIO is a percent-value. a value of 10 means, that 10% of the busysleep time
 # is used to flush the RAM cache, which is the major part of the IO in YaCy
diff --git a/htroot/CrawlStart_p.html b/htroot/CrawlStart_p.html
index 56e036261..0ee51c9ae 100644
--- a/htroot/CrawlStart_p.html
+++ b/htroot/CrawlStart_p.html
@@ -54,7 +54,7 @@
               <tr>
                 <td><label for="file"><span class="nobr">From File</span></label>:</td>
                 <td><input type="radio" name="crawlingMode" id="file" value="file" /></td>
-                <td><input type="file" name="crawlingFile" size="28" onfocus="check('file')" /></td>
+                <td><input type="file" name="crawlingFile" size="18" onfocus="check('file')" /></td>
               </tr>
               <tr>
               	<td colspan="3" class="commit">
@@ -70,42 +70,65 @@
             Other already visited URLs are sorted out as "double", if they are not allowed using the re-crawl option.
           </td>
         </tr>
-        <tr valign="top" class="TableCellDark">
-          <td>Create Bookmark</td>
-          <td>
-	          <label for="createBookmark">Use</label>:
-	          <input type="checkbox" name="createBookmark" id="createBookmark" />
-	          &nbsp;&nbsp;&nbsp;(works with "Starting Point: From URL" only)
-	          <br /><br />
-			  <label for="bookmarkTitle"> Title</label>:&nbsp;&nbsp;&nbsp; 
-			  <input name="bookmarkTitle" id="bookmarkTitle" type="text" size="50" maxlength="100" /><br /><br />
-			  <label for="bookmarkFolder"> Folder</label>:
-			  <input name="bookmarkFolder" id="bookmarkFolder" type="text" size="50" maxlength="100" value="/crawlStart" />
-			  <br />&nbsp;          	  
-          </td>
-          <td>
-            This option lets you create a bookmark from your crawl start URL. For automatic re-crawling you can use the following default folders:<br/>
-            <ul>
-	            <li>/autoReCrawl/hourly</li>
-	            <li>/autoReCrawl/daily</li>
-	            <li>/autoReCrawl/weekly</li>
-	            <li>/autoReCrawl/monthly</li>
-            </ul>
-            Attention: recrawl settings depend on the folder. They can be adjusted in /DATA/SETTINGS/autoReCrawl.conf.
-          </td>
-        </tr>
         <tr valign="top" class="TableCellLight">
           <td><label for="crawlingDepth">Crawling Depth</label>:</td>
           <td><input name="crawlingDepth" id="crawlingDepth" type="text" size="2" maxlength="2" value="#[crawlingDepth]#" /></td>
           <td>
-            This defines how often the Crawler will follow links embedded in websites.<br />
-            A minimum of 0 is recommended and means that the page you enter under "Starting Point" will be added
-            to the index, but no linked content is indexed. 2-4 is good for normal indexing.
-            Be careful with the depth. Consider a branching factor of average 20;
-            A prefetch-depth of 8 would index 25.600.000.000 pages, maybe this is the whole WWW.
+            This defines how often the Crawler will follow links (of links..) embedded in websites.
+            0 means that only the page you enter under "Starting Point" will be added
+            to the index. 2-4 is good for normal indexing. Values over 8 are not useful, since a depth-8 crawl will
+            index approximately 25.600.000.000 pages, maybe this is the whole WWW.
           </td>
         </tr>
         <tr valign="top" class="TableCellDark">
+          <td>Scheduled re-crawl</td>
+          <td>
+            <dl>
+            <dt>no&nbsp;doubles<input type="radio" name="recrawl" value="nodoubles" #(crawlingIfOlderCheck)#checked="checked"::#(/crawlingIfOlderCheck)#/></dt>
+            <dd>run this crawl once and never load any page that is already known, only the start-url may be loaded again.</dd>
+			<dt>re-load<input type="radio" name="recrawl" value="reload"/ #(crawlingIfOlderCheck)#::checked="checked"#(/crawlingIfOlderCheck)#></dt>
+			<dd>run this crawl once, but treat urls that are known since<br/>
+			<select name="crawlingIfOlderNumber" id="crawlingIfOlderNumber">
+              <option value="1">1</option><option value="2">2</option><option value="3">3</option>
+              <option value="4">4</option><option value="5">5</option><option value="6">6</option>
+              <option value="7" selected="selected">7</option>
+              <option value="8">8</option><option value="9">9</option><option value="10">10</option>
+              <option value="12">12</option><option value="14">14</option><option value="21">21</option>
+              <option value="28">28</option><option value="30">30</option>
+			</select>
+			<select name="crawlingIfOlderUnit">
+              <option value="year"   #(crawlingIfOlderUnitYearCheck)#::selected="selected"#(/crawlingIfOlderUnitYearCheck)#>years</option>
+              <option value="month"  #(crawlingIfOlderUnitMonthCheck)#::selected="selected"#(/crawlingIfOlderUnitMonthCheck)#>months</option>
+              <option value="day"    #(crawlingIfOlderUnitDayCheck)#::selected="selected"#(/crawlingIfOlderUnitDayCheck)#>days</option>
+              <option value="hour"   #(crawlingIfOlderUnitHourCheck)#::selected="selected"#(/crawlingIfOlderUnitHourCheck)#>hours</option>
+			</select> not as double and load them again. No scheduled re-crawl.
+			</dd>
+			<dt>scheduled<input type="radio" name="recrawl" value="scheduler"/></dt>
+			<dd>after starting this crawl, repeat the crawl every<br/>
+			<select name="repeat_time">
+              <option value="1">1</option><option value="2">2</option><option value="3">3</option>
+              <option value="4">4</option><option value="5">5</option><option value="6">6</option>
+              <option value="7" selected="selected">7</option>
+              <option value="8">8</option><option value="9">9</option><option value="10">10</option>
+              <option value="12">12</option><option value="14">14</option><option value="21">21</option>
+              <option value="28">28</option><option value="30">30</option>
+			</select>
+			<select name="repeat_unit">
+              <option value="selminutes">minutes</option>
+              <option value="selhours">hours</option>
+              <option value="seldays" selected="selected">days</option>
+            </select> automatically.
+			</dd>
+            </dl>
+          </td>
+          <td>
+            A web crawl performs a double-check on all links found in the internet against the internal database. If the same url is found again,
+            then the url is treated as double when you check the 'no doubles' option. A url may be loaded again when it has reached a specific age,
+            to use that check the 'once' option. When you want that this web crawl is repeated automatically, then check the 'scheduled' option.
+            In this case the crawl is repeated after the given time and no url from the previous crawl is omitted as double.
+          </td>
+        </tr>
+        <tr valign="top" class="TableCellLight">
           <td><label for="mustmatch">Must-Match Filter</label>:</td>
           <td>
 			<input type="radio" name="range" value="wide" checked="checked" />Use filter&nbsp;&nbsp;
@@ -132,26 +155,6 @@
           </td>
         </tr>
         <tr valign="top" class="TableCellLight">
-          <td>Re-crawl known URLs:</td>
-          <td>
-            <label for="crawlingIfOlderChecked">Use</label>:
-            <input type="checkbox" name="crawlingIfOlderCheck" id="crawlingIfOlderChecked" #(crawlingIfOlderCheck)#::checked="checked"#(/crawlingIfOlderCheck)# />&nbsp;&nbsp;
-            <label for="crawlingIfOlderNumber">If older than</label>:
-			<input name="crawlingIfOlderNumber" id="crawlingIfOlderNumber" type="text" size="7" maxlength="7" value="#[crawlingIfOlderNumber]#" />
-			<select name="crawlingIfOlderUnit">
-              <option value="year"   #(crawlingIfOlderUnitYearCheck)#::selected="selected"#(/crawlingIfOlderUnitYearCheck)#>Year(s)</option>
-              <option value="month"  #(crawlingIfOlderUnitMonthCheck)#::selected="selected"#(/crawlingIfOlderUnitMonthCheck)#>Month(s)</option>
-              <option value="day"    #(crawlingIfOlderUnitDayCheck)#::selected="selected"#(/crawlingIfOlderUnitDayCheck)#>Day(s)</option>
-              <option value="hour"   #(crawlingIfOlderUnitHourCheck)#::selected="selected"#(/crawlingIfOlderUnitHourCheck)#>Hour(s)</option>
-			</select>
-          </td>
-          <td>
-            If you use this option, web pages that are already existent in your database are crawled and indexed again.
-            It depends on the age of the last crawl if this is done or not: if the last crawl is older than the given
-            date, the page is crawled again, otherwise it is treated as 'double' and not loaded or indexed again.
-          </td>
-        </tr>
-        <tr valign="top" class="TableCellDark">
           <td>Auto-Dom-Filter:</td>
           <td>
             <label for="crawlingDomFilterCheck">Use</label>:
@@ -167,7 +170,7 @@
             The default value 0 gives no restrictions.
           </td>
         </tr>
-        <tr valign="top" class="TableCellLight">
+        <tr valign="top" class="TableCellDark">
           <td>Maximum Pages per Domain:</td>
           <td>
             <label for="crawlingDomMaxCheck">Use</label>:
@@ -181,7 +184,7 @@
             the given depth. Domains outside the given depth are then sorted-out anyway.
           </td>
         </tr>
-        <tr valign="top" class="TableCellDark">
+        <tr valign="top" class="TableCellLight">
           <td><label for="crawlingQ">Accept URLs with '?' / dynamic URLs</label>:</td>
           <td><input type="checkbox" name="crawlingQ" id="crawlingQ" #(crawlingQChecked)#::checked="checked"#(/crawlingQChecked)# /></td>
           <td>
@@ -189,7 +192,7 @@
             is accessed with URLs containing question marks. If you are unsure, do not check this to avoid crawl loops.
           </td>
         </tr>
-        <tr valign="top" class="TableCellLight">
+        <tr valign="top" class="TableCellDark">
           <td><label for="storeHTCache">Store to Web Cache</label>:</td>
           <td><input type="checkbox" name="storeHTCache" id="storeHTCache" #(storeHTCacheChecked)#::checked="checked"#(/storeHTCacheChecked)# /></td>
           <td>
@@ -275,6 +278,23 @@
           </td>
         </tr>
         -->
+        <tr valign="top" class="TableCellLight">
+          <td>Create Bookmark</td>
+          <td>
+	          <label for="createBookmark">Use</label>:
+	          <input type="checkbox" name="createBookmark" id="createBookmark" />
+	          &nbsp;&nbsp;&nbsp;(works with "Starting Point: From URL" only)
+	          <br /><br />
+			  <label for="bookmarkTitle"> Title</label>:&nbsp;&nbsp;&nbsp; 
+			  <input name="bookmarkTitle" id="bookmarkTitle" type="text" size="50" maxlength="100" /><br /><br />
+			  <label for="bookmarkFolder"> Folder</label>:
+			  <input name="bookmarkFolder" id="bookmarkFolder" type="text" size="50" maxlength="100" value="/crawlStart" />
+			  <br />&nbsp;          	  
+          </td>
+          <td>
+            This option lets you create a bookmark from your crawl start URL.
+          </td>
+        </tr>
         <tr valign="top" class="TableCellLight">
           <td colspan="5"><input type="submit" name="crawlingstart" value="Start New Crawl" /></td>
         </tr>
diff --git a/htroot/Crawler_p.java b/htroot/Crawler_p.java
index 6d3d0e090..f918f354e 100644
--- a/htroot/Crawler_p.java
+++ b/htroot/Crawler_p.java
@@ -140,9 +140,6 @@ public class Crawler_p {
                     try {crawlingStartURL = new DigestURI(crawlingStart, null);} catch (final MalformedURLException e1) {}
                     crawlingStart = (crawlingStartURL == null) ? null : crawlingStartURL.toNormalform(true, true);
                    
-                    // store this call as api call
-                    sb.tables.recordAPICall(post, "Crawler_p.html", WorkTables.TABLE_API_TYPE_CRAWLER, "crawl start for " + crawlingStart);
-                    
                     // set new properties
                     final boolean fullDomain = post.get("range", "wide").equals("domain"); // special property in simple crawl start
                     final boolean subPath    = post.get("range", "wide").equals("subpath"); // special property in simple crawl start
@@ -167,12 +164,37 @@ public class Crawler_p {
                     env.setConfig("crawlingDepth", Integer.toString(newcrawlingdepth));
                     if ((crawlOrder) && (newcrawlingdepth > 8)) newcrawlingdepth = 8;
                     
-                    final boolean crawlingIfOlderCheck = post.get("crawlingIfOlderCheck", "off").equals("on");
-                    final int crawlingIfOlderNumber = Integer.parseInt(post.get("crawlingIfOlderNumber", "-1"));
-                    final String crawlingIfOlderUnit = post.get("crawlingIfOlderUnit","year");
-                    final long crawlingIfOlder = recrawlIfOlderC(crawlingIfOlderCheck, crawlingIfOlderNumber, crawlingIfOlderUnit);                    
-                    env.setConfig("crawlingIfOlder", crawlingIfOlder);
+                    // recrawl
+                    final String recrawl = post.get("recrawl", "nodoubles"); // nodoubles, reload, scheduler
+                    boolean crawlingIfOlderCheck = post.get("crawlingIfOlderCheck", "off").equals("on");
+                    int crawlingIfOlderNumber = Integer.parseInt(post.get("crawlingIfOlderNumber", "-1"));
+                    String crawlingIfOlderUnit = post.get("crawlingIfOlderUnit","year"); // year, month, day, hour
+                    int repeat_time = Integer.parseInt(post.get("repeat_time", "-1"));
+                    final String repeat_unit = post.get("repeat_unit", "seldays"); // selminutes, selhours, seldays
                     
+                    if (recrawl.equals("scheduler")) {
+                        // set crawlingIfOlder attributes that are appropriate for scheduled crawling 
+                        crawlingIfOlderCheck = true;
+                        crawlingIfOlderNumber = repeat_unit.equals("selminutes") ? 1 : repeat_unit.equals("selhours") ? repeat_time / 2 : repeat_time * 12;
+                        crawlingIfOlderUnit = "hour";
+                    } else if (recrawl.equals("reload")) {
+                        repeat_time = -1;
+                        crawlingIfOlderCheck = true;
+                    } else if (recrawl.equals("nodoubles")) {
+                        repeat_time = -1;
+                        crawlingIfOlderCheck = false;
+                    }
+                    long crawlingIfOlder = recrawlIfOlderC(crawlingIfOlderCheck, crawlingIfOlderNumber, crawlingIfOlderUnit);
+                    env.setConfig("crawlingIfOlder", crawlingIfOlder);
+
+                    // store this call as api call
+                    if (repeat_time > 0) {
+                        // store as scheduled api call
+                        sb.tables.recordAPICall(post, "Crawler_p.html", WorkTables.TABLE_API_TYPE_CRAWLER, "crawl start for " + crawlingStart, repeat_time, repeat_unit.substring(3));
+                    } else {
+                        // store just a protocol
+                        sb.tables.recordAPICall(post, "Crawler_p.html", WorkTables.TABLE_API_TYPE_CRAWLER, "crawl start for " + crawlingStart);
+                    }                    
                     final boolean crawlingDomFilterCheck = post.get("crawlingDomFilterCheck", "off").equals("on");
                     final int crawlingDomFilterDepth = (crawlingDomFilterCheck) ? Integer.parseInt(post.get("crawlingDomFilterDepth", "-1")) : -1;
                     env.setConfig("crawlingDomFilterDepth", Integer.toString(crawlingDomFilterDepth));
diff --git a/htroot/PerformanceQueues_p.java b/htroot/PerformanceQueues_p.java
index 8ae423edc..8a5efe424 100644
--- a/htroot/PerformanceQueues_p.java
+++ b/htroot/PerformanceQueues_p.java
@@ -207,13 +207,12 @@ public class PerformanceQueues_p {
                 busysleep = sb.getConfigLong(threadName + "_busysleep", busysleep);
             }
             if (setProfile) {
-                if (threadName.equals(SwitchboardConstants.PEER_PING)
-                		|| threadName.equals(SwitchboardConstants.SEED_UPLOAD)
-                		|| threadName.equals(SwitchboardConstants.CLEANUP)
-                		|| threadName.equals("autoReCrawl")
-                		) { /* do not change any values */ }
-                else if (threadName.equals(SwitchboardConstants.CRAWLJOB_REMOTE_CRAWL_LOADER)
-                		|| threadName.equals(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL)) {
+                if (threadName.equals(SwitchboardConstants.PEER_PING) ||
+                    threadName.equals(SwitchboardConstants.SEED_UPLOAD) ||
+                    threadName.equals(SwitchboardConstants.CLEANUP)) {
+                    /* do not change any values */
+                } else if (threadName.equals(SwitchboardConstants.CRAWLJOB_REMOTE_CRAWL_LOADER) ||
+                		   threadName.equals(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL)) {
                 	sb.setRemotecrawlPPM(Math.max(1, (int) (sb.getConfigLong("network.unit.remotecrawl.speed", 60) / multiplier)));
                 }
                 else {
diff --git a/htroot/QuickCrawlLink_p.java b/htroot/QuickCrawlLink_p.java
index bf8de8c21..6d01dae62 100644
--- a/htroot/QuickCrawlLink_p.java
+++ b/htroot/QuickCrawlLink_p.java
@@ -109,7 +109,7 @@ public class QuickCrawlLink_p {
         
         // get other parameters if set
         final String crawlingMustMatch  = post.get("mustmatch", CrawlProfile.MATCH_ALL);
-        final String crawlingMustNotMatch  = post.get("mustnotmatch", CrawlProfile.MATCH_BAD_URL);
+        final String crawlingMustNotMatch  = post.get("mustnotmatch", CrawlProfile.MATCH_NEVER);
         final int CrawlingDepth      = Integer.parseInt(post.get("crawlingDepth", "0"));        
         final boolean crawlDynamic   = post.get("crawlingQ", "").equals("on");
         final boolean indexText      = post.get("indexText", "on").equals("on");
diff --git a/htroot/Table_API_p.java b/htroot/Table_API_p.java
index 21d45b733..b8a6d3675 100644
--- a/htroot/Table_API_p.java
+++ b/htroot/Table_API_p.java
@@ -62,7 +62,7 @@ public class Table_API_p {
             if (action.equals("on")) {
                 Tables.Row row = sb.tables.select(WorkTables.TABLE_API_NAME, pk.getBytes());
                 if (row != null) {
-                    row.put(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME, 1);
+                    row.put(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME, 7);
                     row.put(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_UNIT, "days");
                     WorkTables.calculateAPIScheduler(row, false);
                     sb.tables.update(WorkTables.TABLE_API_NAME, row);
diff --git a/source/de/anomic/crawler/CrawlProfile.java b/source/de/anomic/crawler/CrawlProfile.java
index 6d3b84174..4fb53ccc5 100644
--- a/source/de/anomic/crawler/CrawlProfile.java
+++ b/source/de/anomic/crawler/CrawlProfile.java
@@ -46,7 +46,6 @@ public class CrawlProfile {
     
     public static final String MATCH_ALL = ".*";
     public static final String MATCH_NEVER = "";
-    public static final String MATCH_BAD_URL = ".*memberlist.*|.*previous.*|.*next.*|.*p=.*";
     
     static ConcurrentHashMap<String, Map<String, DomProfile>> domsCache = new ConcurrentHashMap<String, Map<String, DomProfile>>();
     
diff --git a/source/de/anomic/crawler/CrawlSwitchboard.java b/source/de/anomic/crawler/CrawlSwitchboard.java
index 4be91cdc3..313d56980 100644
--- a/source/de/anomic/crawler/CrawlSwitchboard.java
+++ b/source/de/anomic/crawler/CrawlSwitchboard.java
@@ -163,7 +163,7 @@ public final class CrawlSwitchboard {
         
         if (this.defaultProxyProfile == null) {
             // generate new default entry for proxy crawling
-            this.defaultProxyProfile = this.profilesActiveCrawls.newEntry("proxy", null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_BAD_URL,
+            this.defaultProxyProfile = this.profilesActiveCrawls.newEntry("proxy", null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER,
                     0 /*Integer.parseInt(getConfig(PROXY_PREFETCH_DEPTH, "0"))*/,
                     this.profilesActiveCrawls.getRecrawlDate(CRAWL_PROFILE_PROXY_RECRAWL_CYCLE), -1, -1, false,
                     true /*getConfigBool(PROXY_INDEXING_LOCAL_TEXT, true)*/,
@@ -174,33 +174,33 @@ public final class CrawlSwitchboard {
         }
         if (this.defaultRemoteProfile == null) {
             // generate new default entry for remote crawling
-            defaultRemoteProfile = this.profilesActiveCrawls.newEntry(CRAWL_PROFILE_REMOTE, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_BAD_URL, 0,
+            defaultRemoteProfile = this.profilesActiveCrawls.newEntry(CRAWL_PROFILE_REMOTE, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER, 0,
                     -1, -1, -1, true, true, true, false, true, false, true, true, false, CrawlProfile.CacheStrategy.IFFRESH);
         }
         if (this.defaultTextSnippetLocalProfile == null) {
             // generate new default entry for snippet fetch and optional crawling
-            defaultTextSnippetLocalProfile = this.profilesActiveCrawls.newEntry(CRAWL_PROFILE_SNIPPET_LOCAL_TEXT, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_BAD_URL, 0,
+            defaultTextSnippetLocalProfile = this.profilesActiveCrawls.newEntry(CRAWL_PROFILE_SNIPPET_LOCAL_TEXT, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER, 0,
                     this.profilesActiveCrawls.getRecrawlDate(CRAWL_PROFILE_SNIPPET_LOCAL_TEXT_RECRAWL_CYCLE), -1, -1, true, false, false, true, true, false, true, true, false, CrawlProfile.CacheStrategy.IFFRESH);
         }
         if (this.defaultTextSnippetGlobalProfile == null) {
             // generate new default entry for snippet fetch and optional crawling
-            defaultTextSnippetGlobalProfile = this.profilesActiveCrawls.newEntry(CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_BAD_URL, 0,
+            defaultTextSnippetGlobalProfile = this.profilesActiveCrawls.newEntry(CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER, 0,
                     this.profilesActiveCrawls.getRecrawlDate(CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT_RECRAWL_CYCLE), -1, -1, true, true, true, true, true, false, true, true, false, CrawlProfile.CacheStrategy.IFEXIST);
         }
         this.defaultTextSnippetGlobalProfile.setCacheStrategy(CacheStrategy.IFEXIST);
         if (this.defaultMediaSnippetLocalProfile == null) {
             // generate new default entry for snippet fetch and optional crawling
-            defaultMediaSnippetLocalProfile = this.profilesActiveCrawls.newEntry(CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_BAD_URL, 0,
+            defaultMediaSnippetLocalProfile = this.profilesActiveCrawls.newEntry(CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER, 0,
                     this.profilesActiveCrawls.getRecrawlDate(CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA_RECRAWL_CYCLE), -1, -1, true, false, false, true, false, false, true, true, false, CrawlProfile.CacheStrategy.IFEXIST);
         }
         if (this.defaultMediaSnippetGlobalProfile == null) {
             // generate new default entry for snippet fetch and optional crawling
-            defaultMediaSnippetGlobalProfile = this.profilesActiveCrawls.newEntry(CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_BAD_URL, 0,
+            defaultMediaSnippetGlobalProfile = this.profilesActiveCrawls.newEntry(CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER, 0,
                     this.profilesActiveCrawls.getRecrawlDate(CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA_RECRAWL_CYCLE), -1, -1, true, false, true, true, true, false, true, true, false, CrawlProfile.CacheStrategy.IFEXIST);
         }
         if (this.defaultSurrogateProfile == null) {
             // generate new default entry for surrogate parsing
-            defaultSurrogateProfile = this.profilesActiveCrawls.newEntry(CRAWL_PROFILE_SURROGATE, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_BAD_URL, 0,
+            defaultSurrogateProfile = this.profilesActiveCrawls.newEntry(CRAWL_PROFILE_SURROGATE, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER, 0,
                     this.profilesActiveCrawls.getRecrawlDate(CRAWL_PROFILE_SURROGATE_RECRAWL_CYCLE), -1, -1, true, true, false, false, false, false, true, true, false, CrawlProfile.CacheStrategy.NOCACHE);
         }
     }
diff --git a/source/de/anomic/data/SitemapParser.java b/source/de/anomic/data/SitemapParser.java
index eb331a688..01877bbdb 100644
--- a/source/de/anomic/data/SitemapParser.java
+++ b/source/de/anomic/data/SitemapParser.java
@@ -315,7 +315,7 @@ public class SitemapParser extends DefaultHandler {
         return this.sb.crawler.profilesActiveCrawls.newEntry(
                 domainName, sitemapURL,
                 // crawling Filter
-                CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_BAD_URL,
+                CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER,
                 // Depth
                 0,
                 // force recrawling
diff --git a/source/de/anomic/data/WorkTables.java b/source/de/anomic/data/WorkTables.java
index c1a6df2fd..e1d8c0f74 100644
--- a/source/de/anomic/data/WorkTables.java
+++ b/source/de/anomic/data/WorkTables.java
@@ -77,13 +77,6 @@ public class WorkTables extends Tables {
     public void recordAPICall(final serverObjects post, final String servletName, final String type, final String comment) {
         // remove the apicall attributes from the post object
         String pk    = post.remove(TABLE_API_COL_APICALL_PK);
-        String count = post.remove(TABLE_API_COL_APICALL_COUNT);
-        if (count == null) count = "1";
-        String time  = post.remove(TABLE_API_COL_APICALL_SCHEDULE_TIME);
-        String unit  = post.remove(TABLE_API_COL_APICALL_SCHEDULE_UNIT);
-        if (time == null || unit == null || unit.length() == 0 || "minutes,hours,days".indexOf(unit) < 0) {
-            time = ""; unit = "";
-        }
         
         // generate the apicall url - without the apicall attributes
         final String apiurl = /*"http://localhost:" + getConfig("port", "8080") +*/ "/" + servletName + "?" + post.toString();
@@ -100,20 +93,7 @@ public class WorkTables extends Tables {
         
         // insert or update entry
         try {
-            if (row != null) {
-                // modify and update existing entry
-
-                // modify date attributes and patch old values
-                row.put(TABLE_API_COL_DATE_LAST_EXEC, DateFormatter.formatShortMilliSecond(new Date()).getBytes());
-                if (!row.containsKey(TABLE_API_COL_DATE_RECORDING)) row.put(TABLE_API_COL_DATE_RECORDING, row.get(TABLE_API_COL_DATE));
-                row.remove(TABLE_API_COL_DATE);
-                
-                // insert APICALL attributes 
-                row.put(TABLE_API_COL_APICALL_COUNT, count.getBytes());
-                row.put(TABLE_API_COL_APICALL_SCHEDULE_TIME, time.getBytes());
-                row.put(TABLE_API_COL_APICALL_SCHEDULE_UNIT, unit.getBytes());
-                super.update(TABLE_API_NAME, row);
-            } else {
+            if (row == null) {
                 // create and insert new entry
                 Data data = new Data();
                 data.put(TABLE_API_COL_TYPE, type.getBytes());
@@ -124,10 +104,19 @@ public class WorkTables extends Tables {
                 data.put(TABLE_API_COL_URL, apiurl.getBytes());
                 
                 // insert APICALL attributes 
-                data.put(TABLE_API_COL_APICALL_COUNT, count.getBytes());
-                data.put(TABLE_API_COL_APICALL_SCHEDULE_TIME, time.getBytes());
-                data.put(TABLE_API_COL_APICALL_SCHEDULE_UNIT, unit.getBytes());
+                data.put(TABLE_API_COL_APICALL_COUNT, "1");
                 super.insert(TABLE_API_NAME, data);
+            } else {
+                // modify and update existing entry
+
+                // modify date attributes and patch old values
+                row.put(TABLE_API_COL_DATE_LAST_EXEC, DateFormatter.formatShortMilliSecond(new Date()).getBytes());
+                if (!row.containsKey(TABLE_API_COL_DATE_RECORDING)) row.put(TABLE_API_COL_DATE_RECORDING, row.get(TABLE_API_COL_DATE));
+                row.remove(TABLE_API_COL_DATE);
+                
+                // insert APICALL attributes 
+                row.put(TABLE_API_COL_APICALL_COUNT, row.get(TABLE_API_COL_APICALL_COUNT, 1) + 1);
+                super.update(TABLE_API_NAME, row);
             }
         } catch (IOException e) {
             Log.logException(e);
@@ -137,6 +126,56 @@ public class WorkTables extends Tables {
         Log.logInfo("APICALL", apiurl);
     }
     
+    /**
+     * store a API call and set attributes to schedule a re-call of that API call according to a given frequence
+     * This is the same as the previous method but it also computes a re-call time and stores that additionally
+     * @param post the post arguments of the api call
+     * @param servletName the name of the servlet
+     * @param type name of the servlet category
+     * @param comment visual description of the process
+     * @param time the time until next scheduled execution of this api call
+     * @param unit the time unit for the scheduled call
+     */
+    public void recordAPICall(final serverObjects post, final String servletName, final String type, final String comment, int time, String unit) {
+        if (post.containsKey(TABLE_API_COL_APICALL_PK)) {
+            // this api call has already been stored somewhere.
+            recordAPICall(post, servletName, type, comment);
+            return;
+        }
+        if (time < 0 || unit == null || unit.length() == 0 || "minutes,hours,days".indexOf(unit) < 0) {
+            time = 0; unit = "";
+        } else {
+            if (unit.equals("minutes") && time < 10) time = 10;
+        }
+        
+        // generate the apicall url - without the apicall attributes
+        final String apiurl = /*"http://localhost:" + getConfig("port", "8080") +*/ "/" + servletName + "?" + post.toString();
+
+        // insert entry
+        try {
+            // create and insert new entry
+            Data data = new Data();
+            data.put(TABLE_API_COL_TYPE, type.getBytes());
+            data.put(TABLE_API_COL_COMMENT, comment.getBytes());
+            byte[] date = DateFormatter.formatShortMilliSecond(new Date()).getBytes();
+            data.put(TABLE_API_COL_DATE_RECORDING, date);
+            data.put(TABLE_API_COL_DATE_LAST_EXEC, date);
+            data.put(TABLE_API_COL_URL, apiurl.getBytes());
+            
+            // insert APICALL attributes 
+            data.put(TABLE_API_COL_APICALL_COUNT, "1".getBytes());
+            data.put(TABLE_API_COL_APICALL_SCHEDULE_TIME, Integer.toString(time).getBytes());
+            data.put(TABLE_API_COL_APICALL_SCHEDULE_UNIT, unit.getBytes());
+            calculateAPIScheduler(data, false); // set next execution time
+            super.insert(TABLE_API_NAME, data);
+        } catch (IOException e) {
+            Log.logException(e);
+        } catch (RowSpaceExceededException e) {
+            Log.logException(e);
+        }
+        Log.logInfo("APICALL", apiurl);
+    }
+    
     /**
      * execute an API call using a api table row which contains all essentials
      * to access the server also the host, port and the authentication realm must be given
@@ -164,9 +203,6 @@ public class WorkTables extends Tables {
             if (row == null) continue;
             String url = "http://" + host + ":" + port + new String(row.get(WorkTables.TABLE_API_COL_URL));
             url += "&" + WorkTables.TABLE_API_COL_APICALL_PK + "=" + new String(row.getPK());
-            url += "&" + WorkTables.TABLE_API_COL_APICALL_COUNT + "=" + (row.get(WorkTables.TABLE_API_COL_APICALL_COUNT, 1) + 1);
-            url += "&" + WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME + "=" + row.get(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME, "");
-            url += "&" + WorkTables.TABLE_API_COL_APICALL_SCHEDULE_UNIT + "=" + row.get(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_UNIT, "");
             try {
                 client.GETbytes(url);
                 l.put(url, client.getStatusCode());
@@ -197,8 +233,9 @@ public class WorkTables extends Tables {
     /**
      * calculate the execution time in a api call table based on given scheduling time and last execution time
      * @param row the database row in the api table
+     * @param update if true then the next execution time is based on the latest computed execution time; othervise it is based on the last execution time
      */
-    public static void calculateAPIScheduler(Tables.Row row, boolean update) {
+    public static void calculateAPIScheduler(Tables.Data row, boolean update) {
         Date date = row.containsKey(WorkTables.TABLE_API_COL_DATE) ? row.get(WorkTables.TABLE_API_COL_DATE, new Date()) : null;
         date = update ? row.get(WorkTables.TABLE_API_COL_DATE_NEXT_EXEC, date) : row.get(WorkTables.TABLE_API_COL_DATE_LAST_EXEC, date);
         int time = row.get(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME, 1);
@@ -208,10 +245,11 @@ public class WorkTables extends Tables {
         }
         String unit = row.get(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_UNIT, "days");
         long d = date.getTime();
-        if (unit.equals("minutes")) d += 60000L * time;
+        if (unit.equals("minutes")) d += 60000L * Math.max(10, time);
         if (unit.equals("hours"))   d += 60000L * 60L * time;
         if (unit.equals("days"))    d += 60000L * 60L * 24L * time;
         if (d < System.currentTimeMillis()) d = System.currentTimeMillis() + 600000L;
+        d -= d % 60000; // remove seconds
         row.put(WorkTables.TABLE_API_COL_DATE_NEXT_EXEC, new Date(d));
     }
     
diff --git a/source/de/anomic/data/bookmarksDB.java b/source/de/anomic/data/bookmarksDB.java
index 54668dd00..385dfa927 100644
--- a/source/de/anomic/data/bookmarksDB.java
+++ b/source/de/anomic/data/bookmarksDB.java
@@ -23,18 +23,11 @@
 
 package de.anomic.data;
 
-import java.io.BufferedReader;
 import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileNotFoundException;
-import java.io.FileReader;
-import java.io.FileWriter;
 import java.io.IOException;
-import java.io.InputStreamReader;
 import java.io.Serializable;
 import java.net.MalformedURLException;
 import java.util.Comparator;
-import java.util.Date;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
@@ -42,24 +35,15 @@ import java.util.Map;
 import java.util.Set;
 import java.util.TreeSet;
 import java.util.concurrent.ConcurrentHashMap;
-import java.util.regex.Pattern;
 
 import net.yacy.kelondro.blob.MapHeap;
 import net.yacy.kelondro.data.meta.DigestURI;
 import net.yacy.kelondro.index.RowSpaceExceededException;
 import net.yacy.kelondro.logging.Log;
 import net.yacy.kelondro.order.NaturalOrder;
-import net.yacy.kelondro.util.DateFormatter;
-import net.yacy.kelondro.workflow.BusyThread;
-import net.yacy.kelondro.workflow.InstantBusyThread;
-
-import de.anomic.crawler.CrawlProfile;
-import de.anomic.crawler.retrieval.Request;
-import de.anomic.search.Segments;
-import de.anomic.search.Switchboard;
-import de.anomic.yacy.yacyNewsPool;
 
 public class bookmarksDB {
+    
 	// ------------------------------------
 	// Declaration of Class-Attributes
 	// ------------------------------------
@@ -67,7 +51,6 @@ public class bookmarksDB {
 	//final static int SORT_ALPHA = 1;
 	private final static int SORT_SIZE = 2;
 	private final static int SHOW_ALL = -1;
-	private final static String SLEEP_TIME = "3600000"; // default sleepTime: check for recrawls every hour
 
     // bookmarks
     private MapHeap bookmarks;
@@ -75,9 +58,6 @@ public class bookmarksDB {
     // tags
     private ConcurrentHashMap<String, Tag> tags;
     
-    // autoReCrawl    
-    private final BusyThread autoReCrawl;
-    
     private BookmarkDate dates;
     
 	// ------------------------------------
@@ -120,15 +100,6 @@ public class bookmarksDB {
         //this.datesTable = new MapView(BLOBTree.toHeap(datesFile, true, true, 20, 256, '_', NaturalOrder.naturalOrder, datesFileNew), 500, '_');
         this.dates = new BookmarkDate(datesFile);
         if (!datesExisted) this.dates.init(new bookmarkIterator(true));
-
-        // autoReCrawl
-        final Switchboard sb = Switchboard.getSwitchboard();
-        this.autoReCrawl = new InstantBusyThread(this, "autoReCrawl", null, null, Long.MIN_VALUE, Long.MAX_VALUE, Long.MIN_VALUE, Long.MAX_VALUE);
-        final long sleepTime = Long.parseLong(sb.getConfig("autoReCrawl_idlesleep" , SLEEP_TIME));
-        sb.deployThread("autoReCrawl", "autoReCrawl Scheduler", "simple scheduler for automatic re-crawls of bookmarked urls", null, autoReCrawl, 120000,
-                sleepTime, sleepTime, Long.parseLong(sb.getConfig("autoReCrawl_memprereq" , "-1"))
-        );
-        Log.logInfo("BOOKMARKS", "autoReCrawl - serverBusyThread initialized checking every "+(sleepTime/1000/60)+" minutes for recrawls");
     }
 
     // -----------------------------------------------------
@@ -141,163 +112,6 @@ public class bookmarksDB {
         dates.close();
     }
     
-    // -----------------------------------------------------
-    // bookmarksDB's functions for autoReCrawl
-    // -----------------------------------------------------
-    
-    public boolean autoReCrawl() {
-    	
-    	// read crontab
-        final File file = new File (Switchboard.getSwitchboard().getRootPath(),"DATA/SETTINGS/autoReCrawl.conf");
-        String s;
-        try {                    	
-            final BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(file)));
-            Log.logInfo("BOOKMARKS", "autoReCrawl - reading schedules from " + file);
-            while( null != (s = in.readLine()) ) {
-                if (s.length() > 0 && s.charAt(0) != '#') {
-                    final String parser[] = s.split("\t");
-                    if (parser.length == 13) {
-                        folderReCrawl(Long.parseLong(parser[0]), parser[1], parser[2], Integer.parseInt(parser[3]), Long.parseLong(parser[4]),
-                                Integer.parseInt(parser[5]), Integer.parseInt(parser[6]), Boolean.parseBoolean(parser[7]),
-                                Boolean.parseBoolean(parser[8]), Boolean.parseBoolean(parser[9]),
-                                Boolean.parseBoolean(parser[10]), Boolean.parseBoolean(parser[11]),
-                                Boolean.parseBoolean(parser[12]), CrawlProfile.CacheStrategy.IFFRESH
-                                );
-                    }
-                    if (parser.length == 14) {
-                        folderReCrawl(Long.parseLong(parser[0]), parser[1], parser[2], Integer.parseInt(parser[3]), Long.parseLong(parser[4]), 
-                                Integer.parseInt(parser[5]), Integer.parseInt(parser[6]), Boolean.parseBoolean(parser[7]),
-                                Boolean.parseBoolean(parser[8]), Boolean.parseBoolean(parser[9]),
-                                Boolean.parseBoolean(parser[10]), Boolean.parseBoolean(parser[11]),
-                                Boolean.parseBoolean(parser[12]), CrawlProfile.CacheStrategy.decode(Integer.parseInt(parser[13]))
-                                );
-                    }
-                }
-            }
-            in.close();
-        } catch( FileNotFoundException ex ) {        	
-            try {
-                Log.logInfo("BOOKMARKS", "autoReCrawl - creating new autoReCrawl.conf");
-                final File inputFile = new File(Switchboard.getSwitchboard().getRootPath(),"defaults/autoReCrawl.conf");
-                final File outputFile = new File(Switchboard.getSwitchboard().getRootPath(),"DATA/SETTINGS/autoReCrawl.conf");
-                final FileReader i = new FileReader(inputFile);
-                final FileWriter o = new FileWriter(outputFile);
-                int c;
-                while ((c = i.read()) != -1) {
-                    o.write(c);
-                }
-                i.close();
-                o.close();
-                autoReCrawl();
-                return true;
-            } catch( FileNotFoundException e ) {
-                 Log.logSevere("BOOKMARKS", "autoReCrawl - file not found error: defaults/autoReCrawl.conf", e);
-                 return false;
-            } catch (IOException e) {
-                Log.logSevere("BOOKMARKS", "autoReCrawl - IOException: defaults/autoReCrawl.conf", e);
-                return false;
-            }
-        } catch( Exception ex ) {
-            Log.logSevere("BOOKMARKS", "autoReCrawl - error reading " + file, ex);
-            return false;
-        }
-    	return true;
-    }    
-    
-    public void folderReCrawl(long schedule, String folder, String crawlingfilter, int newcrawlingdepth, long crawlingIfOlder, 
-    		int crawlingDomFilterDepth, int crawlingDomMaxPages, boolean crawlingQ, boolean indexText, boolean indexMedia, 
-    		boolean crawlOrder, boolean xsstopw, boolean storeHTCache, CrawlProfile.CacheStrategy cacheStrategy) {
-
-        final Switchboard sb = Switchboard.getSwitchboard();
-        final Iterator<String> bit = getBookmarksIterator(folder, true);
-        Log.logInfo("BOOKMARKS", "autoReCrawl - processing: "+folder);
-		 
-        final boolean xdstopw = xsstopw;
-        final boolean xpstopw = xsstopw;
-				
-        while(bit.hasNext()) {
-			
-            final Bookmark bm = getBookmark(bit.next());
-            final long sleepTime = Long.parseLong(sb.getConfig("autoReCrawl_idlesleep" , SLEEP_TIME));
-            final long interTime = (System.currentTimeMillis()-bm.getTimeStamp())%schedule;
-			
-            final Date date = new Date(bm.getTimeStamp());
-            Log.logInfo("BOOKMARKS", "autoReCrawl - checking schedule for: "+"["+DateFormatter.formatISO8601(date)+"] "+bm.getUrl());
-			
-            if (interTime >= 0 && interTime < sleepTime) {
-                try {
-                    int pos = 0;
-                    // set crawlingStart to BookmarkUrl
-                    final String crawlingStart = bm.getUrl();
-                    String newcrawlingMustMatch = crawlingfilter;
-	    			
-                    final DigestURI crawlingStartURL = new DigestURI(crawlingStart, null);
-                    
-                    // set the crawling filter                    
-                    if (newcrawlingMustMatch.length() < 2) newcrawlingMustMatch = ".*"; // avoid that all urls are filtered out if bad value was submitted
-                    
-                    if (crawlingStartURL!= null && newcrawlingMustMatch.equals("dom")) {
-                        newcrawlingMustMatch = ".*" + crawlingStartURL.getHost() + ".*";
-                    }
-                    if (crawlingStart!= null && newcrawlingMustMatch.equals("sub") && (pos = crawlingStart.lastIndexOf("/")) > 0) {
-                        newcrawlingMustMatch = crawlingStart.substring(0, pos + 1) + ".*";
-                    }                    				
-
-                    // check if the crawl filter works correctly
-                    Pattern.compile(newcrawlingMustMatch);
-                    
-                    final byte[] urlhash = crawlingStartURL.hash();
-
-                    sb.indexSegments.urlMetadata(Segments.Process.LOCALCRAWLING).remove(urlhash);
-                    sb.crawlQueues.noticeURL.removeByURLHash(urlhash);
-                    sb.crawlQueues.errorURL.remove(urlhash);
-	               
-	                // stack url
-	                sb.crawler.profilesPassiveCrawls.removeEntry(crawlingStartURL.hash()); // if there is an old entry, delete it
-	                final CrawlProfile.entry pe = sb.crawler.profilesActiveCrawls.newEntry(
-	                        folder+"/"+crawlingStartURL, crawlingStartURL,
-	                        newcrawlingMustMatch,
-	                        CrawlProfile.MATCH_BAD_URL,
-	                        newcrawlingdepth,
-	                        sb.crawler.profilesActiveCrawls.getRecrawlDate(crawlingIfOlder), crawlingDomFilterDepth, crawlingDomMaxPages,
-	                        crawlingQ,
-	                        indexText, indexMedia,
-	                        storeHTCache, true, crawlOrder, xsstopw, xdstopw, xpstopw, cacheStrategy);
-	                sb.crawlStacker.enqueueEntry(new Request(
-	                        sb.peers.mySeed().hash.getBytes(),
-                                crawlingStartURL,
-                                null,
-                                "CRAWLING-ROOT",
-                                new Date(),
-                                pe.handle(),
-                                0,
-                                0,
-                                0
-                                ));
-                    Log.logInfo("BOOKMARKS", "autoReCrawl - adding crawl profile for: " + crawlingStart);
-                    // serverLog.logInfo("BOOKMARKS", "autoReCrawl - crawl filter is set to: " + newcrawlingfilter);
-                    // generate a YaCyNews if the global flag was set
-                    if (crawlOrder) {
-                        Map<String, String> m = new HashMap<String, String>(pe.map()); // must be cloned
-                        m.remove("specificDepth");
-                        m.remove("indexText");
-                        m.remove("indexMedia");
-                        m.remove("remoteIndexing");
-                        m.remove("xsstopw");
-                        m.remove("xpstopw");
-                        m.remove("xdstopw");
-                        m.remove("storeTXCache");
-                        m.remove("storeHTCache");
-                        m.remove("generalFilter");
-                        m.remove("specificFilter");
-                        m.put("intention", "Automatic ReCrawl!");
-                        sb.peers.newsPool.publishMyNews(sb.peers.mySeed(), yacyNewsPool.CATEGORY_CRAWL_START, m);	                      
-                    }
-                } catch (MalformedURLException e1) {}
-            } // if
-        } // while(bit.hasNext())
-    } // } autoReCrawl() 
-    
     // -----------------------------------------------------------
     // bookmarksDB's functions for bookmarksTable / bookmarkCache
     // -----------------------------------------------------------
diff --git a/source/de/anomic/search/Switchboard.java b/source/de/anomic/search/Switchboard.java
index da84b7ba5..7cf670836 100644
--- a/source/de/anomic/search/Switchboard.java
+++ b/source/de/anomic/search/Switchboard.java
@@ -137,7 +137,6 @@ import de.anomic.data.userDB;
 import de.anomic.data.wiki.wikiBoard;
 import de.anomic.data.wiki.wikiCode;
 import de.anomic.data.wiki.wikiParser;
-//import de.anomic.http.client.Client;
 import de.anomic.http.client.Cache;
 import de.anomic.http.server.HTTPDemon;
 import de.anomic.http.server.HeaderFramework;
@@ -211,7 +210,7 @@ public final class Switchboard extends serverSwitch {
     public  boolean                        rankingOn;
     public  CRDistribution                 rankingOwnDistribution;
     public  CRDistribution                 rankingOtherDistribution;
-    public  Map<String, Object[]>      outgoingCookies, incomingCookies;
+    public  Map<String, Object[]>          outgoingCookies, incomingCookies;
     public  volatile long                  proxyLastAccess, localSearchLastAccess, remoteSearchLastAccess;
     public  yacyCore                       yc;
     public  ResourceObserver               observer;
@@ -608,7 +607,7 @@ public final class Switchboard extends serverSwitch {
                          SwitchboardConstants.CLEANUP_METHOD_JOBCOUNT,
                          SwitchboardConstants.CLEANUP_METHOD_FREEMEM,
                          60000, Long.MAX_VALUE, 10000, Long.MAX_VALUE),
-                     600000); // all 5 Minutes, wait 10 minutes until first run
+                     60000); // all 5 Minutes, wait 1 minute until first run
         deployThread(SwitchboardConstants.SURROGATES, "Surrogates", "A thread that polls the SURROGATES path and puts all Documents in one surroagte file into the indexing queue.", null,
                      new InstantBusyThread(
                          this,
diff --git a/source/net/yacy/kelondro/blob/Tables.java b/source/net/yacy/kelondro/blob/Tables.java
index 821e89113..b3d243b2d 100644
--- a/source/net/yacy/kelondro/blob/Tables.java
+++ b/source/net/yacy/kelondro/blob/Tables.java
@@ -433,6 +433,17 @@ public class Tables {
                 return dflt;
             }
         }
+        
+        public String toString() {
+            StringBuilder sb = new StringBuilder();
+            sb.append('{');
+            for (Map.Entry<String, byte[]> entry: this.entrySet()) {
+                sb.append(entry.getKey()).append('=').append(new String(entry.getValue())).append(", ");
+            }
+            if (sb.length() > 1) sb.setLength(sb.length() - 2);
+            sb.append('}');
+            return sb.toString();
+        }
     }
     
     public class Row extends Data {