added the new crawl scheduling function to the crawl start menu:

- the scheduler extends the option for re-crawl timing. Many people misunderstood the re-crawl timing feature because that was just a criteria for the url double-check and not a scheduler. Now the scheduler setting is combined with the re-crawl setting and people will have the choice between no re-crawl, re-crawl as was possible so far and a scheduled re-crawl. The 'classic' re-crawl time is set automatically when the scheduling function is selected - removed the bookmark-based scheduler. This scheduler was not able to transport all attributes of a crawl start and did therefore not support special crawling starts i.e. for forums and wikis - since the old scheduler was not aber to crawl special forums and wikis, the must-not-match filter was statically fixed to all bad pages for these special use cases. Since the new scheduler can handle these filters, it is possible to remove the default settings for the filters - removed the busy thread that was used to trigger the bookmark-based scheduler - removed the crontab for the bookmark-based scheduler git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7051 6c8d7289-2bf4-0310-a012-ef5d649a1542
15 years ago · 70dd26ec95
parent 5a994c9796
commit 70dd26ec95
14 changed files with 200 additions and 311 deletions
--- a/defaults/autoReCrawl.conf
+++ b/defaults/autoReCrawl.conf
@ -1,8 +0,0 @@
-# YaCy autoReCrawl configuration for bookmark folders
-#
-# schedule|folder|filter|crawlingdepth|crawlingIfOlder|DomFilterDepth|DomMaxPages|crawlingQ|indexText|indexMedia|crawlOrder|xsstopw|storeHTCache
-3600000	/autoReCrawl/hourly	.*	1	59	-1	-1	true	true	true	true	false	false
-86400000	/autoReCrawl/daily	.*	3	1439	-1	-1	true	true	true	true	false	false
-604800000	/autoReCrawl/weekly	.*	3	10079	-1	-1	true	true	true	true	false	false
-2678400000	/autoReCrawl/monthly	.*	4	44639	-1	-1	true	true	true	true	false	false
-# eof
--- a/defaults/yacy.init
+++ b/defaults/yacy.init
@ -579,11 +579,6 @@ filterOutStopwordsFromTopwords=true
 90_cleanup_busysleep=300000
 90_cleanup_memprereq=0

-# autoReCrawl Options
-autoReCrawl_idlesleep = 3600000
-autoReCrawl_busysleep = 3600000
-autoReCrawl_memprereq = -1
-
 # additional attributes:
 # performanceIO is a percent-value. a value of 10 means, that 10% of the busysleep time
 # is used to flush the RAM cache, which is the major part of the IO in YaCy
--- a/htroot/CrawlStart_p.html
+++ b/htroot/CrawlStart_p.html
@ -54,7 +54,7 @@
              <tr>
                <td><label for="file"><span class="nobr">From File</span></label>:</td>
                <td><input type="radio" name="crawlingMode" id="file" value="file" /></td>
-                <td><input type="file" name="crawlingFile" size="28" onfocus="check('file')" /></td>
+                <td><input type="file" name="crawlingFile" size="18" onfocus="check('file')" /></td>
              </tr>
              <tr>
              	<td colspan="3" class="commit">
@ -70,42 +70,65 @@
            Other already visited URLs are sorted out as "double", if they are not allowed using the re-crawl option.
          </td>
        </tr>
-        <tr valign="top" class="TableCellDark">
-          <td>Create Bookmark</td>
-          <td>
-	          <label for="createBookmark">Use</label>:
-	          <input type="checkbox" name="createBookmark" id="createBookmark" />
-	          &nbsp;&nbsp;&nbsp;(works with "Starting Point: From URL" only)
-	          <br /><br />
-			  <label for="bookmarkTitle"> Title</label>:&nbsp;&nbsp;&nbsp; 
-			  <input name="bookmarkTitle" id="bookmarkTitle" type="text" size="50" maxlength="100" /><br /><br />
-			  <label for="bookmarkFolder"> Folder</label>:
-			  <input name="bookmarkFolder" id="bookmarkFolder" type="text" size="50" maxlength="100" value="/crawlStart" />
-			  <br />&nbsp;          	  
-          </td>
-          <td>
-            This option lets you create a bookmark from your crawl start URL. For automatic re-crawling you can use the following default folders:<br/>
-            <ul>
-	            <li>/autoReCrawl/hourly</li>
-	            <li>/autoReCrawl/daily</li>
-	            <li>/autoReCrawl/weekly</li>
-	            <li>/autoReCrawl/monthly</li>
-            </ul>
-            Attention: recrawl settings depend on the folder. They can be adjusted in /DATA/SETTINGS/autoReCrawl.conf.
-          </td>
-        </tr>
        <tr valign="top" class="TableCellLight">
          <td><label for="crawlingDepth">Crawling Depth</label>:</td>
          <td><input name="crawlingDepth" id="crawlingDepth" type="text" size="2" maxlength="2" value="#[crawlingDepth]#" /></td>
          <td>
-            This defines how often the Crawler will follow links embedded in websites.<br />
-            A minimum of 0 is recommended and means that the page you enter under "Starting Point" will be added
-            to the index, but no linked content is indexed. 2-4 is good for normal indexing.
-            Be careful with the depth. Consider a branching factor of average 20;
-            A prefetch-depth of 8 would index 25.600.000.000 pages, maybe this is the whole WWW.
+            This defines how often the Crawler will follow links (of links..) embedded in websites.
+            0 means that only the page you enter under "Starting Point" will be added
+            to the index. 2-4 is good for normal indexing. Values over 8 are not useful, since a depth-8 crawl will
+            index approximately 25.600.000.000 pages, maybe this is the whole WWW.
          </td>
        </tr>
        <tr valign="top" class="TableCellDark">
+          <td>Scheduled re-crawl</td>
+          <td>
+            <dl>
+            <dt>no&nbsp;doubles<input type="radio" name="recrawl" value="nodoubles" #(crawlingIfOlderCheck)#checked="checked"::#(/crawlingIfOlderCheck)#/></dt>
+            <dd>run this crawl once and never load any page that is already known, only the start-url may be loaded again.</dd>
+			<dt>re-load<input type="radio" name="recrawl" value="reload"/ #(crawlingIfOlderCheck)#::checked="checked"#(/crawlingIfOlderCheck)#></dt>
+			<dd>run this crawl once, but treat urls that are known since<br/>
+			<select name="crawlingIfOlderNumber" id="crawlingIfOlderNumber">
+              <option value="1">1</option><option value="2">2</option><option value="3">3</option>
+              <option value="4">4</option><option value="5">5</option><option value="6">6</option>
+              <option value="7" selected="selected">7</option>
+              <option value="8">8</option><option value="9">9</option><option value="10">10</option>
+              <option value="12">12</option><option value="14">14</option><option value="21">21</option>
+              <option value="28">28</option><option value="30">30</option>
+			</select>
+			<select name="crawlingIfOlderUnit">
+              <option value="year"   #(crawlingIfOlderUnitYearCheck)#::selected="selected"#(/crawlingIfOlderUnitYearCheck)#>years</option>
+              <option value="month"  #(crawlingIfOlderUnitMonthCheck)#::selected="selected"#(/crawlingIfOlderUnitMonthCheck)#>months</option>
+              <option value="day"    #(crawlingIfOlderUnitDayCheck)#::selected="selected"#(/crawlingIfOlderUnitDayCheck)#>days</option>
+              <option value="hour"   #(crawlingIfOlderUnitHourCheck)#::selected="selected"#(/crawlingIfOlderUnitHourCheck)#>hours</option>
+			</select> not as double and load them again. No scheduled re-crawl.
+			</dd>
+			<dt>scheduled<input type="radio" name="recrawl" value="scheduler"/></dt>
+			<dd>after starting this crawl, repeat the crawl every<br/>
+			<select name="repeat_time">
+              <option value="1">1</option><option value="2">2</option><option value="3">3</option>
+              <option value="4">4</option><option value="5">5</option><option value="6">6</option>
+              <option value="7" selected="selected">7</option>
+              <option value="8">8</option><option value="9">9</option><option value="10">10</option>
+              <option value="12">12</option><option value="14">14</option><option value="21">21</option>
+              <option value="28">28</option><option value="30">30</option>
+			</select>
+			<select name="repeat_unit">
+              <option value="selminutes">minutes</option>
+              <option value="selhours">hours</option>
+              <option value="seldays" selected="selected">days</option>
+            </select> automatically.
+			</dd>
+            </dl>
+          </td>
+          <td>
+            A web crawl performs a double-check on all links found in the internet against the internal database. If the same url is found again,
+            then the url is treated as double when you check the 'no doubles' option. A url may be loaded again when it has reached a specific age,
+            to use that check the 'once' option. When you want that this web crawl is repeated automatically, then check the 'scheduled' option.
+            In this case the crawl is repeated after the given time and no url from the previous crawl is omitted as double.
+          </td>
+        </tr>
+        <tr valign="top" class="TableCellLight">
          <td><label for="mustmatch">Must-Match Filter</label>:</td>
          <td>
 			<input type="radio" name="range" value="wide" checked="checked" />Use filter&nbsp;&nbsp;
@ -132,26 +155,6 @@
          </td>
        </tr>
        <tr valign="top" class="TableCellLight">
-          <td>Re-crawl known URLs:</td>
-          <td>
-            <label for="crawlingIfOlderChecked">Use</label>:
-            <input type="checkbox" name="crawlingIfOlderCheck" id="crawlingIfOlderChecked" #(crawlingIfOlderCheck)#::checked="checked"#(/crawlingIfOlderCheck)# />&nbsp;&nbsp;
-            <label for="crawlingIfOlderNumber">If older than</label>:
-			<input name="crawlingIfOlderNumber" id="crawlingIfOlderNumber" type="text" size="7" maxlength="7" value="#[crawlingIfOlderNumber]#" />
-			<select name="crawlingIfOlderUnit">
-              <option value="year"   #(crawlingIfOlderUnitYearCheck)#::selected="selected"#(/crawlingIfOlderUnitYearCheck)#>Year(s)</option>
-              <option value="month"  #(crawlingIfOlderUnitMonthCheck)#::selected="selected"#(/crawlingIfOlderUnitMonthCheck)#>Month(s)</option>
-              <option value="day"    #(crawlingIfOlderUnitDayCheck)#::selected="selected"#(/crawlingIfOlderUnitDayCheck)#>Day(s)</option>
-              <option value="hour"   #(crawlingIfOlderUnitHourCheck)#::selected="selected"#(/crawlingIfOlderUnitHourCheck)#>Hour(s)</option>
-			</select>
-          </td>
-          <td>
-            If you use this option, web pages that are already existent in your database are crawled and indexed again.
-            It depends on the age of the last crawl if this is done or not: if the last crawl is older than the given
-            date, the page is crawled again, otherwise it is treated as 'double' and not loaded or indexed again.
-          </td>
-        </tr>
-        <tr valign="top" class="TableCellDark">
          <td>Auto-Dom-Filter:</td>
          <td>
            <label for="crawlingDomFilterCheck">Use</label>:
@ -167,7 +170,7 @@
            The default value 0 gives no restrictions.
          </td>
        </tr>
-        <tr valign="top" class="TableCellLight">
+        <tr valign="top" class="TableCellDark">
          <td>Maximum Pages per Domain:</td>
          <td>
            <label for="crawlingDomMaxCheck">Use</label>:
@ -181,7 +184,7 @@
            the given depth. Domains outside the given depth are then sorted-out anyway.
          </td>
        </tr>
-        <tr valign="top" class="TableCellDark">
+        <tr valign="top" class="TableCellLight">
          <td><label for="crawlingQ">Accept URLs with '?' / dynamic URLs</label>:</td>
          <td><input type="checkbox" name="crawlingQ" id="crawlingQ" #(crawlingQChecked)#::checked="checked"#(/crawlingQChecked)# /></td>
          <td>
@ -189,7 +192,7 @@
            is accessed with URLs containing question marks. If you are unsure, do not check this to avoid crawl loops.
          </td>
        </tr>
-        <tr valign="top" class="TableCellLight">
+        <tr valign="top" class="TableCellDark">
          <td><label for="storeHTCache">Store to Web Cache</label>:</td>
          <td><input type="checkbox" name="storeHTCache" id="storeHTCache" #(storeHTCacheChecked)#::checked="checked"#(/storeHTCacheChecked)# /></td>
          <td>
@ -275,6 +278,23 @@
          </td>
        </tr>
        -->
+        <tr valign="top" class="TableCellLight">
+          <td>Create Bookmark</td>
+          <td>
+	          <label for="createBookmark">Use</label>:
+	          <input type="checkbox" name="createBookmark" id="createBookmark" />
+	          &nbsp;&nbsp;&nbsp;(works with "Starting Point: From URL" only)
+	          <br /><br />
+			  <label for="bookmarkTitle"> Title</label>:&nbsp;&nbsp;&nbsp; 
+			  <input name="bookmarkTitle" id="bookmarkTitle" type="text" size="50" maxlength="100" /><br /><br />
+			  <label for="bookmarkFolder"> Folder</label>:
+			  <input name="bookmarkFolder" id="bookmarkFolder" type="text" size="50" maxlength="100" value="/crawlStart" />
+			  <br />&nbsp;          	  
+          </td>
+          <td>
+            This option lets you create a bookmark from your crawl start URL.
+          </td>
+        </tr>
        <tr valign="top" class="TableCellLight">
          <td colspan="5"><input type="submit" name="crawlingstart" value="Start New Crawl" /></td>
        </tr>
--- a/htroot/Crawler_p.java
+++ b/htroot/Crawler_p.java
@ -140,9 +140,6 @@ public class Crawler_p {
                    try {crawlingStartURL = new DigestURI(crawlingStart, null);} catch (final MalformedURLException e1) {}
                    crawlingStart = (crawlingStartURL == null) ? null : crawlingStartURL.toNormalform(true, true);
                   
-                    // store this call as api call
-                    sb.tables.recordAPICall(post, "Crawler_p.html", WorkTables.TABLE_API_TYPE_CRAWLER, "crawl start for " + crawlingStart);
-                    
                    // set new properties
                    final boolean fullDomain = post.get("range", "wide").equals("domain"); // special property in simple crawl start
                    final boolean subPath    = post.get("range", "wide").equals("subpath"); // special property in simple crawl start
@ -167,12 +164,37 @@ public class Crawler_p {
                    env.setConfig("crawlingDepth", Integer.toString(newcrawlingdepth));
                    if ((crawlOrder) && (newcrawlingdepth > 8)) newcrawlingdepth = 8;
                    
-                    final boolean crawlingIfOlderCheck = post.get("crawlingIfOlderCheck", "off").equals("on");
-                    final int crawlingIfOlderNumber = Integer.parseInt(post.get("crawlingIfOlderNumber", "-1"));
-                    final String crawlingIfOlderUnit = post.get("crawlingIfOlderUnit","year");
-                    final long crawlingIfOlder = recrawlIfOlderC(crawlingIfOlderCheck, crawlingIfOlderNumber, crawlingIfOlderUnit);                    
-                    env.setConfig("crawlingIfOlder", crawlingIfOlder);
+                    // recrawl
+                    final String recrawl = post.get("recrawl", "nodoubles"); // nodoubles, reload, scheduler
+                    boolean crawlingIfOlderCheck = post.get("crawlingIfOlderCheck", "off").equals("on");
+                    int crawlingIfOlderNumber = Integer.parseInt(post.get("crawlingIfOlderNumber", "-1"));
+                    String crawlingIfOlderUnit = post.get("crawlingIfOlderUnit","year"); // year, month, day, hour
+                    int repeat_time = Integer.parseInt(post.get("repeat_time", "-1"));
+                    final String repeat_unit = post.get("repeat_unit", "seldays"); // selminutes, selhours, seldays
                    
+                    if (recrawl.equals("scheduler")) {
+                        // set crawlingIfOlder attributes that are appropriate for scheduled crawling 
+                        crawlingIfOlderCheck = true;
+                        crawlingIfOlderNumber = repeat_unit.equals("selminutes") ? 1 : repeat_unit.equals("selhours") ? repeat_time / 2 : repeat_time * 12;
+                        crawlingIfOlderUnit = "hour";
+                    } else if (recrawl.equals("reload")) {
+                        repeat_time = -1;
+                        crawlingIfOlderCheck = true;
+                    } else if (recrawl.equals("nodoubles")) {
+                        repeat_time = -1;
+                        crawlingIfOlderCheck = false;
+                    }
+                    long crawlingIfOlder = recrawlIfOlderC(crawlingIfOlderCheck, crawlingIfOlderNumber, crawlingIfOlderUnit);
+                    env.setConfig("crawlingIfOlder", crawlingIfOlder);
+
+                    // store this call as api call
+                    if (repeat_time > 0) {
+                        // store as scheduled api call
+                        sb.tables.recordAPICall(post, "Crawler_p.html", WorkTables.TABLE_API_TYPE_CRAWLER, "crawl start for " + crawlingStart, repeat_time, repeat_unit.substring(3));
+                    } else {
+                        // store just a protocol
+                        sb.tables.recordAPICall(post, "Crawler_p.html", WorkTables.TABLE_API_TYPE_CRAWLER, "crawl start for " + crawlingStart);
+                    }                    
                    final boolean crawlingDomFilterCheck = post.get("crawlingDomFilterCheck", "off").equals("on");
                    final int crawlingDomFilterDepth = (crawlingDomFilterCheck) ? Integer.parseInt(post.get("crawlingDomFilterDepth", "-1")) : -1;
                    env.setConfig("crawlingDomFilterDepth", Integer.toString(crawlingDomFilterDepth));
--- a/htroot/PerformanceQueues_p.java
+++ b/htroot/PerformanceQueues_p.java
@ -207,13 +207,12 @@ public class PerformanceQueues_p {
                busysleep = sb.getConfigLong(threadName + "_busysleep", busysleep);
            }
            if (setProfile) {
-                if (threadName.equals(SwitchboardConstants.PEER_PING)
-                		|| threadName.equals(SwitchboardConstants.SEED_UPLOAD)
-                		|| threadName.equals(SwitchboardConstants.CLEANUP)
-                		|| threadName.equals("autoReCrawl")
-                		) { /* do not change any values */ }
-                else if (threadName.equals(SwitchboardConstants.CRAWLJOB_REMOTE_CRAWL_LOADER)
-                		|| threadName.equals(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL)) {
+                if (threadName.equals(SwitchboardConstants.PEER_PING) ||
+                    threadName.equals(SwitchboardConstants.SEED_UPLOAD) ||
+                    threadName.equals(SwitchboardConstants.CLEANUP)) {
+                    /* do not change any values */
+                } else if (threadName.equals(SwitchboardConstants.CRAWLJOB_REMOTE_CRAWL_LOADER) ||
+                		   threadName.equals(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL)) {
                	sb.setRemotecrawlPPM(Math.max(1, (int) (sb.getConfigLong("network.unit.remotecrawl.speed", 60) / multiplier)));
                }
                else {
--- a/htroot/QuickCrawlLink_p.java
+++ b/htroot/QuickCrawlLink_p.java
@ -109,7 +109,7 @@ public class QuickCrawlLink_p {
        
        // get other parameters if set
        final String crawlingMustMatch  = post.get("mustmatch", CrawlProfile.MATCH_ALL);
-        final String crawlingMustNotMatch  = post.get("mustnotmatch", CrawlProfile.MATCH_BAD_URL);
+        final String crawlingMustNotMatch  = post.get("mustnotmatch", CrawlProfile.MATCH_NEVER);
        final int CrawlingDepth      = Integer.parseInt(post.get("crawlingDepth", "0"));        
        final boolean crawlDynamic   = post.get("crawlingQ", "").equals("on");
        final boolean indexText      = post.get("indexText", "on").equals("on");
--- a/htroot/Table_API_p.java
+++ b/htroot/Table_API_p.java
@ -62,7 +62,7 @@ public class Table_API_p {
            if (action.equals("on")) {
                Tables.Row row = sb.tables.select(WorkTables.TABLE_API_NAME, pk.getBytes());
                if (row != null) {
-                    row.put(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME, 1);
+                    row.put(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME, 7);
                    row.put(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_UNIT, "days");
                    WorkTables.calculateAPIScheduler(row, false);
                    sb.tables.update(WorkTables.TABLE_API_NAME, row);
--- a/source/de/anomic/crawler/CrawlProfile.java
+++ b/source/de/anomic/crawler/CrawlProfile.java
@ -46,7 +46,6 @@ public class CrawlProfile {
    
    public static final String MATCH_ALL = ".*";
    public static final String MATCH_NEVER = "";
-    public static final String MATCH_BAD_URL = ".*memberlist.*|.*previous.*|.*next.*|.*p=.*";
    
    static ConcurrentHashMap<String, Map<String, DomProfile>> domsCache = new ConcurrentHashMap<String, Map<String, DomProfile>>();
    
--- a/source/de/anomic/crawler/CrawlSwitchboard.java
+++ b/source/de/anomic/crawler/CrawlSwitchboard.java
@ -163,7 +163,7 @@ public final class CrawlSwitchboard {
        
        if (this.defaultProxyProfile == null) {
            // generate new default entry for proxy crawling
-            this.defaultProxyProfile = this.profilesActiveCrawls.newEntry("proxy", null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_BAD_URL,
+            this.defaultProxyProfile = this.profilesActiveCrawls.newEntry("proxy", null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER,
                    0 /*Integer.parseInt(getConfig(PROXY_PREFETCH_DEPTH, "0"))*/,
                    this.profilesActiveCrawls.getRecrawlDate(CRAWL_PROFILE_PROXY_RECRAWL_CYCLE), -1, -1, false,
                    true /*getConfigBool(PROXY_INDEXING_LOCAL_TEXT, true)*/,
@ -174,33 +174,33 @@ public final class CrawlSwitchboard {
        }
        if (this.defaultRemoteProfile == null) {
            // generate new default entry for remote crawling
-            defaultRemoteProfile = this.profilesActiveCrawls.newEntry(CRAWL_PROFILE_REMOTE, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_BAD_URL, 0,
+            defaultRemoteProfile = this.profilesActiveCrawls.newEntry(CRAWL_PROFILE_REMOTE, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER, 0,
                    -1, -1, -1, true, true, true, false, true, false, true, true, false, CrawlProfile.CacheStrategy.IFFRESH);
        }
        if (this.defaultTextSnippetLocalProfile == null) {
            // generate new default entry for snippet fetch and optional crawling
-            defaultTextSnippetLocalProfile = this.profilesActiveCrawls.newEntry(CRAWL_PROFILE_SNIPPET_LOCAL_TEXT, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_BAD_URL, 0,
+            defaultTextSnippetLocalProfile = this.profilesActiveCrawls.newEntry(CRAWL_PROFILE_SNIPPET_LOCAL_TEXT, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER, 0,
                    this.profilesActiveCrawls.getRecrawlDate(CRAWL_PROFILE_SNIPPET_LOCAL_TEXT_RECRAWL_CYCLE), -1, -1, true, false, false, true, true, false, true, true, false, CrawlProfile.CacheStrategy.IFFRESH);
        }
        if (this.defaultTextSnippetGlobalProfile == null) {
            // generate new default entry for snippet fetch and optional crawling
-            defaultTextSnippetGlobalProfile = this.profilesActiveCrawls.newEntry(CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_BAD_URL, 0,
+            defaultTextSnippetGlobalProfile = this.profilesActiveCrawls.newEntry(CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER, 0,
                    this.profilesActiveCrawls.getRecrawlDate(CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT_RECRAWL_CYCLE), -1, -1, true, true, true, true, true, false, true, true, false, CrawlProfile.CacheStrategy.IFEXIST);
        }
        this.defaultTextSnippetGlobalProfile.setCacheStrategy(CacheStrategy.IFEXIST);
        if (this.defaultMediaSnippetLocalProfile == null) {
            // generate new default entry for snippet fetch and optional crawling
-            defaultMediaSnippetLocalProfile = this.profilesActiveCrawls.newEntry(CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_BAD_URL, 0,
+            defaultMediaSnippetLocalProfile = this.profilesActiveCrawls.newEntry(CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER, 0,
                    this.profilesActiveCrawls.getRecrawlDate(CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA_RECRAWL_CYCLE), -1, -1, true, false, false, true, false, false, true, true, false, CrawlProfile.CacheStrategy.IFEXIST);
        }
        if (this.defaultMediaSnippetGlobalProfile == null) {
            // generate new default entry for snippet fetch and optional crawling
-            defaultMediaSnippetGlobalProfile = this.profilesActiveCrawls.newEntry(CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_BAD_URL, 0,
+            defaultMediaSnippetGlobalProfile = this.profilesActiveCrawls.newEntry(CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER, 0,
                    this.profilesActiveCrawls.getRecrawlDate(CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA_RECRAWL_CYCLE), -1, -1, true, false, true, true, true, false, true, true, false, CrawlProfile.CacheStrategy.IFEXIST);
        }
        if (this.defaultSurrogateProfile == null) {
            // generate new default entry for surrogate parsing
-            defaultSurrogateProfile = this.profilesActiveCrawls.newEntry(CRAWL_PROFILE_SURROGATE, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_BAD_URL, 0,
+            defaultSurrogateProfile = this.profilesActiveCrawls.newEntry(CRAWL_PROFILE_SURROGATE, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER, 0,
                    this.profilesActiveCrawls.getRecrawlDate(CRAWL_PROFILE_SURROGATE_RECRAWL_CYCLE), -1, -1, true, true, false, false, false, false, true, true, false, CrawlProfile.CacheStrategy.NOCACHE);
        }
    }
--- a/source/de/anomic/data/SitemapParser.java
+++ b/source/de/anomic/data/SitemapParser.java
@ -315,7 +315,7 @@ public class SitemapParser extends DefaultHandler {
        return this.sb.crawler.profilesActiveCrawls.newEntry(
                domainName, sitemapURL,
                // crawling Filter
-                CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_BAD_URL,
+                CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER,
                // Depth
                0,
                // force recrawling
--- a/source/de/anomic/data/WorkTables.java
+++ b/source/de/anomic/data/WorkTables.java
@ -77,13 +77,6 @@ public class WorkTables extends Tables {
    public void recordAPICall(final serverObjects post, final String servletName, final String type, final String comment) {
        // remove the apicall attributes from the post object
        String pk    = post.remove(TABLE_API_COL_APICALL_PK);
-        String count = post.remove(TABLE_API_COL_APICALL_COUNT);
-        if (count == null) count = "1";
-        String time  = post.remove(TABLE_API_COL_APICALL_SCHEDULE_TIME);
-        String unit  = post.remove(TABLE_API_COL_APICALL_SCHEDULE_UNIT);
-        if (time == null || unit == null || unit.length() == 0 || "minutes,hours,days".indexOf(unit) < 0) {
-            time = ""; unit = "";
-        }
        
        // generate the apicall url - without the apicall attributes
        final String apiurl = /*"http://localhost:" + getConfig("port", "8080") +*/ "/" + servletName + "?" + post.toString();
@ -100,20 +93,7 @@ public class WorkTables extends Tables {
        
        // insert or update entry
        try {
-            if (row != null) {
-                // modify and update existing entry
-
-                // modify date attributes and patch old values
-                row.put(TABLE_API_COL_DATE_LAST_EXEC, DateFormatter.formatShortMilliSecond(new Date()).getBytes());
-                if (!row.containsKey(TABLE_API_COL_DATE_RECORDING)) row.put(TABLE_API_COL_DATE_RECORDING, row.get(TABLE_API_COL_DATE));
-                row.remove(TABLE_API_COL_DATE);
-                
-                // insert APICALL attributes 
-                row.put(TABLE_API_COL_APICALL_COUNT, count.getBytes());
-                row.put(TABLE_API_COL_APICALL_SCHEDULE_TIME, time.getBytes());
-                row.put(TABLE_API_COL_APICALL_SCHEDULE_UNIT, unit.getBytes());
-                super.update(TABLE_API_NAME, row);
-            } else {
+            if (row == null) {
                // create and insert new entry
                Data data = new Data();
                data.put(TABLE_API_COL_TYPE, type.getBytes());
@ -124,10 +104,19 @@ public class WorkTables extends Tables {
                data.put(TABLE_API_COL_URL, apiurl.getBytes());
                
                // insert APICALL attributes 
-                data.put(TABLE_API_COL_APICALL_COUNT, count.getBytes());
-                data.put(TABLE_API_COL_APICALL_SCHEDULE_TIME, time.getBytes());
-                data.put(TABLE_API_COL_APICALL_SCHEDULE_UNIT, unit.getBytes());
+                data.put(TABLE_API_COL_APICALL_COUNT, "1");
                super.insert(TABLE_API_NAME, data);
+            } else {
+                // modify and update existing entry
+
+                // modify date attributes and patch old values
+                row.put(TABLE_API_COL_DATE_LAST_EXEC, DateFormatter.formatShortMilliSecond(new Date()).getBytes());
+                if (!row.containsKey(TABLE_API_COL_DATE_RECORDING)) row.put(TABLE_API_COL_DATE_RECORDING, row.get(TABLE_API_COL_DATE));
+                row.remove(TABLE_API_COL_DATE);
+                
+                // insert APICALL attributes 
+                row.put(TABLE_API_COL_APICALL_COUNT, row.get(TABLE_API_COL_APICALL_COUNT, 1) + 1);
+                super.update(TABLE_API_NAME, row);
            }
        } catch (IOException e) {
            Log.logException(e);
@ -137,6 +126,56 @@ public class WorkTables extends Tables {
        Log.logInfo("APICALL", apiurl);
    }
    
+    /**
+     * store a API call and set attributes to schedule a re-call of that API call according to a given frequence
+     * This is the same as the previous method but it also computes a re-call time and stores that additionally
+     * @param post the post arguments of the api call
+     * @param servletName the name of the servlet
+     * @param type name of the servlet category
+     * @param comment visual description of the process
+     * @param time the time until next scheduled execution of this api call
+     * @param unit the time unit for the scheduled call
+     */
+    public void recordAPICall(final serverObjects post, final String servletName, final String type, final String comment, int time, String unit) {
+        if (post.containsKey(TABLE_API_COL_APICALL_PK)) {
+            // this api call has already been stored somewhere.
+            recordAPICall(post, servletName, type, comment);
+            return;
+        }
+        if (time < 0 || unit == null || unit.length() == 0 || "minutes,hours,days".indexOf(unit) < 0) {
+            time = 0; unit = "";
+        } else {
+            if (unit.equals("minutes") && time < 10) time = 10;
+        }
+        
+        // generate the apicall url - without the apicall attributes
+        final String apiurl = /*"http://localhost:" + getConfig("port", "8080") +*/ "/" + servletName + "?" + post.toString();
+
+        // insert entry
+        try {
+            // create and insert new entry
+            Data data = new Data();
+            data.put(TABLE_API_COL_TYPE, type.getBytes());
+            data.put(TABLE_API_COL_COMMENT, comment.getBytes());
+            byte[] date = DateFormatter.formatShortMilliSecond(new Date()).getBytes();
+            data.put(TABLE_API_COL_DATE_RECORDING, date);
+            data.put(TABLE_API_COL_DATE_LAST_EXEC, date);
+            data.put(TABLE_API_COL_URL, apiurl.getBytes());
+            
+            // insert APICALL attributes 
+            data.put(TABLE_API_COL_APICALL_COUNT, "1".getBytes());
+            data.put(TABLE_API_COL_APICALL_SCHEDULE_TIME, Integer.toString(time).getBytes());
+            data.put(TABLE_API_COL_APICALL_SCHEDULE_UNIT, unit.getBytes());
+            calculateAPIScheduler(data, false); // set next execution time
+            super.insert(TABLE_API_NAME, data);
+        } catch (IOException e) {
+            Log.logException(e);
+        } catch (RowSpaceExceededException e) {
+            Log.logException(e);
+        }
+        Log.logInfo("APICALL", apiurl);
+    }
+    
    /**
     * execute an API call using a api table row which contains all essentials
     * to access the server also the host, port and the authentication realm must be given
@ -164,9 +203,6 @@ public class WorkTables extends Tables {
            if (row == null) continue;
            String url = "http://" + host + ":" + port + new String(row.get(WorkTables.TABLE_API_COL_URL));
            url += "&" + WorkTables.TABLE_API_COL_APICALL_PK + "=" + new String(row.getPK());
-            url += "&" + WorkTables.TABLE_API_COL_APICALL_COUNT + "=" + (row.get(WorkTables.TABLE_API_COL_APICALL_COUNT, 1) + 1);
-            url += "&" + WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME + "=" + row.get(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME, "");
-            url += "&" + WorkTables.TABLE_API_COL_APICALL_SCHEDULE_UNIT + "=" + row.get(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_UNIT, "");
            try {
                client.GETbytes(url);
                l.put(url, client.getStatusCode());
@ -197,8 +233,9 @@ public class WorkTables extends Tables {
    /**
     * calculate the execution time in a api call table based on given scheduling time and last execution time
     * @param row the database row in the api table
+     * @param update if true then the next execution time is based on the latest computed execution time; othervise it is based on the last execution time
     */
-    public static void calculateAPIScheduler(Tables.Row row, boolean update) {
+    public static void calculateAPIScheduler(Tables.Data row, boolean update) {
        Date date = row.containsKey(WorkTables.TABLE_API_COL_DATE) ? row.get(WorkTables.TABLE_API_COL_DATE, new Date()) : null;
        date = update ? row.get(WorkTables.TABLE_API_COL_DATE_NEXT_EXEC, date) : row.get(WorkTables.TABLE_API_COL_DATE_LAST_EXEC, date);
        int time = row.get(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_TIME, 1);
@ -208,10 +245,11 @@ public class WorkTables extends Tables {
        }
        String unit = row.get(WorkTables.TABLE_API_COL_APICALL_SCHEDULE_UNIT, "days");
        long d = date.getTime();
-        if (unit.equals("minutes")) d += 60000L * time;
+        if (unit.equals("minutes")) d += 60000L * Math.max(10, time);
        if (unit.equals("hours"))   d += 60000L * 60L * time;
        if (unit.equals("days"))    d += 60000L * 60L * 24L * time;
        if (d < System.currentTimeMillis()) d = System.currentTimeMillis() + 600000L;
+        d -= d % 60000; // remove seconds
        row.put(WorkTables.TABLE_API_COL_DATE_NEXT_EXEC, new Date(d));
    }
    
--- a/source/de/anomic/data/bookmarksDB.java
+++ b/source/de/anomic/data/bookmarksDB.java
@ -23,18 +23,11 @@

 package de.anomic.data;

-import java.io.BufferedReader;
 import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileNotFoundException;
-import java.io.FileReader;
-import java.io.FileWriter;
 import java.io.IOException;
-import java.io.InputStreamReader;
 import java.io.Serializable;
 import java.net.MalformedURLException;
 import java.util.Comparator;
-import java.util.Date;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
@ -42,24 +35,15 @@ import java.util.Map;
 import java.util.Set;
 import java.util.TreeSet;
 import java.util.concurrent.ConcurrentHashMap;
-import java.util.regex.Pattern;

 import net.yacy.kelondro.blob.MapHeap;
 import net.yacy.kelondro.data.meta.DigestURI;
 import net.yacy.kelondro.index.RowSpaceExceededException;
 import net.yacy.kelondro.logging.Log;
 import net.yacy.kelondro.order.NaturalOrder;
-import net.yacy.kelondro.util.DateFormatter;
-import net.yacy.kelondro.workflow.BusyThread;
-import net.yacy.kelondro.workflow.InstantBusyThread;
-
-import de.anomic.crawler.CrawlProfile;
-import de.anomic.crawler.retrieval.Request;
-import de.anomic.search.Segments;
-import de.anomic.search.Switchboard;
-import de.anomic.yacy.yacyNewsPool;

 public class bookmarksDB {
+    
 	// ------------------------------------
 	// Declaration of Class-Attributes
 	// ------------------------------------
@ -67,7 +51,6 @@ public class bookmarksDB {
 	//final static int SORT_ALPHA = 1;
 	private final static int SORT_SIZE = 2;
 	private final static int SHOW_ALL = -1;
-	private final static String SLEEP_TIME = "3600000"; // default sleepTime: check for recrawls every hour

    // bookmarks
    private MapHeap bookmarks;
@ -75,9 +58,6 @@ public class bookmarksDB {
    // tags
    private ConcurrentHashMap<String, Tag> tags;
    
-    // autoReCrawl    
-    private final BusyThread autoReCrawl;
-    
    private BookmarkDate dates;
    
 	// ------------------------------------
@ -120,15 +100,6 @@ public class bookmarksDB {
        //this.datesTable = new MapView(BLOBTree.toHeap(datesFile, true, true, 20, 256, '_', NaturalOrder.naturalOrder, datesFileNew), 500, '_');
        this.dates = new BookmarkDate(datesFile);
        if (!datesExisted) this.dates.init(new bookmarkIterator(true));
-
-        // autoReCrawl
-        final Switchboard sb = Switchboard.getSwitchboard();
-        this.autoReCrawl = new InstantBusyThread(this, "autoReCrawl", null, null, Long.MIN_VALUE, Long.MAX_VALUE, Long.MIN_VALUE, Long.MAX_VALUE);
-        final long sleepTime = Long.parseLong(sb.getConfig("autoReCrawl_idlesleep" , SLEEP_TIME));
-        sb.deployThread("autoReCrawl", "autoReCrawl Scheduler", "simple scheduler for automatic re-crawls of bookmarked urls", null, autoReCrawl, 120000,
-                sleepTime, sleepTime, Long.parseLong(sb.getConfig("autoReCrawl_memprereq" , "-1"))
-        );
-        Log.logInfo("BOOKMARKS", "autoReCrawl - serverBusyThread initialized checking every "+(sleepTime/1000/60)+" minutes for recrawls");
    }

    // -----------------------------------------------------
@ -141,163 +112,6 @@ public class bookmarksDB {
        dates.close();
    }
    
-    // -----------------------------------------------------
-    // bookmarksDB's functions for autoReCrawl
-    // -----------------------------------------------------
-    
-    public boolean autoReCrawl() {
-    	
-    	// read crontab
-        final File file = new File (Switchboard.getSwitchboard().getRootPath(),"DATA/SETTINGS/autoReCrawl.conf");
-        String s;
-        try {                    	
-            final BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(file)));
-            Log.logInfo("BOOKMARKS", "autoReCrawl - reading schedules from " + file);
-            while( null != (s = in.readLine()) ) {
-                if (s.length() > 0 && s.charAt(0) != '#') {
-                    final String parser[] = s.split("\t");
-                    if (parser.length == 13) {
-                        folderReCrawl(Long.parseLong(parser[0]), parser[1], parser[2], Integer.parseInt(parser[3]), Long.parseLong(parser[4]),
-                                Integer.parseInt(parser[5]), Integer.parseInt(parser[6]), Boolean.parseBoolean(parser[7]),
-                                Boolean.parseBoolean(parser[8]), Boolean.parseBoolean(parser[9]),
-                                Boolean.parseBoolean(parser[10]), Boolean.parseBoolean(parser[11]),
-                                Boolean.parseBoolean(parser[12]), CrawlProfile.CacheStrategy.IFFRESH
-                                );
-                    }
-                    if (parser.length == 14) {
-                        folderReCrawl(Long.parseLong(parser[0]), parser[1], parser[2], Integer.parseInt(parser[3]), Long.parseLong(parser[4]), 
-                                Integer.parseInt(parser[5]), Integer.parseInt(parser[6]), Boolean.parseBoolean(parser[7]),
-                                Boolean.parseBoolean(parser[8]), Boolean.parseBoolean(parser[9]),
-                                Boolean.parseBoolean(parser[10]), Boolean.parseBoolean(parser[11]),
-                                Boolean.parseBoolean(parser[12]), CrawlProfile.CacheStrategy.decode(Integer.parseInt(parser[13]))
-                                );
-                    }
-                }
-            }
-            in.close();
-        } catch( FileNotFoundException ex ) {        	
-            try {
-                Log.logInfo("BOOKMARKS", "autoReCrawl - creating new autoReCrawl.conf");
-                final File inputFile = new File(Switchboard.getSwitchboard().getRootPath(),"defaults/autoReCrawl.conf");
-                final File outputFile = new File(Switchboard.getSwitchboard().getRootPath(),"DATA/SETTINGS/autoReCrawl.conf");
-                final FileReader i = new FileReader(inputFile);
-                final FileWriter o = new FileWriter(outputFile);
-                int c;
-                while ((c = i.read()) != -1) {
-                    o.write(c);
-                }
-                i.close();
-                o.close();
-                autoReCrawl();
-                return true;
-            } catch( FileNotFoundException e ) {
-                 Log.logSevere("BOOKMARKS", "autoReCrawl - file not found error: defaults/autoReCrawl.conf", e);
-                 return false;
-            } catch (IOException e) {
-                Log.logSevere("BOOKMARKS", "autoReCrawl - IOException: defaults/autoReCrawl.conf", e);
-                return false;
-            }
-        } catch( Exception ex ) {
-            Log.logSevere("BOOKMARKS", "autoReCrawl - error reading " + file, ex);
-            return false;
-        }
-    	return true;
-    }    
-    
-    public void folderReCrawl(long schedule, String folder, String crawlingfilter, int newcrawlingdepth, long crawlingIfOlder, 
-    		int crawlingDomFilterDepth, int crawlingDomMaxPages, boolean crawlingQ, boolean indexText, boolean indexMedia, 
-    		boolean crawlOrder, boolean xsstopw, boolean storeHTCache, CrawlProfile.CacheStrategy cacheStrategy) {
-
-        final Switchboard sb = Switchboard.getSwitchboard();
-        final Iterator<String> bit = getBookmarksIterator(folder, true);
-        Log.logInfo("BOOKMARKS", "autoReCrawl - processing: "+folder);
-		 
-        final boolean xdstopw = xsstopw;
-        final boolean xpstopw = xsstopw;
-				
-        while(bit.hasNext()) {
-			
-            final Bookmark bm = getBookmark(bit.next());
-            final long sleepTime = Long.parseLong(sb.getConfig("autoReCrawl_idlesleep" , SLEEP_TIME));
-            final long interTime = (System.currentTimeMillis()-bm.getTimeStamp())%schedule;
-			
-            final Date date = new Date(bm.getTimeStamp());
-            Log.logInfo("BOOKMARKS", "autoReCrawl - checking schedule for: "+"["+DateFormatter.formatISO8601(date)+"] "+bm.getUrl());
-			
-            if (interTime >= 0 && interTime < sleepTime) {
-                try {
-                    int pos = 0;
-                    // set crawlingStart to BookmarkUrl
-                    final String crawlingStart = bm.getUrl();
-                    String newcrawlingMustMatch = crawlingfilter;
-	    			
-                    final DigestURI crawlingStartURL = new DigestURI(crawlingStart, null);
-                    
-                    // set the crawling filter                    
-                    if (newcrawlingMustMatch.length() < 2) newcrawlingMustMatch = ".*"; // avoid that all urls are filtered out if bad value was submitted
-                    
-                    if (crawlingStartURL!= null && newcrawlingMustMatch.equals("dom")) {
-                        newcrawlingMustMatch = ".*" + crawlingStartURL.getHost() + ".*";
-                    }
-                    if (crawlingStart!= null && newcrawlingMustMatch.equals("sub") && (pos = crawlingStart.lastIndexOf("/")) > 0) {
-                        newcrawlingMustMatch = crawlingStart.substring(0, pos + 1) + ".*";
-                    }                    				
-
-                    // check if the crawl filter works correctly
-                    Pattern.compile(newcrawlingMustMatch);
-                    
-                    final byte[] urlhash = crawlingStartURL.hash();
-
-                    sb.indexSegments.urlMetadata(Segments.Process.LOCALCRAWLING).remove(urlhash);
-                    sb.crawlQueues.noticeURL.removeByURLHash(urlhash);
-                    sb.crawlQueues.errorURL.remove(urlhash);
-	               
-	                // stack url
-	                sb.crawler.profilesPassiveCrawls.removeEntry(crawlingStartURL.hash()); // if there is an old entry, delete it
-	                final CrawlProfile.entry pe = sb.crawler.profilesActiveCrawls.newEntry(
-	                        folder+"/"+crawlingStartURL, crawlingStartURL,
-	                        newcrawlingMustMatch,
-	                        CrawlProfile.MATCH_BAD_URL,
-	                        newcrawlingdepth,
-	                        sb.crawler.profilesActiveCrawls.getRecrawlDate(crawlingIfOlder), crawlingDomFilterDepth, crawlingDomMaxPages,
-	                        crawlingQ,
-	                        indexText, indexMedia,
-	                        storeHTCache, true, crawlOrder, xsstopw, xdstopw, xpstopw, cacheStrategy);
-	                sb.crawlStacker.enqueueEntry(new Request(
-	                        sb.peers.mySeed().hash.getBytes(),
-                                crawlingStartURL,
-                                null,
-                                "CRAWLING-ROOT",
-                                new Date(),
-                                pe.handle(),
-                                0,
-                                0,
-                                0
-                                ));
-                    Log.logInfo("BOOKMARKS", "autoReCrawl - adding crawl profile for: " + crawlingStart);
-                    // serverLog.logInfo("BOOKMARKS", "autoReCrawl - crawl filter is set to: " + newcrawlingfilter);
-                    // generate a YaCyNews if the global flag was set
-                    if (crawlOrder) {
-                        Map<String, String> m = new HashMap<String, String>(pe.map()); // must be cloned
-                        m.remove("specificDepth");
-                        m.remove("indexText");
-                        m.remove("indexMedia");
-                        m.remove("remoteIndexing");
-                        m.remove("xsstopw");
-                        m.remove("xpstopw");
-                        m.remove("xdstopw");
-                        m.remove("storeTXCache");
-                        m.remove("storeHTCache");
-                        m.remove("generalFilter");
-                        m.remove("specificFilter");
-                        m.put("intention", "Automatic ReCrawl!");
-                        sb.peers.newsPool.publishMyNews(sb.peers.mySeed(), yacyNewsPool.CATEGORY_CRAWL_START, m);	                      
-                    }
-                } catch (MalformedURLException e1) {}
-            } // if
-        } // while(bit.hasNext())
-    } // } autoReCrawl() 
-    
    // -----------------------------------------------------------
    // bookmarksDB's functions for bookmarksTable / bookmarkCache
    // -----------------------------------------------------------
--- a/source/de/anomic/search/Switchboard.java
+++ b/source/de/anomic/search/Switchboard.java
@ -137,7 +137,6 @@ import de.anomic.data.userDB;
 import de.anomic.data.wiki.wikiBoard;
 import de.anomic.data.wiki.wikiCode;
 import de.anomic.data.wiki.wikiParser;
-//import de.anomic.http.client.Client;
 import de.anomic.http.client.Cache;
 import de.anomic.http.server.HTTPDemon;
 import de.anomic.http.server.HeaderFramework;
@ -211,7 +210,7 @@ public final class Switchboard extends serverSwitch {
    public  boolean                        rankingOn;
    public  CRDistribution                 rankingOwnDistribution;
    public  CRDistribution                 rankingOtherDistribution;
-    public  Map<String, Object[]>      outgoingCookies, incomingCookies;
+    public  Map<String, Object[]>          outgoingCookies, incomingCookies;
    public  volatile long                  proxyLastAccess, localSearchLastAccess, remoteSearchLastAccess;
    public  yacyCore                       yc;
    public  ResourceObserver               observer;
@ -608,7 +607,7 @@ public final class Switchboard extends serverSwitch {
                         SwitchboardConstants.CLEANUP_METHOD_JOBCOUNT,
                         SwitchboardConstants.CLEANUP_METHOD_FREEMEM,
                         60000, Long.MAX_VALUE, 10000, Long.MAX_VALUE),
-                     600000); // all 5 Minutes, wait 10 minutes until first run
+                     60000); // all 5 Minutes, wait 1 minute until first run
        deployThread(SwitchboardConstants.SURROGATES, "Surrogates", "A thread that polls the SURROGATES path and puts all Documents in one surroagte file into the indexing queue.", null,
                     new InstantBusyThread(
                         this,
--- a/source/net/yacy/kelondro/blob/Tables.java
+++ b/source/net/yacy/kelondro/blob/Tables.java
@ -433,6 +433,17 @@ public class Tables {
                return dflt;
            }
        }
+        
+        public String toString() {
+            StringBuilder sb = new StringBuilder();
+            sb.append('{');
+            for (Map.Entry<String, byte[]> entry: this.entrySet()) {
+                sb.append(entry.getKey()).append('=').append(new String(entry.getValue())).append(", ");
+            }
+            if (sb.length() > 1) sb.setLength(sb.length() - 2);
+            sb.append('}');
+            return sb.toString();
+        }
    }
    
    public class Row extends Data {