From 0cae420d8ea189885d574be588489983fe0e47a9 Mon Sep 17 00:00:00 2001
From: sixcooler <sgaebel@sixcooler.de>
Date: Wed, 25 Sep 2013 15:01:28 +0200
Subject: [PATCH] some dns-timing changes: since httpclient uses the
 domain-cache it is useful not to clean the domain cache until crawling is
 running (domains are filled into this cache) On huge crawl-starts (eg. from
 file) my DNS did not follow the high rates - so I reduced the rate and give
 some more time(-out)

---
 source/net/yacy/cora/protocol/Domains.java |  6 +++---
 source/net/yacy/search/Switchboard.java    | 13 ++++++++-----
 2 files changed, 11 insertions(+), 8 deletions(-)
diff --git a/source/net/yacy/cora/protocol/Domains.java b/source/net/yacy/cora/protocol/Domains.java
index c1c6b6d18..70a960f5b 100644
--- a/source/net/yacy/cora/protocol/Domains.java
+++ b/source/net/yacy/cora/protocol/Domains.java
@@ -74,8 +74,8 @@ public class Domains {
     private static final String PRESENT = "";
     private static final Pattern LOCAL_PATTERNS = Pattern.compile("(10\\..*)|(127\\..*)|(172\\.(1[6-9]|2[0-9]|3[0-1])\\..*)|(169\\.254\\..*)|(192\\.168\\..*)|(localhost)|(\\[?\\:\\:1/.*)|(\\[?fc.*)|(\\[?fd.*)|(\\[?(fe80|0)\\:0\\:0\\:0\\:0\\:0\\:0\\:1.*)");
     
-    private static final int MAX_NAME_CACHE_HIT_SIZE = 100000;
-    private static final int MAX_NAME_CACHE_MISS_SIZE = 100000;
+    private static final int MAX_NAME_CACHE_HIT_SIZE = 10000;
+    private static final int MAX_NAME_CACHE_MISS_SIZE = 1000;
     private static final int CONCURRENCY_LEVEL = Runtime.getRuntime().availableProcessors() * 2;
 
     // a dns cache
@@ -782,7 +782,7 @@ public class Domains {
                         public InetAddress call() throws Exception {
                             return InetAddress.getByName(host);
                         }
-                    }, 1000L, TimeUnit.MILLISECONDS, false);
+                    }, 3000L, TimeUnit.MILLISECONDS, false);
                     //ip = TimeoutRequest.getByName(host, 1000); // this makes the DNS request to backbone
                 }
                 //.out.println("DNSLOOKUP-*LOOKUP* " + host + ", time = " + (System.currentTimeMillis() - t) + "ms");
diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java
index 36f4930bc..83a321052 100644
--- a/source/net/yacy/search/Switchboard.java
+++ b/source/net/yacy/search/Switchboard.java
@@ -2009,7 +2009,7 @@ public final class Switchboard extends serverSwitch {
             // clear caches
             if (WordCache.sizeCommonWords() > 1000) WordCache.clearCommonWords();
             Word.clearCache();
-            Domains.clear();
+            // Domains.clear();
             
             // clean up image stack
             ResultImages.clearQueues();
@@ -2274,7 +2274,10 @@ public final class Switchboard extends serverSwitch {
             // if no crawl is running and processing is activated:
             // execute the (post-) processing steps for all entries that have a process tag assigned
             if (this.crawlQueues.coreCrawlJobSize() == 0) {
-                if (this.crawlQueues.noticeURL.isEmpty()) this.crawlQueues.noticeURL.clear(); // flushes more caches 
+                if (this.crawlQueues.noticeURL.isEmpty()) {
+                	Domains.clear();
+                	this.crawlQueues.noticeURL.clear(); // flushes more caches 
+                }
                 postprocessingRunning = true;
                 int proccount = 0;
                 proccount += index.fulltext().getDefaultConfiguration().postprocessing(index);
@@ -2827,7 +2830,7 @@ public final class Switchboard extends serverSwitch {
 
     public void stackURLs(Set<DigestURL> rootURLs, final CrawlProfile profile, final Set<DigestURL> successurls, final Map<DigestURL,String> failurls) {
         if (rootURLs == null || rootURLs.size() == 0) return;
-        List<Thread> stackthreads = new ArrayList<Thread>(); // do this concurrently
+        final List<Thread> stackthreads = new ArrayList<Thread>(); // do this concurrently
         for (DigestURL url: rootURLs) {
             final DigestURL turl = url;
             Thread t = new Thread() {
@@ -2838,9 +2841,9 @@ public final class Switchboard extends serverSwitch {
             };
             t.start();
             stackthreads.add(t);
-            try {Thread.sleep(10);} catch (final InterruptedException e) {} // to prevent that this fires more than 100 connections pre second!
+            try {Thread.sleep(100);} catch (final InterruptedException e) {} // to prevent that this fires more than 10 connections pre second!
         }
-        long waitingtime = 1 + (30000 / rootURLs.size()); // at most wait only halve an minute to prevent that the crawl start runs into a time-out
+        final long waitingtime = 10 + (30000 / rootURLs.size()); // at most wait only halve an minute to prevent that the crawl start runs into a time-out
         for (Thread t: stackthreads) try {t.join(waitingtime);} catch (final InterruptedException e) {}
     }