diff --git a/source/de/anomic/crawler/CrawlStacker.java b/source/de/anomic/crawler/CrawlStacker.java index ee157d0ad..87fd41ec4 100644 --- a/source/de/anomic/crawler/CrawlStacker.java +++ b/source/de/anomic/crawler/CrawlStacker.java @@ -29,7 +29,6 @@ package de.anomic.crawler; import java.net.UnknownHostException; -import java.util.ArrayList; import java.util.Date; import de.anomic.index.indexReferenceBlacklist; @@ -51,10 +50,6 @@ public final class CrawlStacker { private plasmaWordIndex wordIndex; private boolean acceptLocalURLs, acceptGlobalURLs; - // objects for the prefetch task - private final ArrayList dnsfetchHosts = new ArrayList(); - - // this is the process that checks url for double-occurrences and for allowance/disallowance by robots.txt public CrawlStacker(CrawlQueues cq, plasmaWordIndex wordIndex, boolean acceptLocalURLs, boolean acceptGlobalURLs) { @@ -102,15 +97,13 @@ public final class CrawlStacker { // returns true when the host was known in the dns cache. // If not, the host is stacked on the fetch stack and false is returned try { - serverDomains.dnsResolveFromCache(host); - return true; + if (serverDomains.dnsResolveFromCache(host) != null) return true; // found entry } catch (final UnknownHostException e) { - synchronized (this) { - dnsfetchHosts.add(host); - notifyAll(); - } + // we know that this is unknown return false; } + // we just don't know anything about that host + return false; } /* diff --git a/source/de/anomic/server/serverDomains.java b/source/de/anomic/server/serverDomains.java index 3e3ee8f80..92b13ae81 100644 --- a/source/de/anomic/server/serverDomains.java +++ b/source/de/anomic/server/serverDomains.java @@ -37,7 +37,6 @@ import java.util.Map; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; -import de.anomic.kelondro.kelondroMScoreCluster; import de.anomic.plasma.plasmaSwitchboard; public class serverDomains { @@ -45,15 +44,10 @@ public class serverDomains { // a dns cache private static final Map nameCacheHit = new ConcurrentHashMap(); // a not-synchronized map resulted in deadlocks private static final Set nameCacheMiss = Collections.synchronizedSet(new HashSet()); - private static final kelondroMScoreCluster nameCacheHitAges = new kelondroMScoreCluster(); - private static final kelondroMScoreCluster nameCacheMissAges = new kelondroMScoreCluster(); - private static final int maxNameCacheHitAge = 24 * 60 * 60; // 24 hours in minutes - private static final int maxNameCacheMissAge = 24 * 60 * 60; // 24 hours in minutes - private static final int maxNameCacheHitSize = 3000; - private static final int maxNameCacheMissSize = 3000; + private static final int maxNameCacheHitSize = 8000; + private static final int maxNameCacheMissSize = 8000; public static final List nameCacheNoCachingPatterns = Collections.synchronizedList(new LinkedList()); private static final Set nameCacheNoCachingList = Collections.synchronizedSet(new HashSet()); - private static final long startTime = System.currentTimeMillis(); /** * ! ! ! A T T E N T I O N A T T E N T I O N A T T E N T I O N ! ! ! @@ -90,7 +84,7 @@ public class serverDomains { "GD=Grenada", "GP=Guadeloupe", "GS=South Georgia and the South Sandwich Islands", // south of south america, but administrated by british, has only a scientific base - "GU=Guam", // strategical US basis close to Japan + "GU=Guam", // strategic US basis close to Japan "HM=Heard and McDonald Islands", // uninhabited, sub-Antarctic island, owned by Australia "HT=Haiti", "IO=British Indian Ocean Territory", // UK-US naval support facility in the Indian Ocean @@ -402,16 +396,6 @@ public class serverDomains { insertTLDProps(TLD_Generic, TLD_Generic_ID); // the id=7 is used to flag local addresses } - - /** - * Converts the time to a non negative int - * - * @param longTime Time in miliseconds since 01/01/1970 00:00 GMT - * @return int seconds since startTime - */ - private static int intTime(final long longTime) { - return (int) Math.max(0, ((longTime - startTime) / 1000)); - } /** * Does an DNS-Check to resolve a hostname to an IP. @@ -443,10 +427,10 @@ public class serverDomains { //System.out.println("***DEBUG dnsResolve(" + host + ")"); try { boolean doCaching = true; - ip = InetAddress.getByName(host); + ip = InetAddress.getByName(host); // this makes the DNS request to backbone if ((ip == null) || (ip.isLoopbackAddress()) || - (nameCacheNoCachingList.contains(ip.getHostName())) + (nameCacheNoCachingList.contains(host)) ) { doCaching = false; } else { @@ -454,9 +438,9 @@ public class serverDomains { String nextPattern; while (noCachingPatternIter.hasNext()) { nextPattern = noCachingPatternIter.next(); - if (ip.getHostName().matches(nextPattern)) { + if (host.matches(nextPattern)) { // disallow dns caching for this host - nameCacheNoCachingList.add(ip.getHostName()); + nameCacheNoCachingList.add(host); doCaching = false; break; } @@ -468,10 +452,7 @@ public class serverDomains { flushHitNameCache(); // add new entries - synchronized (nameCacheHit) { - nameCacheHit.put(ip.getHostName(), ip); - nameCacheHitAges.setScore(ip.getHostName(), intTime(System.currentTimeMillis())); - } + nameCacheHit.put(host, ip); } return ip; } catch (final UnknownHostException e) { @@ -480,7 +461,6 @@ public class serverDomains { // add new entries nameCacheMiss.add(host); - nameCacheMissAges.setScore(host, intTime(System.currentTimeMillis())); } return null; } @@ -512,28 +492,14 @@ public class serverDomains { * Removes old entries from the dns hit cache */ public static void flushHitNameCache() { - final int cutofftime = intTime(System.currentTimeMillis()) - maxNameCacheHitAge; - String k; - while ((nameCacheHitAges.size() > maxNameCacheHitSize) || (nameCacheHitAges.getMinScore() < cutofftime)) { - k = nameCacheHitAges.getMinObject(); - if (nameCacheHit.remove(k) == null) break; // ensure termination - nameCacheHitAges.deleteScore(k); - } - + if (nameCacheHit.size() > maxNameCacheHitSize) nameCacheHit.clear(); } /** * Removes old entries from the dns miss cache */ public static void flushMissNameCache() { - final int cutofftime = intTime(System.currentTimeMillis()) - maxNameCacheMissAge; - String k; - while ((nameCacheMissAges.size() > maxNameCacheMissSize) || (nameCacheMissAges.getMinScore() < cutofftime)) { - k = nameCacheMissAges.getMinObject(); - if (!nameCacheMiss.remove(k)) break; // ensure termination - nameCacheMissAges.deleteScore(k); - } - + if (nameCacheMiss.size() > maxNameCacheMissSize) nameCacheMiss.clear(); } private static InetAddress[] localAddresses = null; diff --git a/source/de/anomic/server/serverProcessor.java b/source/de/anomic/server/serverProcessor.java index 27081fe39..e815fc55b 100644 --- a/source/de/anomic/server/serverProcessor.java +++ b/source/de/anomic/server/serverProcessor.java @@ -91,6 +91,7 @@ public class serverProcessor { public J take() throws InterruptedException { // read from the input queue + if (this.input == null) return null; long t = System.currentTimeMillis(); J j = this.input.take(); this.blockTime += System.currentTimeMillis() - t;