0) {
newcrawlingMustMatch = crawlingStart.substring(0, pos + 1) + ".*";
@@ -203,7 +202,8 @@ public class Crawler_p {
final boolean indexMedia = post.get("indexMedia", "off").equals("on");
env.setConfig("indexMedia", (indexMedia) ? "true" : "false");
- final boolean storeHTCache = post.get("storeHTCache", "off").equals("on");
+ boolean storeHTCache = post.get("storeHTCache", "off").equals("on");
+ if (crawlingStartURL.isFile() || crawlingStartURL.isSMB()) storeHTCache = false;
env.setConfig("storeHTCache", (storeHTCache) ? "true" : "false");
final String cachePolicyString = post.get("cachePolicy", "iffresh");
@@ -247,15 +247,21 @@ public class Crawler_p {
// stack url
sb.crawler.profilesPassiveCrawls.remove(crawlingStartURL.hash()); // if there is an old entry, delete it
final CrawlProfile pe = new CrawlProfile(
- (crawlingStartURL.getHost() == null) ? Long.toHexString(System.currentTimeMillis()) : crawlingStartURL.getHost(),
+ (crawlingStartURL.getHost() == null) ? crawlingStartURL.toNormalform(true, false) : crawlingStartURL.getHost(),
crawlingStartURL,
newcrawlingMustMatch,
newcrawlingMustNotMatch,
newcrawlingdepth,
- crawlingIfOlder, crawlingDomMaxPages,
+ crawlingIfOlder,
+ crawlingDomMaxPages,
crawlingQ,
indexText, indexMedia,
- storeHTCache, true, crawlOrder, xsstopw, xdstopw, xpstopw, cachePolicy);
+ storeHTCache,
+ crawlOrder,
+ xsstopw,
+ xdstopw,
+ xpstopw,
+ cachePolicy);
sb.crawler.profilesActiveCrawls.put(pe.handle().getBytes(), pe);
final String reasonString = sb.crawlStacker.stackCrawl(new Request(
sb.peers.mySeed().hash.getBytes(),
@@ -352,7 +358,8 @@ public class Crawler_p {
final Map hyperlinks = scraper.getAnchors();
final DigestURI crawlURL = new DigestURI("file://" + file.toString(), null);
final CrawlProfile profile = new CrawlProfile(
- fileName, crawlURL,
+ fileName,
+ crawlURL,
newcrawlingMustMatch,
CrawlProfile.MATCH_NEVER,
newcrawlingdepth,
@@ -362,9 +369,10 @@ public class Crawler_p {
indexText,
indexMedia,
storeHTCache,
- true,
crawlOrder,
- xsstopw, xdstopw, xpstopw,
+ xsstopw,
+ xdstopw,
+ xpstopw,
cachePolicy);
sb.crawler.profilesActiveCrawls.put(profile.handle().getBytes(), profile);
sb.pauseCrawlJob(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL);
@@ -405,15 +413,21 @@ public class Crawler_p {
try {
final DigestURI sitemapURL = new DigestURI(sitemapURLStr, null);
final CrawlProfile pe = new CrawlProfile(
- sitemapURLStr, sitemapURL,
- newcrawlingMustMatch,
+ sitemapURLStr,
+ sitemapURL,
+ CrawlProfile.MATCH_ALL,
CrawlProfile.MATCH_NEVER,
- newcrawlingdepth,
- crawlingIfOlder, crawlingDomMaxPages,
- crawlingQ,
- indexText, indexMedia,
- storeHTCache, true, crawlOrder,
- xsstopw, xdstopw, xpstopw,
+ 0,
+ crawlingIfOlder,
+ crawlingDomMaxPages,
+ true,
+ indexText,
+ indexMedia,
+ storeHTCache,
+ crawlOrder,
+ xsstopw,
+ xdstopw,
+ xpstopw,
cachePolicy);
sb.crawler.profilesActiveCrawls.put(pe.handle().getBytes(), pe);
final SitemapImporter importer = new SitemapImporter(sb, sitemapURL, pe);
@@ -431,7 +445,7 @@ public class Crawler_p {
// download document
ContentScraper scraper = null;
scraper = sb.loader.parseResource(sitelistURL, CrawlProfile.CacheStrategy.IFFRESH);
- String title = scraper.getTitle();
+ // String title = scraper.getTitle();
// String description = scraper.getDescription();
// get links and generate filter
@@ -444,7 +458,7 @@ public class Crawler_p {
// put links onto crawl queue
final CrawlProfile profile = new CrawlProfile(
- title == null || title.length() == 0 ? sitelistURL.getHost() : title,
+ sitelistURL.getHost(),
sitelistURL,
newcrawlingMustMatch,
CrawlProfile.MATCH_NEVER,
@@ -455,9 +469,10 @@ public class Crawler_p {
indexText,
indexMedia,
storeHTCache,
- true,
crawlOrder,
- xsstopw, xdstopw, xpstopw,
+ xsstopw,
+ xdstopw,
+ xpstopw,
cachePolicy);
sb.crawler.profilesActiveCrawls.put(profile.handle().getBytes(), profile);
sb.pauseCrawlJob(SwitchboardConstants.CRAWLJOB_LOCAL_CRAWL);
diff --git a/htroot/QuickCrawlLink_p.java b/htroot/QuickCrawlLink_p.java
index c470db791..043c26159 100644
--- a/htroot/QuickCrawlLink_p.java
+++ b/htroot/QuickCrawlLink_p.java
@@ -157,7 +157,6 @@ public class QuickCrawlLink_p {
indexText,
indexMedia,
storeHTCache,
- true,
remoteIndexing,
xsstopw,
xdstopw,
diff --git a/htroot/SettingsAck_p.java b/htroot/SettingsAck_p.java
index 32d974486..f9b3df8d3 100644
--- a/htroot/SettingsAck_p.java
+++ b/htroot/SettingsAck_p.java
@@ -36,6 +36,7 @@ import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import net.yacy.cora.document.MultiProtocolURI;
+import net.yacy.cora.protocol.Domains;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.kelondro.order.Base64Order;
import net.yacy.kelondro.order.Digest;
@@ -111,7 +112,7 @@ public class SettingsAck_p {
final serverCore theServerCore = (serverCore) env.getThread("10_httpd");
try {
final InetSocketAddress theNewAddress = theServerCore.generateSocketAddress(port);
- final String hostName = theNewAddress.getHostName();
+ final String hostName = Domains.getHostName(theNewAddress.getAddress());
prop.put("info_restart", "1");
prop.put("info_restart_ip",(hostName.equals("0.0.0.0"))? "localhost" : hostName);
prop.put("info_restart_port", theNewAddress.getPort());
diff --git a/htroot/api/util/getpageinfo_p.java b/htroot/api/util/getpageinfo_p.java
index 89bc7ad8e..882c6601b 100755
--- a/htroot/api/util/getpageinfo_p.java
+++ b/htroot/api/util/getpageinfo_p.java
@@ -26,11 +26,13 @@ public class getpageinfo_p {
prop.put("robots-allowed", "3"); //unknown
prop.put("sitemap", "");
prop.put("favicon","");
+ prop.put("sitelist", "");
+ prop.put("filter", ".*");
// default actions
String actions="title,robots";
- if(post!=null && post.containsKey("url")){
+ if (post != null && post.containsKey("url")) {
if(post.containsKey("actions"))
actions=post.get("actions");
String url=post.get("url");
@@ -97,7 +99,7 @@ public class getpageinfo_p {
prop.putXML("filter", filter.length() > 0 ? filter.substring(1) : ".*");
}
}
- if(actions.indexOf("robots")>=0){
+ if (actions.indexOf("robots")>=0) {
try {
final DigestURI theURL = new DigestURI(url, null);
diff --git a/htroot/js/IndexCreate.js b/htroot/js/IndexCreate.js
index b411f2261..ab7a72333 100644
--- a/htroot/js/IndexCreate.js
+++ b/htroot/js/IndexCreate.js
@@ -48,14 +48,14 @@ function handleResponse(){
sitemap=response.getElementsByTagName("sitemap")[0].firstChild.nodeValue;
}
document.getElementsByName("sitemapURL")[0].value=sitemap;
- document.getElementById("sitemap").disabled=false;
+ if (sitemap) document.getElementById("sitemap").disabled=false;
}
sitelist="";
if (response.getElementsByTagName("sitelist")[0].firstChild!=null){
sitelist=response.getElementsByTagName("sitelist")[0].firstChild.nodeValue;
}
document.getElementById("sitelistURLs").innerHTML = sitelist;
- document.getElementById("sitelist").disabled=false;
+ if (sitelist) document.getElementById("sitelist").disabled=false;
// clear the ajax image
document.getElementsByName("ajax")[0].setAttribute("src", AJAX_OFF);
diff --git a/source/de/anomic/crawler/CrawlProfile.java b/source/de/anomic/crawler/CrawlProfile.java
index 23e26fa9d..08c028c3b 100644
--- a/source/de/anomic/crawler/CrawlProfile.java
+++ b/source/de/anomic/crawler/CrawlProfile.java
@@ -53,7 +53,6 @@ public class CrawlProfile extends ConcurrentHashMap implements M
public static final String INDEX_TEXT = "indexText";
public static final String INDEX_MEDIA = "indexMedia";
public static final String STORE_HTCACHE = "storeHTCache";
- public static final String STORE_TXCACHE = "storeTXCache";
public static final String REMOTE_INDEXING = "remoteIndexing";
public static final String XSSTOPW = "xsstopw";
public static final String XDSTOPW = "xdstopw";
@@ -64,17 +63,22 @@ public class CrawlProfile extends ConcurrentHashMap implements M
private Pattern mustmatch = null, mustnotmatch = null;
- public CrawlProfile(final String name, final DigestURI startURL,
+ public CrawlProfile(
+ final String name,
+ final DigestURI startURL,
final String mustmatch,
final String mustnotmatch,
final int depth,
final long recrawlIfOlder /*date*/,
final int domMaxPages,
final boolean crawlingQ,
- final boolean indexText, final boolean indexMedia,
- final boolean storeHTCache, final boolean storeTXCache,
+ final boolean indexText,
+ final boolean indexMedia,
+ final boolean storeHTCache,
final boolean remoteIndexing,
- final boolean xsstopw, final boolean xdstopw, final boolean xpstopw,
+ final boolean xsstopw,
+ final boolean xdstopw,
+ final boolean xpstopw,
final CacheStrategy cacheStrategy) {
super(40);
if (name == null || name.length() == 0) throw new NullPointerException("name must not be null");
@@ -91,7 +95,6 @@ public class CrawlProfile extends ConcurrentHashMap implements M
put(INDEX_TEXT, indexText);
put(INDEX_MEDIA, indexMedia);
put(STORE_HTCACHE, storeHTCache);
- put(STORE_TXCACHE, storeTXCache);
put(REMOTE_INDEXING, remoteIndexing);
put(XSSTOPW, xsstopw); // exclude static stop-words
put(XDSTOPW, xdstopw); // exclude dynamic stop-word
@@ -218,11 +221,6 @@ public class CrawlProfile extends ConcurrentHashMap implements M
if (r == null) return false;
return (r.equals(Boolean.TRUE.toString()));
}
- public boolean storeTXCache() {
- final String r = get(STORE_TXCACHE);
- if (r == null) return false;
- return (r.equals(Boolean.TRUE.toString()));
- }
public boolean remoteIndexing() {
final String r = get(REMOTE_INDEXING);
if (r == null) return false;
diff --git a/source/de/anomic/crawler/CrawlSwitchboard.java b/source/de/anomic/crawler/CrawlSwitchboard.java
index f90b0f40b..025369ba4 100644
--- a/source/de/anomic/crawler/CrawlSwitchboard.java
+++ b/source/de/anomic/crawler/CrawlSwitchboard.java
@@ -170,7 +170,7 @@ public final class CrawlSwitchboard {
CrawlProfile.getRecrawlDate(CRAWL_PROFILE_PROXY_RECRAWL_CYCLE), -1, false,
true /*getConfigBool(PROXY_INDEXING_LOCAL_TEXT, true)*/,
true /*getConfigBool(PROXY_INDEXING_LOCAL_MEDIA, true)*/,
- true, true,
+ true,
false /*getConfigBool(PROXY_INDEXING_REMOTE, false)*/, true, true, true,
CrawlProfile.CacheStrategy.IFFRESH);
this.profilesActiveCrawls.put(this.defaultProxyProfile.handle().getBytes(), this.defaultProxyProfile);
@@ -178,38 +178,38 @@ public final class CrawlSwitchboard {
if (this.defaultRemoteProfile == null) {
// generate new default entry for remote crawling
this.defaultRemoteProfile = new CrawlProfile(CRAWL_PROFILE_REMOTE, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER, 0,
- -1, -1, true, true, true, false, true, false, true, true, false, CrawlProfile.CacheStrategy.IFFRESH);
+ -1, -1, true, true, true, false, false, true, true, false, CrawlProfile.CacheStrategy.IFFRESH);
this.profilesActiveCrawls.put(this.defaultRemoteProfile.handle().getBytes(), this.defaultRemoteProfile);
}
if (this.defaultTextSnippetLocalProfile == null) {
// generate new default entry for snippet fetch and optional crawling
this.defaultTextSnippetLocalProfile = new CrawlProfile(CRAWL_PROFILE_SNIPPET_LOCAL_TEXT, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER, 0,
- CrawlProfile.getRecrawlDate(CRAWL_PROFILE_SNIPPET_LOCAL_TEXT_RECRAWL_CYCLE), -1, true, false, false, true, true, false, true, true, false, CrawlProfile.CacheStrategy.IFFRESH);
+ CrawlProfile.getRecrawlDate(CRAWL_PROFILE_SNIPPET_LOCAL_TEXT_RECRAWL_CYCLE), -1, true, false, false, true, false, true, true, false, CrawlProfile.CacheStrategy.IFFRESH);
this.profilesActiveCrawls.put(this.defaultTextSnippetLocalProfile.handle().getBytes(), this.defaultTextSnippetLocalProfile);
}
if (this.defaultTextSnippetGlobalProfile == null) {
// generate new default entry for snippet fetch and optional crawling
this.defaultTextSnippetGlobalProfile = new CrawlProfile(CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER, 0,
- CrawlProfile.getRecrawlDate(CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT_RECRAWL_CYCLE), -1, true, true, true, true, true, false, true, true, false, CrawlProfile.CacheStrategy.IFEXIST);
+ CrawlProfile.getRecrawlDate(CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT_RECRAWL_CYCLE), -1, true, true, true, true, false, true, true, false, CrawlProfile.CacheStrategy.IFEXIST);
this.profilesActiveCrawls.put(this.defaultTextSnippetGlobalProfile.handle().getBytes(), this.defaultTextSnippetGlobalProfile);
}
this.defaultTextSnippetGlobalProfile.setCacheStrategy(CrawlProfile.CacheStrategy.IFEXIST);
if (this.defaultMediaSnippetLocalProfile == null) {
// generate new default entry for snippet fetch and optional crawling
this.defaultMediaSnippetLocalProfile = new CrawlProfile(CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER, 0,
- CrawlProfile.getRecrawlDate(CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA_RECRAWL_CYCLE), -1, true, false, false, true, false, false, true, true, false, CrawlProfile.CacheStrategy.IFEXIST);
+ CrawlProfile.getRecrawlDate(CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA_RECRAWL_CYCLE), -1, true, false, false, true, false, true, true, false, CrawlProfile.CacheStrategy.IFEXIST);
this.profilesActiveCrawls.put(this.defaultMediaSnippetLocalProfile.handle().getBytes(), this.defaultMediaSnippetLocalProfile);
}
if (this.defaultMediaSnippetGlobalProfile == null) {
// generate new default entry for snippet fetch and optional crawling
this.defaultMediaSnippetGlobalProfile = new CrawlProfile(CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER, 0,
- CrawlProfile.getRecrawlDate(CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA_RECRAWL_CYCLE), -1, true, false, true, true, true, false, true, true, false, CrawlProfile.CacheStrategy.IFEXIST);
+ CrawlProfile.getRecrawlDate(CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA_RECRAWL_CYCLE), -1, true, false, true, true, false, true, true, false, CrawlProfile.CacheStrategy.IFEXIST);
this.profilesActiveCrawls.put(this.defaultMediaSnippetGlobalProfile.handle().getBytes(), this.defaultMediaSnippetGlobalProfile);
}
if (this.defaultSurrogateProfile == null) {
// generate new default entry for surrogate parsing
this.defaultSurrogateProfile = new CrawlProfile(CRAWL_PROFILE_SURROGATE, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER, 0,
- CrawlProfile.getRecrawlDate(CRAWL_PROFILE_SURROGATE_RECRAWL_CYCLE), -1, true, true, false, false, false, false, true, true, false, CrawlProfile.CacheStrategy.NOCACHE);
+ CrawlProfile.getRecrawlDate(CRAWL_PROFILE_SURROGATE_RECRAWL_CYCLE), -1, true, true, false, false, false, true, true, false, CrawlProfile.CacheStrategy.NOCACHE);
this.profilesActiveCrawls.put(this.defaultSurrogateProfile.handle().getBytes(), this.defaultSurrogateProfile);
}
}
diff --git a/source/de/anomic/crawler/ResultURLs.java b/source/de/anomic/crawler/ResultURLs.java
index ca2e9d70e..27bbac4cc 100644
--- a/source/de/anomic/crawler/ResultURLs.java
+++ b/source/de/anomic/crawler/ResultURLs.java
@@ -159,6 +159,10 @@ public final class ResultURLs {
return resultDomains.get(stack);
}
+ public void clearStacks() {
+ for (EventOrigin origin: EventOrigin.values()) clearStack(origin);
+ }
+
public synchronized void clearStack(final EventOrigin stack) {
final Map resultStack = getStack(stack);
if (resultStack != null) resultStack.clear();
diff --git a/source/de/anomic/crawler/retrieval/HTTPLoader.java b/source/de/anomic/crawler/retrieval/HTTPLoader.java
index 9d26b262b..c0bb1ef8f 100644
--- a/source/de/anomic/crawler/retrieval/HTTPLoader.java
+++ b/source/de/anomic/crawler/retrieval/HTTPLoader.java
@@ -118,7 +118,11 @@ public final class HTTPLoader {
// FIXME: 30*-handling (bottom) is never reached
// we always get the final content because httpClient.followRedirects = true
- if (responseBody != null && (code == 200 || code == 203)) {
+ if (responseBody == null) {
+ // no response, reject file
+ sb.crawlQueues.errorURL.push(request, sb.peers.mySeed().hash.getBytes(), new Date(), 1, "no response body (you may increase the maxmimum file size)");
+ throw new IOException("REJECTED EMPTY RESPONSE BODY '" + client.getHttpResponse().getStatusLine() + "' for URL " + request.url().toString());
+ } else if (code == 200 || code == 203) {
// the transfer is ok
// we write the new cache entry to file system directly
@@ -180,7 +184,7 @@ public final class HTTPLoader {
}
} else {
// if the response has not the right response type then reject file
- sb.crawlQueues.errorURL.push(request, sb.peers.mySeed().hash.getBytes(), new Date(), 1, "wrong http status code " + code + ")");
+ sb.crawlQueues.errorURL.push(request, sb.peers.mySeed().hash.getBytes(), new Date(), 1, "wrong http status code " + code);
throw new IOException("REJECTED WRONG STATUS TYPE '" + client.getHttpResponse().getStatusLine() + "' for URL " + request.url().toString());
}
return response;
diff --git a/source/de/anomic/search/Switchboard.java b/source/de/anomic/search/Switchboard.java
index 09528623d..a8f5f01c8 100644
--- a/source/de/anomic/search/Switchboard.java
+++ b/source/de/anomic/search/Switchboard.java
@@ -858,6 +858,9 @@ public final class Switchboard extends serverSwitch {
this.queuesRoot = new File(new File(indexPrimaryPath, networkName), "QUEUES");
this.networkRoot.mkdirs();
this.queuesRoot.mkdirs();
+
+ // clear statistic data
+ this.crawlResults.clearStacks();
// relocate
this.crawlQueues.relocate(this.queuesRoot); // cannot be closed because the busy threads are working with that object
diff --git a/source/net/yacy/cora/document/MultiProtocolURI.java b/source/net/yacy/cora/document/MultiProtocolURI.java
index 685097b01..10dbdb5de 100644
--- a/source/net/yacy/cora/document/MultiProtocolURI.java
+++ b/source/net/yacy/cora/document/MultiProtocolURI.java
@@ -225,22 +225,9 @@ public class MultiProtocolURI implements Serializable, Comparable 0 && h.charAt(0) == '/') {
diff --git a/source/net/yacy/cora/protocol/Domains.java b/source/net/yacy/cora/protocol/Domains.java
index c42f76371..f415a930b 100644
--- a/source/net/yacy/cora/protocol/Domains.java
+++ b/source/net/yacy/cora/protocol/Domains.java
@@ -23,11 +23,20 @@ package net.yacy.cora.protocol;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.ArrayList;
+import java.util.Collection;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
+import java.util.concurrent.Callable;
+import java.util.concurrent.CancellationException;
import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
import java.util.regex.Pattern;
import net.yacy.cora.storage.ARC;
@@ -454,6 +463,80 @@ public class Domains {
return false;
}
+ public static String getHostName(final InetAddress i) {
+ Collection hosts = nameCacheHit.getKeys(i);
+ if (hosts.size() > 0) return hosts.iterator().next();
+
+ // call i.getHostName() using concurrency to interrupt execution in case of a time-out
+ final Callable callable = new Callable() {
+ public String call() { return i.getHostName(); }
+ };
+ ExecutorService service = Executors.newSingleThreadExecutor();
+ final Future taskFuture = service.submit(callable);
+ Runnable t = new Runnable() {
+ public void run() { taskFuture.cancel(true); }
+ };
+ service.execute(t);
+ service.shutdown();
+ try {
+ return taskFuture.get(500, TimeUnit.MILLISECONDS);
+ } catch (CancellationException e) {
+ // callable was interrupted
+ return i.getHostAddress();
+ } catch (InterruptedException e) {
+ // service was shutdown
+ return i.getHostAddress();
+ } catch(ExecutionException e) {
+ // callable failed unexpectedly
+ return i.getHostAddress();
+ } catch (TimeoutException e) {
+ // time-out
+ return i.getHostAddress();
+ }
+ }
+
+ public static InetAddress dnsResolve(final String hostx) {
+ if ((hostx == null) || (hostx.length() == 0)) return null;
+ final String host = hostx.toLowerCase().trim();
+ // try to simply parse the address
+ InetAddress ip = parseInetAddress(host);
+ if (ip != null) return ip;
+
+ // try to resolve host by doing a name cache lookup
+ ip = nameCacheHit.get(host);
+ if (ip != null) return ip;
+
+ if (nameCacheMiss.containsKey(host)) return null;
+
+ // call dnsResolveNetBased(host) using concurrency to interrupt execution in case of a time-out
+ final Callable callable = new Callable() {
+ public InetAddress call() { return dnsResolveNetBased(host); }
+ };
+ ExecutorService service = Executors.newSingleThreadExecutor();
+ final Future taskFuture = service.submit(callable);
+ Runnable t = new Runnable() {
+ public void run() { taskFuture.cancel(true); }
+ };
+ service.execute(t);
+ service.shutdown();
+ try {
+ return taskFuture.get(500, TimeUnit.MILLISECONDS);
+ } catch (CancellationException e) {
+ // callable was interrupted
+ return null;
+ } catch (InterruptedException e) {
+ // service was shutdown
+ return null;
+ } catch(ExecutionException e) {
+ // callable failed unexpectedly
+ return null;
+ } catch (TimeoutException e) {
+ // time-out
+ return null;
+ }
+ }
+
+
private static final InetAddress parseInetAddress(final String ip) {
if (ip == null) return null;
if (ip.length() < 8) return null;
@@ -474,33 +557,21 @@ public class Domains {
return null;
}
}
-
- public static InetAddress dnsResolve(String host) {
- if ((host == null) || (host.length() == 0)) return null;
- host = host.toLowerCase().trim();
- // try to simply parse the address
- InetAddress ip = parseInetAddress(host);
- if (ip != null) return ip;
-
- // try to resolve host by doing a name cache lookup
- ip = nameCacheHit.get(host);
- if (ip != null) return ip;
-
- if (nameCacheMiss.containsKey(host)) return null;
- //System.out.println("***DEBUG dnsResolve(" + host + ")");
+
+ private static InetAddress dnsResolveNetBased(String host) {
try {
boolean doCaching = true;
- ip = InetAddress.getByName(host); // this makes the DNS request to backbone
+ InetAddress ip = InetAddress.getByName(host); // this makes the DNS request to backbone
if ((ip == null) ||
(ip.isLoopbackAddress()) ||
(nameCacheNoCachingList.containsKey(host))
) {
doCaching = false;
} else {
- if (matchesList(host, nameCacheNoCachingPatterns)) {
- nameCacheNoCachingList.put(host, PRESENT);
+ if (matchesList(host, nameCacheNoCachingPatterns)) {
+ nameCacheNoCachingList.put(host, PRESENT);
doCaching = false;
- }
+ }
}
if (doCaching && ip != null) {
@@ -519,6 +590,7 @@ public class Domains {
return null;
}
+
/**
* Returns the number of entries in the nameCacheHit map
*
@@ -565,7 +637,7 @@ public class Domains {
public void run() {
String lhn = localHostName;
try {
- lhn = InetAddress.getLocalHost().getHostName();
+ lhn = getHostName(InetAddress.getLocalHost());
} catch (UnknownHostException e) {}
try {
localHostAddresses = InetAddress.getAllByName(lhn);
@@ -656,7 +728,8 @@ public class Domains {
// finally check if there are other local IP addresses that are not in
// the standard IP range
for (int i = 0; i < localHostAddresses.length; i++) {
- if (localHostAddresses[i].getHostName().equals(host)) return true;
+ String hostname = getHostName(localHostAddresses[i]);
+ if (hostname != null && hostname.equals(host)) return true;
if (localHostAddresses[i].getHostAddress().equals(host)) return true;
}
diff --git a/source/net/yacy/cora/protocol/Scanner.java b/source/net/yacy/cora/protocol/Scanner.java
index d37088f4b..578068044 100644
--- a/source/net/yacy/cora/protocol/Scanner.java
+++ b/source/net/yacy/cora/protocol/Scanner.java
@@ -121,7 +121,8 @@ public class Scanner extends Thread {
private void addProtocol(String protocol, boolean bigrange) {
for (InetAddress i: genlist(bigrange)) {
try {
- this.scanqueue.put(new MultiProtocolURI(protocol + "://" + i.getHostName() + "/"));
+
+ this.scanqueue.put(new MultiProtocolURI(protocol + "://" + Domains.getHostName(i) + "/"));
} catch (MalformedURLException e) {
Log.logException(e);
} catch (InterruptedException e) {
diff --git a/source/net/yacy/cora/storage/ARC.java b/source/net/yacy/cora/storage/ARC.java
index 47aebc939..d0cec8065 100644
--- a/source/net/yacy/cora/storage/ARC.java
+++ b/source/net/yacy/cora/storage/ARC.java
@@ -21,6 +21,7 @@
package net.yacy.cora.storage;
+import java.util.Collection;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
@@ -62,14 +63,21 @@ public interface ARC extends Iterable> {
* @return the value
*/
public V get(K s);
+
+ /**
+ * check if the map contains the value
+ * @param value
+ * @return the keys that have the given value
+ */
+ public Collection getKeys(V value);
/**
* check if the map contains the key
- * @param s
- * @return
+ * @param key
+ * @return true if the map contains the key
*/
- public boolean containsKey(K s);
-
+ public boolean containsKey(K key);
+
/**
* remove an entry from the cache
* @param s
diff --git a/source/net/yacy/cora/storage/ConcurrentARC.java b/source/net/yacy/cora/storage/ConcurrentARC.java
index d339f0d0a..741da71bb 100644
--- a/source/net/yacy/cora/storage/ConcurrentARC.java
+++ b/source/net/yacy/cora/storage/ConcurrentARC.java
@@ -21,6 +21,8 @@
package net.yacy.cora.storage;
import java.util.AbstractMap;
+import java.util.ArrayList;
+import java.util.Collection;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Iterator;
@@ -105,6 +107,17 @@ public final class ConcurrentARC extends AbstractMap implements Map<
return this.arc[getPartition(s)].get((K) s);
}
+ /**
+ * check if the map contains the value
+ * @param value
+ * @return the keys that have the given value
+ */
+ public Collection getKeys(V value) {
+ ArrayList keys = new ArrayList();
+ for (int i = 0; i < this.arc.length; i++) keys.addAll(this.arc[i].getKeys(value));
+ return keys;
+ }
+
/**
* check if the map contains the key
* @param s
diff --git a/source/net/yacy/cora/storage/SimpleARC.java b/source/net/yacy/cora/storage/SimpleARC.java
index 7e5af9c18..9552d2200 100644
--- a/source/net/yacy/cora/storage/SimpleARC.java
+++ b/source/net/yacy/cora/storage/SimpleARC.java
@@ -22,6 +22,8 @@
package net.yacy.cora.storage;
import java.util.AbstractMap;
+import java.util.ArrayList;
+import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
@@ -98,6 +100,26 @@ abstract class SimpleARC extends AbstractMap implements Map, I
assert (this.levelB.size() <= cacheSize); // the cache should shrink automatically
return v;
}
+
+ /**
+ * check if the map contains the value
+ * @param value
+ * @return the keys that have the given value
+ */
+ public Collection getKeys(V value) {
+ ArrayList keys = new ArrayList();
+ synchronized (this.levelB) {
+ for (Map.Entry entry: this.levelB.entrySet()) {
+ if (value.equals(entry.getValue())) keys.add(entry.getKey());
+ }
+ }
+ synchronized (this) {
+ for (Map.Entry entry: this.levelA.entrySet()) {
+ if (value.equals(entry.getValue())) keys.add(entry.getKey());
+ }
+ }
+ return keys;
+ }
/**
* check if the map contains the key
diff --git a/source/net/yacy/kelondro/table/Table.java b/source/net/yacy/kelondro/table/Table.java
index 4a35b2d7f..5794f5ab2 100644
--- a/source/net/yacy/kelondro/table/Table.java
+++ b/source/net/yacy/kelondro/table/Table.java
@@ -305,6 +305,7 @@ public class Table implements Index, Iterable {
assert table == null || table.size() == index.size() : "table.size() = " + table.size() + ", index.size() = " + index.size();
}
final HashMap map = new HashMap(8);
+ if (index == null) return map; // possibly closed or beeing closed
map.put("tableSize", Integer.toString(index.size()));
map.put("tableKeyChunkSize", Integer.toString(index.row().objectsize));
map.put("tableKeyMem", Integer.toString(index.row().objectsize * index.size()));
|