implemented crawl restrictions for IP pattern and country lists

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7980 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 14 years ago
parent e207c41c8e
commit b250e6466d

@ -38,10 +38,9 @@ import net.yacy.cora.protocol.RequestHeader;
import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.logging.Log;
import net.yacy.search.Switchboard;
import de.anomic.crawler.CrawlProfile;
import de.anomic.crawler.CrawlStacker;
import de.anomic.crawler.CrawlSwitchboard;
import de.anomic.crawler.CrawlProfile;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.server.servletProperties;
@ -118,7 +117,7 @@ public class CrawlProfileEditor_p {
// delete all entries from the crawl queue that are deleted here
sb.crawler.removeActive(handle.getBytes());
sb.crawlQueues.noticeURL.removeByProfileHandle(handle, 10000);
} catch (RowSpaceExceededException e) {
} catch (final RowSpaceExceededException e) {
Log.logException(e);
}
if (post.containsKey("delete")) {
@ -159,8 +158,8 @@ public class CrawlProfileEditor_p {
if ((post != null) && (selentry != null)) {
if (post.containsKey("submit")) {
try {
Pattern.compile(post.get(CrawlProfile.FILTER_URL_MUSTMATCH, CrawlProfile.MATCH_ALL));
Pattern.compile(post.get(CrawlProfile.FILTER_URL_MUSTNOTMATCH, CrawlProfile.MATCH_NEVER));
Pattern.compile(post.get(CrawlProfile.FILTER_URL_MUSTMATCH, CrawlProfile.MATCH_ALL_STRING));
Pattern.compile(post.get(CrawlProfile.FILTER_URL_MUSTNOTMATCH, CrawlProfile.MATCH_NEVER_STRING));
final Iterator<eentry> lit = labels.iterator();
eentry tee;
while (lit.hasNext()) {
@ -253,8 +252,8 @@ public class CrawlProfileEditor_p {
prop.putXML(CRAWL_PROFILE_PREFIX + count + "_startURL", profile.startURL());
prop.put(CRAWL_PROFILE_PREFIX + count + "_handle", profile.handle());
prop.put(CRAWL_PROFILE_PREFIX + count + "_depth", profile.depth());
prop.put(CRAWL_PROFILE_PREFIX + count + "_mustmatch", profile.mustMatchPattern().toString());
prop.put(CRAWL_PROFILE_PREFIX + count + "_mustnotmatch", profile.mustNotMatchPattern().toString());
prop.put(CRAWL_PROFILE_PREFIX + count + "_mustmatch", profile.urlMustMatchPattern().toString());
prop.put(CRAWL_PROFILE_PREFIX + count + "_mustnotmatch", profile.urlMustNotMatchPattern().toString());
prop.put(CRAWL_PROFILE_PREFIX + count + "_crawlingIfOlder", (profile.recrawlIfOlder() == 0L) ? "no re-crawl" : DateFormat.getDateTimeInstance().format(profile.recrawlIfOlder()));
prop.put(CRAWL_PROFILE_PREFIX + count + "_crawlingDomFilterDepth", "inactive");

@ -185,7 +185,7 @@
<td><label for="crawlingCountryMustMatch">Must-Match List for Country Codes</label>:</td>
<td>
<input type="radio" name="countryMustMatchSwitch" id="countryMustMatchSwitch" value="true" />Use filter&nbsp;&nbsp;
<input name="crawlingCountryMustMatch" id="crawlingCountryMustMatch" type="text" size="60" maxlength="100" value="#[crawlingCountryMustMatch]#" />
<input name="countryMustMatchList" id="countryMustMatchList" type="text" size="60" maxlength="100" value="#[countryMustMatch]#" /><br />
<input type="radio" name="countryMustMatchSwitch" id="countryMustMatchSwitch" value="false" checked="checked" />no country code restriction
</td>
<td>

@ -42,11 +42,11 @@ public class CrawlStartExpert_p {
prop.put("starturl", /*(intranet) ? repository :*/ "http://");
prop.put("proxyPrefetchDepth", env.getConfig("proxyPrefetchDepth", "0"));
prop.put("crawlingDepth", Math.min(3, env.getConfigLong("crawlingDepth", 0)));
prop.put("mustmatch", /*(intranet) ? repository + ".*" :*/ CrawlProfile.MATCH_ALL);
prop.put("mustnotmatch", CrawlProfile.MATCH_NEVER);
prop.put("ipMustmatch", sb.getConfig("crawlingIPMustMatch", CrawlProfile.MATCH_ALL));
prop.put("ipMustnotmatch", sb.getConfig("crawlingIPMustNotMatch", CrawlProfile.MATCH_NEVER));
prop.put("crawlingCountryMustMatch", sb.getConfig("crawlingCountryMustMatch", ""));
prop.put("mustmatch", /*(intranet) ? repository + ".*" :*/ CrawlProfile.MATCH_ALL_STRING);
prop.put("mustnotmatch", CrawlProfile.MATCH_NEVER_STRING);
prop.put("ipMustmatch", sb.getConfig("crawlingIPMustMatch", CrawlProfile.MATCH_ALL_STRING));
prop.put("ipMustnotmatch", sb.getConfig("crawlingIPMustNotMatch", CrawlProfile.MATCH_NEVER_STRING));
prop.put("countryMustMatch", sb.getConfig("crawlingCountryMustMatch", ""));
prop.put("crawlingIfOlderCheck", "0");
prop.put("crawlingIfOlderUnitYearCheck", "0");

@ -153,12 +153,12 @@ public class Crawler_p {
final boolean subPath = "subpath".equals(post.get("range", "wide")); // special property in simple crawl start
// set the crawl filter
String newcrawlingMustMatch = post.get("mustmatch", CrawlProfile.MATCH_ALL);
final String newcrawlingMustNotMatch = post.get("mustnotmatch", CrawlProfile.MATCH_NEVER);
if (newcrawlingMustMatch.length() < 2) newcrawlingMustMatch = CrawlProfile.MATCH_ALL; // avoid that all urls are filtered out if bad value was submitted
String ipMustMatch = post.get("ipMustmatch", CrawlProfile.MATCH_ALL);
final String ipMustNotMatch = post.get("ipMustnotmatch", CrawlProfile.MATCH_NEVER);
if (ipMustMatch.length() < 2) ipMustMatch = CrawlProfile.MATCH_ALL;
String newcrawlingMustMatch = post.get("mustmatch", CrawlProfile.MATCH_ALL_STRING);
final String newcrawlingMustNotMatch = post.get("mustnotmatch", CrawlProfile.MATCH_NEVER_STRING);
if (newcrawlingMustMatch.length() < 2) newcrawlingMustMatch = CrawlProfile.MATCH_ALL_STRING; // avoid that all urls are filtered out if bad value was submitted
String ipMustMatch = post.get("ipMustmatch", CrawlProfile.MATCH_ALL_STRING);
final String ipMustNotMatch = post.get("ipMustnotmatch", CrawlProfile.MATCH_NEVER_STRING);
if (ipMustMatch.length() < 2) ipMustMatch = CrawlProfile.MATCH_ALL_STRING;
final String countryMustMatch = post.getBoolean("countryMustMatchSwitch", false) ? post.get("countryMustMatchList", "") : "";
sb.setConfig("crawlingIPMustMatch", ipMustMatch);
sb.setConfig("crawlingIPMustNotMatch", ipMustNotMatch);
@ -439,7 +439,7 @@ public class Crawler_p {
crawlingFileName,
crawlURL,
newcrawlingMustMatch,
CrawlProfile.MATCH_NEVER,
CrawlProfile.MATCH_NEVER_STRING,
ipMustMatch,
ipMustNotMatch,
countryMustMatch,
@ -478,8 +478,8 @@ public class Crawler_p {
final CrawlProfile pe = new CrawlProfile(
sitemapURLStr,
sitemapURL,
CrawlProfile.MATCH_ALL,
CrawlProfile.MATCH_NEVER,
CrawlProfile.MATCH_ALL_STRING,
CrawlProfile.MATCH_NEVER_STRING,
ipMustMatch,
ipMustNotMatch,
countryMustMatch,
@ -523,7 +523,7 @@ public class Crawler_p {
sitelistURL.getHost(),
sitelistURL,
newcrawlingMustMatch,
CrawlProfile.MATCH_NEVER,
CrawlProfile.MATCH_NEVER_STRING,
ipMustMatch,
ipMustNotMatch,
countryMustMatch,

@ -108,8 +108,8 @@ public class QuickCrawlLink_p {
final String title = post.get("title",null);
// get other parameters if set
final String crawlingMustMatch = post.get("mustmatch", CrawlProfile.MATCH_ALL);
final String crawlingMustNotMatch = post.get("mustnotmatch", CrawlProfile.MATCH_NEVER);
final String crawlingMustMatch = post.get("mustmatch", CrawlProfile.MATCH_ALL_STRING);
final String crawlingMustNotMatch = post.get("mustnotmatch", CrawlProfile.MATCH_NEVER_STRING);
final int CrawlingDepth = post.getInt("crawlingDepth", 0);
final boolean crawlDynamic = post.get("crawlingQ", "").equals("on");
final boolean indexText = post.get("indexText", "on").equals("on");
@ -149,8 +149,8 @@ public class QuickCrawlLink_p {
crawlingStartURL.getHost(),
crawlingStartURL,
crawlingMustMatch,
CrawlProfile.MATCH_ALL,
CrawlProfile.MATCH_NEVER,
CrawlProfile.MATCH_ALL_STRING,
CrawlProfile.MATCH_NEVER_STRING,
"",
crawlingMustNotMatch,
CrawlingDepth,

@ -41,8 +41,10 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
private static final long serialVersionUID = 5527325718810703504L;
public static final String MATCH_ALL = ".*";
public static final String MATCH_NEVER = "";
public static final String MATCH_ALL_STRING = ".*";
public static final String MATCH_NEVER_STRING = "";
public static final Pattern MATCH_ALL_PATTERN = Pattern.compile(MATCH_ALL_STRING);
public static final Pattern MATCH_NEVER_PATTERN = Pattern.compile(MATCH_NEVER_STRING);
// this is a simple record structure that hold all properties of a single crawl start
public static final String HANDLE = "handle";
@ -67,7 +69,7 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
public static final String FILTER_IP_MUSTNOTMATCH = "crawlingIPMustNotMatch";
public static final String FILTER_COUNTRY_MUSTMATCH = "crawlingCountryMustMatch";
private Pattern mustmatch = null, mustnotmatch = null;
private Pattern urlmustmatch = null, urlmustnotmatch = null, ipmustmatch = null, ipmustnotmatch = null;
/**
* Constructor which creates CrawlPofile from parameters.
@ -119,10 +121,10 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
put(HANDLE, handle);
put(NAME, name);
put(START_URL, (startURL == null) ? "" : startURL.toNormalform(true, false));
put(FILTER_URL_MUSTMATCH, (urlMustMatch == null) ? CrawlProfile.MATCH_ALL : urlMustMatch);
put(FILTER_URL_MUSTNOTMATCH, (urlMustNotMatch == null) ? CrawlProfile.MATCH_NEVER : urlMustNotMatch);
put(FILTER_IP_MUSTMATCH, (ipMustMatch == null) ? CrawlProfile.MATCH_ALL : ipMustMatch);
put(FILTER_IP_MUSTNOTMATCH, (ipMustNotMatch == null) ? CrawlProfile.MATCH_NEVER : ipMustNotMatch);
put(FILTER_URL_MUSTMATCH, (urlMustMatch == null) ? CrawlProfile.MATCH_ALL_STRING : urlMustMatch);
put(FILTER_URL_MUSTNOTMATCH, (urlMustNotMatch == null) ? CrawlProfile.MATCH_NEVER_STRING : urlMustNotMatch);
put(FILTER_IP_MUSTMATCH, (ipMustMatch == null) ? CrawlProfile.MATCH_ALL_STRING : ipMustMatch);
put(FILTER_IP_MUSTNOTMATCH, (ipMustNotMatch == null) ? CrawlProfile.MATCH_NEVER_STRING : ipMustNotMatch);
put(FILTER_COUNTRY_MUSTMATCH, (countryMustMatch == null) ? "" : countryMustMatch);
put(DEPTH, depth);
put(RECRAWL_IF_OLDER, recrawlIfOlder);
@ -207,26 +209,77 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
* Gets the regex which must be matched by URLs in order to be crawled.
* @return regex which must be matched
*/
public Pattern mustMatchPattern() {
if (this.mustmatch == null) {
String r = get(FILTER_URL_MUSTMATCH);
if (r == null) r = CrawlProfile.MATCH_ALL;
this.mustmatch = Pattern.compile(r);
public Pattern urlMustMatchPattern() {
if (this.urlmustmatch == null) {
final String r = get(FILTER_URL_MUSTMATCH);
if (r == null || r.equals(CrawlProfile.MATCH_ALL_STRING)) {
this.urlmustmatch = CrawlProfile.MATCH_ALL_PATTERN;
} else {
this.urlmustmatch = Pattern.compile(r);
}
return this.mustmatch;
}
return this.urlmustmatch;
}
/**
* Gets the regex which must not be matched by URLs in order to be crawled.
* @return regex which must not be matched
*/
public Pattern mustNotMatchPattern() {
if (this.mustnotmatch == null) {
String r = get(FILTER_URL_MUSTNOTMATCH);
if (r == null) r = CrawlProfile.MATCH_NEVER;
this.mustnotmatch = Pattern.compile(r);
public Pattern urlMustNotMatchPattern() {
if (this.urlmustnotmatch == null) {
final String r = get(FILTER_URL_MUSTNOTMATCH);
if (r == null || r.equals(CrawlProfile.MATCH_NEVER_STRING)) {
this.urlmustnotmatch = CrawlProfile.MATCH_NEVER_PATTERN;
} else {
this.urlmustnotmatch = Pattern.compile(r);
}
}
return this.urlmustnotmatch;
}
/**
* Gets the regex which must be matched by IPs in order to be crawled.
* @return regex which must be matched
*/
public Pattern ipMustMatchPattern() {
if (this.ipmustmatch == null) {
final String r = get(FILTER_IP_MUSTMATCH);
if (r == null || r.equals(CrawlProfile.MATCH_ALL_STRING)) {
this.ipmustmatch = CrawlProfile.MATCH_ALL_PATTERN;
} else {
this.ipmustmatch = Pattern.compile(r);
}
}
return this.ipmustmatch;
}
/**
* Gets the regex which must not be matched by IPs in order to be crawled.
* @return regex which must not be matched
*/
public Pattern ipMustNotMatchPattern() {
if (this.ipmustnotmatch == null) {
final String r = get(FILTER_IP_MUSTNOTMATCH);
if (r == null || r.equals(CrawlProfile.MATCH_NEVER_STRING)) {
this.ipmustnotmatch = CrawlProfile.MATCH_NEVER_PATTERN;
} else {
this.ipmustnotmatch = Pattern.compile(r);
}
return this.mustnotmatch;
}
return this.ipmustnotmatch;
}
/**
* get the list of countries that must match for the locations of the URLs IPs
* @return a list of country codes
*/
public String[] countryMustMatchList() {
String countryMustMatch = get(FILTER_COUNTRY_MUSTMATCH);
if (countryMustMatch == null) countryMustMatch = "";
if (countryMustMatch.length() == 0) return new String[0];
String[] list = countryMustMatch.split(",");
if (list.length == 1 && list.length == 0) list = new String[0];
return list;
}
/**

@ -299,8 +299,8 @@ public class CrawlQueues {
+ ", crawlOrder=" + ((profile.remoteIndexing()) ? "true" : "false")
+ ", depth=" + urlEntry.depth()
+ ", crawlDepth=" + profile.depth()
+ ", must-match=" + profile.mustMatchPattern().toString()
+ ", must-not-match=" + profile.mustNotMatchPattern().toString()
+ ", must-match=" + profile.urlMustMatchPattern().toString()
+ ", must-not-match=" + profile.urlMustNotMatchPattern().toString()
+ ", permission=" + ((this.sb.peers == null) ? "undefined" : (((this.sb.peers.mySeed().isSenior()) || (this.sb.peers.mySeed().isPrincipal())) ? "true" : "false")));
// work off one Crawl stack entry

@ -34,6 +34,7 @@ import java.net.MalformedURLException;
import java.net.UnknownHostException;
import java.util.Date;
import java.util.Iterator;
import java.util.Locale;
import java.util.Map;
import java.util.Properties;
import java.util.concurrent.BlockingQueue;
@ -438,8 +439,9 @@ public final class CrawlStacker {
// check if the protocol is supported
final String urlProtocol = url.getProtocol();
final String urlstring = url.toString();
if (!Switchboard.getSwitchboard().loader.isSupportedProtocol(urlProtocol)) {
this.log.logSevere("Unsupported protocol in URL '" + url.toString() + "'.");
this.log.logSevere("Unsupported protocol in URL '" + urlstring + "'.");
return "unsupported protocol";
}
@ -452,31 +454,31 @@ public final class CrawlStacker {
// check blacklist
if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_CRAWLER, url)) {
if (this.log.isFine()) this.log.logFine("URL '" + url.toString() + "' is in blacklist.");
if (this.log.isFine()) this.log.logFine("URL '" + urlstring + "' is in blacklist.");
return "url in blacklist";
}
// filter with must-match
if ((depth > 0) && !profile.mustMatchPattern().matcher(url.toString()).matches()) {
if (this.log.isFine()) this.log.logFine("URL '" + url.toString() + "' does not match must-match crawling filter '" + profile.mustMatchPattern().toString() + "'.");
// filter with must-match for URLs
if ((depth > 0) && !profile.urlMustMatchPattern().matcher(urlstring).matches()) {
if (this.log.isFine()) this.log.logFine("URL '" + urlstring + "' does not match must-match crawling filter '" + profile.urlMustMatchPattern().toString() + "'.");
return "url does not match must-match filter";
}
// filter with must-not-match
if ((depth > 0) && profile.mustNotMatchPattern().matcher(url.toString()).matches()) {
if (this.log.isFine()) this.log.logFine("URL '" + url.toString() + "' does matches do-not-match crawling filter '" + profile.mustNotMatchPattern().toString() + "'.");
// filter with must-not-match for URLs
if ((depth > 0) && profile.urlMustNotMatchPattern().matcher(urlstring).matches()) {
if (this.log.isFine()) this.log.logFine("URL '" + urlstring + "' matches must-not-match crawling filter '" + profile.urlMustNotMatchPattern().toString() + "'.");
return "url matches must-not-match filter";
}
// deny cgi
if (url.isIndividual() && !(profile.crawlingQ())) { // TODO: make special property for crawlingIndividual
if (this.log.isFine()) this.log.logFine("URL '" + url.toString() + "' is CGI URL.");
if (this.log.isFine()) this.log.logFine("URL '" + urlstring + "' is CGI URL.");
return "individual url (sessionid etc) not wanted";
}
// deny post properties
if (url.isPOST() && !(profile.crawlingQ())) {
if (this.log.isFine()) this.log.logFine("URL '" + url.toString() + "' is post URL.");
if (this.log.isFine()) this.log.logFine("URL '" + urlstring + "' is post URL.");
return "post url not allowed";
}
@ -486,7 +488,7 @@ public final class CrawlStacker {
if (oldEntry == null) {
if (dbocc != null) {
// do double-check
if (this.log.isFine()) this.log.logFine("URL '" + url.toString() + "' is double registered in '" + dbocc + "'.");
if (this.log.isFine()) this.log.logFine("URL '" + urlstring + "' is double registered in '" + dbocc + "'.");
if (dbocc.equals("errors")) {
final ZURL.Entry errorEntry = this.nextQueue.errorURL.get(url.hash());
return "double in: errors (" + errorEntry.anycause() + ")";
@ -498,13 +500,13 @@ public final class CrawlStacker {
final boolean recrawl = profile.recrawlIfOlder() > oldEntry.loaddate().getTime();
if (recrawl) {
if (this.log.isInfo())
this.log.logInfo("RE-CRAWL of URL '" + url.toString() + "': this url was crawled " +
this.log.logInfo("RE-CRAWL of URL '" + urlstring + "': this url was crawled " +
((System.currentTimeMillis() - oldEntry.loaddate().getTime()) / 60000 / 60 / 24) + " days ago.");
} else {
if (dbocc == null) {
return "double in: LURL-DB";
} else {
if (this.log.isInfo()) this.log.logInfo("URL '" + url.toString() + "' is double registered in '" + dbocc + "'. " + "Stack processing time:");
if (this.log.isInfo()) this.log.logInfo("URL '" + urlstring + "' is double registered in '" + dbocc + "'. " + "Stack processing time:");
if (dbocc.equals("errors")) {
final ZURL.Entry errorEntry = this.nextQueue.errorURL.get(url.hash());
return "double in: errors (" + errorEntry.anycause() + ")";
@ -520,16 +522,51 @@ public final class CrawlStacker {
if (maxAllowedPagesPerDomain < Integer.MAX_VALUE) {
final DomProfile dp = this.doms.get(url.getHost());
if (dp != null && dp.count >= maxAllowedPagesPerDomain) {
if (this.log.isFine()) this.log.logFine("URL '" + url.toString() + "' appeared too often in crawl stack, a maximum of " + profile.domMaxPages() + " is allowed.");
if (this.log.isFine()) this.log.logFine("URL '" + urlstring + "' appeared too often in crawl stack, a maximum of " + profile.domMaxPages() + " is allowed.");
return "crawl stack domain counter exceeded";
}
if (ResultURLs.domainCount(EventOrigin.LOCAL_CRAWLING, url.getHost()) >= profile.domMaxPages()) {
if (this.log.isFine()) this.log.logFine("URL '" + url.toString() + "' appeared too often in result stack, a maximum of " + profile.domMaxPages() + " is allowed.");
if (this.log.isFine()) this.log.logFine("URL '" + urlstring + "' appeared too often in result stack, a maximum of " + profile.domMaxPages() + " is allowed.");
return "result stack domain counter exceeded";
}
}
// the following filters use a DNS lookup to check if the url matches with IP filter
// this is expensive and those filters are check at the end of all other tests
// filter with must-match for IPs
if ((depth > 0) && profile.ipMustMatchPattern() != CrawlProfile.MATCH_ALL_PATTERN && !profile.ipMustMatchPattern().matcher(url.getInetAddress().getHostAddress()).matches()) {
if (this.log.isFine()) this.log.logFine("IP " + url.getInetAddress().getHostAddress() + " of URL '" + urlstring + "' does not match must-match crawling filter '" + profile.ipMustMatchPattern().toString() + "'.");
return "ip " + url.getInetAddress().getHostAddress() + " of url does not match must-match filter";
}
// filter with must-not-match for IPs
if ((depth > 0) && profile.ipMustMatchPattern() != CrawlProfile.MATCH_NEVER_PATTERN && profile.ipMustNotMatchPattern().matcher(url.getInetAddress().getHostAddress()).matches()) {
if (this.log.isFine()) this.log.logFine("IP " + url.getInetAddress().getHostAddress() + " of URL '" + urlstring + "' matches must-not-match crawling filter '" + profile.ipMustMatchPattern().toString() + "'.");
return "ip " + url.getInetAddress().getHostAddress() + " of url matches must-not-match filter";
}
// filter with must-match for IPs
final String[] countryMatchList = profile.countryMustMatchList();
if (depth > 0 && countryMatchList != null && countryMatchList.length > 0) {
final Locale locale = url.getLocale();
if (locale != null) {
final String c0 = locale.getCountry();
boolean granted = false;
matchloop: for (final String c: countryMatchList) {
if (c0.equals(c)) {
granted = true;
break matchloop;
}
}
if (!granted) {
if (this.log.isFine()) this.log.logFine("IP " + url.getInetAddress().getHostAddress() + " of URL '" + urlstring + "' does not match must-match crawling filter '" + profile.ipMustMatchPattern().toString() + "'.");
return "country " + c0 + " of url does not match must-match filter for countries";
}
}
}
return null;
}

@ -229,8 +229,8 @@ public final class CrawlSwitchboard {
// generate new default entry for proxy crawling
this.defaultProxyProfile = new CrawlProfile(
"proxy", null,
CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER,
CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER,
CrawlProfile.MATCH_ALL_STRING, CrawlProfile.MATCH_NEVER_STRING,
CrawlProfile.MATCH_ALL_STRING, CrawlProfile.MATCH_NEVER_STRING,
"",
0 /*Integer.parseInt(getConfig(PROXY_PREFETCH_DEPTH, "0"))*/,
CrawlProfile.getRecrawlDate(CRAWL_PROFILE_PROXY_RECRAWL_CYCLE), -1, false,
@ -243,38 +243,38 @@ public final class CrawlSwitchboard {
}
if (this.defaultRemoteProfile == null) {
// generate new default entry for remote crawling
this.defaultRemoteProfile = new CrawlProfile(CRAWL_PROFILE_REMOTE, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER, "", CrawlProfile.MATCH_NEVER, 0,
this.defaultRemoteProfile = new CrawlProfile(CRAWL_PROFILE_REMOTE, null, CrawlProfile.MATCH_ALL_STRING, CrawlProfile.MATCH_ALL_STRING, CrawlProfile.MATCH_NEVER_STRING, "", CrawlProfile.MATCH_NEVER_STRING, 0,
-1, -1, true, true, true, false, false, true, true, false, CacheStrategy.IFFRESH);
this.profilesActiveCrawls.put(UTF8.getBytes(this.defaultRemoteProfile.handle()), this.defaultRemoteProfile);
}
if (this.defaultTextSnippetLocalProfile == null) {
// generate new default entry for snippet fetch and optional crawling
this.defaultTextSnippetLocalProfile = new CrawlProfile(CRAWL_PROFILE_SNIPPET_LOCAL_TEXT, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER, "", 0,
this.defaultTextSnippetLocalProfile = new CrawlProfile(CRAWL_PROFILE_SNIPPET_LOCAL_TEXT, null, CrawlProfile.MATCH_ALL_STRING, CrawlProfile.MATCH_NEVER_STRING, CrawlProfile.MATCH_ALL_STRING, CrawlProfile.MATCH_NEVER_STRING, "", 0,
CrawlProfile.getRecrawlDate(CRAWL_PROFILE_SNIPPET_LOCAL_TEXT_RECRAWL_CYCLE), -1, true, false, false, true, false, true, true, false, CacheStrategy.IFEXIST);
this.profilesActiveCrawls.put(UTF8.getBytes(this.defaultTextSnippetLocalProfile.handle()), this.defaultTextSnippetLocalProfile);
}
if (this.defaultTextSnippetGlobalProfile == null) {
// generate new default entry for snippet fetch and optional crawling
this.defaultTextSnippetGlobalProfile = new CrawlProfile(CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER, "", 0,
this.defaultTextSnippetGlobalProfile = new CrawlProfile(CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT, null, CrawlProfile.MATCH_ALL_STRING, CrawlProfile.MATCH_NEVER_STRING, CrawlProfile.MATCH_ALL_STRING, CrawlProfile.MATCH_NEVER_STRING, "", 0,
CrawlProfile.getRecrawlDate(CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT_RECRAWL_CYCLE), -1, true, true, true, true, false, true, true, false, CacheStrategy.IFEXIST);
this.profilesActiveCrawls.put(UTF8.getBytes(this.defaultTextSnippetGlobalProfile.handle()), this.defaultTextSnippetGlobalProfile);
}
this.defaultTextSnippetGlobalProfile.setCacheStrategy(CacheStrategy.IFEXIST);
if (this.defaultMediaSnippetLocalProfile == null) {
// generate new default entry for snippet fetch and optional crawling
this.defaultMediaSnippetLocalProfile = new CrawlProfile(CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER, "", 0,
this.defaultMediaSnippetLocalProfile = new CrawlProfile(CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA, null, CrawlProfile.MATCH_ALL_STRING, CrawlProfile.MATCH_NEVER_STRING, CrawlProfile.MATCH_ALL_STRING, CrawlProfile.MATCH_NEVER_STRING, "", 0,
CrawlProfile.getRecrawlDate(CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA_RECRAWL_CYCLE), -1, true, false, false, true, false, true, true, false, CacheStrategy.IFEXIST);
this.profilesActiveCrawls.put(UTF8.getBytes(this.defaultMediaSnippetLocalProfile.handle()), this.defaultMediaSnippetLocalProfile);
}
if (this.defaultMediaSnippetGlobalProfile == null) {
// generate new default entry for snippet fetch and optional crawling
this.defaultMediaSnippetGlobalProfile = new CrawlProfile(CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER, "", 0,
this.defaultMediaSnippetGlobalProfile = new CrawlProfile(CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA, null, CrawlProfile.MATCH_ALL_STRING, CrawlProfile.MATCH_NEVER_STRING, CrawlProfile.MATCH_ALL_STRING, CrawlProfile.MATCH_NEVER_STRING, "", 0,
CrawlProfile.getRecrawlDate(CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA_RECRAWL_CYCLE), -1, true, false, true, true, false, true, true, false, CacheStrategy.IFEXIST);
this.profilesActiveCrawls.put(UTF8.getBytes(this.defaultMediaSnippetGlobalProfile.handle()), this.defaultMediaSnippetGlobalProfile);
}
if (this.defaultSurrogateProfile == null) {
// generate new default entry for surrogate parsing
this.defaultSurrogateProfile = new CrawlProfile(CRAWL_PROFILE_SURROGATE, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER, "", 0,
this.defaultSurrogateProfile = new CrawlProfile(CRAWL_PROFILE_SURROGATE, null, CrawlProfile.MATCH_ALL_STRING, CrawlProfile.MATCH_NEVER_STRING, CrawlProfile.MATCH_ALL_STRING, CrawlProfile.MATCH_NEVER_STRING, "", 0,
CrawlProfile.getRecrawlDate(CRAWL_PROFILE_SURROGATE_RECRAWL_CYCLE), -1, true, true, false, false, false, true, true, false, CacheStrategy.NOCACHE);
this.profilesActiveCrawls.put(UTF8.getBytes(this.defaultSurrogateProfile.handle()), this.defaultSurrogateProfile);
}

@ -88,6 +88,7 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
protected final String protocol, userInfo;
protected String host, path, quest, ref;
protected int port;
private InetAddress hostAddress;
/**
* initialization of a MultiProtocolURI to produce poison pills for concurrent blocking queues
@ -95,6 +96,7 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
public MultiProtocolURI() {
this.protocol = null;
this.host = null;
this.hostAddress = null;
this.userInfo = null;
this.path = null;
this.quest = null;
@ -109,6 +111,7 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
protected MultiProtocolURI(final MultiProtocolURI url) {
this.protocol = url.protocol;
this.host = url.host;
this.hostAddress = null;
this.userInfo = url.userInfo;
this.path = url.path;
this.quest = url.quest;
@ -119,6 +122,8 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
public MultiProtocolURI(String url) throws MalformedURLException {
if (url == null) throw new MalformedURLException("url string is null");
this.hostAddress = null;
// identify protocol
assert (url != null);
url = url.trim();
@ -688,6 +693,12 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
return this.host;
}
public InetAddress getInetAddress() {
if (this.hostAddress != null) return this.hostAddress;
this.hostAddress = Domains.dnsResolve(this.host.toLowerCase());
return this.hostAddress;
}
public int getPort() {
return this.port;
}
@ -827,7 +838,7 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
}
final String hl = getHost().toLowerCase();
if (resolveHost) {
final InetAddress r = Domains.dnsResolve(hl);
final InetAddress r = getInetAddress();
u.append(r == null ? hl : r.getHostAddress());
} else {
u.append(hl);
@ -1119,8 +1130,11 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
return baos.toByteArray();
}
public Locale getLocale() {
if (this.hostAddress != null) {
final Locale locale = Domains.getLocale(this.hostAddress);
if (locale != null && locale.getCountry() != null && locale.getCountry().length() > 0) return locale;
}
return Domains.getLocale(this.host);
}

@ -550,6 +550,11 @@ public class Domains {
cacheHit_Insert++;
}
/**
* resolve a host address using a local DNS cache and a DNS lookup if necessary
* @param host
* @return the hosts InetAddress or null if the address cannot be resolved
*/
public static InetAddress dnsResolve(String host) {
if ((host == null) || (host.length() == 0)) return null;
host = host.toLowerCase().trim();
@ -921,7 +926,7 @@ public class Domains {
public static Locale getLocale(final String host) {
if (host == null) return null;
final Locale locale = getLocale(dnsResolve(host));
if (locale != null) return locale;
if (locale != null && locale.getCountry() != null && locale.getCountry().length() > 0) return locale;
final int p = host.lastIndexOf('.');
if (p < 0) return null;
String tld = host.substring(p + 1).toUpperCase();

@ -57,18 +57,18 @@ public class Scanner extends Thread {
public static enum Access {unknown, empty, granted, denied;}
public static enum Protocol {http(80), https(443), ftp(21), smb(445);
public int port;
private Protocol(int port) {this.port = port;}
private Protocol(final int port) {this.port = port;}
}
public static class Service {
public Protocol protocol;
public InetAddress inetAddress;
private String hostname;
public Service(Protocol protocol, InetAddress inetAddress) {
public Service(final Protocol protocol, final InetAddress inetAddress) {
this.protocol = protocol;
this.inetAddress = inetAddress;
this.hostname = null;
}
public Service(String protocol, InetAddress inetAddress) {
public Service(final String protocol, final InetAddress inetAddress) {
this.protocol = protocol.equals("http") ? Protocol.http : protocol.equals("https") ? Protocol.https : protocol.equals("ftp") ? Protocol.ftp : Protocol.smb;
this.inetAddress = inetAddress;
this.hostname = null;
@ -92,7 +92,7 @@ public class Scanner extends Thread {
try {
this.hostname = TimeoutRequest.getHostName(this.inetAddress, 100);
Domains.setHostName(this.inetAddress, this.hostname);
} catch (ExecutionException e) {
} catch (final ExecutionException e) {
this.hostname = this.inetAddress.getHostAddress();
}
//this.hostname = Domains.getHostName(this.inetAddress);
@ -105,7 +105,7 @@ public class Scanner extends Thread {
public String toString() {
try {
return new MultiProtocolURI(this.protocol.name() + "://" + this.inetAddress.getHostAddress() + "/").toNormalform(true, false);
} catch (MalformedURLException e) {
} catch (final MalformedURLException e) {
return "";
}
}
@ -114,7 +114,7 @@ public class Scanner extends Thread {
return this.inetAddress.hashCode();
}
@Override
public boolean equals(Object o) {
public boolean equals(final Object o) {
return (o instanceof Service) && ((Service) o).protocol == this.protocol && ((Service) o).inetAddress.equals(this.inetAddress);
}
}
@ -128,7 +128,7 @@ public class Scanner extends Thread {
return scancache.size();
}
public static void scancacheReplace(Scanner newScanner, long validTime) {
public static void scancacheReplace(final Scanner newScanner, final long validTime) {
scancache.clear();
scancache.putAll(newScanner.services());
//scancacheUpdateTime = System.currentTimeMillis();
@ -136,8 +136,8 @@ public class Scanner extends Thread {
scancacheScanrange = newScanner.scanrange;
}
public static void scancacheExtend(Scanner newScanner, long validTime) {
Iterator<Map.Entry<Service, Access>> i = Scanner.scancache.entrySet().iterator();
public static void scancacheExtend(final Scanner newScanner, final long validTime) {
final Iterator<Map.Entry<Service, Access>> i = Scanner.scancache.entrySet().iterator();
Map.Entry<Service, Access> entry;
while (i.hasNext()) {
entry = i.next();
@ -160,50 +160,50 @@ public class Scanner extends Thread {
* @param url
* @return true if the url shall be part of a search result
*/
public static boolean acceptURL(MultiProtocolURI url) {
public static boolean acceptURL(final MultiProtocolURI url) {
// if the scan range is empty, then all urls are accepted
if (scancacheScanrange == null || scancacheScanrange.isEmpty()) return true;
//if (System.currentTimeMillis() > scancacheValidUntilTime) return true;
InetAddress a = Domains.dnsResolve(url.getHost()); // try to avoid that!
final InetAddress a = url.getInetAddress(); // try to avoid that!
if (a == null) return true;
InetAddress n = normalize(a);
final InetAddress n = normalize(a);
if (!scancacheScanrange.contains(n)) return true;
Access access = scancache.get(new Service(url.getProtocol(), a));
final Access access = scancache.get(new Service(url.getProtocol(), a));
if (access == null) return false;
return access == Access.granted;
}
private static InetAddress normalize(InetAddress a) {
private static InetAddress normalize(final InetAddress a) {
if (a == null) return null;
byte[] b = a.getAddress();
final byte[] b = a.getAddress();
if (b[3] == 1) return a;
b[3] = 1;
try {
return InetAddress.getByAddress(b);
} catch (UnknownHostException e) {
} catch (final UnknownHostException e) {
return a;
}
}
private int runnerCount;
private Set<InetAddress> scanrange;
private BlockingQueue<Service> scanqueue;
private Map<Service, Access> services;
private Map<Runner, Object> runner;
private int timeout;
private final int runnerCount;
private final Set<InetAddress> scanrange;
private final BlockingQueue<Service> scanqueue;
private final Map<Service, Access> services;
private final Map<Runner, Object> runner;
private final int timeout;
public Scanner(Set<InetAddress> scanrange, int concurrentRunner, int timeout) {
public Scanner(final Set<InetAddress> scanrange, final int concurrentRunner, final int timeout) {
this.runnerCount = concurrentRunner;
this.scanrange = new HashSet<InetAddress>();
for (InetAddress a: scanrange) this.scanrange.add(normalize(a));
for (final InetAddress a: scanrange) this.scanrange.add(normalize(a));
this.scanqueue = new LinkedBlockingQueue<Service>();
this.services = Collections.synchronizedMap(new HashMap<Service, Access>());
this.runner = new ConcurrentHashMap<Runner, Object>();
this.timeout = timeout;
}
public Scanner(int concurrentRunner, int timeout) {
public Scanner(final int concurrentRunner, final int timeout) {
this(Domains.myIntranetIPs(), concurrentRunner, timeout);
}
@ -211,18 +211,18 @@ public class Scanner extends Thread {
public void run() {
Service uri;
try {
while ((uri = scanqueue.take()) != POISONSERVICE) {
while (runner.size() >= this.runnerCount) {
while ((uri = this.scanqueue.take()) != POISONSERVICE) {
while (this.runner.size() >= this.runnerCount) {
/*for (Runner r: runner.keySet()) {
if (r.age() > 3000) synchronized(r) { r.interrupt(); }
}*/
if (runner.size() >= this.runnerCount) Thread.sleep(20);
if (this.runner.size() >= this.runnerCount) Thread.sleep(20);
}
Runner runner = new Runner(uri);
final Runner runner = new Runner(uri);
this.runner.put(runner, PRESENT);
runner.start();
}
} catch (InterruptedException e) {
} catch (final InterruptedException e) {
}
}
@ -231,29 +231,29 @@ public class Scanner extends Thread {
}
public void terminate() {
for (int i = 0; i < runnerCount; i++) try {
for (int i = 0; i < this.runnerCount; i++) try {
this.scanqueue.put(POISONSERVICE);
} catch (InterruptedException e) {
} catch (final InterruptedException e) {
}
try {
this.join();
} catch (InterruptedException e) {
} catch (final InterruptedException e) {
}
}
public class Runner extends Thread {
private Service service;
private long starttime;
public Runner(Service service) {
private final Service service;
private final long starttime;
public Runner(final Service service) {
this.service = service;
this.starttime = System.currentTimeMillis();
}
@Override
public void run() {
try {
if (TimeoutRequest.ping(this.service.getInetAddress().getHostAddress(), this.service.getProtocol().port, timeout)) {
if (TimeoutRequest.ping(this.service.getInetAddress().getHostAddress(), this.service.getProtocol().port, Scanner.this.timeout)) {
Access access = this.service.getProtocol() == Protocol.http || this.service.getProtocol() == Protocol.https ? Access.granted : Access.unknown;
services.put(service, access);
Scanner.this.services.put(this.service, access);
if (access == Access.unknown) {
// ask the service if it lets us in
if (this.service.getProtocol() == Protocol.ftp) {
@ -261,35 +261,35 @@ public class Scanner extends Thread {
try {
ftpClient.open(this.service.getInetAddress().getHostAddress(), this.service.getProtocol().port);
ftpClient.login("anonymous", "anomic@");
List<String> list = ftpClient.list("/", false);
final List<String> list = ftpClient.list("/", false);
ftpClient.CLOSE();
access = list == null || list.isEmpty() ? Access.empty : Access.granted;
} catch (IOException e) {
} catch (final IOException e) {
access = Access.denied;
}
}
if (this.service.getProtocol() == Protocol.smb) {
try {
MultiProtocolURI uri = new MultiProtocolURI(this.service.toString());
String[] list = uri.list();
final MultiProtocolURI uri = new MultiProtocolURI(this.service.toString());
final String[] list = uri.list();
access = list == null || list.length == 0 ? Access.empty : Access.granted;
} catch (IOException e) {
} catch (final IOException e) {
access = Access.denied;
}
}
}
if (access != Access.unknown) services.put(this.service, access);
if (access != Access.unknown) Scanner.this.services.put(this.service, access);
}
} catch (ExecutionException e) {
} catch (final ExecutionException e) {
}
Object r = runner.remove(this);
final Object r = Scanner.this.runner.remove(this);
assert r != null;
}
public long age() {
return System.currentTimeMillis() - this.starttime;
}
@Override
public boolean equals(Object o) {
public boolean equals(final Object o) {
return (o instanceof Runner) && this.service.equals(((Runner) o).service);
}
@Override
@ -298,42 +298,42 @@ public class Scanner extends Thread {
}
}
public void addHTTP(boolean bigrange) {
public void addHTTP(final boolean bigrange) {
addProtocol(Protocol.http, bigrange);
}
public void addHTTPS(boolean bigrange) {
public void addHTTPS(final boolean bigrange) {
addProtocol(Protocol.https, bigrange);
}
public void addSMB(boolean bigrange) {
public void addSMB(final boolean bigrange) {
addProtocol(Protocol.smb, bigrange);
}
public void addFTP(boolean bigrange) {
public void addFTP(final boolean bigrange) {
addProtocol(Protocol.ftp, bigrange);
}
private void addProtocol(Protocol protocol, boolean bigrange) {
for (InetAddress i: genlist(bigrange)) {
private void addProtocol(final Protocol protocol, final boolean bigrange) {
for (final InetAddress i: genlist(bigrange)) {
try {
this.scanqueue.put(new Service(protocol, i));
} catch (InterruptedException e) {
} catch (final InterruptedException e) {
}
}
}
private final List<InetAddress> genlist(boolean bigrange) {
ArrayList<InetAddress> c = new ArrayList<InetAddress>(10);
for (InetAddress i: scanrange) {
private final List<InetAddress> genlist(final boolean bigrange) {
final ArrayList<InetAddress> c = new ArrayList<InetAddress>(10);
for (final InetAddress i: this.scanrange) {
for (int br = bigrange ? 1 : i.getAddress()[2]; br < (bigrange ? 255 : i.getAddress()[2] + 1); br++) {
for (int j = 1; j < 255; j++) {
byte[] address = i.getAddress();
final byte[] address = i.getAddress();
address[2] = (byte) br;
address[3] = (byte) j;
try {
c.add(InetAddress.getByAddress(address));
} catch (UnknownHostException e) {
} catch (final UnknownHostException e) {
}
}
}
@ -345,28 +345,28 @@ public class Scanner extends Thread {
return this.services;
}
public static byte[] inIndex(Map<byte[], String> commentCache, String url) {
for (Map.Entry<byte[], String> comment: commentCache.entrySet()) {
public static byte[] inIndex(final Map<byte[], String> commentCache, final String url) {
for (final Map.Entry<byte[], String> comment: commentCache.entrySet()) {
if (comment.getValue().contains(url)) return comment.getKey();
}
return null;
}
public static void main(String[] args) {
public static void main(final String[] args) {
//try {System.out.println("192.168.1.91: " + ping(new MultiProtocolURI("smb://192.168.1.91/"), 1000));} catch (MalformedURLException e) {}
Scanner scanner = new Scanner(100, 10);
final Scanner scanner = new Scanner(100, 10);
scanner.addFTP(false);
scanner.addHTTP(false);
scanner.addHTTPS(false);
scanner.addSMB(false);
scanner.start();
scanner.terminate();
for (Service service: scanner.services().keySet()) {
for (final Service service: scanner.services().keySet()) {
System.out.println(service.toString());
}
try {
HTTPClient.closeConnectionManager();
} catch (InterruptedException e) {
} catch (final InterruptedException e) {
}
}
}

@ -37,7 +37,6 @@ import java.util.Set;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.protocol.Domains;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.ResponseHeader;
import net.yacy.cora.storage.ConfigurationSet;
@ -103,7 +102,7 @@ public class SolrScheme extends ConfigurationSet {
addSolr(solrdoc, "failreason_t", ""); // overwrite a possible fail reason (in case that there was a fail reason before)
addSolr(solrdoc, "id", id);
addSolr(solrdoc, "sku", digestURI.toNormalform(true, false), 3.0f);
final InetAddress address = Domains.dnsResolve(digestURI.getHost());
final InetAddress address = digestURI.getInetAddress();
if (address != null) addSolr(solrdoc, "ip_s", address.getHostAddress());
if (digestURI.getHost() != null) addSolr(solrdoc, "host_s", digestURI.getHost());
addSolr(solrdoc, "title", yacydoc.dc_title());
@ -354,10 +353,10 @@ public class SolrScheme extends ConfigurationSet {
return solrdoc;
}
private int relEval(String[] rel) {
private int relEval(final String[] rel) {
int i = 0;
for (String s: rel) {
String s0 = s.toLowerCase().trim();
for (final String s: rel) {
final String s0 = s.toLowerCase().trim();
if ("me".equals(s0)) i += 1;
if ("nofollow".equals(s0)) i += 2;
}

@ -279,7 +279,7 @@ public class SolrSingleConnector implements SolrConnector {
final SolrInputDocument solrdoc = new SolrInputDocument();
solrdoc.addField("id", ASCII.String(digestURI.hash()));
solrdoc.addField("sku", digestURI.toNormalform(true, false), 3.0f);
final InetAddress address = Domains.dnsResolve(digestURI.getHost());
final InetAddress address = digestURI.getInetAddress();
if (address != null) solrdoc.addField("ip_s", address.getHostAddress());
if (digestURI.getHost() != null) solrdoc.addField("host_s", digestURI.getHost());

@ -232,6 +232,7 @@ public class ReferenceContainer<ReferenceType extends Reference> extends RowSet
int pos = 0;
while (i.hasNext()) {
r = i.next();
if (r == null) continue;
mod = r.lastModified();
positions = tm.get(mod);
if (positions == null) positions = new ArrayList<Integer>();

@ -1909,8 +1909,8 @@ public final class Switchboard extends serverSwitch {
this.log.logFine("processResourceStack processCase=" + processCase +
", depth=" + response.depth() +
", maxDepth=" + ((response.profile() == null) ? "null" : Integer.toString(response.profile().depth())) +
", must-match=" + ((response.profile() == null) ? "null" : response.profile().mustMatchPattern().toString()) +
", must-not-match=" + ((response.profile() == null) ? "null" : response.profile().mustNotMatchPattern().toString()) +
", must-match=" + ((response.profile() == null) ? "null" : response.profile().urlMustMatchPattern().toString()) +
", must-not-match=" + ((response.profile() == null) ? "null" : response.profile().urlMustNotMatchPattern().toString()) +
", initiatorHash=" + ((response.initiator() == null) ? "null" : ASCII.String(response.initiator())) +
//", responseHeader=" + ((entry.responseHeader() == null) ? "null" : entry.responseHeader().toString()) +
", url=" + response.url()); // DEBUG

Loading…
Cancel
Save