fixes to crawl profiles

pull/1/head
Michael Peter Christen 13 years ago
parent 2f536cb54d
commit 76d218fbef

@ -134,14 +134,14 @@ public class QuickCrawlLink_p {
try { try {
pe = new CrawlProfile( pe = new CrawlProfile(
crawlingStartURL.toNormalform(true, false), crawlingStartURL.toNormalform(true, false),
crawlingMustMatch, crawlingMustMatch, //crawlerUrlMustMatch
CrawlProfile.MATCH_ALL_STRING, crawlingMustNotMatch, //crawlerUrlMustNotMatch
CrawlProfile.MATCH_NEVER_STRING, CrawlProfile.MATCH_ALL_STRING, //crawlerIpMustMatch
CrawlProfile.MATCH_NEVER_STRING, CrawlProfile.MATCH_NEVER_STRING, //crawlerIpMustNotMatch
CrawlProfile.MATCH_ALL_STRING, "", //crawlerCountryMustMatch
CrawlProfile.MATCH_NEVER_STRING, CrawlProfile.MATCH_NEVER_STRING, //crawlerNoDepthLimitMatch
"", CrawlProfile.MATCH_ALL_STRING, //indexUrlMustMatch
crawlingMustNotMatch, CrawlProfile.MATCH_NEVER_STRING, //indexUrlMustNotMatch
CrawlingDepth, CrawlingDepth,
true, true,
60 * 24 * 30, // recrawlIfOlder (minutes); here: one month 60 * 24 * 30, // recrawlIfOlder (minutes); here: one month

@ -217,270 +217,210 @@ public final class CrawlSwitchboard
} }
private void initActiveCrawlProfiles() { private void initActiveCrawlProfiles() {
this.defaultProxyProfile = null; // generate new default entry for proxy crawling
this.defaultRemoteProfile = null; this.defaultProxyProfile =
this.defaultTextSnippetLocalProfile = null; new CrawlProfile(
this.defaultTextSnippetGlobalProfile = null; CRAWL_PROFILE_PROXY,
this.defaultMediaSnippetLocalProfile = null; CrawlProfile.MATCH_ALL_STRING, //crawlerUrlMustMatch
this.defaultMediaSnippetGlobalProfile = null; CrawlProfile.MATCH_NEVER_STRING, //crawlerUrlMustNotMatch
this.defaultSurrogateProfile = null; CrawlProfile.MATCH_ALL_STRING, //crawlerIpMustMatch
CrawlProfile profile; CrawlProfile.MATCH_NEVER_STRING, //crawlerIpMustNotMatch
String name; "", //crawlerCountryMustMatch
try { CrawlProfile.MATCH_NEVER_STRING, //crawlerNoDepthLimitMatch
for ( final byte[] handle : this.profilesActiveCrawls.keySet() ) { CrawlProfile.MATCH_ALL_STRING, //indexUrlMustMatch
profile = new CrawlProfile(this.profilesActiveCrawls.get(handle)); CrawlProfile.MATCH_NEVER_STRING, //indexUrlMustNotMatch
name = profile.name(); 0 /*Integer.parseInt(getConfig(PROXY_PREFETCH_DEPTH, "0"))*/,
if ( name.equals(CRAWL_PROFILE_PROXY) ) { true,
this.defaultProxyProfile = profile; CrawlProfile.getRecrawlDate(CRAWL_PROFILE_PROXY_RECRAWL_CYCLE),
} -1,
if ( name.equals(CRAWL_PROFILE_REMOTE) ) { false,
this.defaultRemoteProfile = profile; true /*getConfigBool(PROXY_INDEXING_LOCAL_TEXT, true)*/,
} true /*getConfigBool(PROXY_INDEXING_LOCAL_MEDIA, true)*/,
if ( name.equals(CRAWL_PROFILE_SNIPPET_LOCAL_TEXT) ) { true,
this.defaultTextSnippetLocalProfile = profile; false /*getConfigBool(PROXY_INDEXING_REMOTE, false)*/,
} true,
if ( name.equals(CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT) ) { true,
this.defaultTextSnippetGlobalProfile = profile; true,
} CacheStrategy.IFFRESH,
if ( name.equals(CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA) ) { "robot_" + CRAWL_PROFILE_PROXY);
this.defaultMediaSnippetLocalProfile = profile; this.profilesActiveCrawls.put(
} UTF8.getBytes(this.defaultProxyProfile.handle()),
if ( name.equals(CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA) ) { this.defaultProxyProfile);
this.defaultMediaSnippetGlobalProfile = profile; // generate new default entry for remote crawling
} this.defaultRemoteProfile =
if ( name.equals(CRAWL_PROFILE_SURROGATE) ) { new CrawlProfile(
this.defaultSurrogateProfile = profile; CRAWL_PROFILE_REMOTE,
} CrawlProfile.MATCH_ALL_STRING, //crawlerUrlMustMatch
} CrawlProfile.MATCH_NEVER_STRING, //crawlerUrlMustNotMatch
} catch ( final Exception e ) { CrawlProfile.MATCH_ALL_STRING, //crawlerIpMustMatch
this.profilesActiveCrawls.clear(); CrawlProfile.MATCH_NEVER_STRING, //crawlerIpMustNotMatch
this.defaultProxyProfile = null; "", //crawlerCountryMustMatch
this.defaultRemoteProfile = null; CrawlProfile.MATCH_NEVER_STRING, //crawlerNoDepthLimitMatch
this.defaultTextSnippetLocalProfile = null; CrawlProfile.MATCH_ALL_STRING, //indexUrlMustMatch
this.defaultTextSnippetGlobalProfile = null; CrawlProfile.MATCH_NEVER_STRING, //indexUrlMustNotMatch
this.defaultMediaSnippetLocalProfile = null; 0,
this.defaultMediaSnippetGlobalProfile = null; false,
this.defaultSurrogateProfile = null; -1,
} -1,
true,
if ( this.defaultProxyProfile == null ) { true,
// generate new default entry for proxy crawling true,
this.defaultProxyProfile = false,
new CrawlProfile( false,
CRAWL_PROFILE_PROXY, true,
CrawlProfile.MATCH_ALL_STRING, true,
CrawlProfile.MATCH_NEVER_STRING, false,
CrawlProfile.MATCH_ALL_STRING, CacheStrategy.IFFRESH,
CrawlProfile.MATCH_NEVER_STRING, "robot_" + CRAWL_PROFILE_REMOTE);
CrawlProfile.MATCH_NEVER_STRING, this.profilesActiveCrawls.put(
CrawlProfile.MATCH_ALL_STRING, UTF8.getBytes(this.defaultRemoteProfile.handle()),
CrawlProfile.MATCH_NEVER_STRING, this.defaultRemoteProfile);
"", // generate new default entry for snippet fetch and optional crawling
0 /*Integer.parseInt(getConfig(PROXY_PREFETCH_DEPTH, "0"))*/, this.defaultTextSnippetLocalProfile =
true, new CrawlProfile(
CrawlProfile.getRecrawlDate(CRAWL_PROFILE_PROXY_RECRAWL_CYCLE), CRAWL_PROFILE_SNIPPET_LOCAL_TEXT,
-1, CrawlProfile.MATCH_ALL_STRING, //crawlerUrlMustMatch
false, CrawlProfile.MATCH_NEVER_STRING, //crawlerUrlMustNotMatch
true /*getConfigBool(PROXY_INDEXING_LOCAL_TEXT, true)*/, CrawlProfile.MATCH_ALL_STRING, //crawlerIpMustMatch
true /*getConfigBool(PROXY_INDEXING_LOCAL_MEDIA, true)*/, CrawlProfile.MATCH_NEVER_STRING, //crawlerIpMustNotMatch
true, "", //crawlerCountryMustMatch
false /*getConfigBool(PROXY_INDEXING_REMOTE, false)*/, CrawlProfile.MATCH_NEVER_STRING, //crawlerNoDepthLimitMatch
true, CrawlProfile.MATCH_ALL_STRING, //indexUrlMustMatch
true, CrawlProfile.MATCH_NEVER_STRING, //indexUrlMustNotMatch
true, 0,
CacheStrategy.IFFRESH, false,
"robot_" + CRAWL_PROFILE_PROXY); CrawlProfile.getRecrawlDate(CRAWL_PROFILE_SNIPPET_LOCAL_TEXT_RECRAWL_CYCLE),
this.profilesActiveCrawls.put( -1,
UTF8.getBytes(this.defaultProxyProfile.handle()), true,
this.defaultProxyProfile); false,
} false,
if ( this.defaultRemoteProfile == null ) { true,
// generate new default entry for remote crawling false,
this.defaultRemoteProfile = true,
new CrawlProfile( true,
CRAWL_PROFILE_REMOTE, false,
CrawlProfile.MATCH_ALL_STRING, CacheStrategy.IFEXIST,
CrawlProfile.MATCH_ALL_STRING, "robot_" + CRAWL_PROFILE_SNIPPET_LOCAL_TEXT);
CrawlProfile.MATCH_NEVER_STRING, this.profilesActiveCrawls.put(
CrawlProfile.MATCH_NEVER_STRING, UTF8.getBytes(this.defaultTextSnippetLocalProfile.handle()),
CrawlProfile.MATCH_ALL_STRING, this.defaultTextSnippetLocalProfile);
CrawlProfile.MATCH_NEVER_STRING, // generate new default entry for snippet fetch and optional crawling
"", this.defaultTextSnippetGlobalProfile =
CrawlProfile.MATCH_NEVER_STRING, new CrawlProfile(
0, CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT,
false, CrawlProfile.MATCH_ALL_STRING, //crawlerUrlMustMatch
-1, CrawlProfile.MATCH_NEVER_STRING, //crawlerUrlMustNotMatch
-1, CrawlProfile.MATCH_ALL_STRING, //crawlerIpMustMatch
true, CrawlProfile.MATCH_NEVER_STRING, //crawlerIpMustNotMatch
true, "", //crawlerCountryMustMatch
true, CrawlProfile.MATCH_NEVER_STRING, //crawlerNoDepthLimitMatch
false, CrawlProfile.MATCH_ALL_STRING, //indexUrlMustMatch
false, CrawlProfile.MATCH_NEVER_STRING, //indexUrlMustNotMatch
true, 0,
true, false,
false, CrawlProfile.getRecrawlDate(CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT_RECRAWL_CYCLE),
CacheStrategy.IFFRESH, -1,
"robot_" + CRAWL_PROFILE_REMOTE); true,
this.profilesActiveCrawls.put( true,
UTF8.getBytes(this.defaultRemoteProfile.handle()), true,
this.defaultRemoteProfile); true,
} false,
if ( this.defaultTextSnippetLocalProfile == null ) { true,
// generate new default entry for snippet fetch and optional crawling true,
this.defaultTextSnippetLocalProfile = false,
new CrawlProfile( CacheStrategy.IFEXIST,
CRAWL_PROFILE_SNIPPET_LOCAL_TEXT, "robot_" + CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT);
CrawlProfile.MATCH_ALL_STRING, this.profilesActiveCrawls.put(
CrawlProfile.MATCH_NEVER_STRING, UTF8.getBytes(this.defaultTextSnippetGlobalProfile.handle()),
CrawlProfile.MATCH_ALL_STRING, this.defaultTextSnippetGlobalProfile);
CrawlProfile.MATCH_NEVER_STRING,
CrawlProfile.MATCH_NEVER_STRING,
CrawlProfile.MATCH_ALL_STRING,
CrawlProfile.MATCH_NEVER_STRING,
"",
0,
false,
CrawlProfile.getRecrawlDate(CRAWL_PROFILE_SNIPPET_LOCAL_TEXT_RECRAWL_CYCLE),
-1,
true,
false,
false,
true,
false,
true,
true,
false,
CacheStrategy.IFEXIST,
"robot_" + CRAWL_PROFILE_SNIPPET_LOCAL_TEXT);
this.profilesActiveCrawls.put(
UTF8.getBytes(this.defaultTextSnippetLocalProfile.handle()),
this.defaultTextSnippetLocalProfile);
}
if ( this.defaultTextSnippetGlobalProfile == null ) {
// generate new default entry for snippet fetch and optional crawling
this.defaultTextSnippetGlobalProfile =
new CrawlProfile(
CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT,
CrawlProfile.MATCH_ALL_STRING,
CrawlProfile.MATCH_NEVER_STRING,
CrawlProfile.MATCH_ALL_STRING,
CrawlProfile.MATCH_NEVER_STRING,
CrawlProfile.MATCH_NEVER_STRING,
CrawlProfile.MATCH_ALL_STRING,
CrawlProfile.MATCH_NEVER_STRING,
"",
0,
false,
CrawlProfile.getRecrawlDate(CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT_RECRAWL_CYCLE),
-1,
true,
true,
true,
true,
false,
true,
true,
false,
CacheStrategy.IFEXIST,
"robot_" + CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT);
this.profilesActiveCrawls.put(
UTF8.getBytes(this.defaultTextSnippetGlobalProfile.handle()),
this.defaultTextSnippetGlobalProfile);
}
this.defaultTextSnippetGlobalProfile.setCacheStrategy(CacheStrategy.IFEXIST); this.defaultTextSnippetGlobalProfile.setCacheStrategy(CacheStrategy.IFEXIST);
if ( this.defaultMediaSnippetLocalProfile == null ) { // generate new default entry for snippet fetch and optional crawling
// generate new default entry for snippet fetch and optional crawling this.defaultMediaSnippetLocalProfile =
this.defaultMediaSnippetLocalProfile = new CrawlProfile(
new CrawlProfile( CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA,
CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA, CrawlProfile.MATCH_ALL_STRING, //crawlerUrlMustMatch
CrawlProfile.MATCH_ALL_STRING, CrawlProfile.MATCH_NEVER_STRING, //crawlerUrlMustNotMatch
CrawlProfile.MATCH_NEVER_STRING, CrawlProfile.MATCH_ALL_STRING, //crawlerIpMustMatch
CrawlProfile.MATCH_ALL_STRING, CrawlProfile.MATCH_NEVER_STRING, //crawlerIpMustNotMatch
CrawlProfile.MATCH_NEVER_STRING, "", //crawlerCountryMustMatch
CrawlProfile.MATCH_NEVER_STRING, CrawlProfile.MATCH_NEVER_STRING, //crawlerNoDepthLimitMatch
CrawlProfile.MATCH_ALL_STRING, CrawlProfile.MATCH_ALL_STRING, //indexUrlMustMatch
CrawlProfile.MATCH_NEVER_STRING, CrawlProfile.MATCH_NEVER_STRING, //indexUrlMustNotMatch
"", 0,
0, false,
false, CrawlProfile.getRecrawlDate(CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA_RECRAWL_CYCLE),
CrawlProfile.getRecrawlDate(CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA_RECRAWL_CYCLE), -1,
-1, true,
true, false,
false, false,
false, true,
true, false,
false, true,
true, true,
true, false,
false, CacheStrategy.IFEXIST,
CacheStrategy.IFEXIST, "robot_" + CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA);
"robot_" + CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA); this.profilesActiveCrawls.put(
this.profilesActiveCrawls.put( UTF8.getBytes(this.defaultMediaSnippetLocalProfile.handle()),
UTF8.getBytes(this.defaultMediaSnippetLocalProfile.handle()), this.defaultMediaSnippetLocalProfile);
this.defaultMediaSnippetLocalProfile); // generate new default entry for snippet fetch and optional crawling
} this.defaultMediaSnippetGlobalProfile =
if ( this.defaultMediaSnippetGlobalProfile == null ) { new CrawlProfile(
// generate new default entry for snippet fetch and optional crawling CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA,
this.defaultMediaSnippetGlobalProfile = CrawlProfile.MATCH_ALL_STRING, //crawlerUrlMustMatch
new CrawlProfile( CrawlProfile.MATCH_NEVER_STRING, //crawlerUrlMustNotMatch
CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA, CrawlProfile.MATCH_ALL_STRING, //crawlerIpMustMatch
CrawlProfile.MATCH_ALL_STRING, CrawlProfile.MATCH_NEVER_STRING, //crawlerIpMustNotMatch
CrawlProfile.MATCH_NEVER_STRING, "", //crawlerCountryMustMatch
CrawlProfile.MATCH_ALL_STRING, CrawlProfile.MATCH_NEVER_STRING, //crawlerNoDepthLimitMatch
CrawlProfile.MATCH_NEVER_STRING, CrawlProfile.MATCH_ALL_STRING, //indexUrlMustMatch
CrawlProfile.MATCH_NEVER_STRING, CrawlProfile.MATCH_NEVER_STRING, //indexUrlMustNotMatch
CrawlProfile.MATCH_ALL_STRING, 0,
CrawlProfile.MATCH_NEVER_STRING, false,
"", CrawlProfile.getRecrawlDate(CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA_RECRAWL_CYCLE),
0, -1,
false, true,
CrawlProfile.getRecrawlDate(CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA_RECRAWL_CYCLE), false,
-1, true,
true, true,
false, false,
true, true,
true, true,
false, false,
true, CacheStrategy.IFEXIST,
true, "robot_" + CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA);
false, this.profilesActiveCrawls.put(
CacheStrategy.IFEXIST, UTF8.getBytes(this.defaultMediaSnippetGlobalProfile.handle()),
"robot_" + CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA); this.defaultMediaSnippetGlobalProfile);
this.profilesActiveCrawls.put( // generate new default entry for surrogate parsing
UTF8.getBytes(this.defaultMediaSnippetGlobalProfile.handle()), this.defaultSurrogateProfile =
this.defaultMediaSnippetGlobalProfile); new CrawlProfile(
} CRAWL_PROFILE_SURROGATE,
if ( this.defaultSurrogateProfile == null ) { CrawlProfile.MATCH_ALL_STRING, //crawlerUrlMustMatch
// generate new default entry for surrogate parsing CrawlProfile.MATCH_NEVER_STRING, //crawlerUrlMustNotMatch
this.defaultSurrogateProfile = CrawlProfile.MATCH_ALL_STRING, //crawlerIpMustMatch
new CrawlProfile( CrawlProfile.MATCH_NEVER_STRING, //crawlerIpMustNotMatch
CRAWL_PROFILE_SURROGATE, "", //crawlerCountryMustMatch
CrawlProfile.MATCH_ALL_STRING, CrawlProfile.MATCH_NEVER_STRING, //crawlerNoDepthLimitMatch
CrawlProfile.MATCH_NEVER_STRING, CrawlProfile.MATCH_ALL_STRING, //indexUrlMustMatch
CrawlProfile.MATCH_ALL_STRING, CrawlProfile.MATCH_NEVER_STRING, //indexUrlMustNotMatch
CrawlProfile.MATCH_NEVER_STRING, 0,
CrawlProfile.MATCH_NEVER_STRING, false,
CrawlProfile.MATCH_ALL_STRING, CrawlProfile.getRecrawlDate(CRAWL_PROFILE_SURROGATE_RECRAWL_CYCLE),
CrawlProfile.MATCH_NEVER_STRING, -1,
"", true,
0, true,
false, false,
CrawlProfile.getRecrawlDate(CRAWL_PROFILE_SURROGATE_RECRAWL_CYCLE), false,
-1, false,
true, true,
true, true,
false, false,
false, CacheStrategy.NOCACHE,
false, "robot_" + CRAWL_PROFILE_SURROGATE);
true, this.profilesActiveCrawls.put(
true, UTF8.getBytes(this.defaultSurrogateProfile.handle()),
false, this.defaultSurrogateProfile);
CacheStrategy.NOCACHE,
"robot_" + CRAWL_PROFILE_SURROGATE);
this.profilesActiveCrawls.put(
UTF8.getBytes(this.defaultSurrogateProfile.handle()),
this.defaultSurrogateProfile);
}
} }
private void resetProfiles() { private void resetProfiles() {

@ -164,10 +164,10 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
put(CRAWLER_URL_MUSTNOTMATCH, (crawlerUrlMustNotMatch == null) ? CrawlProfile.MATCH_NEVER_STRING : crawlerUrlMustNotMatch); put(CRAWLER_URL_MUSTNOTMATCH, (crawlerUrlMustNotMatch == null) ? CrawlProfile.MATCH_NEVER_STRING : crawlerUrlMustNotMatch);
put(CRAWLER_IP_MUSTMATCH, (crawlerIpMustMatch == null) ? CrawlProfile.MATCH_ALL_STRING : crawlerIpMustMatch); put(CRAWLER_IP_MUSTMATCH, (crawlerIpMustMatch == null) ? CrawlProfile.MATCH_ALL_STRING : crawlerIpMustMatch);
put(CRAWLER_IP_MUSTNOTMATCH, (crawlerIpMustNotMatch == null) ? CrawlProfile.MATCH_NEVER_STRING : crawlerIpMustNotMatch); put(CRAWLER_IP_MUSTNOTMATCH, (crawlerIpMustNotMatch == null) ? CrawlProfile.MATCH_NEVER_STRING : crawlerIpMustNotMatch);
put(CRAWLER_COUNTRY_MUSTMATCH, (crawlerCountryMustMatch == null) ? "" : crawlerCountryMustMatch); put(CRAWLER_COUNTRY_MUSTMATCH, (crawlerCountryMustMatch == null) ? CrawlProfile.MATCH_NEVER_STRING : crawlerCountryMustMatch);
put(CRAWLER_URL_NODEPTHLIMITMATCH, (crawlerNoDepthLimitMatch == null) ? "" : crawlerNoDepthLimitMatch); put(CRAWLER_URL_NODEPTHLIMITMATCH, (crawlerNoDepthLimitMatch == null) ? CrawlProfile.MATCH_NEVER_STRING : crawlerNoDepthLimitMatch);
put(INDEXING_URL_MUSTMATCH, (indexUrlMustMatch == null) ? "" : indexUrlMustMatch); put(INDEXING_URL_MUSTMATCH, (indexUrlMustMatch == null) ? CrawlProfile.MATCH_NEVER_STRING : indexUrlMustMatch);
put(INDEXING_URL_MUSTNOTMATCH, (indexUrlMustNotMatch == null) ? "" : indexUrlMustNotMatch); put(INDEXING_URL_MUSTNOTMATCH, (indexUrlMustNotMatch == null) ? CrawlProfile.MATCH_NEVER_STRING : indexUrlMustNotMatch);
put(DEPTH, depth); put(DEPTH, depth);
put(DIRECT_DOC_BY_URL, directDocByURL); put(DIRECT_DOC_BY_URL, directDocByURL);
put(RECRAWL_IF_OLDER, recrawlIfOlder); put(RECRAWL_IF_OLDER, recrawlIfOlder);
@ -351,7 +351,7 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
*/ */
public String[] countryMustMatchList() { public String[] countryMustMatchList() {
String countryMustMatch = get(CRAWLER_COUNTRY_MUSTMATCH); String countryMustMatch = get(CRAWLER_COUNTRY_MUSTMATCH);
if (countryMustMatch == null) countryMustMatch = ""; if (countryMustMatch == null) countryMustMatch = CrawlProfile.MATCH_NEVER_STRING;
if (countryMustMatch.isEmpty()) return new String[0]; if (countryMustMatch.isEmpty()) return new String[0];
String[] list = countryMustMatch.split(","); String[] list = countryMustMatch.split(",");
if (list.length == 1 && list.length == 0) list = new String[0]; if (list.length == 1 && list.length == 0) list = new String[0];

@ -2461,7 +2461,7 @@ public final class Switchboard extends serverSwitch
} }
if (!profile.indexUrlMustMatchPattern().matcher(urls).matches() || if (!profile.indexUrlMustMatchPattern().matcher(urls).matches() ||
profile.indexUrlMustNotMatchPattern().matcher(urls).matches() ) { profile.indexUrlMustNotMatchPattern().matcher(urls).matches() ) {
if (this.log.isInfo()) this.log.logInfo("Not Condensed Resource '" + urls + "': indexing prevented by regular expression on url"); if (this.log.isInfo()) this.log.logInfo("Not Condensed Resource '" + urls + "': indexing prevented by regular expression on url; indexUrlMustMatchPattern = " + profile.indexUrlMustMatchPattern().pattern() + ", indexUrlMustNotMatchPattern = " + profile.indexUrlMustNotMatchPattern().pattern());
return new IndexingQueueEntry(in.queueEntry, in.documents, null); return new IndexingQueueEntry(in.queueEntry, in.documents, null);
} }

Loading…
Cancel
Save