|
|
|
@ -164,7 +164,7 @@ public final class CrawlSwitchboard {
|
|
|
|
|
|
|
|
|
|
if (this.defaultProxyProfile == null) {
|
|
|
|
|
// generate new default entry for proxy crawling
|
|
|
|
|
this.defaultProxyProfile = this.profilesActiveCrawls.newEntry("proxy", null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER,
|
|
|
|
|
this.defaultProxyProfile = this.profilesActiveCrawls.newEntry("proxy", null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_BAD_URL,
|
|
|
|
|
0 /*Integer.parseInt(getConfig(PROXY_PREFETCH_DEPTH, "0"))*/,
|
|
|
|
|
this.profilesActiveCrawls.getRecrawlDate(CRAWL_PROFILE_PROXY_RECRAWL_CYCLE), -1, -1, false,
|
|
|
|
|
true /*getConfigBool(PROXY_INDEXING_LOCAL_TEXT, true)*/,
|
|
|
|
@ -175,32 +175,32 @@ public final class CrawlSwitchboard {
|
|
|
|
|
}
|
|
|
|
|
if (this.defaultRemoteProfile == null) {
|
|
|
|
|
// generate new default entry for remote crawling
|
|
|
|
|
defaultRemoteProfile = this.profilesActiveCrawls.newEntry(CRAWL_PROFILE_REMOTE, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER, 0,
|
|
|
|
|
defaultRemoteProfile = this.profilesActiveCrawls.newEntry(CRAWL_PROFILE_REMOTE, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_BAD_URL, 0,
|
|
|
|
|
-1, -1, -1, true, true, true, false, true, false, true, true, false, CrawlProfile.CACHE_STRATEGY_IFFRESH);
|
|
|
|
|
}
|
|
|
|
|
if (this.defaultTextSnippetLocalProfile == null) {
|
|
|
|
|
// generate new default entry for snippet fetch and optional crawling
|
|
|
|
|
defaultTextSnippetLocalProfile = this.profilesActiveCrawls.newEntry(CRAWL_PROFILE_SNIPPET_LOCAL_TEXT, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER, 0,
|
|
|
|
|
defaultTextSnippetLocalProfile = this.profilesActiveCrawls.newEntry(CRAWL_PROFILE_SNIPPET_LOCAL_TEXT, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_BAD_URL, 0,
|
|
|
|
|
this.profilesActiveCrawls.getRecrawlDate(CRAWL_PROFILE_SNIPPET_LOCAL_TEXT_RECRAWL_CYCLE), -1, -1, true, false, false, false, false, false, true, true, false, CrawlProfile.CACHE_STRATEGY_IFFRESH);
|
|
|
|
|
}
|
|
|
|
|
if (this.defaultTextSnippetGlobalProfile == null) {
|
|
|
|
|
// generate new default entry for snippet fetch and optional crawling
|
|
|
|
|
defaultTextSnippetGlobalProfile = this.profilesActiveCrawls.newEntry(CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER, 0,
|
|
|
|
|
defaultTextSnippetGlobalProfile = this.profilesActiveCrawls.newEntry(CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_BAD_URL, 0,
|
|
|
|
|
this.profilesActiveCrawls.getRecrawlDate(CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT_RECRAWL_CYCLE), -1, -1, true, true, true, true, true, false, true, true, false, CrawlProfile.CACHE_STRATEGY_CACHEONLY);
|
|
|
|
|
}
|
|
|
|
|
if (this.defaultMediaSnippetLocalProfile == null) {
|
|
|
|
|
// generate new default entry for snippet fetch and optional crawling
|
|
|
|
|
defaultMediaSnippetLocalProfile = this.profilesActiveCrawls.newEntry(CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER, 0,
|
|
|
|
|
defaultMediaSnippetLocalProfile = this.profilesActiveCrawls.newEntry(CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_BAD_URL, 0,
|
|
|
|
|
this.profilesActiveCrawls.getRecrawlDate(CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA_RECRAWL_CYCLE), -1, -1, true, false, false, false, false, false, true, true, false, CrawlProfile.CACHE_STRATEGY_IFEXIST);
|
|
|
|
|
}
|
|
|
|
|
if (this.defaultMediaSnippetGlobalProfile == null) {
|
|
|
|
|
// generate new default entry for snippet fetch and optional crawling
|
|
|
|
|
defaultMediaSnippetGlobalProfile = this.profilesActiveCrawls.newEntry(CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER, 0,
|
|
|
|
|
defaultMediaSnippetGlobalProfile = this.profilesActiveCrawls.newEntry(CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_BAD_URL, 0,
|
|
|
|
|
this.profilesActiveCrawls.getRecrawlDate(CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA_RECRAWL_CYCLE), -1, -1, true, false, true, true, true, false, true, true, false, CrawlProfile.CACHE_STRATEGY_IFEXIST);
|
|
|
|
|
}
|
|
|
|
|
if (this.defaultSurrogateProfile == null) {
|
|
|
|
|
// generate new default entry for surrogate parsing
|
|
|
|
|
defaultSurrogateProfile = this.profilesActiveCrawls.newEntry(CRAWL_PROFILE_SURROGATE, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER, 0,
|
|
|
|
|
defaultSurrogateProfile = this.profilesActiveCrawls.newEntry(CRAWL_PROFILE_SURROGATE, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_BAD_URL, 0,
|
|
|
|
|
this.profilesActiveCrawls.getRecrawlDate(CRAWL_PROFILE_SURROGATE_RECRAWL_CYCLE), -1, -1, true, true, false, false, false, false, true, true, false, CrawlProfile.CACHE_STRATEGY_NOCACHE);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|