diff --git a/htroot/CrawlStartExpert_p.html b/htroot/CrawlStartExpert_p.html index 5baece971..59a7dff61 100644 --- a/htroot/CrawlStartExpert_p.html +++ b/htroot/CrawlStartExpert_p.html @@ -310,14 +310,6 @@ so they can omit starting a crawl with the same start point. - - : - - - This can be useful to circumvent that extremely common words are added to the database, i.e. "the", "he", "she", "it"... To exclude all words given in the file yacy.stopwords from indexing, - check this box. - - : @@ -327,24 +319,6 @@ A crawl result can be tagged with names which are candidates for a collection request. These tags can be selected with the GSA interface using the 'site' operator. To use this option, the 'collection_sxt'-field must be switched on in the Solr Schema - diff --git a/htroot/CrawlStartSite_p.html b/htroot/CrawlStartSite_p.html index 38a83864e..15978b66b 100644 --- a/htroot/CrawlStartSite_p.html +++ b/htroot/CrawlStartSite_p.html @@ -101,9 +101,6 @@ - - -
diff --git a/htroot/Crawler_p.java b/htroot/Crawler_p.java index 615393757..ee147811f 100644 --- a/htroot/Crawler_p.java +++ b/htroot/Crawler_p.java @@ -265,15 +265,6 @@ public class Crawler_p { CacheStrategy cachePolicy = CacheStrategy.parse(post.get("cachePolicy", "iffresh")); if (cachePolicy == null) cachePolicy = CacheStrategy.IFFRESH; - final boolean xsstopw = "on".equals(post.get("xsstopw", "off")); - env.setConfig("xsstopw", xsstopw); - - final boolean xdstopw = "on".equals(post.get("xdstopw", "off")); - env.setConfig("xdstopw", xdstopw); - - final boolean xpstopw = "on".equals(post.get("xpstopw", "off")); - env.setConfig("xpstopw", xpstopw); - String crawlingMode = post.get("crawlingMode","url"); if ("file".equals(crawlingMode) && post.containsKey("crawlingFile")) { @@ -365,9 +356,6 @@ public class Crawler_p { indexMedia, storeHTCache, crawlOrder, - xsstopw, - xdstopw, - xpstopw, cachePolicy, collection); byte[] handle = ASCII.getBytes(profile.handle()); diff --git a/htroot/QuickCrawlLink_p.java b/htroot/QuickCrawlLink_p.java index 894427fce..18fa543dc 100644 --- a/htroot/QuickCrawlLink_p.java +++ b/htroot/QuickCrawlLink_p.java @@ -102,9 +102,6 @@ public class QuickCrawlLink_p { final boolean indexMedia = post.get("indexMedia", "off").equals("on"); final boolean storeHTCache = post.get("storeHTCache", "").equals("on"); final boolean remoteIndexing = post.get("crawlOrder", "").equals("on"); - final boolean xsstopw = post.get("xsstopw", "").equals("on"); - final boolean xdstopw = post.get("xdstopw", "").equals("on"); - final boolean xpstopw = post.get("xpstopw", "").equals("on"); final String collection = post.get("collection", "user"); prop.put("mode_url", (crawlingStart == null) ? "unknown" : crawlingStart); @@ -151,9 +148,6 @@ public class QuickCrawlLink_p { indexMedia, storeHTCache, remoteIndexing, - xsstopw, - xdstopw, - xpstopw, CacheStrategy.IFFRESH, collection); sb.crawler.putActive(pe.handle().getBytes(), pe); diff --git a/source/net/yacy/crawler/CrawlSwitchboard.java b/source/net/yacy/crawler/CrawlSwitchboard.java index a5a3fe7fe..c09072efb 100644 --- a/source/net/yacy/crawler/CrawlSwitchboard.java +++ b/source/net/yacy/crawler/CrawlSwitchboard.java @@ -245,9 +245,6 @@ public final class CrawlSwitchboard { true /*getConfigBool(PROXY_INDEXING_LOCAL_MEDIA, true)*/, true, false /*getConfigBool(PROXY_INDEXING_REMOTE, false)*/, - true, - true, - true, CacheStrategy.IFFRESH, "robot_" + CRAWL_PROFILE_PROXY); this.profilesActiveCrawls.put( @@ -274,9 +271,6 @@ public final class CrawlSwitchboard { true, false, false, - true, - true, - false, CacheStrategy.IFFRESH, "robot_" + CRAWL_PROFILE_REMOTE); this.profilesActiveCrawls.put( @@ -303,9 +297,6 @@ public final class CrawlSwitchboard { false, true, false, - true, - true, - false, CacheStrategy.IFEXIST, "robot_" + CRAWL_PROFILE_SNIPPET_LOCAL_TEXT); this.profilesActiveCrawls.put( @@ -332,9 +323,6 @@ public final class CrawlSwitchboard { true, true, false, - true, - true, - false, CacheStrategy.IFEXIST, "robot_" + CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT); this.profilesActiveCrawls.put( @@ -362,9 +350,6 @@ public final class CrawlSwitchboard { false, true, false, - true, - true, - false, CacheStrategy.IFEXIST, "robot_" + CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA); this.profilesActiveCrawls.put( @@ -391,9 +376,6 @@ public final class CrawlSwitchboard { true, true, false, - true, - true, - false, CacheStrategy.IFEXIST, "robot_" + CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA); this.profilesActiveCrawls.put( @@ -420,9 +402,6 @@ public final class CrawlSwitchboard { false, false, false, - true, - true, - false, CacheStrategy.NOCACHE, "robot_" + CRAWL_PROFILE_SURROGATE); this.profilesActiveCrawls.put( diff --git a/source/net/yacy/crawler/data/CrawlProfile.java b/source/net/yacy/crawler/data/CrawlProfile.java index 6b8ade6be..4b8695494 100644 --- a/source/net/yacy/crawler/data/CrawlProfile.java +++ b/source/net/yacy/crawler/data/CrawlProfile.java @@ -126,9 +126,6 @@ public class CrawlProfile extends ConcurrentHashMap implements M final boolean indexMedia, final boolean storeHTCache, final boolean remoteIndexing, - final boolean xsstopw, - final boolean xdstopw, - final boolean xpstopw, final CacheStrategy cacheStrategy, final String collections) { super(40); diff --git a/source/net/yacy/data/ymark/YMarkCrawlStart.java b/source/net/yacy/data/ymark/YMarkCrawlStart.java index b1a57ce35..ff50afec5 100644 --- a/source/net/yacy/data/ymark/YMarkCrawlStart.java +++ b/source/net/yacy/data/ymark/YMarkCrawlStart.java @@ -56,12 +56,6 @@ public class YMarkCrawlStart extends HashMap{ SINGLE, ONE_LINK, FULL_DOMAIN } - public YMarkCrawlStart(final WorkTables worktables) { - super(); - this.date_recording = new Date(0); - this.worktables = worktables; - } - public YMarkCrawlStart(final WorkTables worktables, final String url) { super(); this.worktables = worktables; @@ -187,7 +181,7 @@ public class YMarkCrawlStart extends HashMap{ CrawlProfile.getRecrawlDate(CrawlSwitchboard.CRAWL_PROFILE_PROXY_RECRAWL_CYCLE), -1, crawlingQ, - true, true, true, false, true, true, true, + true, true, true, false, CacheStrategy.IFFRESH, "robot_" + CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA); // TODO: make this a default profile in CrawlSwitchboard sb.crawler.putActive(pe.handle().getBytes(), pe);