added new crawl attributes in crawl profile (not active yet)

13 years ago · 6ec02deec6
parent a13e5153ac
commit 6ec02deec6
7 changed files with 142 additions and 104 deletions
--- a/htroot/CrawlProfileEditor_p.java
+++ b/htroot/CrawlProfileEditor_p.java
@ -29,7 +29,6 @@ import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.TreeMap;
-import java.util.regex.Pattern;

 import net.yacy.cora.protocol.RequestHeader;
 import net.yacy.cora.util.SpaceExceededException;
@ -66,8 +65,8 @@ public class CrawlProfileEditor_p {
    private static final List <eentry> labels = new ArrayList<eentry>();
    static {
        labels.add(new eentry(CrawlProfile.NAME,                "Name",                  true,  eentry.STRING));
-        labels.add(new eentry(CrawlProfile.FILTER_URL_MUSTMATCH,    "Must-Match Filter",     false, eentry.STRING));
-        labels.add(new eentry(CrawlProfile.FILTER_URL_MUSTNOTMATCH, "Must-Not-Match Filter", false, eentry.STRING));
+        labels.add(new eentry(CrawlProfile.CRAWLER_URL_MUSTMATCH,    "Must-Match Filter",     false, eentry.STRING));
+        labels.add(new eentry(CrawlProfile.CRAWLER_URL_MUSTNOTMATCH, "Must-Not-Match Filter", false, eentry.STRING));
        labels.add(new eentry(CrawlProfile.DEPTH,               "Crawl Depth",           false, eentry.INTEGER));
        labels.add(new eentry(CrawlProfile.RECRAWL_IF_OLDER,    "Recrawl If Older",      false, eentry.INTEGER));
        labels.add(new eentry(CrawlProfile.DOM_MAX_PAGES,       "Domain Max. Pages",     false, eentry.INTEGER));
@ -139,8 +138,6 @@ public class CrawlProfileEditor_p {
        if ((post != null) && (selentry != null)) {
            if (post.containsKey("submit")) {
                try {
-                	Pattern.compile(post.get(CrawlProfile.FILTER_URL_MUSTMATCH, CrawlProfile.MATCH_ALL_STRING));
-                	Pattern.compile(post.get(CrawlProfile.FILTER_URL_MUSTNOTMATCH, CrawlProfile.MATCH_NEVER_STRING));
                    final Iterator<eentry> lit = labels.iterator();
                    eentry tee;
                    while (lit.hasNext()) {
--- a/htroot/Crawler_p.java
+++ b/htroot/Crawler_p.java
@ -347,6 +347,9 @@ public class Crawler_p {
                        ipMustMatch,
                        ipMustNotMatch,
                        countryMustMatch,
+                        CrawlProfile.MATCH_NEVER_STRING,
+                        CrawlProfile.MATCH_ALL_STRING,
+                        CrawlProfile.MATCH_NEVER_STRING,
                        newcrawlingdepth,
                        directDocByURL,
                        crawlingIfOlder,
--- a/htroot/QuickCrawlLink_p.java
+++ b/htroot/QuickCrawlLink_p.java
@ -137,6 +137,9 @@ public class QuickCrawlLink_p {
                        crawlingMustMatch,
                        CrawlProfile.MATCH_ALL_STRING,
                        CrawlProfile.MATCH_NEVER_STRING,
+                        CrawlProfile.MATCH_NEVER_STRING,
+                        CrawlProfile.MATCH_ALL_STRING,
+                        CrawlProfile.MATCH_NEVER_STRING,
                        "",
                        crawlingMustNotMatch,
                        CrawlingDepth,
--- a/source/de/anomic/crawler/CrawlProfile.java
+++ b/source/de/anomic/crawler/CrawlProfile.java
@ -32,6 +32,7 @@ import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.regex.Pattern;
+import java.util.regex.PatternSyntaxException;

 import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.cora.services.federated.yacy.CacheStrategy;
@ -67,14 +68,20 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
    public static final String XDSTOPW          = "xdstopw";
    public static final String XPSTOPW          = "xpstopw";
    public static final String CACHE_STRAGEGY   = "cacheStrategy";
-    public static final String FILTER_URL_MUSTMATCH     = "generalFilter"; // for URLs
-    public static final String FILTER_URL_MUSTNOTMATCH  = "nevermatch";    // for URLs
-    public static final String FILTER_IP_MUSTMATCH      = "crawlingIPMustMatch";
-    public static final String FILTER_IP_MUSTNOTMATCH   = "crawlingIPMustNotMatch";
-    public static final String FILTER_COUNTRY_MUSTMATCH = "crawlingCountryMustMatch";
+    public static final String CRAWLER_URL_MUSTMATCH         = "crawlerURLMustMatch";
+    public static final String CRAWLER_URL_MUSTNOTMATCH      = "crawlerURLMustNotMatch";
+    public static final String CRAWLER_IP_MUSTMATCH          = "crawlerIPMustMatch";
+    public static final String CRAWLER_IP_MUSTNOTMATCH       = "crawlerIPMustNotMatch";
+    public static final String CRAWLER_COUNTRY_MUSTMATCH     = "crawlerCountryMustMatch";
+    public static final String CRAWLER_URL_NODEPTHLIMITMATCH = "crawlerNoLimitURLMustMatch";
+    public static final String INDEXING_URL_MUSTMATCH        = "indexURLMustMatch";
+    public static final String INDEXING_URL_MUSTNOTMATCH     = "indexURLMustNotMatch";
    public static final String COLLECTIONS = "collections";

-    private Pattern urlmustmatch = null, urlmustnotmatch = null, ipmustmatch = null, ipmustnotmatch = null;
+    private Pattern crawlerurlmustmatch = null, crawlerurlmustnotmatch = null;
+    private Pattern crawleripmustmatch = null, crawleripmustnotmatch = null;
+    private Pattern crawlernodepthlimitmatch = null;
+    private Pattern indexurlmustmatch = null, indexurlmustnotmatch = null;

    public final static class DomProfile {

@ -99,11 +106,14 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
     * Constructor which creates CrawlPofile from parameters.
     * @param name name of the crawl profile
     * @param startURL root URL of the crawl
-     * @param urlMustMatch URLs which do not match this regex will be ignored
-     * @param urlMustNotMatch URLs which match this regex will be ignored
-     * @param ipMustMatch IPs from URLs which do not match this regex will be ignored
-     * @param ipMustNotMatch IPs from URLs which match this regex will be ignored
-     * @param countryMustMatch URLs from a specific country must match
+     * @param crawlerUrlMustMatch URLs which do not match this regex will be ignored in the crawler
+     * @param crawlerUrlMustNotMatch URLs which match this regex will be ignored in the crawler
+     * @param crawlerIpMustMatch IPs from URLs which do not match this regex will be ignored in the crawler
+     * @param crawlerIpMustNotMatch IPs from URLs which match this regex will be ignored in the crawler
+     * @param crawlerCountryMustMatch URLs from a specific country must match
+     * @param crawlerNoDepthLimitMatch if matches, no depth limit is applied to the crawler
+     * @param indexUrlMustMatch URLs which do not match this regex will be ignored for indexing
+     * @param indexUrlMustNotMatch URLs which match this regex will be ignored for indexing
     * @param depth height of the tree which will be created by the crawler
     * @param directDocByURL if true, then linked documents that cannot be parsed are indexed as document
     * @param recrawlIfOlder documents which have been indexed in the past will
@ -122,11 +132,10 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
     */
    public CrawlProfile(
                 String name,
-                 final String urlMustMatch,
-                 final String urlMustNotMatch,
-                 final String ipMustMatch,
-                 final String ipMustNotMatch,
-                 final String countryMustMatch,
+                 final String crawlerUrlMustMatch, final String crawlerUrlMustNotMatch,
+                 final String crawlerIpMustMatch, final String crawlerIpMustNotMatch,
+                 final String crawlerCountryMustMatch, final String crawlerNoDepthLimitMatch,
+                 final String indexUrlMustMatch, final String indexUrlMustNotMatch,
                 final int depth,
                 final boolean directDocByURL,
                 final long recrawlIfOlder /*date*/,
@ -150,11 +159,14 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
        final String handle = Base64Order.enhancedCoder.encode(Digest.encodeMD5Raw(name)).substring(0, Word.commonHashLength);
        put(HANDLE,           handle);
        put(NAME,             name);
-        put(FILTER_URL_MUSTMATCH,     (urlMustMatch == null) ? CrawlProfile.MATCH_ALL_STRING : urlMustMatch);
-        put(FILTER_URL_MUSTNOTMATCH,  (urlMustNotMatch == null) ? CrawlProfile.MATCH_NEVER_STRING : urlMustNotMatch);
-        put(FILTER_IP_MUSTMATCH,      (ipMustMatch == null) ? CrawlProfile.MATCH_ALL_STRING : ipMustMatch);
-        put(FILTER_IP_MUSTNOTMATCH,   (ipMustNotMatch == null) ? CrawlProfile.MATCH_NEVER_STRING : ipMustNotMatch);
-        put(FILTER_COUNTRY_MUSTMATCH, (countryMustMatch == null) ? "" : countryMustMatch);
+        put(CRAWLER_URL_MUSTMATCH,     (crawlerUrlMustMatch == null) ? CrawlProfile.MATCH_ALL_STRING : crawlerUrlMustMatch);
+        put(CRAWLER_URL_MUSTNOTMATCH,  (crawlerUrlMustNotMatch == null) ? CrawlProfile.MATCH_NEVER_STRING : crawlerUrlMustNotMatch);
+        put(CRAWLER_IP_MUSTMATCH,      (crawlerIpMustMatch == null) ? CrawlProfile.MATCH_ALL_STRING : crawlerIpMustMatch);
+        put(CRAWLER_IP_MUSTNOTMATCH,   (crawlerIpMustNotMatch == null) ? CrawlProfile.MATCH_NEVER_STRING : crawlerIpMustNotMatch);
+        put(CRAWLER_COUNTRY_MUSTMATCH, (crawlerCountryMustMatch == null) ? "" : crawlerCountryMustMatch);
+        put(CRAWLER_URL_NODEPTHLIMITMATCH, (crawlerNoDepthLimitMatch == null) ? "" : crawlerNoDepthLimitMatch);
+        put(INDEXING_URL_MUSTMATCH, (indexUrlMustMatch == null) ? "" : indexUrlMustMatch);
+        put(INDEXING_URL_MUSTNOTMATCH, (indexUrlMustNotMatch == null) ? "" : indexUrlMustNotMatch);
        put(DEPTH,            depth);
        put(DIRECT_DOC_BY_URL, directDocByURL);
        put(RECRAWL_IF_OLDER, recrawlIfOlder);
@ -281,15 +293,13 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
     * @return regex which must be matched
     */
    public Pattern urlMustMatchPattern() {
-        if (this.urlmustmatch == null) {
-            final String r = get(FILTER_URL_MUSTMATCH);
-            if (r == null || r.equals(CrawlProfile.MATCH_ALL_STRING)) {
-                this.urlmustmatch = CrawlProfile.MATCH_ALL_PATTERN;
-            } else {
-                this.urlmustmatch = Pattern.compile(r);
-            }
+        if (this.crawlerurlmustmatch == null) {
+            final String r = get(CRAWLER_URL_MUSTMATCH);
+            try {
+                this.crawlerurlmustmatch = (r == null || r.equals(CrawlProfile.MATCH_ALL_STRING)) ? CrawlProfile.MATCH_ALL_PATTERN : Pattern.compile(r);
+            } catch (PatternSyntaxException e) { this.crawlerurlmustmatch = CrawlProfile.MATCH_NEVER_PATTERN; }
        }
-        return this.urlmustmatch;
+        return this.crawlerurlmustmatch;
    }

    /**
@ -297,15 +307,13 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
     * @return regex which must not be matched
     */
    public Pattern urlMustNotMatchPattern() {
-        if (this.urlmustnotmatch == null) {
-            final String r = get(FILTER_URL_MUSTNOTMATCH);
-            if (r == null || r.equals(CrawlProfile.MATCH_NEVER_STRING)) {
-                this.urlmustnotmatch = CrawlProfile.MATCH_NEVER_PATTERN;
-            } else {
-                this.urlmustnotmatch = Pattern.compile(r);
-            }
+        if (this.crawlerurlmustnotmatch == null) {
+            final String r = get(CRAWLER_URL_MUSTNOTMATCH);
+            try {
+                this.crawlerurlmustnotmatch = (r == null || r.equals(CrawlProfile.MATCH_NEVER_STRING)) ? CrawlProfile.MATCH_NEVER_PATTERN : Pattern.compile(r);
+            } catch (PatternSyntaxException e) { this.crawlerurlmustnotmatch = CrawlProfile.MATCH_NEVER_PATTERN; }
        }
-        return this.urlmustnotmatch;
+        return this.crawlerurlmustnotmatch;
    }

    /**
@ -313,15 +321,13 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
     * @return regex which must be matched
     */
    public Pattern ipMustMatchPattern() {
-        if (this.ipmustmatch == null) {
-            final String r = get(FILTER_IP_MUSTMATCH);
-            if (r == null || r.equals(CrawlProfile.MATCH_ALL_STRING)) {
-                this.ipmustmatch = CrawlProfile.MATCH_ALL_PATTERN;
-            } else {
-                this.ipmustmatch = Pattern.compile(r);
-            }
+        if (this.crawleripmustmatch == null) {
+            final String r = get(CRAWLER_IP_MUSTMATCH);
+            try {
+                this.crawleripmustmatch = (r == null || r.equals(CrawlProfile.MATCH_ALL_STRING)) ? CrawlProfile.MATCH_ALL_PATTERN : Pattern.compile(r);
+            } catch (PatternSyntaxException e) { this.crawleripmustmatch = CrawlProfile.MATCH_NEVER_PATTERN; }
        }
-        return this.ipmustmatch;
+        return this.crawleripmustmatch;
    }

    /**
@ -329,15 +335,13 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
     * @return regex which must not be matched
     */
    public Pattern ipMustNotMatchPattern() {
-        if (this.ipmustnotmatch == null) {
-            final String r = get(FILTER_IP_MUSTNOTMATCH);
-            if (r == null || r.equals(CrawlProfile.MATCH_NEVER_STRING)) {
-                this.ipmustnotmatch = CrawlProfile.MATCH_NEVER_PATTERN;
-            } else {
-                this.ipmustnotmatch = Pattern.compile(r);
-            }
+        if (this.crawleripmustnotmatch == null) {
+            final String r = get(CRAWLER_IP_MUSTNOTMATCH);
+            try {
+                this.crawleripmustnotmatch = (r == null || r.equals(CrawlProfile.MATCH_NEVER_STRING)) ? CrawlProfile.MATCH_NEVER_PATTERN : Pattern.compile(r);
+            } catch (PatternSyntaxException e) { this.crawleripmustnotmatch = CrawlProfile.MATCH_NEVER_PATTERN; }
        }
-        return this.ipmustnotmatch;
+        return this.crawleripmustnotmatch;
    }

    /**
@ -345,14 +349,56 @@ public class CrawlProfile extends ConcurrentHashMap<String, String> implements M
     * @return a list of country codes
     */
    public String[] countryMustMatchList() {
-        String countryMustMatch = get(FILTER_COUNTRY_MUSTMATCH);
+        String countryMustMatch = get(CRAWLER_COUNTRY_MUSTMATCH);
        if (countryMustMatch == null) countryMustMatch = "";
        if (countryMustMatch.isEmpty()) return new String[0];
        String[] list = countryMustMatch.split(",");
        if (list.length == 1 && list.length == 0) list = new String[0];
        return list;
    }
+    
+    /**
+     * If the regex matches with the url, then there is no depth limit on the crawl (it overrides depth == 0)
+     * @return regex which must be matched
+     */
+    public Pattern crawlerNoDepthLimitMatchPattern() {
+        if (this.crawlernodepthlimitmatch == null) {
+            final String r = get(CRAWLER_URL_NODEPTHLIMITMATCH);
+            try {
+                this.crawlernodepthlimitmatch = (r == null || r.equals(CrawlProfile.MATCH_ALL_STRING)) ? CrawlProfile.MATCH_ALL_PATTERN : Pattern.compile(r);
+            } catch (PatternSyntaxException e) { this.crawlernodepthlimitmatch = CrawlProfile.MATCH_NEVER_PATTERN; }
+        }
+        return this.crawlernodepthlimitmatch;
+    }
+
+    /**
+     * Gets the regex which must be matched by URLs in order to be indexed.
+     * @return regex which must be matched
+     */
+    public Pattern indexUrlMustMatchPattern() {
+        if (this.indexurlmustmatch == null) {
+            final String r = get(INDEXING_URL_MUSTMATCH);
+            try {
+                this.indexurlmustmatch = (r == null || r.equals(CrawlProfile.MATCH_ALL_STRING)) ? CrawlProfile.MATCH_ALL_PATTERN : Pattern.compile(r);
+            } catch (PatternSyntaxException e) { this.indexurlmustmatch = CrawlProfile.MATCH_NEVER_PATTERN; }
+        }
+        return this.indexurlmustmatch;
+    }

+    /**
+     * Gets the regex which must not be matched by URLs in order to be indexed.
+     * @return regex which must not be matched
+     */
+    public Pattern indexUrlMustNotMatchPattern() {
+        if (this.indexurlmustnotmatch == null) {
+            final String r = get(INDEXING_URL_MUSTNOTMATCH);
+            try {
+                this.indexurlmustnotmatch = (r == null || r.equals(CrawlProfile.MATCH_NEVER_STRING)) ? CrawlProfile.MATCH_NEVER_PATTERN : Pattern.compile(r);
+            } catch (PatternSyntaxException e) { this.indexurlmustnotmatch = CrawlProfile.MATCH_NEVER_PATTERN; }
+        }
+        return this.indexurlmustnotmatch;
+    }
+    
    /**
     * Gets depth of crawl job (or height of the tree which will be
     * created by the crawler).
--- a/source/de/anomic/crawler/CrawlSwitchboard.java
+++ b/source/de/anomic/crawler/CrawlSwitchboard.java
@ -43,7 +43,6 @@ import net.yacy.kelondro.order.Base64Order;
 import net.yacy.kelondro.order.NaturalOrder;
 import net.yacy.kelondro.util.FileUtils;
 import net.yacy.kelondro.util.kelondroException;
-import net.yacy.repository.RegexHelper;

 public final class CrawlSwitchboard
 {
@ -108,30 +107,6 @@ public final class CrawlSwitchboard
            if ( p == null ) {
                continue;
            }
-            if ( !RegexHelper.isValidRegex(p.get(CrawlProfile.FILTER_URL_MUSTMATCH)) ) {
-                removeActive(handle);
-                Log.logWarning("CrawlProfiles", "removed Profile "
-                    + p.handle()
-                    + ": "
-                    + p.name()
-                    + " from active crawls since "
-                    + CrawlProfile.FILTER_URL_MUSTMATCH
-                    + " is no valid regular expression: "
-                    + p.get(CrawlProfile.FILTER_URL_MUSTMATCH));
-            } else if ( !RegexHelper.isValidRegex(p.get(CrawlProfile.FILTER_URL_MUSTNOTMATCH)) ) {
-                removeActive(handle);
-                Log.logWarning("CrawlProfiles", "removed Profile "
-                    + p.handle()
-                    + ": "
-                    + p.name()
-                    + " from active crawls since "
-                    + CrawlProfile.FILTER_URL_MUSTNOTMATCH
-                    + " is no valid regular expression: "
-                    + p.get(CrawlProfile.FILTER_URL_MUSTNOTMATCH));
-            } else {
-                Log.logInfo("CrawlProfiles", "loaded Profile " + p.handle() + ": " + p.name());
-            }
-
        }
        initActiveCrawlProfiles();
        log.logInfo("Loaded active crawl profiles from file "
@ -296,6 +271,9 @@ public final class CrawlSwitchboard
                    CrawlProfile.MATCH_NEVER_STRING,
                    CrawlProfile.MATCH_ALL_STRING,
                    CrawlProfile.MATCH_NEVER_STRING,
+                    CrawlProfile.MATCH_NEVER_STRING,
+                    CrawlProfile.MATCH_ALL_STRING,
+                    CrawlProfile.MATCH_NEVER_STRING,
                    "",
                    0 /*Integer.parseInt(getConfig(PROXY_PREFETCH_DEPTH, "0"))*/,
                    true,
@ -323,6 +301,9 @@ public final class CrawlSwitchboard
                    CrawlProfile.MATCH_ALL_STRING,
                    CrawlProfile.MATCH_ALL_STRING,
                    CrawlProfile.MATCH_NEVER_STRING,
+                    CrawlProfile.MATCH_NEVER_STRING,
+                    CrawlProfile.MATCH_ALL_STRING,
+                    CrawlProfile.MATCH_NEVER_STRING,
                    "",
                    CrawlProfile.MATCH_NEVER_STRING,
                    0,
@ -352,6 +333,9 @@ public final class CrawlSwitchboard
                    CrawlProfile.MATCH_NEVER_STRING,
                    CrawlProfile.MATCH_ALL_STRING,
                    CrawlProfile.MATCH_NEVER_STRING,
+                    CrawlProfile.MATCH_NEVER_STRING,
+                    CrawlProfile.MATCH_ALL_STRING,
+                    CrawlProfile.MATCH_NEVER_STRING,
                    "",
                    0,
                    false,
@ -380,6 +364,9 @@ public final class CrawlSwitchboard
                    CrawlProfile.MATCH_NEVER_STRING,
                    CrawlProfile.MATCH_ALL_STRING,
                    CrawlProfile.MATCH_NEVER_STRING,
+                    CrawlProfile.MATCH_NEVER_STRING,
+                    CrawlProfile.MATCH_ALL_STRING,
+                    CrawlProfile.MATCH_NEVER_STRING,
                    "",
                    0,
                    false,
@ -409,6 +396,9 @@ public final class CrawlSwitchboard
                    CrawlProfile.MATCH_NEVER_STRING,
                    CrawlProfile.MATCH_ALL_STRING,
                    CrawlProfile.MATCH_NEVER_STRING,
+                    CrawlProfile.MATCH_NEVER_STRING,
+                    CrawlProfile.MATCH_ALL_STRING,
+                    CrawlProfile.MATCH_NEVER_STRING,
                    "",
                    0,
                    false,
@ -437,6 +427,9 @@ public final class CrawlSwitchboard
                    CrawlProfile.MATCH_NEVER_STRING,
                    CrawlProfile.MATCH_ALL_STRING,
                    CrawlProfile.MATCH_NEVER_STRING,
+                    CrawlProfile.MATCH_NEVER_STRING,
+                    CrawlProfile.MATCH_ALL_STRING,
+                    CrawlProfile.MATCH_NEVER_STRING,
                    "",
                    0,
                    false,
@ -465,6 +458,9 @@ public final class CrawlSwitchboard
                    CrawlProfile.MATCH_NEVER_STRING,
                    CrawlProfile.MATCH_ALL_STRING,
                    CrawlProfile.MATCH_NEVER_STRING,
+                    CrawlProfile.MATCH_NEVER_STRING,
+                    CrawlProfile.MATCH_ALL_STRING,
+                    CrawlProfile.MATCH_NEVER_STRING,
                    "",
                    0,
                    false,
--- a/source/de/anomic/data/ymark/YMarkCrawlStart.java
+++ b/source/de/anomic/data/ymark/YMarkCrawlStart.java
@ -178,6 +178,9 @@ public class YMarkCrawlStart extends HashMap<String,String>{
 		                urlMustNotMatch,
 		                CrawlProfile.MATCH_ALL_STRING,
 		                CrawlProfile.MATCH_NEVER_STRING,
+	                    CrawlProfile.MATCH_NEVER_STRING,
+	                    CrawlProfile.MATCH_ALL_STRING,
+	                    CrawlProfile.MATCH_NEVER_STRING,
 		                "",
 		                depth,
 		                medialink,
--- a/source/net/yacy/search/Switchboard.java
+++ b/source/net/yacy/search/Switchboard.java
@ -1946,45 +1946,35 @@ public final class Switchboard extends serverSwitch
                    }
                    boolean insert = false;
                    if ( selentry.name().equals(CrawlSwitchboard.CRAWL_PROFILE_PROXY) ) {
-                        selentry.put(CrawlProfile.RECRAWL_IF_OLDER, Long.toString(CrawlProfile
-                            .getRecrawlDate(CrawlSwitchboard.CRAWL_PROFILE_PROXY_RECRAWL_CYCLE)));
+                        selentry.put(CrawlProfile.RECRAWL_IF_OLDER, Long.toString(CrawlProfile.getRecrawlDate(CrawlSwitchboard.CRAWL_PROFILE_PROXY_RECRAWL_CYCLE)));
                        insert = true;
                    }
                    if ( selentry.name().equals(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_LOCAL_TEXT) ) {
-                        selentry
-                            .put(
+                        selentry.put(
                                CrawlProfile.RECRAWL_IF_OLDER,
-                                Long.toString(CrawlProfile
-                                    .getRecrawlDate(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_LOCAL_TEXT_RECRAWL_CYCLE)));
+                                Long.toString(CrawlProfile.getRecrawlDate(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_LOCAL_TEXT_RECRAWL_CYCLE)));
                        insert = true;
                    }
                    if ( selentry.name().equals(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT) ) {
-                        selentry
-                            .put(
+                        selentry.put(
                                CrawlProfile.RECRAWL_IF_OLDER,
-                                Long.toString(CrawlProfile
-                                    .getRecrawlDate(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT_RECRAWL_CYCLE)));
+                                Long.toString(CrawlProfile.getRecrawlDate(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT_RECRAWL_CYCLE)));
                        insert = true;
                    }
                    if ( selentry.name().equals(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA) ) {
-                        selentry
-                            .put(
+                        selentry.put(
                                CrawlProfile.RECRAWL_IF_OLDER,
-                                Long.toString(CrawlProfile
-                                    .getRecrawlDate(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA_RECRAWL_CYCLE)));
+                                Long.toString(CrawlProfile.getRecrawlDate(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA_RECRAWL_CYCLE)));
                        insert = true;
                    }
                    if ( selentry.name().equals(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA) ) {
-                        selentry
-                            .put(
+                        selentry.put(
                                CrawlProfile.RECRAWL_IF_OLDER,
-                                Long.toString(CrawlProfile
-                                    .getRecrawlDate(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA_RECRAWL_CYCLE)));
+                                Long.toString(CrawlProfile.getRecrawlDate(CrawlSwitchboard.CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA_RECRAWL_CYCLE)));
                        insert = true;
                    }
                    if ( selentry.name().equals(CrawlSwitchboard.CRAWL_PROFILE_SURROGATE) ) {
-                        selentry.put(CrawlProfile.RECRAWL_IF_OLDER, Long.toString(CrawlProfile
-                            .getRecrawlDate(CrawlSwitchboard.CRAWL_PROFILE_SURROGATE_RECRAWL_CYCLE)));
+                        selentry.put(CrawlProfile.RECRAWL_IF_OLDER, Long.toString(CrawlProfile.getRecrawlDate(CrawlSwitchboard.CRAWL_PROFILE_SURROGATE_RECRAWL_CYCLE)));
                        insert = true;
                    }
                    if ( insert ) {