fixed interpretation of directDocByURL attribute during crawl start

13 years ago · ae246c30c3
parent 68d0f8de03
commit ae246c30c3
3 changed files with 7 additions and 7 deletions
--- a/htroot/CrawlStartSite_p.html
+++ b/htroot/CrawlStartSite_p.html
@ -95,7 +95,7 @@
        <dt><label>Dynamic URLs</label></dt>
        <dd>
 		<input type="checkbox" name="crawlingQ" id="crawlingQ" #(crawlingQChecked)#::checked="checked"#(/crawlingQChecked)# /> allow <a href="http://en.wikipedia.org/wiki/Query_string">query-strings</a> (urls with a '?' in the path)
-        <input type="hidden" name="directDocByURL" id="directDocByURL" value="on" />
+        <input type="hidden" name="directDocByURL" id="directDocByURL" value="off" />
 		<input type="hidden" name="storeHTCache" id="storeHTCache" value="on" />
        <input type="hidden" name="cachePolicy" id="cachePolicy" value="iffresh" />
        <input type="hidden" name="indexText" id="indexText" value="on" />
--- a/htroot/Crawler_p.java
+++ b/htroot/Crawler_p.java
@ -155,7 +155,7 @@ public class Crawler_p {
                }
                
                // prepare some filter that are adjusted in case that this is wanted
-                boolean storeHTCache = "on".equals(post.get("storeHTCache", "on"));
+                boolean storeHTCache = "on".equals(post.get("storeHTCache", "off"));
                String newcrawlingMustMatch = post.get("mustmatch", CrawlProfile.MATCH_ALL_STRING);
                String newcrawlingMustNotMatch = post.get("mustnotmatch", CrawlProfile.MATCH_NEVER_STRING);
                if (newcrawlingMustMatch.length() < 2) newcrawlingMustMatch = CrawlProfile.MATCH_ALL_STRING; // avoid that all urls are filtered out if bad value was submitted
@ -219,7 +219,7 @@ public class Crawler_p {
                env.setConfig("crawlingDepth", Integer.toString(newcrawlingdepth));
                if ((crawlOrder) && (newcrawlingdepth > 8)) newcrawlingdepth = 8;

-                boolean directDocByURL = "on".equals(post.get("directDocByURL", "on")); // catch also all linked media documents without loading them
+                boolean directDocByURL = "on".equals(post.get("directDocByURL", "off")); // catch also all linked media documents without loading them
                env.setConfig("crawlingDirectDocByURL", directDocByURL);

                final String collection = post.get("collection", sb.getConfig("collection", "user"));
@ -264,10 +264,10 @@ public class Crawler_p {
                boolean crawlingQ = "on".equals(post.get("crawlingQ", "off"));
                env.setConfig("crawlingQ", crawlingQ);

-                final boolean indexText = "on".equals(post.get("indexText", "on"));
+                final boolean indexText = "on".equals(post.get("indexText", "off"));
                env.setConfig("indexText", indexText);

-                final boolean indexMedia = "on".equals(post.get("indexMedia", "on"));
+                final boolean indexMedia = "on".equals(post.get("indexMedia", "off"));
                env.setConfig("indexMedia", indexMedia);

                env.setConfig("storeHTCache", storeHTCache);
--- a/htroot/QuickCrawlLink_p.java
+++ b/htroot/QuickCrawlLink_p.java
@ -98,8 +98,8 @@ public class QuickCrawlLink_p {
        final String crawlingMustNotMatch  = post.get("mustnotmatch", CrawlProfile.MATCH_NEVER_STRING);
        final int CrawlingDepth      = post.getInt("crawlingDepth", 0);
        final boolean crawlDynamic   = post.get("crawlingQ", "").equals("on");
-        final boolean indexText      = post.get("indexText", "on").equals("on");
-        final boolean indexMedia     = post.get("indexMedia", "on").equals("on");
+        final boolean indexText      = post.get("indexText", "off").equals("on");
+        final boolean indexMedia     = post.get("indexMedia", "off").equals("on");
        final boolean storeHTCache   = post.get("storeHTCache", "").equals("on");
        final boolean remoteIndexing = post.get("crawlOrder", "").equals("on");
        final boolean xsstopw        = post.get("xsstopw", "").equals("on");