fixed interpretation of directDocByURL attribute during crawl start

pull/1/head
orbiter 12 years ago
parent 68d0f8de03
commit ae246c30c3

@ -95,7 +95,7 @@
<dt><label>Dynamic URLs</label></dt>
<dd>
<input type="checkbox" name="crawlingQ" id="crawlingQ" #(crawlingQChecked)#::checked="checked"#(/crawlingQChecked)# /> allow <a href="http://en.wikipedia.org/wiki/Query_string">query-strings</a> (urls with a '?' in the path)
<input type="hidden" name="directDocByURL" id="directDocByURL" value="on" />
<input type="hidden" name="directDocByURL" id="directDocByURL" value="off" />
<input type="hidden" name="storeHTCache" id="storeHTCache" value="on" />
<input type="hidden" name="cachePolicy" id="cachePolicy" value="iffresh" />
<input type="hidden" name="indexText" id="indexText" value="on" />

@ -155,7 +155,7 @@ public class Crawler_p {
}
// prepare some filter that are adjusted in case that this is wanted
boolean storeHTCache = "on".equals(post.get("storeHTCache", "on"));
boolean storeHTCache = "on".equals(post.get("storeHTCache", "off"));
String newcrawlingMustMatch = post.get("mustmatch", CrawlProfile.MATCH_ALL_STRING);
String newcrawlingMustNotMatch = post.get("mustnotmatch", CrawlProfile.MATCH_NEVER_STRING);
if (newcrawlingMustMatch.length() < 2) newcrawlingMustMatch = CrawlProfile.MATCH_ALL_STRING; // avoid that all urls are filtered out if bad value was submitted
@ -219,7 +219,7 @@ public class Crawler_p {
env.setConfig("crawlingDepth", Integer.toString(newcrawlingdepth));
if ((crawlOrder) && (newcrawlingdepth > 8)) newcrawlingdepth = 8;
boolean directDocByURL = "on".equals(post.get("directDocByURL", "on")); // catch also all linked media documents without loading them
boolean directDocByURL = "on".equals(post.get("directDocByURL", "off")); // catch also all linked media documents without loading them
env.setConfig("crawlingDirectDocByURL", directDocByURL);
final String collection = post.get("collection", sb.getConfig("collection", "user"));
@ -264,10 +264,10 @@ public class Crawler_p {
boolean crawlingQ = "on".equals(post.get("crawlingQ", "off"));
env.setConfig("crawlingQ", crawlingQ);
final boolean indexText = "on".equals(post.get("indexText", "on"));
final boolean indexText = "on".equals(post.get("indexText", "off"));
env.setConfig("indexText", indexText);
final boolean indexMedia = "on".equals(post.get("indexMedia", "on"));
final boolean indexMedia = "on".equals(post.get("indexMedia", "off"));
env.setConfig("indexMedia", indexMedia);
env.setConfig("storeHTCache", storeHTCache);

@ -98,8 +98,8 @@ public class QuickCrawlLink_p {
final String crawlingMustNotMatch = post.get("mustnotmatch", CrawlProfile.MATCH_NEVER_STRING);
final int CrawlingDepth = post.getInt("crawlingDepth", 0);
final boolean crawlDynamic = post.get("crawlingQ", "").equals("on");
final boolean indexText = post.get("indexText", "on").equals("on");
final boolean indexMedia = post.get("indexMedia", "on").equals("on");
final boolean indexText = post.get("indexText", "off").equals("on");
final boolean indexMedia = post.get("indexMedia", "off").equals("on");
final boolean storeHTCache = post.get("storeHTCache", "").equals("on");
final boolean remoteIndexing = post.get("crawlOrder", "").equals("on");
final boolean xsstopw = post.get("xsstopw", "").equals("on");

Loading…
Cancel
Save