*) Check validity of crawl filter reg.exp. before adding it into the crawler queue

See: http://www.yacy-forum.de/viewtopic.php?p=24671

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@2410 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
theli 19 years ago
parent 9da3aa74d3
commit 34831d2d9f

@ -232,13 +232,15 @@ Error: #[errmsg]#
::<!-- 3 -->
Application not yet initialized. Sorry. Please wait some seconds and repeat the request.
::<!-- 4 -->
<b>ERROR: Crawl filter "#[newcrawlingfilter]#" does not match with crawl root "#[crawlingStart]#".</b> Please try again with different filter.</p><br>
<b>ERROR: Crawl filter "<span class="settingsValue">#[newcrawlingfilter]#</span>" does not match with crawl root "#[crawlingStart]#".</b> Please try again with different filter.</p><br>
::<!-- 5 -->
Crawling of "#[crawlingURL]#" failed. Reason: #[reasonString]#<br>
Crawling of "<span class="settingsValue">#[crawlingURL]#</span>" failed. Reason: #[reasonString]#<br>
::<!-- 6 -->
Error with URL input "#[crawlingStart]#": #[error]#
Error with URL input "<span class="settingsValue">#[crawlingStart]#</span>": #[error]#
::<!-- 7 -->
Error with file input "#[crawlingStart]#": #[error]#
Error with file input "<span class="settingsValue">#[crawlingStart]#</span>": #[error]#
::<!-- 8 -->
Error with Crawling Filter "<span class="settingsValue">#[newcrawlingfilter]#</span>": <span style="color:red">#[error]#</span>
#(/error)#
<br>
#(info)#

@ -53,6 +53,8 @@ import java.util.Enumeration;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import de.anomic.data.wikiCode;
import de.anomic.htmlFilter.htmlFilterContentScraper;
@ -158,6 +160,10 @@ public class IndexCreate_p {
prop.put("error_newcrawlingfilter", newcrawlingfilter);
prop.put("error_crawlingStart", crawlingStart);
} else try {
// check if the crawl filter works correctly
Pattern.compile(newcrawlingfilter);
// stack request
// first delete old entry, if exists
String urlhash = indexURL.urlHash(crawlingStart);
@ -201,6 +207,10 @@ public class IndexCreate_p {
ee.store();
switchboard.urlPool.errorURL.stackPushEntry(ee);
}
} catch (PatternSyntaxException e) {
prop.put("error", 8); //crawlfilter does not match url
prop.put("error_newcrawlingfilter", newcrawlingfilter);
prop.put("error_error", e.getMessage());
} catch (Exception e) {
// mist
prop.put("error", 6);//Error with url
@ -213,7 +223,11 @@ public class IndexCreate_p {
if (post.containsKey("crawlingFile")) {
// getting the name of the uploaded file
String fileName = (String) post.get("crawlingFile");
try {
try {
// check if the crawl filter works correctly
Pattern.compile(newcrawlingfilter);
// loading the file content
File file = new File(fileName);
// getting the content of the bookmark file
@ -268,7 +282,12 @@ public class IndexCreate_p {
switchboard.urlPool.errorURL.stackPushEntry(ee);
}
}
} catch (PatternSyntaxException e) {
// print error message
prop.put("error", 8); //crawlfilter does not match url
prop.put("error_newcrawlingfilter", newcrawlingfilter);
prop.put("error_error", e.getMessage());
} catch (Exception e) {
// mist
prop.put("error", 7);//Error with file

Loading…
Cancel
Save