diff --git a/htroot/IndexCreateParserErrors_p.java b/htroot/IndexCreateParserErrors_p.java index 4e45ada0f..9cb56e51f 100644 --- a/htroot/IndexCreateParserErrors_p.java +++ b/htroot/IndexCreateParserErrors_p.java @@ -30,6 +30,7 @@ import net.yacy.cora.protocol.RequestHeader; import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.peers.Seed; import net.yacy.search.Switchboard; +import de.anomic.crawler.CrawlStacker; import de.anomic.crawler.ZURL; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; @@ -89,7 +90,17 @@ public class IndexCreateParserErrors_p { prop.putHTML("rejected_list_"+j+"_initiator", ((initiatorSeed == null) ? "proxy" : initiatorSeed.getName())); prop.putHTML("rejected_list_"+j+"_executor", ((executorSeed == null) ? "proxy" : executorSeed.getName())); prop.putHTML("rejected_list_"+j+"_url", url.toNormalform(false, true)); - prop.putHTML("rejected_list_"+j+"_failreason", entry.anycause()); + + String cause = entry.anycause(); + if (cause.startsWith(CrawlStacker.ERROR_NO_MATCH_MUST_MATCH_FILTER)) { + prop.put("rejected_list_"+j+"_failreason", "(test) " + cause); + } else if (cause.startsWith(CrawlStacker.ERROR_MATCH_WITH_MUST_NOT_MATCH_FILTER)) { + prop.put("rejected_list_"+j+"_failreason", "(test) " + cause); + } else { + prop.putHTML("rejected_list_"+j+"_failreason", cause); + } prop.put("rejected_list_"+j+"_dark", dark ? "1" : "0"); dark = !dark; j++; diff --git a/htroot/RegexTest.html b/htroot/RegexTest.html new file mode 100644 index 000000000..76a2ca812 --- /dev/null +++ b/htroot/RegexTest.html @@ -0,0 +1,39 @@ + + + + YaCy '#[clientname]#': Crawl Start + #%env/templates/metas.template%# + + + #%env/templates/header.template%# + #%env/templates/submenuIndexCreate.template%# +

RegexTest

+
+
+
+
+ +
+
+ +
+
+ +
+
+ +
+
+ +
+ #(match)#
no match
::
match
::
error in expression: #[error]#
#(/match)# +
+
+ +
+
+
+
+ #%env/templates/footer.template%# + + diff --git a/htroot/RegexTest.java b/htroot/RegexTest.java new file mode 100644 index 000000000..02afe3968 --- /dev/null +++ b/htroot/RegexTest.java @@ -0,0 +1,51 @@ +/** + * RegexTest + * Copyright 2012 by Michael Peter Christen, mc@yacy.net, Frankfurt am Main, Germany + * First released 14.09.2011 at http://yacy.net + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program in the file lgpl21.txt + * If not, see . + */ + +import java.util.regex.PatternSyntaxException; + +import net.yacy.cora.protocol.RequestHeader; +import de.anomic.server.serverObjects; +import de.anomic.server.serverSwitch; + +public class RegexTest { + + public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header, final serverObjects post, @SuppressWarnings("unused") final serverSwitch env) { + + String text = (post == null) ? "" : post.get("text", ""); + String regex = (post == null) ? ".*" : post.get("regex", ".*"); + String error = ""; + Boolean match = null; + try { + match = text.matches(regex); + } catch (PatternSyntaxException e) { + error = e.getMessage(); + } + + final serverObjects prop = new serverObjects(); + + prop.put("text", text); + prop.put("regex", regex); + prop.put("match", match == null ? 2 : (match.booleanValue() ? 1 : 0)); + prop.put("match_error", error); + + return prop; + } + +} diff --git a/htroot/env/grafics/nok.png b/htroot/env/grafics/nok.png new file mode 100644 index 000000000..d5e25f9a0 Binary files /dev/null and b/htroot/env/grafics/nok.png differ diff --git a/source/de/anomic/crawler/CrawlStacker.java b/source/de/anomic/crawler/CrawlStacker.java index 360f8e667..c35f3acca 100644 --- a/source/de/anomic/crawler/CrawlStacker.java +++ b/source/de/anomic/crawler/CrawlStacker.java @@ -64,6 +64,10 @@ import de.anomic.crawler.retrieval.Request; import de.anomic.crawler.retrieval.SMBLoader; public final class CrawlStacker { + + public static String ERROR_NO_MATCH_MUST_MATCH_FILTER = "url does not match must-match filter "; + public static String ERROR_MATCH_WITH_MUST_NOT_MATCH_FILTER = "url matches must-not-match filter "; + private final Log log = new Log("STACKCRAWL"); @@ -415,13 +419,13 @@ public final class CrawlStacker { // filter with must-match for URLs if ((depth > 0) && !profile.urlMustMatchPattern().matcher(urlstring).matches()) { if (this.log.isFine()) this.log.logFine("URL '" + urlstring + "' does not match must-match crawling filter '" + profile.urlMustMatchPattern().toString() + "'."); - return "url does not match must-match filter " + profile.urlMustMatchPattern().toString(); + return ERROR_NO_MATCH_MUST_MATCH_FILTER + profile.urlMustMatchPattern().toString(); } // filter with must-not-match for URLs if ((depth > 0) && profile.urlMustNotMatchPattern().matcher(urlstring).matches()) { if (this.log.isFine()) this.log.logFine("URL '" + urlstring + "' matches must-not-match crawling filter '" + profile.urlMustNotMatchPattern().toString() + "'."); - return "url matches must-not-match filter " + profile.urlMustNotMatchPattern().toString(); + return ERROR_MATCH_WITH_MUST_NOT_MATCH_FILTER + profile.urlMustNotMatchPattern().toString(); } // deny cgi