diff --git a/htroot/yacysearch.html b/htroot/yacysearch.html index c9e17e618..929d53b49 100644 --- a/htroot/yacysearch.html +++ b/htroot/yacysearch.html @@ -115,7 +115,7 @@ $(function() { #(searchvideo)#::  #(/searchvideo)# #(searchapp)#::#(/searchapp)#   - more options + more options #(/searchdomswitches)# @@ -132,6 +132,14 @@ $(function() { +#(urlmaskerror)#:: +

Illegal URL mask: #[urlmask]# (not a valid regular expression), mask ignored.

+#(/urlmaskerror)# + +#(prefermaskerror)#:: +

Illegal prefer mask: #[prefermask]# (not a valid regular expression), mask ignored.

+#(/prefermaskerror)# + #(didYouMean)#::

Did you mean: #{suggestions}# #[word]# #[sep]##{/suggestions}#

#(/didYouMean)# diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java index 00bb2d382..0780bc1f2 100644 --- a/htroot/yacysearch.java +++ b/htroot/yacysearch.java @@ -33,6 +33,8 @@ import java.util.Iterator; import java.util.Map; import java.util.SortedSet; import java.util.TreeSet; +import java.util.regex.Pattern; +import java.util.regex.PatternSyntaxException; import net.yacy.cora.document.RSSMessage; import net.yacy.cora.document.UTF8; @@ -191,11 +193,11 @@ public class yacysearch { } String prefermask = (post == null) ? "" : post.get("prefermaskfilter", ""); - if (prefermask.length() > 0 && prefermask.indexOf(".*") < 0) { + if (!prefermask.isEmpty() && prefermask.indexOf(".*") < 0) { prefermask = ".*" + prefermask + ".*"; } - Bitfield constraint = (post != null && post.containsKey("constraint") && post.get("constraint", "").length() > 0) ? new Bitfield(4, post.get("constraint", "______")) : null; + Bitfield constraint = (post != null && post.containsKey("constraint") && !post.get("constraint", "").isEmpty()) ? new Bitfield(4, post.get("constraint", "______")) : null; if (indexof) { constraint = new Bitfield(4); constraint.set(Condenser.flag_cat_indexof, true); @@ -475,6 +477,26 @@ public class yacysearch { // do the search final HandleSet queryHashes = Word.words2hashesHandles(query[0]); + + // check filters + try { + Pattern.compile(urlmask); + } catch (final PatternSyntaxException ex) { + Log.logWarning("SEARCH", "Illegal URL mask, not a valid regex: " + urlmask); + prop.put("urlmaskerror", 1); + prop.putHTML("urlmaskerror_urlmask", urlmask); + urlmask = ".*"; + } + + try { + Pattern.compile(prefermask); + } catch (final PatternSyntaxException ex) { + Log.logWarning("SEARCH", "Illegal prefer mask, not a valid regex: " + prefermask); + prop.put("prefermaskerror", 1); + prop.putHTML("prefermaskerror_prefermask", prefermask); + prefermask = ""; + } + final QueryParams theQuery = new QueryParams( originalquerystring, queryHashes, diff --git a/source/de/anomic/search/QueryParams.java b/source/de/anomic/search/QueryParams.java index 0aa337535..47edde209 100644 --- a/source/de/anomic/search/QueryParams.java +++ b/source/de/anomic/search/QueryParams.java @@ -34,6 +34,7 @@ import java.util.Map; import java.util.SortedSet; import java.util.TreeSet; import java.util.regex.Pattern; +import java.util.regex.PatternSyntaxException; import net.yacy.cora.document.MultiProtocolURI; import net.yacy.cora.document.UTF8; @@ -191,9 +192,17 @@ public final class QueryParams { this.contentdom = contentdom; this.itemsPerPage = Math.min((specialRights) ? 1000 : 100, itemsPerPage); this.offset = Math.min((specialRights) ? 10000 : 1000, offset); - this.urlMask = Pattern.compile(urlMask.toLowerCase()); + try { + this.urlMask = Pattern.compile(urlMask.toLowerCase()); + } catch (final PatternSyntaxException ex) { + throw new IllegalArgumentException("Not a valid regular expression: " + urlMask, ex); + } this.urlMask_isCatchall = this.urlMask.toString().equals(catchall_pattern.toString()); - this.prefer = Pattern.compile(prefer); + try { + this.prefer = Pattern.compile(prefer); + } catch (final PatternSyntaxException ex) { + throw new IllegalArgumentException("Not a valid regular expression: " + prefer, ex); + } this.prefer_isMatchnothing = this.prefer.toString().equals(matchnothing_pattern.toString()); assert language != null; this.targetlang = language; @@ -204,7 +213,7 @@ public final class QueryParams { this.constraint = constraint; this.allofconstraint = allofconstraint; this.sitehash = site; assert site == null || site.length() == 6; - this.authorhash = authorhash; assert authorhash == null || authorhash.length() > 0; + this.authorhash = authorhash; assert authorhash == null || !authorhash.isEmpty(); this.snippetCacheStrategy = snippetCacheStrategy; this.host = host; this.remotepeer = null; @@ -326,7 +335,7 @@ public final class QueryParams { final TreeSet exclude = new TreeSet(NaturalOrder.naturalComparator); final TreeSet fullquery = new TreeSet(NaturalOrder.naturalComparator); - if ((querystring != null) && (querystring.length() > 0)) { + if ((querystring != null) && (!querystring.isEmpty())) { // convert Umlaute querystring = AbstractScraper.stripAll(querystring.toCharArray()).toLowerCase().trim(); @@ -341,20 +350,20 @@ public final class QueryParams { int l; // the string is clean now, but we must generate a set out of it final String[] queries = querystring.split(" "); - for (int i = 0; i < queries.length; i++) { - if (queries[i].startsWith("-")) { - exclude.add(queries[i].substring(1)); + for (String quer : queries) { + if (quer.startsWith("-")) { + exclude.add(quer.substring(1)); } else { - while ((c = queries[i].indexOf('-')) >= 0) { - s = queries[i].substring(0, c); + while ((c = quer.indexOf('-')) >= 0) { + s = quer.substring(0, c); l = s.length(); if (l >= Condenser.wordminsize) {query.add(s);} if (l > 0) {fullquery.add(s);} - queries[i] = queries[i].substring(c + 1); + quer = quer.substring(c + 1); } - l = queries[i].length(); - if (l >= Condenser.wordminsize) {query.add(queries[i]);} - if (l > 0) {fullquery.add(queries[i]);} + l = quer.length(); + if (l >= Condenser.wordminsize) {query.add(quer);} + if (l > 0) {fullquery.add(quer);} } } } @@ -364,7 +373,7 @@ public final class QueryParams { public String queryString(final boolean encodeHTML) { final String ret; if (encodeHTML){ - ret =CharacterCoding.unicode2html(this.queryString, true); + ret = CharacterCoding.unicode2html(this.queryString, true); } else { ret = this.queryString; } @@ -375,7 +384,7 @@ public final class QueryParams { try { return URLEncoder.encode(this.queryString, "UTF-8"); } catch (UnsupportedEncodingException e) { - e.printStackTrace(); + Log.logException(e); return this.queryString; } } @@ -388,7 +397,7 @@ public final class QueryParams { // filter out words that appear in this set // this is applied to the queryHashes final HandleSet blues = Word.words2hashesHandles(blueList); - for (byte[] b: blues) queryHashes.remove(b); + for (final byte[] b: blues) queryHashes.remove(b); }