From 25f9c350335cfd9c221e3f72e3e8e3a0a31c1a54 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Fri, 27 Dec 2013 00:34:55 +0100 Subject: [PATCH] add patch which shall prevent that naive search mistakes like usage of regular expressions cause no results. Usage of '*' followed by a dot or any expression will now cause that this expression is used as a filetype search. --- htroot/yacysearch.java | 7 +++- .../net/yacy/search/query/QueryModifier.java | 36 +++++++++++-------- 2 files changed, 28 insertions(+), 15 deletions(-) diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java index adb16d9e5..f2a577ef3 100644 --- a/htroot/yacysearch.java +++ b/htroot/yacysearch.java @@ -370,7 +370,12 @@ public class yacysearch { int stp = querystring.indexOf('*'); if (stp >= 0) { - querystring = querystring.substring(0, stp) + Segment.catchallString + querystring.substring(stp + 1); + // if the star appears as a single entry, use the catchallstring + if (querystring.length() == 1) { + querystring = Segment.catchallString; + } else { + querystring = querystring.replace('*', ' ').replaceAll(" ", " "); + } } if ( querystring.indexOf("/near", 0) >= 0 ) { querystring = querystring.replace("/near", ""); diff --git a/source/net/yacy/search/query/QueryModifier.java b/source/net/yacy/search/query/QueryModifier.java index 362ece108..d17871348 100644 --- a/source/net/yacy/search/query/QueryModifier.java +++ b/source/net/yacy/search/query/QueryModifier.java @@ -27,6 +27,7 @@ import org.apache.solr.common.params.MultiMapSolrParams; import net.yacy.cora.document.id.DigestURL; import net.yacy.cora.util.CommonPattern; +import net.yacy.search.index.Segment; import net.yacy.search.schema.CollectionSchema; import net.yacy.server.serverObjects; @@ -74,21 +75,11 @@ public class QueryModifier { add("/file"); } + // parse 'common search mistakes' like guessed regular expressions + querystring = filetypeParser(querystring, "*"); + // parse filetype - final int ftp = querystring.indexOf("filetype:", 0); - if ( ftp >= 0 ) { - int ftb = querystring.indexOf(' ', ftp); - if ( ftb == -1 ) { - ftb = querystring.length(); - } - filetype = querystring.substring(ftp + 9, ftb); - querystring = querystring.replace("filetype:" + filetype, ""); - while ( !filetype.isEmpty() && filetype.charAt(0) == '.' ) { - filetype = filetype.substring(1); - } - add("filetype:" + filetype); - if (filetype.isEmpty()) filetype = null; - } + querystring = filetypeParser(querystring, "filetype:"); // parse site final int sp = querystring.indexOf("site:", 0); @@ -141,6 +132,23 @@ public class QueryModifier { return querystring.trim(); } + private String filetypeParser(String querystring, final String filetypePrefix) { + final int ftp = querystring.indexOf(filetypePrefix, 0); + if ( ftp >= 0 ) { + int ftb = querystring.indexOf(' ', ftp); + if ( ftb < 0 ) ftb = querystring.length(); + filetype = querystring.substring(ftp + filetypePrefix.length(), ftb); + querystring = querystring.replace(filetypePrefix + filetype, ""); + while ( !filetype.isEmpty() && filetype.charAt(0) == '.' ) { + filetype = filetype.substring(1); + } + add("filetype:" + filetype); + if (filetype.isEmpty()) filetype = null; + if (querystring.length() == 0) querystring = "*"; + } + return querystring; + } + public void add(String m) { if (modifier.length() > 0 && modifier.charAt(modifier.length() - 1) != ' ' && m != null && m.length() > 0) modifier.append(' '); if (m != null) modifier.append(m);