fix for ',' or '.' appearing within a word or number. This will not

tokenize the query into parts around that character to make it possible
to search for numbers or version numbers.
pull/594/head
Michael Peter Christen 1 year ago
parent 0689f4f0ae
commit ff8fe7b6a4

@ -52,7 +52,7 @@ public class QueryGoal {
private static char space = ' '; private static char space = ' ';
private static char sq = '\''; private static char sq = '\'';
private static char dq = '"'; private static char dq = '"';
private static String seps = ".:;#*`,!$%()=?^<>/&_"; private static String seps = ":;#*`!$%()=?^<>/&_";
public String query_original; public String query_original;
private HandleSet include_hashes, exclude_hashes; private HandleSet include_hashes, exclude_hashes;
@ -134,21 +134,21 @@ public class QueryGoal {
this.exclude_hashes = null; this.exclude_hashes = null;
} }
/* /*
* EBNF of a query * EBNF of a query
* *
* query = {whitespace, phrase}, [whitespace] * query = {whitespace, phrase}, [whitespace]
* whitespace = space, {space} * whitespace = space, {space}
* space = ' ' * space = ' '
* phrase = ['-'], string * phrase = ['-']|['+'], string
* string = {any character without sq, dq and whitespace} | sq, {any character without sq}, sq | dq, {any character without dq}, dq * string = {any character without sq, dq and whitespace} | sq, {any character without sq}, sq | dq, {any character without dq}, dq
* sq = '\'' * sq = '\''
* dq = '"' * dq = '"'
*/ */
private static void parseQuery(String s, Collection<String> include_string, Collection<String> exclude_string) { private static void parseQuery(String s, Collection<String> include_string, Collection<String> exclude_string) {
while (s.length() > 0) { while (s.length() > 0) {
// parse query // parse whitespace
int p = 0; int p = 0;
while (p < s.length() && s.charAt(p) == space) p++; while (p < s.length() && s.charAt(p) == space) p++;
s = s.substring(p); s = s.substring(p);
@ -174,11 +174,26 @@ public class QueryGoal {
stop = s.charAt(0); stop = s.charAt(0);
s = s.substring(1); s = s.substring(1);
} }
p = 0;
if (stop == space) {
// For non-quoted strings, just skip to the next token
while (p < s.length() && s.charAt(p) != stop) p++; while (p < s.length() && s.charAt(p) != stop) p++;
String string = s.substring(0, p); } else {
p++; // go behind the stop character (eats up space, sq and dq) // For quoted strings, find the closing quote
while (p < s.length() && s.charAt(p) != stop) p++;
// Consume the closing quote
if (p < s.length() && s.charAt(p) == stop) p++;
}
String string;
if (stop == space) {
string = s.substring(0, p);
} else {
string = s.substring(0, p - 1); // Exclude the closing quote
}
s = p < s.length() ? s.substring(p) : ""; s = p < s.length() ? s.substring(p) : "";
p++; // go behind the stop character (eats up space, sq and dq)
if (string.length() > 0) { if (string.length() > 0) {
if (inc) { if (inc) {
if (!include_string.contains(string)) include_string.add(string); if (!include_string.contains(string)) include_string.add(string);
@ -187,6 +202,7 @@ public class QueryGoal {
} }
} }
} }
// in case that the include_string contains several entries including 1-char tokens and also more-than-1-char tokens, // in case that the include_string contains several entries including 1-char tokens and also more-than-1-char tokens,
// then remove the 1-char tokens to prevent that we are to strict. This will make it possible to be a bit more fuzzy // then remove the 1-char tokens to prevent that we are to strict. This will make it possible to be a bit more fuzzy
// in the search where it is appropriate // in the search where it is appropriate

Loading…
Cancel
Save