|
|
|
@ -52,7 +52,7 @@ public class QueryGoal {
|
|
|
|
|
private static char space = ' ';
|
|
|
|
|
private static char sq = '\'';
|
|
|
|
|
private static char dq = '"';
|
|
|
|
|
private static String seps = ".:;#*`,!$%()=?^<>/&_";
|
|
|
|
|
private static String seps = ":;#*`!$%()=?^<>/&_";
|
|
|
|
|
|
|
|
|
|
public String query_original;
|
|
|
|
|
private HandleSet include_hashes, exclude_hashes;
|
|
|
|
@ -134,21 +134,21 @@ public class QueryGoal {
|
|
|
|
|
this.exclude_hashes = null;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* EBNF of a query
|
|
|
|
|
*
|
|
|
|
|
* query = {whitespace, phrase}, [whitespace]
|
|
|
|
|
* whitespace = space, {space}
|
|
|
|
|
* space = ' '
|
|
|
|
|
* phrase = ['-'], string
|
|
|
|
|
* phrase = ['-']|['+'], string
|
|
|
|
|
* string = {any character without sq, dq and whitespace} | sq, {any character without sq}, sq | dq, {any character without dq}, dq
|
|
|
|
|
* sq = '\''
|
|
|
|
|
* dq = '"'
|
|
|
|
|
*/
|
|
|
|
|
private static void parseQuery(String s, Collection<String> include_string, Collection<String> exclude_string) {
|
|
|
|
|
|
|
|
|
|
while (s.length() > 0) {
|
|
|
|
|
// parse query
|
|
|
|
|
// parse whitespace
|
|
|
|
|
int p = 0;
|
|
|
|
|
while (p < s.length() && s.charAt(p) == space) p++;
|
|
|
|
|
s = s.substring(p);
|
|
|
|
@ -174,11 +174,26 @@ public class QueryGoal {
|
|
|
|
|
stop = s.charAt(0);
|
|
|
|
|
s = s.substring(1);
|
|
|
|
|
}
|
|
|
|
|
p = 0;
|
|
|
|
|
while (p < s.length() && s.charAt(p) != stop) p++;
|
|
|
|
|
String string = s.substring(0, p);
|
|
|
|
|
p++; // go behind the stop character (eats up space, sq and dq)
|
|
|
|
|
|
|
|
|
|
if (stop == space) {
|
|
|
|
|
// For non-quoted strings, just skip to the next token
|
|
|
|
|
while (p < s.length() && s.charAt(p) != stop) p++;
|
|
|
|
|
} else {
|
|
|
|
|
// For quoted strings, find the closing quote
|
|
|
|
|
while (p < s.length() && s.charAt(p) != stop) p++;
|
|
|
|
|
|
|
|
|
|
// Consume the closing quote
|
|
|
|
|
if (p < s.length() && s.charAt(p) == stop) p++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
String string;
|
|
|
|
|
if (stop == space) {
|
|
|
|
|
string = s.substring(0, p);
|
|
|
|
|
} else {
|
|
|
|
|
string = s.substring(0, p - 1); // Exclude the closing quote
|
|
|
|
|
}
|
|
|
|
|
s = p < s.length() ? s.substring(p) : "";
|
|
|
|
|
p++; // go behind the stop character (eats up space, sq and dq)
|
|
|
|
|
if (string.length() > 0) {
|
|
|
|
|
if (inc) {
|
|
|
|
|
if (!include_string.contains(string)) include_string.add(string);
|
|
|
|
@ -187,6 +202,7 @@ public class QueryGoal {
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// in case that the include_string contains several entries including 1-char tokens and also more-than-1-char tokens,
|
|
|
|
|
// then remove the 1-char tokens to prevent that we are to strict. This will make it possible to be a bit more fuzzy
|
|
|
|
|
// in the search where it is appropriate
|
|
|
|
@ -397,12 +413,12 @@ public class QueryGoal {
|
|
|
|
|
|
|
|
|
|
// add filter to prevent that results come from failed urls
|
|
|
|
|
fqs.add(CollectionSchema.httpstatus_i.getSolrFieldName() + ":" + HttpStatus.SC_OK);
|
|
|
|
|
StringBuilder filter = new StringBuilder(CollectionSchema.content_type.getSolrFieldName()).append(":(image/*)");
|
|
|
|
|
if (!strict) {
|
|
|
|
|
filter.append(" OR ").append(CollectionSchema.images_urlstub_sxt.getSolrFieldName())
|
|
|
|
|
.append(AbstractSolrConnector.CATCHALL_DTERM);
|
|
|
|
|
}
|
|
|
|
|
fqs.add(filter.toString());
|
|
|
|
|
StringBuilder filter = new StringBuilder(CollectionSchema.content_type.getSolrFieldName()).append(":(image/*)");
|
|
|
|
|
if (!strict) {
|
|
|
|
|
filter.append(" OR ").append(CollectionSchema.images_urlstub_sxt.getSolrFieldName())
|
|
|
|
|
.append(AbstractSolrConnector.CATCHALL_DTERM);
|
|
|
|
|
}
|
|
|
|
|
fqs.add(filter.toString());
|
|
|
|
|
return fqs;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
@ -416,16 +432,16 @@ public class QueryGoal {
|
|
|
|
|
* @return Solr filter queries for audio content URLs
|
|
|
|
|
*/
|
|
|
|
|
public List<String> collectionAudioFilterQuery(final boolean strict) {
|
|
|
|
|
final ArrayList<String> fqs = new ArrayList<>();
|
|
|
|
|
|
|
|
|
|
// add filter to prevent that results come from failed urls
|
|
|
|
|
fqs.add(CollectionSchema.httpstatus_i.getSolrFieldName() + ":" + HttpStatus.SC_OK);
|
|
|
|
|
StringBuilder filter = new StringBuilder(CollectionSchema.content_type.getSolrFieldName()).append(":(audio/*)");
|
|
|
|
|
if (!strict) {
|
|
|
|
|
filter.append(" OR ").append(CollectionSchema.audiolinkscount_i.getSolrFieldName()).append(":[1 TO *]");
|
|
|
|
|
}
|
|
|
|
|
fqs.add(filter.toString());
|
|
|
|
|
return fqs;
|
|
|
|
|
final ArrayList<String> fqs = new ArrayList<>();
|
|
|
|
|
|
|
|
|
|
// add filter to prevent that results come from failed urls
|
|
|
|
|
fqs.add(CollectionSchema.httpstatus_i.getSolrFieldName() + ":" + HttpStatus.SC_OK);
|
|
|
|
|
StringBuilder filter = new StringBuilder(CollectionSchema.content_type.getSolrFieldName()).append(":(audio/*)");
|
|
|
|
|
if (!strict) {
|
|
|
|
|
filter.append(" OR ").append(CollectionSchema.audiolinkscount_i.getSolrFieldName()).append(":[1 TO *]");
|
|
|
|
|
}
|
|
|
|
|
fqs.add(filter.toString());
|
|
|
|
|
return fqs;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
@ -441,13 +457,13 @@ public class QueryGoal {
|
|
|
|
|
final ArrayList<String> fqs = new ArrayList<>();
|
|
|
|
|
|
|
|
|
|
// add filter to prevent that results come from failed urls
|
|
|
|
|
fqs.add(CollectionSchema.httpstatus_i.getSolrFieldName() + ":" + HttpStatus.SC_OK);
|
|
|
|
|
StringBuilder filter = new StringBuilder(CollectionSchema.content_type.getSolrFieldName()).append(":(video/*)");
|
|
|
|
|
if (!strict) {
|
|
|
|
|
filter.append(" OR ").append(CollectionSchema.videolinkscount_i.getSolrFieldName()).append(":[1 TO *]");
|
|
|
|
|
}
|
|
|
|
|
fqs.add(filter.toString());
|
|
|
|
|
return fqs;
|
|
|
|
|
fqs.add(CollectionSchema.httpstatus_i.getSolrFieldName() + ":" + HttpStatus.SC_OK);
|
|
|
|
|
StringBuilder filter = new StringBuilder(CollectionSchema.content_type.getSolrFieldName()).append(":(video/*)");
|
|
|
|
|
if (!strict) {
|
|
|
|
|
filter.append(" OR ").append(CollectionSchema.videolinkscount_i.getSolrFieldName()).append(":[1 TO *]");
|
|
|
|
|
}
|
|
|
|
|
fqs.add(filter.toString());
|
|
|
|
|
return fqs;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
@ -463,14 +479,14 @@ public class QueryGoal {
|
|
|
|
|
final ArrayList<String> fqs = new ArrayList<>();
|
|
|
|
|
|
|
|
|
|
// add filter to prevent that results come from failed urls
|
|
|
|
|
fqs.add(CollectionSchema.httpstatus_i.getSolrFieldName() + ":" + HttpStatus.SC_OK);
|
|
|
|
|
StringBuilder filter = new StringBuilder(CollectionSchema.content_type.getSolrFieldName())
|
|
|
|
|
.append(":(application/*)");
|
|
|
|
|
if (!strict) {
|
|
|
|
|
filter.append(" OR ").append(CollectionSchema.applinkscount_i.getSolrFieldName()).append(":[1 TO *]");
|
|
|
|
|
}
|
|
|
|
|
fqs.add(filter.toString());
|
|
|
|
|
return fqs;
|
|
|
|
|
fqs.add(CollectionSchema.httpstatus_i.getSolrFieldName() + ":" + HttpStatus.SC_OK);
|
|
|
|
|
StringBuilder filter = new StringBuilder(CollectionSchema.content_type.getSolrFieldName())
|
|
|
|
|
.append(":(application/*)");
|
|
|
|
|
if (!strict) {
|
|
|
|
|
filter.append(" OR ").append(CollectionSchema.applinkscount_i.getSolrFieldName()).append(":[1 TO *]");
|
|
|
|
|
}
|
|
|
|
|
fqs.add(filter.toString());
|
|
|
|
|
return fqs;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public StringBuilder collectionImageQuery(final QueryModifier modifier) {
|
|
|
|
|