- using a filter query for facet restriction

- calculating the whole search result in at most two sub-queries from
solr
pull/1/head
Michael Peter Christen 12 years ago
parent 7ad5457db0
commit bab573361f

@ -262,7 +262,7 @@ public class RemoteSearch extends Thread {
public void run() { public void run() {
int tmpoffset = 0; int tmpoffset = 0;
int tmpcount = 10; int tmpcount = 10;
while (tmpoffset + tmpcount <= count) { while (tmpoffset + tmpcount <= count && tmpcount > 0) {
try { try {
event.rankingProcess.oneFeederStarted(); event.rankingProcess.oneFeederStarted();
int urls = Protocol.solrQuery( int urls = Protocol.solrQuery(
@ -287,6 +287,7 @@ public class RemoteSearch extends Thread {
event.rankingProcess.oneFeederTerminated(); event.rankingProcess.oneFeederTerminated();
} }
tmpoffset += tmpcount; tmpoffset += tmpcount;
tmpcount = count - tmpoffset; // increase the tmpcount to get to all results in less time
} }
} }
}; };

@ -403,30 +403,30 @@ public final class QueryParams {
public SolrQuery solrQuery() { public SolrQuery solrQuery() {
if (this.queryGoal.getIncludeStrings().size() == 0) return null; if (this.queryGoal.getIncludeStrings().size() == 0) return null;
// get text query // construct query
final StringBuilder q = this.queryGoal.solrQueryString(this.indexSegment.fulltext().getSolrScheme()); final SolrQuery params = new SolrQuery();
params.setQuery(this.queryGoal.solrQueryString(this.indexSegment.fulltext().getSolrScheme()).toString());
// add constraints // add constraints
final StringBuilder fq = new StringBuilder();
if (this.nav_sitehash == null && this.nav_sitehost == null) { if (this.nav_sitehash == null && this.nav_sitehost == null) {
if (this.siteexcludes != null) { if (this.siteexcludes != null) {
for (String ex: this.siteexcludes) { for (String ex: this.siteexcludes) {
q.append(" -").append(YaCySchema.host_id_s.getSolrFieldName()).append(':').append(ex); fq.append(" AND -").append(YaCySchema.host_id_s.getSolrFieldName()).append(':').append(ex);
} }
} }
} else { } else {
if (this.nav_sitehost != null) if (this.nav_sitehost != null)
q.append(" AND ").append(YaCySchema.host_s.getSolrFieldName()).append(":\"").append(this.nav_sitehost).append('\"'); fq.append(" AND ").append(YaCySchema.host_s.getSolrFieldName()).append(":\"").append(this.nav_sitehost).append('\"');
else else
q.append(" AND ").append(YaCySchema.host_id_s.getSolrFieldName()).append(":\"").append(this.nav_sitehash).append('\"'); fq.append(" AND ").append(YaCySchema.host_id_s.getSolrFieldName()).append(":\"").append(this.nav_sitehash).append('\"');
} }
// add vocabulary facets // add vocabulary facets
for (Tagging.Metatag tag: this.metatags) { for (Tagging.Metatag tag: this.metatags) {
q.append(" AND ").append(YaCySchema.VOCABULARY_PREFIX).append(tag.getVocabularyName()).append(YaCySchema.VOCABULARY_SUFFIX).append(":\"").append(tag.getObject()).append('\"'); fq.append(" AND ").append(YaCySchema.VOCABULARY_PREFIX).append(tag.getVocabularyName()).append(YaCySchema.VOCABULARY_SUFFIX).append(":\"").append(tag.getObject()).append('\"');
} }
// construct query
final SolrQuery params = new SolrQuery();
params.setParam("defType", "edismax"); params.setParam("defType", "edismax");
params.setParam("bq", Boost.RANKING.getBoostQuery()); // a boost query that moves double content to the back params.setParam("bq", Boost.RANKING.getBoostQuery()); // a boost query that moves double content to the back
params.setParam("bf", Boost.RANKING.getBoostFunction()); // a boost function extension params.setParam("bf", Boost.RANKING.getBoostFunction()); // a boost function extension
@ -441,27 +441,27 @@ public final class QueryParams {
int extm = urlMaskPattern.indexOf(".*\\."); int extm = urlMaskPattern.indexOf(".*\\.");
if (extm >= 0) { if (extm >= 0) {
String ext = urlMaskPattern.substring(extm + 4); String ext = urlMaskPattern.substring(extm + 4);
q.append(" AND ").append(YaCySchema.url_file_ext_s.getSolrFieldName()).append(':').append(ext); int k = ext.indexOf('(');
if (k > 0) ext = ext.substring(0, k);
fq.append(" AND ").append(YaCySchema.url_file_ext_s.getSolrFieldName()).append(':').append(ext);
} }
// translate protocol navigation // translate protocol navigation
if (urlMaskPattern.startsWith("http://.*")) q.append(" AND ").append(YaCySchema.url_protocol_s.getSolrFieldName()).append(':').append("http"); if (urlMaskPattern.startsWith("http://.*")) fq.append(" AND ").append(YaCySchema.url_protocol_s.getSolrFieldName()).append(':').append("http");
else if (urlMaskPattern.startsWith("https://.*")) q.append(" AND ").append(YaCySchema.url_protocol_s.getSolrFieldName()).append(':').append("https"); else if (urlMaskPattern.startsWith("https://.*")) fq.append(" AND ").append(YaCySchema.url_protocol_s.getSolrFieldName()).append(':').append("https");
else if (urlMaskPattern.startsWith("ftp://.*")) q.append(" AND ").append(YaCySchema.url_protocol_s.getSolrFieldName()).append(':').append("ftp"); else if (urlMaskPattern.startsWith("ftp://.*")) fq.append(" AND ").append(YaCySchema.url_protocol_s.getSolrFieldName()).append(':').append("ftp");
else if (urlMaskPattern.startsWith("smb://.*")) q.append(" AND ").append(YaCySchema.url_protocol_s.getSolrFieldName()).append(':').append("smb"); else if (urlMaskPattern.startsWith("smb://.*")) fq.append(" AND ").append(YaCySchema.url_protocol_s.getSolrFieldName()).append(':').append("smb");
else if (urlMaskPattern.startsWith("file://.*")) q.append(" AND ").append(YaCySchema.url_protocol_s.getSolrFieldName()).append(':').append("file"); else if (urlMaskPattern.startsWith("file://.*")) fq.append(" AND ").append(YaCySchema.url_protocol_s.getSolrFieldName()).append(':').append("file");
// add a filter query on urls // add a filter query on urls
// solr doesn't like slashes, backslashes or doublepoints; remove them // solr doesn't like slashes, backslashes or doublepoints; remove them // urlmask = ".*\\." + ft + "(\\?.*)?";
int p; int p;
while ((p = urlMaskPattern.indexOf("\\")) >= 0) urlMaskPattern = urlMaskPattern.substring(0, p) + "." + urlMaskPattern.substring(p + 2);
while ((p = urlMaskPattern.indexOf(':')) >= 0) urlMaskPattern = urlMaskPattern.substring(0, p) + "." + urlMaskPattern.substring(p + 1); while ((p = urlMaskPattern.indexOf(':')) >= 0) urlMaskPattern = urlMaskPattern.substring(0, p) + "." + urlMaskPattern.substring(p + 1);
while ((p = urlMaskPattern.indexOf('/')) >= 0) urlMaskPattern = urlMaskPattern.substring(0, p) + "." + urlMaskPattern.substring(p + 1); while ((p = urlMaskPattern.indexOf('/')) >= 0) urlMaskPattern = urlMaskPattern.substring(0, p) + "." + urlMaskPattern.substring(p + 1);
params.setFilterQueries(YaCySchema.sku.getSolrFieldName() + ":/" + urlMaskPattern + "/"); while ((p = urlMaskPattern.indexOf('\\')) >= 0) urlMaskPattern = urlMaskPattern.substring(0, p) + "." + urlMaskPattern.substring(p + 2);
fq.append(" AND ").append(YaCySchema.sku.getSolrFieldName() + ":/" + urlMaskPattern + "/");
} }
params.setQuery(q.toString());
if (this.radius > 0.0d && this.lat != 0.0d && this.lon != 0.0d) { if (this.radius > 0.0d && this.lat != 0.0d && this.lon != 0.0d) {
// localtion search, no special ranking // localtion search, no special ranking
// try http://localhost:8090/solr/select?q=*:*&fq={!bbox sfield=coordinate_p pt=50.17,8.65 d=1} // try http://localhost:8090/solr/select?q=*:*&fq={!bbox sfield=coordinate_p pt=50.17,8.65 d=1}
@ -470,7 +470,7 @@ public final class QueryParams {
//params.set("sfield", YaCySchema.coordinate_p.name()); //params.set("sfield", YaCySchema.coordinate_p.name());
//params.set("pt", Double.toString(this.lat) + "," + Double.toString(this.lon)); //params.set("pt", Double.toString(this.lat) + "," + Double.toString(this.lon));
//params.set("d", GeoLocation.degreeToKm(this.radius)); //params.set("d", GeoLocation.degreeToKm(this.radius));
params.setFilterQueries("{!bbox sfield=" + YaCySchema.coordinate_p.getSolrFieldName() + " pt=" + Double.toString(this.lat) + "," + Double.toString(this.lon) + " d=" + GeoLocation.degreeToKm(this.radius) + "}"); fq.append(" AND ").append("{!bbox sfield=" + YaCySchema.coordinate_p.getSolrFieldName() + " pt=" + Double.toString(this.lat) + "," + Double.toString(this.lon) + " d=" + GeoLocation.degreeToKm(this.radius) + "}");
//params.setRows(Integer.MAX_VALUE); //params.setRows(Integer.MAX_VALUE);
} else { } else {
// set ranking // set ranking
@ -479,6 +479,7 @@ public final class QueryParams {
params.setSortField(YaCySchema.last_modified.getSolrFieldName(), ORDER.desc); params.setSortField(YaCySchema.last_modified.getSolrFieldName(), ORDER.desc);
} }
} }
if (fq.length() > 0) params.setFilterQueries(fq.substring(5));
// prepare result // prepare result
Log.logInfo("Protocol", "SOLR QUERY: " + params.toString()); Log.logInfo("Protocol", "SOLR QUERY: " + params.toString());

Loading…
Cancel
Save