- redesign of solr query construction

- fix for solr boosts and location search
- fix for number of search results in local search
pull/1/head
Michael Peter Christen 13 years ago
parent 6ab64746d7
commit 584663ae8c

@ -143,5 +143,4 @@ public abstract class AbstractSolrConnector implements SolrConnector {
};
}
}

@ -36,6 +36,7 @@ import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
@ -166,7 +167,7 @@ public class EmbeddedSolrConnector extends SolrServerConnector implements SolrCo
}
@Override
public QueryResponse query(SolrParams params) throws IOException {
public QueryResponse query(ModifiableSolrParams params) throws IOException {
if (this.server == null) throw new IOException("server disconnected");
try {
return this.server.query(params);

@ -33,11 +33,14 @@ import net.yacy.cora.storage.ARC;
import net.yacy.cora.storage.ConcurrentARC;
import net.yacy.kelondro.util.MemoryControl;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.client.solrj.util.ClientUtils;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.ModifiableSolrParams;
/**
* Implementation of a mirrored solr connector.
@ -328,6 +331,50 @@ public class MirrorSolrConnector extends AbstractSolrConnector implements SolrCo
return list;
}
@Override
public QueryResponse query(ModifiableSolrParams query) throws IOException, SolrException {
Integer count0 = query.getInt(CommonParams.ROWS);
int count = count0 == null ? 10 : count0.intValue();
Integer start0 = query.getInt(CommonParams.START);
int start = start0 == null ? 0 : start0.intValue();
if (this.solr0 == null && this.solr1 == null) return new QueryResponse();
if (this.solr0 != null && this.solr1 == null) {
QueryResponse list = this.solr0.query(query);
return list;
}
if (this.solr1 != null && this.solr0 == null) {
QueryResponse list = this.solr1.query(query);
return list;
}
// combine both lists
QueryResponse rsp = this.solr0.query(query);
final SolrDocumentList l = rsp.getResults();
if (l.size() >= count) return rsp;
// at this point we need to know how many results are in solr0
// compute this with a very bad hack; replace with better method later
int size0 = 0;
{ //bad hack - TODO: replace
query.set(CommonParams.START, 0);
query.set(CommonParams.ROWS, Integer.MAX_VALUE);
QueryResponse lHack = this.solr0.query(query);
query.set(CommonParams.START, start);
query.set(CommonParams.ROWS, count);
size0 = lHack.getResults().size();
}
// now use the size of the first query to do a second query
query.set(CommonParams.START, start + l.size() - size0);
query.set(CommonParams.ROWS, count - l.size());
QueryResponse rsp1 = this.solr1.query(query);
query.set(CommonParams.START, start);
query.set(CommonParams.ROWS, count);
// TODO: combine both
return rsp1;
}
@Override
public long getQueryCount(final String querystring) throws IOException {
if (this.solr0 == null && this.solr1 == null) return 0;

@ -27,10 +27,12 @@ import java.util.concurrent.ArrayBlockingQueue;
import net.yacy.cora.sorting.ReversibleScoreMap;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.ModifiableSolrParams;
public class MultipleSolrConnector extends AbstractSolrConnector implements SolrConnector {
@ -165,6 +167,11 @@ public class MultipleSolrConnector extends AbstractSolrConnector implements Solr
return this.solr.query(querystring, offset, count);
}
@Override
public QueryResponse query(ModifiableSolrParams query) throws IOException, SolrException {
return this.solr.query(query);
}
@Override
public long getQueryCount(final String querystring) throws IOException {
return this.solr.getQueryCount(querystring);

@ -52,7 +52,7 @@ import org.apache.solr.client.solrj.impl.HttpSolrServer;
import org.apache.solr.client.solrj.impl.XMLResponseParser;
import org.apache.solr.client.solrj.request.QueryRequest;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.util.NamedList;
@ -151,7 +151,8 @@ public class RemoteSolrConnector extends SolrServerConnector implements SolrConn
this.terminate();
}
public QueryResponse query(SolrParams params) throws IOException {
@Override
public QueryResponse query(ModifiableSolrParams params) throws IOException {
try {
QueryRequest request = new QueryRequest(params);
ResponseParser responseParser = new XMLResponseParser();

@ -28,10 +28,12 @@ import net.yacy.cora.document.UTF8;
import net.yacy.cora.sorting.ClusteredScoreMap;
import net.yacy.cora.sorting.ReversibleScoreMap;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.ModifiableSolrParams;
public class RetrySolrConnector extends AbstractSolrConnector implements SolrConnector {
@ -187,6 +189,21 @@ public class RetrySolrConnector extends AbstractSolrConnector implements SolrCon
return null;
}
@Override
public QueryResponse query(ModifiableSolrParams query) throws IOException, SolrException {
final long t = System.currentTimeMillis() + this.retryMaxTime;
Throwable ee = null;
while (System.currentTimeMillis() < t) try {
return this.solrConnector.query(query);
} catch (final Throwable e) {
ee = e;
try {Thread.sleep(10);} catch (final InterruptedException e1) {}
continue;
}
if (ee != null) throw (ee instanceof IOException) ? (IOException) ee : new IOException(ee.getMessage());
return null;
}
@Override
public long getQueryCount(final String querystring) throws IOException {
final long t = System.currentTimeMillis() + this.retryMaxTime;

@ -32,10 +32,12 @@ import net.yacy.cora.sorting.ClusteredScoreMap;
import net.yacy.cora.sorting.ReversibleScoreMap;
import net.yacy.cora.protocol.Domains;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.ModifiableSolrParams;
public class ShardSolrConnector extends AbstractSolrConnector implements SolrConnector {
@ -187,6 +189,15 @@ public class ShardSolrConnector extends AbstractSolrConnector implements SolrCon
return list;
}
@Override
public QueryResponse query(final ModifiableSolrParams query) throws IOException, SolrException {
for (final SolrConnector connector: this.connectors) {
QueryResponse rsp = connector.query(query);
if (rsp != null && rsp.getResults().size() > 0) return rsp;
}
return new QueryResponse();
}
@Override
public long getQueryCount(final String querystring) throws IOException {
final AtomicLong count = new AtomicLong(0);

@ -27,10 +27,12 @@ import java.util.concurrent.BlockingQueue;
import net.yacy.cora.sorting.ReversibleScoreMap;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.ModifiableSolrParams;
public interface SolrConnector extends Iterable<String> /* Iterable of document IDs */ {
@ -103,6 +105,13 @@ public interface SolrConnector extends Iterable<String> /* Iterable of document
*/
public SolrDocument get(final String id) throws IOException;
/**
* get a query result from solr
* @param query
* @throws IOException
*/
public QueryResponse query(final ModifiableSolrParams query) throws IOException, SolrException;
/**
* get a query result from solr
* to get all results set the query String to "*:*"

@ -43,7 +43,7 @@ import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.FacetParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.params.ModifiableSolrParams;
public abstract class SolrServerConnector extends AbstractSolrConnector implements SolrConnector {
@ -281,7 +281,8 @@ public abstract class SolrServerConnector extends AbstractSolrConnector implemen
return result;
}
abstract public QueryResponse query(SolrParams params) throws IOException;
@Override
abstract public QueryResponse query(ModifiableSolrParams params) throws IOException;
private final char[] queryIDTemplate = "id:\" \"".toCharArray();

@ -225,6 +225,8 @@ public class DCEntry extends TreeMap<String, String> {
public String getDescription() {
String t = this.get("body");
if (t == null) t = this.get("dc:description");
if (t == null) t = this.get("dc:subject");
if (t == null) t = this.get("categories");
t = stripCDATA(t);
if (t == null) return "";
return t;

@ -224,7 +224,7 @@ public class RowCollection implements Sortable<Row.Entry>, Iterable<Row.Entry>,
entry.setCol(exp_chunkcount, this.chunkcount);
entry.setCol(exp_last_read, daysSince2000(System.currentTimeMillis()));
entry.setCol(exp_last_wrote, daysSince2000(this.lastTimeWrote));
entry.setCol(exp_order_type, (this.rowdef.objectOrder == null) ? ASCII.getBytes("__") : UTF8.getBytes(this.rowdef.objectOrder.signature()));
entry.setCol(exp_order_type, (this.rowdef.objectOrder == null) ? ASCII.getBytes("__") : ASCII.getBytes(this.rowdef.objectOrder.signature()));
entry.setCol(exp_order_bound, this.sortBound);
entry.setCol(exp_collection, this.chunkcache);
return entry.bytes();

@ -113,6 +113,8 @@ import net.yacy.server.serverSwitch;
import net.yacy.utils.crypt;
import org.apache.http.entity.mime.content.ContentBody;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.client.solrj.util.ClientUtils;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
@ -1032,12 +1034,15 @@ public final class Protocol
}
event.rankingProcess.addExpectedRemoteReferences(count);
SolrDocumentList docList = null;
final String solrQuerystring = event.getQuery().solrQueryString();
final SolrQuery solrQuery = event.getQuery().solrQuery();
solrQuery.setStart(offset);
solrQuery.setRows(count);
boolean localsearch = target == null || target.equals(event.peers.mySeed());
if (localsearch) {
// search the local index
try {
docList = event.rankingProcess.getQuery().getSegment().fulltext().getSolr().query(solrQuerystring, offset, count);
QueryResponse rsp = event.rankingProcess.getQuery().getSegment().fulltext().getSolr().query(solrQuery);
docList = rsp.getResults();
} catch (SolrException e) {
Network.log.logInfo("SEARCH failed (solr, 1), localpeer (" + e.getMessage() + ")", e);
return -1;
@ -1049,7 +1054,8 @@ public final class Protocol
final String solrURL = "http://" + target.getPublicAddress() + "/solr";
try {
SolrConnector solrConnector = new RemoteSolrConnector(solrURL);
docList = solrConnector.query(solrQuerystring, offset, count);
QueryResponse rsp = solrConnector.query(solrQuery);
docList = rsp.getResults();
// no need to close this here because that sends a commit to remote solr which is not wanted here
} catch (IOException e) {
Network.log.logInfo("SEARCH failed (solr), Peer: " + target.hash + ":" + target.getName() + " (" + e.getMessage() + ")", e);

@ -39,7 +39,8 @@ import java.util.SortedSet;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrQuery.ORDER;
import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.Classification;
@ -497,8 +498,8 @@ public final class QueryParams {
public static final String DF = "df";
*/
public String solrQueryString() {
if (this.solrQueryString != null) return this.solrQueryString;
public SolrQuery solrQuery() {
if (this.query_include_words == null || this.query_include_words.size() == 0) return null;
// get text query
final StringBuilder q = solrQueryString(this.query_include_words, this.query_exclude_words, this.indexSegment.fulltext().getSolrScheme());
@ -520,31 +521,34 @@ public final class QueryParams {
q.append(" AND ").append(YaCySchema.url_file_ext_s.name()).append(':').append(ext);
}
// construct query
final SolrQuery params = new SolrQuery();
params.setQuery(q.toString());
params.setStart(this.offset);
params.setRows(this.resultcount);
params.setFacet(false);
if (this.radius > 0.0d && this.lat != 0.0d && this.lon != 0.0d) {
// localtion search, no special ranking
// try http://localhost:8090/solr/select?q=*:*&fq={!bbox}&sfield=coordinate_p&pt=50.17,8.65&d=1
q.append('&').append(CommonParams.FQ).append("=!bbox&sfield=").append(YaCySchema.coordinate_p.name()).append("&pt=");
q.append(Double.toString(this.lat)).append(',').append(Double.toString(this.lon)).append("&d=").append(GeoLocation.degreeToKm(this.radius));
// try http://localhost:8090/solr/select?q=*:*&fq={!bbox sfield=coordinate_p pt=50.17,8.65 d=1}
//params.setQuery("!bbox " + q.toString());
//params.set("sfield", YaCySchema.coordinate_p.name());
//params.set("pt", Double.toString(this.lat) + "," + Double.toString(this.lon));
//params.set("d", GeoLocation.degreeToKm(this.radius));
params.setFilterQueries("{!bbox sfield=" + YaCySchema.coordinate_p.name() + " pt=" + Double.toString(this.lat) + "," + Double.toString(this.lon) + " d=" + GeoLocation.degreeToKm(this.radius) + "}");
//params.setRows(Integer.MAX_VALUE);
} else {
// set ranking
if (this.ranking.coeff_date == RankingProfile.COEFF_MAX) {
// set a most-recent ordering
q.append('&').append(CommonParams.SORT).append('=').append(YaCySchema.last_modified.name()).append(" desc");
} else {
// boost fields
q.append("&defType=edismax&qf=");
int c = 0;
for (Map.Entry<YaCySchema, Float> boost: boosts.entrySet()) {
if (c++ > 0) q.append(',');
q.append(boost.getKey().name()).append('^').append(boost.getValue().toString());
}
params.setSortField(YaCySchema.last_modified.name(), ORDER.desc);
}
}
// prepare result
this.solrQueryString = q.toString();
Log.logInfo("Protocol", "SOLR QUERY: " + this.solrQueryString);
return this.solrQueryString;
Log.logInfo("Protocol", "SOLR QUERY: " + params.toString());
return params;
}
public static StringBuilder solrQueryString(Collection<String> include, Collection<String> exclude, SolrConfiguration configuration) {
@ -566,10 +570,15 @@ public final class QueryParams {
// combine these queries for all relevant fields
wc = 0;
Float boost;
for (YaCySchema field: fields) {
if (configuration != null && !configuration.contains(field.name())) continue;
if (wc > 0) q.append(" OR ");
q.append('(').append(field.name()).append(':').append(w).append(')');
q.append('(');
q.append(field.name()).append(':').append(w);
boost = boosts.get(field);
if (boost != null) q.append('^').append(boost.toString());
q.append(')');
wc++;
}
q.insert(0, '(');

@ -196,35 +196,6 @@ public final class RWIProcess extends Thread
@Override
public void run() {
/*
// start a concurrent solr search
if (this.query.query_include_words != null) {
Thread solrSearch = new Thread() {
@Override
public void run() {
Thread.currentThread().setName("SearchEvent.solrSearch");
String solrQuery = RWIProcess.this.query.solrQuery();
try {
ReferenceContainer<WordReference> wr = ReferenceContainer.emptyContainer(Segment.wordReferenceFactory, null);
SolrDocumentList sdl = RWIProcess.this.query.getSegment().fulltext().getSolr().query(solrQuery, 0, 20);
for (SolrDocument d : sdl) {
try {
URIMetadataNode md = new URIMetadataNode(d);
WordReferenceVars v = new WordReferenceVars(md);
wr.add(v);
} catch (SpaceExceededException e) {}
}
Log.logInfo("SearchEvent", "added " + wr.size() + " hits from solr to ranking process");
RWIProcess.this.add(wr, true, "embedded solr", sdl.size(), 60000);
} catch (SolrException e) {
} catch (IOException e) {
}
}
};
solrSearch.start();
}
*/
// do a search
oneFeederStarted();

@ -150,7 +150,7 @@ public final class SearchEvent {
this.rankingProcess = new RWIProcess(this.query, this.order, remote);
// start a local solr search
RemoteSearch.solrRemoteSearch(this, Math.min(300, this.query.itemsPerPage() * 3), 10000, null /*this peer*/, Switchboard.urlBlacklist);
RemoteSearch.solrRemoteSearch(this, 10000, 1000, null /*this peer*/, Switchboard.urlBlacklist);
// start a local RWI search concurrently
this.rankingProcess.start();

@ -113,16 +113,6 @@ public class SearchEventCache {
return alive;
}
/*
private volatile static SearchEvent dummyEvent = null;
private static SearchEvent getDummyEvent(final WorkTables workTables, final LoaderDispatcher loader, final Segment indexSegment) {
Log.logWarning("SearchEventCache", "returning dummy event");
if (dummyEvent != null) return dummyEvent;
final QueryParams query = new QueryParams("", 0, null, indexSegment, new RankingProfile(Classification.ContentDomain.TEXT), "");
dummyEvent = new SearchEvent(query, null, workTables, null, false, loader, 0, 0, 0, 0, false);
return dummyEvent;
}
*/
public static SearchEvent getEvent(
final QueryParams query,
final SeedDB peers,
@ -154,37 +144,6 @@ public class SearchEventCache {
}
if (event == null) {
// throttling in case of too many search requests
int waitcount = 0;
/*
throttling : while (true) {
final int allowedThreads = (int) Math.max(10, MemoryControl.available() / (query.snippetCacheStrategy == null ? 3 : 30) / 1024 / 1024);
// make room if there are too many search events (they need a lot of RAM)
if (lastEvents.size() >= allowedThreads) {
Log.logWarning("SearchEventCache", "throttling phase 1: " + lastEvents.size() + " in cache; " + countAliveThreads() + " alive; " + allowedThreads + " allowed");
cleanupEvents(false);
} else break throttling;
// if there are still some then delete just all
if (lastEvents.size() >= allowedThreads) {
Log.logWarning("SearchEventCache", "throttling phase 2: " + lastEvents.size() + " in cache; " + countAliveThreads() + " alive; " + allowedThreads + " allowed");
cleanupEvents(true);
} else break throttling;
// now there might be still events left that are alive
if (countAliveThreads() < allowedThreads) break throttling;
// finally we just wait some time until we get access
Log.logWarning("SearchEventCache", "throttling phase 3: " + lastEvents.size() + " in cache; " + countAliveThreads() + " alive; " + allowedThreads + " allowed");
try { Thread.sleep(200); } catch (final InterruptedException e) { }
waitcount++;
if (waitcount >= 100) return getDummyEvent(workTables, loader, query.getSegment());
}
*/
if (waitcount > 0) {
// do not fetch snippets because that is most time-expensive
query.snippetCacheStrategy = null;
}
// check if there are too many other searches alive now
Log.logInfo("SearchEventCache", "getEvent: " + lastEvents.size() + " in cache; " + countAliveThreads() + " alive");

Loading…
Cancel
Save