From b94bd7f20a1579d5ddf0a481ca675453539a5552 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Sun, 2 Aug 2015 14:52:41 +0200 Subject: [PATCH] a collection of search query enhancements: - fixed superfluous space in query field list - fixed filter query logic - removed look-ahead query which caused that each new search page submitted two solr queries - fixed random solr result orders in case that the solr score was equal: this was then re-ordered by YaCy using the document hash which came from the solr object and that appeared to be random. Now the hash of the url is used and the score is additionally modified by the url length to prevent that this particular case appears at all. --- .../net/yacy/cora/federate/solr/Ranking.java | 2 +- .../kelondro/data/meta/URIMetadataNode.java | 27 ++----------------- source/net/yacy/peers/Protocol.java | 6 +++++ source/net/yacy/peers/RemoteSearch.java | 4 ++- .../net/yacy/search/query/QueryModifier.java | 16 +++++------ source/net/yacy/search/query/QueryParams.java | 10 ++++++- source/net/yacy/search/query/SearchEvent.java | 9 ++++--- 7 files changed, 35 insertions(+), 39 deletions(-) diff --git a/source/net/yacy/cora/federate/solr/Ranking.java b/source/net/yacy/cora/federate/solr/Ranking.java index adcc8fa8f..ee82c887d 100644 --- a/source/net/yacy/cora/federate/solr/Ranking.java +++ b/source/net/yacy/cora/federate/solr/Ranking.java @@ -105,7 +105,7 @@ public class Ranking { if (!this.fieldBoosts.containsKey(CollectionSchema.description_txt)) qf.append(CollectionSchema.description_txt.getSolrFieldName()).append(' '); if (!this.fieldBoosts.containsKey(CollectionSchema.keywords)) qf.append(CollectionSchema.keywords.getSolrFieldName()); - this.queryFields = qf.toString(); // doesn't change often, cache it + this.queryFields = qf.toString().trim(); // doesn't change often, cache it return this.queryFields; } diff --git a/source/net/yacy/kelondro/data/meta/URIMetadataNode.java b/source/net/yacy/kelondro/data/meta/URIMetadataNode.java index 0554ecea5..ce296c95f 100644 --- a/source/net/yacy/kelondro/data/meta/URIMetadataNode.java +++ b/source/net/yacy/kelondro/data/meta/URIMetadataNode.java @@ -787,32 +787,9 @@ public class URIMetadataNode extends SolrDocument /* implements Comparable"); solrQuery.setHighlightSnippets(5); for (CollectionSchema field: snippetFields) solrQuery.addHighlightField(field.getSolrFieldName()); + //System.out.println("*** debug-query-highligh ***:" + ConcurrentLog.stackTrace()); } else { solrQuery.setHighlight(false); } @@ -1078,6 +1083,7 @@ public final class Protocol { docs = new ArrayList(docList[0].size()); } else docs = null; for (final SolrDocument doc: docList[0]) { + //System.out.println("***DEBUG*** " + ((String) doc.getFieldValue("sku"))); if ( term-- <= 0 ) { break; // do not process more that requested (in case that evil peers fill us up with rubbish) } diff --git a/source/net/yacy/peers/RemoteSearch.java b/source/net/yacy/peers/RemoteSearch.java index fae3bae5d..cccc08f61 100644 --- a/source/net/yacy/peers/RemoteSearch.java +++ b/source/net/yacy/peers/RemoteSearch.java @@ -328,7 +328,9 @@ public class RemoteSearch extends Thread { final Seed targetPeer, final int partitions, final Blacklist blacklist) { - + + //System.out.println("*** debug-remoteSearch ***:" + ConcurrentLog.stackTrace()); + assert solrQuery != null; // check own peer status if (event.peers.mySeed() == null) { return null; } diff --git a/source/net/yacy/search/query/QueryModifier.java b/source/net/yacy/search/query/QueryModifier.java index 0cb0f6942..a65ae1e31 100644 --- a/source/net/yacy/search/query/QueryModifier.java +++ b/source/net/yacy/search/query/QueryModifier.java @@ -331,20 +331,20 @@ public class QueryModifier { */ public static String parseCollectionExpression(String collectionDescription) { String[] s0 = CommonPattern.VERTICALBAR.split(collectionDescription); - ArrayList sites = new ArrayList(2); + ArrayList collections = new ArrayList(2); for (String s: s0) { s = s.trim(); - if (s.length() > 0) sites.add(s); + if (s.length() > 0) collections.add(s); } StringBuilder fq = new StringBuilder(20); - if (sites.size() > 1) { - fq.append('(').append(CollectionSchema.collection_sxt.getSolrFieldName()).append(":\"").append(sites.get(0)).append('\"'); - for (int i = 1; i < sites.size(); i++) { - fq.append(" OR ").append(CollectionSchema.collection_sxt.getSolrFieldName()).append(":\"").append(sites.get(i)).append('\"'); + if (collections.size() > 1) { + fq.append('(').append(CollectionSchema.collection_sxt.getSolrFieldName()).append(":\"").append(collections.get(0)).append('\"'); + for (int i = 1; i < collections.size(); i++) { + fq.append(" OR ").append(CollectionSchema.collection_sxt.getSolrFieldName()).append(":\"").append(collections.get(i)).append('\"'); } fq.append(')'); - } else if (sites.size() == 1) { - fq.append(CollectionSchema.collection_sxt.getSolrFieldName()).append(":\"").append(sites.get(0)).append('\"'); + } else if (collections.size() == 1) { + fq.append(CollectionSchema.collection_sxt.getSolrFieldName()).append(":\"").append(collections.get(0)).append('\"'); } if (fq.length() > 0) fq.insert(0, "{!tag=" + CollectionSchema.collection_sxt.getSolrFieldName() + "}"); return fq.toString(); diff --git a/source/net/yacy/search/query/QueryParams.java b/source/net/yacy/search/query/QueryParams.java index b08045619..7da6122f7 100644 --- a/source/net/yacy/search/query/QueryParams.java +++ b/source/net/yacy/search/query/QueryParams.java @@ -448,7 +448,15 @@ public final class QueryParams { // add site facets fqs.addAll(getFacetsFilterQueries()); if (fqs.size() > 0) { - params.setFilterQueries(fqs.toArray(new String[fqs.size()])); + StringBuilder fqsb = new StringBuilder(); + for (String f: fqs) { + fqsb.append(" AND "); + //boolean wo = f.indexOf(" OR ") >= 0; + //if (wo) fqsb.append('('); + fqsb.append(f); + //if (wo) fqsb.append(')'); + } + params.setFilterQueries(new String[]{fqsb.substring(5)}); } // set facet query attributes diff --git a/source/net/yacy/search/query/SearchEvent.java b/source/net/yacy/search/query/SearchEvent.java index 0d64a7ebe..c4b432551 100644 --- a/source/net/yacy/search/query/SearchEvent.java +++ b/source/net/yacy/search/query/SearchEvent.java @@ -973,7 +973,8 @@ public final class SearchEvent { this.urlhashes.putUnique(iEntry.hash()); rankingtryloop: while (true) { try { - long score = (long) (1000000.0f * iEntry.score()); + long score = (long) Math.max(0, (1000000.0f * iEntry.score()) - iEntry.urllength()); // we modify the score here since the solr score is equal in many cases and then the order would simply depend on the url hash which would be silly + //System.out.println("*** debug-score *** " + score + " for entry " + iEntry.urlstring()); this.nodeStack.put(new ReverseElement(iEntry, score == 0 ? this.order.cardinal(iEntry) : score)); // inserts the element and removes the worst (which is smallest) break rankingtryloop; } catch (final ArithmeticException e ) { @@ -1519,13 +1520,15 @@ public final class SearchEvent { final URIMetadataNode re = this.resultList.element(item).getElement(); EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.EventSearch(this.query.id(true), SearchEventType.ONERESULT, "fetched, item = " + item + ", available = " + this.getResultCount() + ": " + re.urlstring(), 0, 0), false); - if (this.localsolrsearch == null || !this.localsolrsearch.isAlive() && this.local_solr_stored.get() > this.localsolroffset && (item + 1) % this.query.itemsPerPage == 0) { + /* + if (this.localsolrsearch == null || (!this.localsolrsearch.isAlive() && this.local_solr_stored.get() > this.localsolroffset && (item + 1) % this.query.itemsPerPage == 0)) { // at the end of a list, trigger a next solr search if (!Switchboard.getSwitchboard().getConfigBool(SwitchboardConstants.DEBUG_SEARCH_LOCAL_SOLR_OFF, false)) { - this.localsolrsearch = RemoteSearch.solrRemoteSearch(this, this.query.solrQuery(this.query.contentdom, false, this.excludeintext_image), this.localsolroffset, this.query.itemsPerPage, null /*this peer*/, 0, Switchboard.urlBlacklist); + this.localsolrsearch = RemoteSearch.solrRemoteSearch(this, this.query.solrQuery(this.query.contentdom, false, this.excludeintext_image), this.localsolroffset, this.query.itemsPerPage, null, 0, Switchboard.urlBlacklist); } this.localsolroffset += this.query.itemsPerPage; } + */ return re; }