a collection of search query enhancements:

- fixed superfluous space in query field list
- fixed filter query logic
- removed look-ahead query which caused that each new search page
submitted two solr queries
- fixed random solr result orders in case that the solr score was equal:
this was then re-ordered by YaCy using the document hash which came from
the solr object and that appeared to be random. Now the hash of the url
is used and the score is additionally modified by the url length to
prevent that this particular case appears at all.
pull/9/merge
Michael Peter Christen 10 years ago
parent ec75959162
commit b94bd7f20a

@ -105,7 +105,7 @@ public class Ranking {
if (!this.fieldBoosts.containsKey(CollectionSchema.description_txt)) qf.append(CollectionSchema.description_txt.getSolrFieldName()).append(' ');
if (!this.fieldBoosts.containsKey(CollectionSchema.keywords)) qf.append(CollectionSchema.keywords.getSolrFieldName());
this.queryFields = qf.toString(); // doesn't change often, cache it
this.queryFields = qf.toString().trim(); // doesn't change often, cache it
return this.queryFields;
}

@ -787,32 +787,9 @@ public class URIMetadataNode extends SolrDocument /* implements Comparable<URIMe
}
return this.toString(this.textSnippet.getLineRaw());
}
/*
taken from ResultEntry (should work without)
private int hashCache = Integer.MIN_VALUE; // if this is used in a compare method many times, a cache is useful
@Override
public int hashCode() {
if (this.hashCache == Integer.MIN_VALUE) {
this.hashCache = ByteArray.hashCode(this.hash());
return this.url().hashCode();
}
return this.hashCache;
}
@Override
public boolean equals(final Object obj) {
if (this == obj) return true;
if (obj == null) return false;
if (!(obj instanceof URIMetadataNode)) return false;
URIMetadataNode other = (URIMetadataNode) obj;
return Base64Order.enhancedCoder.equal(this.hash(), other.hash());
}
@Override
public int compareTo(URIMetadataNode o) {
return Base64Order.enhancedCoder.compare(this.hash(), o.hash());
}
@Override
public int compare(URIMetadataNode o1, URIMetadataNode o2) {
return Base64Order.enhancedCoder.compare(o1.hash(), o2.hash());
}*/
}

@ -47,7 +47,9 @@ import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.net.InetAddress;
import java.net.URLDecoder;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
@ -937,6 +939,8 @@ public final class Protocol {
final int partitions,
final Blacklist blacklist) {
//try {System.out.println("*** debug-query *** " + URLDecoder.decode(solrQuery.toString(), "UTF-8"));} catch (UnsupportedEncodingException e) {}
if (event.query.getQueryGoal().getQueryString(false) == null || event.query.getQueryGoal().getQueryString(false).length() == 0) {
return -1; // we cannot query solr only with word hashes, there is no clear text string
}
@ -954,6 +958,7 @@ public final class Protocol {
solrQuery.setHighlightSimplePre("<b>");
solrQuery.setHighlightSnippets(5);
for (CollectionSchema field: snippetFields) solrQuery.addHighlightField(field.getSolrFieldName());
//System.out.println("*** debug-query-highligh ***:" + ConcurrentLog.stackTrace());
} else {
solrQuery.setHighlight(false);
}
@ -1078,6 +1083,7 @@ public final class Protocol {
docs = new ArrayList<SolrInputDocument>(docList[0].size());
} else docs = null;
for (final SolrDocument doc: docList[0]) {
//System.out.println("***DEBUG*** " + ((String) doc.getFieldValue("sku")));
if ( term-- <= 0 ) {
break; // do not process more that requested (in case that evil peers fill us up with rubbish)
}

@ -329,6 +329,8 @@ public class RemoteSearch extends Thread {
final int partitions,
final Blacklist blacklist) {
//System.out.println("*** debug-remoteSearch ***:" + ConcurrentLog.stackTrace());
assert solrQuery != null;
// check own peer status
if (event.peers.mySeed() == null) { return null; }

@ -331,20 +331,20 @@ public class QueryModifier {
*/
public static String parseCollectionExpression(String collectionDescription) {
String[] s0 = CommonPattern.VERTICALBAR.split(collectionDescription);
ArrayList<String> sites = new ArrayList<String>(2);
ArrayList<String> collections = new ArrayList<String>(2);
for (String s: s0) {
s = s.trim();
if (s.length() > 0) sites.add(s);
if (s.length() > 0) collections.add(s);
}
StringBuilder fq = new StringBuilder(20);
if (sites.size() > 1) {
fq.append('(').append(CollectionSchema.collection_sxt.getSolrFieldName()).append(":\"").append(sites.get(0)).append('\"');
for (int i = 1; i < sites.size(); i++) {
fq.append(" OR ").append(CollectionSchema.collection_sxt.getSolrFieldName()).append(":\"").append(sites.get(i)).append('\"');
if (collections.size() > 1) {
fq.append('(').append(CollectionSchema.collection_sxt.getSolrFieldName()).append(":\"").append(collections.get(0)).append('\"');
for (int i = 1; i < collections.size(); i++) {
fq.append(" OR ").append(CollectionSchema.collection_sxt.getSolrFieldName()).append(":\"").append(collections.get(i)).append('\"');
}
fq.append(')');
} else if (sites.size() == 1) {
fq.append(CollectionSchema.collection_sxt.getSolrFieldName()).append(":\"").append(sites.get(0)).append('\"');
} else if (collections.size() == 1) {
fq.append(CollectionSchema.collection_sxt.getSolrFieldName()).append(":\"").append(collections.get(0)).append('\"');
}
if (fq.length() > 0) fq.insert(0, "{!tag=" + CollectionSchema.collection_sxt.getSolrFieldName() + "}");
return fq.toString();

@ -448,7 +448,15 @@ public final class QueryParams {
// add site facets
fqs.addAll(getFacetsFilterQueries());
if (fqs.size() > 0) {
params.setFilterQueries(fqs.toArray(new String[fqs.size()]));
StringBuilder fqsb = new StringBuilder();
for (String f: fqs) {
fqsb.append(" AND ");
//boolean wo = f.indexOf(" OR ") >= 0;
//if (wo) fqsb.append('(');
fqsb.append(f);
//if (wo) fqsb.append(')');
}
params.setFilterQueries(new String[]{fqsb.substring(5)});
}
// set facet query attributes

@ -973,7 +973,8 @@ public final class SearchEvent {
this.urlhashes.putUnique(iEntry.hash());
rankingtryloop: while (true) {
try {
long score = (long) (1000000.0f * iEntry.score());
long score = (long) Math.max(0, (1000000.0f * iEntry.score()) - iEntry.urllength()); // we modify the score here since the solr score is equal in many cases and then the order would simply depend on the url hash which would be silly
//System.out.println("*** debug-score *** " + score + " for entry " + iEntry.urlstring());
this.nodeStack.put(new ReverseElement<URIMetadataNode>(iEntry, score == 0 ? this.order.cardinal(iEntry) : score)); // inserts the element and removes the worst (which is smallest)
break rankingtryloop;
} catch (final ArithmeticException e ) {
@ -1519,13 +1520,15 @@ public final class SearchEvent {
final URIMetadataNode re = this.resultList.element(item).getElement();
EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.EventSearch(this.query.id(true), SearchEventType.ONERESULT, "fetched, item = " + item + ", available = " + this.getResultCount() + ": " + re.urlstring(), 0, 0), false);
if (this.localsolrsearch == null || !this.localsolrsearch.isAlive() && this.local_solr_stored.get() > this.localsolroffset && (item + 1) % this.query.itemsPerPage == 0) {
/*
if (this.localsolrsearch == null || (!this.localsolrsearch.isAlive() && this.local_solr_stored.get() > this.localsolroffset && (item + 1) % this.query.itemsPerPage == 0)) {
// at the end of a list, trigger a next solr search
if (!Switchboard.getSwitchboard().getConfigBool(SwitchboardConstants.DEBUG_SEARCH_LOCAL_SOLR_OFF, false)) {
this.localsolrsearch = RemoteSearch.solrRemoteSearch(this, this.query.solrQuery(this.query.contentdom, false, this.excludeintext_image), this.localsolroffset, this.query.itemsPerPage, null /*this peer*/, 0, Switchboard.urlBlacklist);
this.localsolrsearch = RemoteSearch.solrRemoteSearch(this, this.query.solrQuery(this.query.contentdom, false, this.excludeintext_image), this.localsolroffset, this.query.itemsPerPage, null, 0, Switchboard.urlBlacklist);
}
this.localsolroffset += this.query.itemsPerPage;
}
*/
return re;
}

Loading…
Cancel
Save