update to Solr Boost handling

pull/1/head
Michael Peter Christen 12 years ago
parent 908ad2f174
commit 8aa08261a7

@ -1,3 +1,22 @@
/**
* RankingSolr_p
* Copyright 2012 by Michael Peter Christen
* First released 30.11.2012 at http://yacy.net
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
import java.util.Map;
@ -52,7 +71,7 @@ public class RankingSolr_p {
if (boostString.length() > 0) {
String s = boostString.toString();
sb.setConfig(SwitchboardConstants.SEARCH_RANKING_SOLR_BOOST, s);
Boost.RANKING.update(s);
Boost.RANKING.updateBoosts(s);
}
}

@ -102,7 +102,7 @@ public class searchresult {
sb.intermissionAllThreads(3000); // tell all threads to do nothing for a specific time
// update the boost values
Boost.RANKING.update(sb.getConfig(SwitchboardConstants.SEARCH_RANKING_SOLR_BOOST, ""));
Boost.RANKING.updateBoosts(sb.getConfig(SwitchboardConstants.SEARCH_RANKING_SOLR_BOOST, ""));
// rename post fields according to result style
//post.put(CommonParams.Q, post.remove("q")); // same as solr

@ -35,6 +35,19 @@ public class Boost extends LinkedHashMap<YaCySchema, Float> {
private static final long serialVersionUID = 5248172257724571603L;
public final static Boost RANKING = new Boost();
public final static YaCySchema[] GOAL_FIELDS = new YaCySchema[]{
YaCySchema.sku,
YaCySchema.url_paths_sxt,
YaCySchema.title,
YaCySchema.h1_txt,
YaCySchema.h2_txt,
YaCySchema.author,
YaCySchema.description,
YaCySchema.keywords,
YaCySchema.text_t,
YaCySchema.synonyms_sxt
};
// for minTokenLen = 2 the quantRate value should not be below 0.24; for minTokenLen = 3 the quantRate value must be not below 0.5!
private float quantRate = 0.5f; // to be filled with search.ranking.solr.doubledetection.quantrate
@ -55,6 +68,9 @@ public class Boost extends LinkedHashMap<YaCySchema, Float> {
put(YaCySchema.description, 5.0f);
put(YaCySchema.keywords, 2.0f);
put(YaCySchema.text_t, 1.0f);
put(YaCySchema.synonyms_sxt, 0.9f);
// boosts on non-goal fields which are used for a special ranking order
put(YaCySchema.fuzzy_signature_unique_b, 100000.0f); // must be very high to move double results to end of list
}
@ -73,7 +89,7 @@ public class Boost extends LinkedHashMap<YaCySchema, Float> {
* This should be called with the field in search.ranking.solr.boost
* @param boostDef the definition string
*/
public void update(String boostDef) {
public void updateBoosts(String boostDef) {
// call i.e. with "sku^20.0f,url_paths_sxt^20.0f,title^15.0f,h1_txt^11.0f,h2_txt^10.0f,author^8.0f,description^5.0f,keywords^2.0f,text_t^1.0f,fuzzy_signature_unique_b^100000.0f"
if (boostDef == null || boostDef.length() == 0) return;
String[] bf = CommonPattern.COMMA.split(boostDef);

@ -402,7 +402,7 @@ public final class Switchboard extends serverSwitch {
ConfigurationSet.Entry entry = solrScheme.get(field.name()); entry.setEnable(true); solrScheme.put(field.name(), entry);
}
solrScheme.commit();
Boost.RANKING.update(this.getConfig(SwitchboardConstants.SEARCH_RANKING_SOLR_BOOST, "")); // must be called every time the boosts change
Boost.RANKING.updateBoosts(this.getConfig(SwitchboardConstants.SEARCH_RANKING_SOLR_BOOST, "")); // must be called every time the boosts change
Boost.RANKING.setMinTokenLen(this.getConfigInt(SwitchboardConstants.SEARCH_RANKING_SOLR_DOUBLEDETECTION_MINLENGTH, 3));
Boost.RANKING.setQuantRate(this.getConfigFloat(SwitchboardConstants.SEARCH_RANKING_SOLR_DOUBLEDETECTION_QUANTRATE, 0.5f));

@ -24,8 +24,6 @@ package net.yacy.search.query;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.SortedSet;
import net.yacy.cora.federate.solr.Boost;
@ -216,11 +214,6 @@ public class QueryGoal {
final HandleSet blues = Word.words2hashesHandles(blueList);
for (final byte[] b: blues) this.include_hashes.remove(b);
}
private final static YaCySchema[] fields = new YaCySchema[]{
YaCySchema.sku,YaCySchema.title,YaCySchema.h1_txt,YaCySchema.h2_txt,
YaCySchema.author,YaCySchema.description,YaCySchema.keywords,YaCySchema.text_t,YaCySchema.synonyms_sxt
};
public StringBuilder solrQueryString(SolrConfiguration configuration) {
final StringBuilder q = new StringBuilder(80);
@ -249,7 +242,7 @@ public class QueryGoal {
// combine these queries for all relevant fields
wc = 0;
Float boost;
for (YaCySchema field: fields) {
for (YaCySchema field: Boost.GOAL_FIELDS) {
if (configuration != null && !configuration.contains(field.getSolrFieldName())) continue;
if (wc > 0) q.append(" OR ");
q.append('(');

@ -151,7 +151,7 @@ public class SearchEventCache {
// start a new event
Switchboard sb = Switchboard.getSwitchboard();
final boolean delete = sb == null || Switchboard.getSwitchboard().getConfigBool(SwitchboardConstants.SEARCH_VERIFY_DELETE, true);
if (sb != null) Boost.RANKING.update(sb.getConfig(SwitchboardConstants.SEARCH_RANKING_SOLR_BOOST, "")); // update the boost values
if (sb != null) Boost.RANKING.updateBoosts(sb.getConfig(SwitchboardConstants.SEARCH_RANKING_SOLR_BOOST, "")); // update the boost values
event = new SearchEvent(query, peers, workTables, preselectedPeerHashes, generateAbstracts, loader, remote_maxcount, remote_maxtime, burstRobinsonPercent, burstMultiwordPercent, delete);
MemoryControl.request(100 * 1024 * 1024, false); // this may trigger a short memory status which causes a reducing of cache space of other threads
}

Loading…
Cancel
Save