From 8aa08261a775e3ab486719a2fa4489f83348ef84 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Wed, 5 Dec 2012 12:26:42 +0100 Subject: [PATCH] update to Solr Boost handling --- htroot/RankingSolr_p.java | 21 ++++++++++++++++++- htroot/gsa/searchresult.java | 2 +- source/net/yacy/cora/federate/solr/Boost.java | 18 +++++++++++++++- source/net/yacy/search/Switchboard.java | 2 +- source/net/yacy/search/query/QueryGoal.java | 9 +------- .../yacy/search/query/SearchEventCache.java | 2 +- 6 files changed, 41 insertions(+), 13 deletions(-) diff --git a/htroot/RankingSolr_p.java b/htroot/RankingSolr_p.java index 2d6a6746d..119cd1aee 100644 --- a/htroot/RankingSolr_p.java +++ b/htroot/RankingSolr_p.java @@ -1,3 +1,22 @@ +/** + * RankingSolr_p + * Copyright 2012 by Michael Peter Christen + * First released 30.11.2012 at http://yacy.net + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program in the file lgpl21.txt + * If not, see . + */ import java.util.Map; @@ -52,7 +71,7 @@ public class RankingSolr_p { if (boostString.length() > 0) { String s = boostString.toString(); sb.setConfig(SwitchboardConstants.SEARCH_RANKING_SOLR_BOOST, s); - Boost.RANKING.update(s); + Boost.RANKING.updateBoosts(s); } } diff --git a/htroot/gsa/searchresult.java b/htroot/gsa/searchresult.java index a86694574..6cc14dc36 100644 --- a/htroot/gsa/searchresult.java +++ b/htroot/gsa/searchresult.java @@ -102,7 +102,7 @@ public class searchresult { sb.intermissionAllThreads(3000); // tell all threads to do nothing for a specific time // update the boost values - Boost.RANKING.update(sb.getConfig(SwitchboardConstants.SEARCH_RANKING_SOLR_BOOST, "")); + Boost.RANKING.updateBoosts(sb.getConfig(SwitchboardConstants.SEARCH_RANKING_SOLR_BOOST, "")); // rename post fields according to result style //post.put(CommonParams.Q, post.remove("q")); // same as solr diff --git a/source/net/yacy/cora/federate/solr/Boost.java b/source/net/yacy/cora/federate/solr/Boost.java index ad1684618..c6a8c3e18 100644 --- a/source/net/yacy/cora/federate/solr/Boost.java +++ b/source/net/yacy/cora/federate/solr/Boost.java @@ -35,6 +35,19 @@ public class Boost extends LinkedHashMap { private static final long serialVersionUID = 5248172257724571603L; public final static Boost RANKING = new Boost(); + + public final static YaCySchema[] GOAL_FIELDS = new YaCySchema[]{ + YaCySchema.sku, + YaCySchema.url_paths_sxt, + YaCySchema.title, + YaCySchema.h1_txt, + YaCySchema.h2_txt, + YaCySchema.author, + YaCySchema.description, + YaCySchema.keywords, + YaCySchema.text_t, + YaCySchema.synonyms_sxt + }; // for minTokenLen = 2 the quantRate value should not be below 0.24; for minTokenLen = 3 the quantRate value must be not below 0.5! private float quantRate = 0.5f; // to be filled with search.ranking.solr.doubledetection.quantrate @@ -55,6 +68,9 @@ public class Boost extends LinkedHashMap { put(YaCySchema.description, 5.0f); put(YaCySchema.keywords, 2.0f); put(YaCySchema.text_t, 1.0f); + put(YaCySchema.synonyms_sxt, 0.9f); + + // boosts on non-goal fields which are used for a special ranking order put(YaCySchema.fuzzy_signature_unique_b, 100000.0f); // must be very high to move double results to end of list } @@ -73,7 +89,7 @@ public class Boost extends LinkedHashMap { * This should be called with the field in search.ranking.solr.boost * @param boostDef the definition string */ - public void update(String boostDef) { + public void updateBoosts(String boostDef) { // call i.e. with "sku^20.0f,url_paths_sxt^20.0f,title^15.0f,h1_txt^11.0f,h2_txt^10.0f,author^8.0f,description^5.0f,keywords^2.0f,text_t^1.0f,fuzzy_signature_unique_b^100000.0f" if (boostDef == null || boostDef.length() == 0) return; String[] bf = CommonPattern.COMMA.split(boostDef); diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java index 3b81afbae..d06c3363c 100644 --- a/source/net/yacy/search/Switchboard.java +++ b/source/net/yacy/search/Switchboard.java @@ -402,7 +402,7 @@ public final class Switchboard extends serverSwitch { ConfigurationSet.Entry entry = solrScheme.get(field.name()); entry.setEnable(true); solrScheme.put(field.name(), entry); } solrScheme.commit(); - Boost.RANKING.update(this.getConfig(SwitchboardConstants.SEARCH_RANKING_SOLR_BOOST, "")); // must be called every time the boosts change + Boost.RANKING.updateBoosts(this.getConfig(SwitchboardConstants.SEARCH_RANKING_SOLR_BOOST, "")); // must be called every time the boosts change Boost.RANKING.setMinTokenLen(this.getConfigInt(SwitchboardConstants.SEARCH_RANKING_SOLR_DOUBLEDETECTION_MINLENGTH, 3)); Boost.RANKING.setQuantRate(this.getConfigFloat(SwitchboardConstants.SEARCH_RANKING_SOLR_DOUBLEDETECTION_QUANTRATE, 0.5f)); diff --git a/source/net/yacy/search/query/QueryGoal.java b/source/net/yacy/search/query/QueryGoal.java index dc71f7716..7c1822296 100644 --- a/source/net/yacy/search/query/QueryGoal.java +++ b/source/net/yacy/search/query/QueryGoal.java @@ -24,8 +24,6 @@ package net.yacy.search.query; import java.io.UnsupportedEncodingException; import java.net.URLEncoder; import java.util.ArrayList; -import java.util.LinkedHashMap; -import java.util.Map; import java.util.SortedSet; import net.yacy.cora.federate.solr.Boost; @@ -216,11 +214,6 @@ public class QueryGoal { final HandleSet blues = Word.words2hashesHandles(blueList); for (final byte[] b: blues) this.include_hashes.remove(b); } - - private final static YaCySchema[] fields = new YaCySchema[]{ - YaCySchema.sku,YaCySchema.title,YaCySchema.h1_txt,YaCySchema.h2_txt, - YaCySchema.author,YaCySchema.description,YaCySchema.keywords,YaCySchema.text_t,YaCySchema.synonyms_sxt - }; public StringBuilder solrQueryString(SolrConfiguration configuration) { final StringBuilder q = new StringBuilder(80); @@ -249,7 +242,7 @@ public class QueryGoal { // combine these queries for all relevant fields wc = 0; Float boost; - for (YaCySchema field: fields) { + for (YaCySchema field: Boost.GOAL_FIELDS) { if (configuration != null && !configuration.contains(field.getSolrFieldName())) continue; if (wc > 0) q.append(" OR "); q.append('('); diff --git a/source/net/yacy/search/query/SearchEventCache.java b/source/net/yacy/search/query/SearchEventCache.java index de2d2bde8..c9cafdc1c 100644 --- a/source/net/yacy/search/query/SearchEventCache.java +++ b/source/net/yacy/search/query/SearchEventCache.java @@ -151,7 +151,7 @@ public class SearchEventCache { // start a new event Switchboard sb = Switchboard.getSwitchboard(); final boolean delete = sb == null || Switchboard.getSwitchboard().getConfigBool(SwitchboardConstants.SEARCH_VERIFY_DELETE, true); - if (sb != null) Boost.RANKING.update(sb.getConfig(SwitchboardConstants.SEARCH_RANKING_SOLR_BOOST, "")); // update the boost values + if (sb != null) Boost.RANKING.updateBoosts(sb.getConfig(SwitchboardConstants.SEARCH_RANKING_SOLR_BOOST, "")); // update the boost values event = new SearchEvent(query, peers, workTables, preselectedPeerHashes, generateAbstracts, loader, remote_maxcount, remote_maxtime, burstRobinsonPercent, burstMultiwordPercent, delete); MemoryControl.request(100 * 1024 * 1024, false); // this may trigger a short memory status which causes a reducing of cache space of other threads }