Merge remote-tracking branch 'origin/master' into docker

pull/52/head
luc 9 years ago
commit 45a5fde7b1

@ -1036,7 +1036,7 @@ search.ranking.rwi.profile =
search.ranking.solr.collection.boostname.tmpa.0=Default Profile search.ranking.solr.collection.boostname.tmpa.0=Default Profile
search.ranking.solr.collection.boostfields.tmpa.0=url_paths_sxt^3.0,synonyms_sxt^0.5,title^5.0,text_t^1.0,host_s^6.0,h1_txt^5.0,url_file_name_tokens_t^4.0,h2_txt^3.0,keywords^2.0,description_txt^1.5,author^1.0 search.ranking.solr.collection.boostfields.tmpa.0=url_paths_sxt^3.0,synonyms_sxt^0.5,title^5.0,text_t^1.0,host_s^6.0,h1_txt^5.0,url_file_name_tokens_t^4.0,h2_txt^3.0,keywords^2.0,description_txt^1.5,author^1.0
search.ranking.solr.collection.filterquery.tmpa.0= search.ranking.solr.collection.filterquery.tmpa.0=
search.ranking.solr.collection.boostquery.tmpa.0=crawldepth_i:0^0.8 crawldepth_i:1^0.4 search.ranking.solr.collection.boostquery.tmpa.0=crawldepth_i:0^0.8\ncrawldepth_i:1^0.4
search.ranking.solr.collection.boostfunction.tmpb.0= search.ranking.solr.collection.boostfunction.tmpb.0=
search.ranking.solr.collection.boostname.tmpa.1=Date Profile: sort by date in descending order for a '/date' usage search.ranking.solr.collection.boostname.tmpa.1=Date Profile: sort by date in descending order for a '/date' usage
search.ranking.solr.collection.boostfields.tmpa.1=url_paths_sxt^0.1,title^0.1,text_t^0.1 search.ranking.solr.collection.boostfields.tmpa.1=url_paths_sxt^0.1,title^0.1,text_t^0.1
@ -1051,7 +1051,7 @@ search.ranking.solr.collection.boostfunction.tmpb.2=
search.ranking.solr.collection.boostname.tmpa.3=_unused3 search.ranking.solr.collection.boostname.tmpa.3=_unused3
search.ranking.solr.collection.boostfields.tmpa.3=text_t^1.0 search.ranking.solr.collection.boostfields.tmpa.3=text_t^1.0
search.ranking.solr.collection.filterquery.tmpa.3= search.ranking.solr.collection.filterquery.tmpa.3=
search.ranking.solr.collection.boostquery.tmpa.3=crawldepth_i:0^0.8 crawldepth_i:1^0.4 search.ranking.solr.collection.boostquery.tmpa.3=crawldepth_i:0^0.8\ncrawldepth_i:1^0.4
search.ranking.solr.collection.boostfunction.tmpb.3= search.ranking.solr.collection.boostfunction.tmpb.3=
# the following values are used to identify duplicate content # the following values are used to identify duplicate content

@ -50,7 +50,7 @@
<dl> <dl>
<dt style="width:260px;margin:0;padding:0;height:1.8em;"><label for="bq" id="bq_label">bq=</label></dt> <dt style="width:260px;margin:0;padding:0;height:1.8em;"><label for="bq" id="bq_label">bq=</label></dt>
<dd style="width:360px;margin:0;padding:0;float:left;display:inline;" id="bq_dd"> <dd style="width:360px;margin:0;padding:0;float:left;display:inline;" id="bq_dd">
<textarea name="bq" id="bq" align="left" cols="96" rows="1"/>#[bq]#</textarea> <textarea name="bq" id="bq" align="left" cols="96" rows="5"/>#[bq]#</textarea>
</dd> </dd>
<dt style="width:260px;margin:0;padding:0;height:1.8em;"></dt> <dt style="width:260px;margin:0;padding:0;height:1.8em;"></dt>
<dd style="width:360px;margin:0;padding:0;height:1.8em;float:left;display:inline;"> <dd style="width:360px;margin:0;padding:0;height:1.8em;float:left;display:inline;">

@ -97,7 +97,7 @@ public class RankingSolr_p {
} }
} }
if (post != null && post.containsKey("ResetBQ")) { if (post != null && post.containsKey("ResetBQ")) {
String bq = "crawldepth_i:0^0.8 crawldepth_i:1^0.4"; String bq = "crawldepth_i:0^0.8\ncrawldepth_i:1^0.4";
if (bq != null) { if (bq != null) {
sb.setConfig(SwitchboardConstants.SEARCH_RANKING_SOLR_COLLECTION_BOOSTQUERY_ + profileNr, bq); sb.setConfig(SwitchboardConstants.SEARCH_RANKING_SOLR_COLLECTION_BOOSTQUERY_ + profileNr, bq);
sb.index.fulltext().getDefaultConfiguration().getRanking(profileNr).setBoostQuery(bq); sb.index.fulltext().getDefaultConfiguration().getRanking(profileNr).setBoostQuery(bq);

@ -59,6 +59,7 @@ import net.yacy.search.query.SearchEvent;
import net.yacy.search.schema.CollectionSchema; import net.yacy.search.schema.CollectionSchema;
import net.yacy.search.schema.WebgraphSchema; import net.yacy.search.schema.WebgraphSchema;
import org.apache.commons.lang.StringUtils;
import org.apache.solr.common.SolrDocumentList; import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.CommonParams;
@ -168,7 +169,7 @@ public class SolrSelectServlet extends HttpServlet {
String bq = ranking.getBoostQuery(); String bq = ranking.getBoostQuery();
String bf = ranking.getBoostFunction(); String bf = ranking.getBoostFunction();
if (fq.length() > 0) mmsp.getMap().put(CommonParams.FQ, new String[]{fq}); if (fq.length() > 0) mmsp.getMap().put(CommonParams.FQ, new String[]{fq});
if (bq.length() > 0) mmsp.getMap().put(DisMaxParams.BQ, new String[]{bq}); if (bq.length() > 0) mmsp.getMap().put(DisMaxParams.BQ, StringUtils.split(bq,"\t\n\r\f")); // bq split into multiple query params, allowing space in single query
if (bf.length() > 0) mmsp.getMap().put("boost", new String[]{bf}); // a boost function extension, see http://wiki.apache.org/solr/ExtendedDisMax#bf_.28Boost_Function.2C_additive.29 if (bf.length() > 0) mmsp.getMap().put("boost", new String[]{bf}); // a boost function extension, see http://wiki.apache.org/solr/ExtendedDisMax#bf_.28Boost_Function.2C_additive.29
} }

@ -54,13 +54,14 @@ import java.util.LinkedList;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentHashMap;
import java.util.regex.Pattern;
import java.util.zip.GZIPInputStream; import java.util.zip.GZIPInputStream;
import net.yacy.cora.document.encoding.UTF8; import net.yacy.cora.document.encoding.UTF8;
import net.yacy.cora.storage.Files; import net.yacy.cora.storage.Files;
import net.yacy.cora.util.ConcurrentLog; import net.yacy.cora.util.ConcurrentLog;
import org.apache.commons.lang.StringUtils;
import org.mozilla.intl.chardet.nsDetector; import org.mozilla.intl.chardet.nsDetector;
import org.mozilla.intl.chardet.nsPSMDetector; import org.mozilla.intl.chardet.nsPSMDetector;
@ -419,11 +420,10 @@ public final class FileUtils {
return mb; return mb;
} }
private final static Pattern backslashbackslash = Pattern.compile("\\\\"); private final static String[] unescaped_strings_in = {"\r\n", "\r", "\n", "=", "\\"};
private final static Pattern unescaped_equal = Pattern.compile("="); private final static String[] escaped_strings_out = {"\\n", "\\n", "\\n", "\\=", "\\\\"};
private final static Pattern escaped_equal = Pattern.compile("\\=", Pattern.LITERAL); private final static String[] escaped_strings_in = {"\\\\", "\\n", "\\="};
private final static Pattern escaped_newline = Pattern.compile("\\n", Pattern.LITERAL); private final static String[] unescaped_strings_out = {"\\", "\n", "="};
private final static Pattern escaped_backslash = Pattern.compile(Pattern.quote("\\"), Pattern.LITERAL);
public static void saveMap(final File file, final Map<String, String> props, final String comment) { public static void saveMap(final File file, final Map<String, String> props, final String comment) {
boolean err = false; boolean err = false;
@ -436,16 +436,13 @@ public final class FileUtils {
for ( final Map.Entry<String, String> entry : props.entrySet() ) { for ( final Map.Entry<String, String> entry : props.entrySet() ) {
key = entry.getKey(); key = entry.getKey();
if ( key != null ) { if ( key != null ) {
key = backslashbackslash.matcher(key).replaceAll("\\\\"); key = StringUtils.replaceEach(key, unescaped_strings_in, escaped_strings_out);
key = escaped_newline.matcher(key).replaceAll("\\n");
key = unescaped_equal.matcher(key).replaceAll("\\=");
} }
if ( entry.getValue() == null ) { if ( entry.getValue() == null ) {
value = ""; value = "";
} else { } else {
value = entry.getValue(); value = entry.getValue();
value = backslashbackslash.matcher(value).replaceAll("\\\\"); value = StringUtils.replaceEach(value, unescaped_strings_in, escaped_strings_out);
value = escaped_newline.matcher(value).replaceAll("\\n");
} }
pw.println(key + "=" + value); pw.println(key + "=" + value);
} }
@ -494,11 +491,8 @@ public final class FileUtils {
pos = line.indexOf('=', pos + 1); pos = line.indexOf('=', pos + 1);
} while ( pos > 0 && line.charAt(pos - 1) == '\\' ); } while ( pos > 0 && line.charAt(pos - 1) == '\\' );
if ( pos > 0 ) try { if ( pos > 0 ) try {
String key = escaped_equal.matcher(line.substring(0, pos).trim()).replaceAll("="); String key = StringUtils.replaceEach(line.substring(0, pos).trim(), escaped_strings_in, unescaped_strings_out);
key = escaped_newline.matcher(key).replaceAll("\n"); String value = StringUtils.replaceEach(line.substring(pos + 1).trim(), escaped_strings_in, unescaped_strings_out);
key = escaped_backslash.matcher(key).replaceAll("\\");
String value = escaped_newline.matcher(line.substring(pos + 1).trim()).replaceAll("\n");
value = value.replace("\\\\", "\\"); // does not work: escaped_backslashbackslash.matcher(value).replaceAll("\\");
//System.out.println("key = " + key + ", value = " + value); //System.out.println("key = " + key + ", value = " + value);
props.put(key, value); props.put(key, value);
} catch (final IndexOutOfBoundsException e) { } catch (final IndexOutOfBoundsException e) {

@ -537,6 +537,7 @@ public final class Switchboard extends serverSwitch {
bf.equals("scale(cr_host_norm_i,1,20)")) bf = ""; bf.equals("scale(cr_host_norm_i,1,20)")) bf = "";
if (bf.equals("recip(rord(last_modified),1,1000,1000))")) bf = "recip(ms(NOW,last_modified),3.16e-11,1,1)"; // that was an outdated date boost that did not work well if (bf.equals("recip(rord(last_modified),1,1000,1000))")) bf = "recip(ms(NOW,last_modified),3.16e-11,1,1)"; // that was an outdated date boost that did not work well
if (i == 0 && bq.equals("fuzzy_signature_unique_b:true^100000.0")) bq = "crawldepth_i:0^0.8 crawldepth_i:1^0.4"; if (i == 0 && bq.equals("fuzzy_signature_unique_b:true^100000.0")) bq = "crawldepth_i:0^0.8 crawldepth_i:1^0.4";
if (bq.equals("crawldepth_i:0^0.8 crawldepth_i:1^0.4")) bq = "crawldepth_i:0^0.8\ncrawldepth_i:1^0.4"; // Fix issue with multiple Boost Queries
if (boosts.equals("url_paths_sxt^1000.0,synonyms_sxt^1.0,title^10000.0,text_t^2.0,h1_txt^1000.0,h2_txt^100.0,host_organization_s^100000.0")) boosts = "url_paths_sxt^3.0,synonyms_sxt^0.5,title^5.0,text_t^1.0,host_s^6.0,h1_txt^5.0,url_file_name_tokens_t^4.0,h2_txt^2.0"; if (boosts.equals("url_paths_sxt^1000.0,synonyms_sxt^1.0,title^10000.0,text_t^2.0,h1_txt^1000.0,h2_txt^100.0,host_organization_s^100000.0")) boosts = "url_paths_sxt^3.0,synonyms_sxt^0.5,title^5.0,text_t^1.0,host_s^6.0,h1_txt^5.0,url_file_name_tokens_t^4.0,h2_txt^2.0";
r.setName(name); r.setName(name);
r.updateBoosts(boosts); r.updateBoosts(boosts);

@ -386,7 +386,7 @@ public final class QueryParams {
if (!qf.isEmpty()) params.setParam(DisMaxParams.QF, qf); if (!qf.isEmpty()) params.setParam(DisMaxParams.QF, qf);
if (this.queryGoal.getIncludeSize() > 1) { if (this.queryGoal.getIncludeSize() > 1) {
// add boost on combined words // add boost on combined words
if (bq.length() > 0) bq += " "; if (bq.length() > 0) bq += "\n";
bq += CollectionSchema.text_t.getSolrFieldName() + ":\"" + this.queryGoal.getIncludeString() + "\"^10"; bq += CollectionSchema.text_t.getSolrFieldName() + ":\"" + this.queryGoal.getIncludeString() + "\"^10";
} }
if (fq.length() > 0) { if (fq.length() > 0) {
@ -396,7 +396,7 @@ public final class QueryParams {
newfq.add(fq); newfq.add(fq);
params.setFilterQueries(newfq.toArray(new String[newfq.size()])); params.setFilterQueries(newfq.toArray(new String[newfq.size()]));
} }
if (bq.length() > 0) params.setParam(DisMaxParams.BQ, bq); if (bq.length() > 0) params.setParam(DisMaxParams.BQ, bq.split("[\\r\\n]+")); // split on any sequence consisting of CR and/or LF
if (bf.length() > 0) params.setParam("boost", bf); // a boost function extension, see http://wiki.apache.org/solr/ExtendedDisMax#bf_.28Boost_Function.2C_additive.29 if (bf.length() > 0) params.setParam("boost", bf); // a boost function extension, see http://wiki.apache.org/solr/ExtendedDisMax#bf_.28Boost_Function.2C_additive.29
// prepare result // prepare result

Loading…
Cancel
Save