diff --git a/defaults/solr.keys.list b/defaults/solr.keys.list index 48c9ce1bb..81f0b322b 100644 --- a/defaults/solr.keys.list +++ b/defaults/solr.keys.list @@ -119,8 +119,8 @@ responsetime_i ## all visible text, text text_t -## additional synonyms to the words in the text, text -synonyms_t +## additional synonyms to the words in the text +synonyms_sxt ## h1 header h1_txt diff --git a/defaults/yacy.init b/defaults/yacy.init index 13dd88b93..3bdfc1369 100644 --- a/defaults/yacy.init +++ b/defaults/yacy.init @@ -1113,3 +1113,7 @@ interaction.dontimportbookmarks = interaction.autocrawler.enabled = false interaction.autocrawler.domainfilter = .* interaction.autocrawler.categoryfilter = .* + +# host browser settings +browser.autoload = true +browser.load4everyone = false diff --git a/source/net/yacy/cora/federate/solr/YaCySchema.java b/source/net/yacy/cora/federate/solr/YaCySchema.java index 8c1cf556f..7a749464b 100644 --- a/source/net/yacy/cora/federate/solr/YaCySchema.java +++ b/source/net/yacy/cora/federate/solr/YaCySchema.java @@ -67,7 +67,7 @@ public enum YaCySchema implements Schema { imagescount_i(SolrType.integer, true, true, false, "number of images"), responsetime_i(SolrType.integer, true, true, false, "response time of target server in milliseconds"), text_t(SolrType.text_general, true, true, false, "all visible text"), - synonyms_t(SolrType.text_general, true, true, false, "additional synonyms to the words in the text"), + synonyms_sxt(SolrType.string, true, true, true, "additional synonyms to the words in the text"), h1_txt(SolrType.text_general, true, true, true, "h1 header"), h2_txt(SolrType.text_general, true, true, true, "h2 header"), h3_txt(SolrType.text_general, true, true, true, "h3 header"), diff --git a/source/net/yacy/document/Condenser.java b/source/net/yacy/document/Condenser.java index 67a44da03..f524645e3 100644 --- a/source/net/yacy/document/Condenser.java +++ b/source/net/yacy/document/Condenser.java @@ -24,9 +24,12 @@ import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; +import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; +import java.util.LinkedHashSet; +import java.util.List; import java.util.Locale; import java.util.Map; import java.util.Properties; @@ -81,14 +84,14 @@ public final class Condenser { final boolean indexText, final boolean indexMedia, final WordCache meaningLib, - final SynonymLibrary stemming, + final SynonymLibrary synonyms, final boolean doAutotagging ) { Thread.currentThread().setName("condenser-" + document.dc_identifier()); // for debugging // if addMedia == true, then all the media links are also parsed and added to the words // added media words are flagged with the appropriate media flag this.words = new HashMap(); - this.synonyms = new HashSet(); + this.synonyms = new LinkedHashSet(); this.RESULT_FLAGS = new Bitfield(4); // construct flag set for document @@ -208,9 +211,9 @@ public final class Condenser { } // create the synonyms set - if (stemming != null) { + if (synonyms != null) { for (String word: this.words.keySet()) { - Set syms = stemming.getSynonyms(word); + Set syms = synonyms.getSynonyms(word); if (syms != null) this.synonyms.addAll(syms); } } @@ -268,8 +271,10 @@ public final class Condenser { return this.words; } - public Set synonyms() { - return this.synonyms; + public List synonyms() { + ArrayList l = new ArrayList(this.synonyms.size()); + for (String s: this.synonyms) l.add(s); + return l; } public String language() { diff --git a/source/net/yacy/search/index/Fulltext.java b/source/net/yacy/search/index/Fulltext.java index 5e4fb1a25..828512e75 100644 --- a/source/net/yacy/search/index/Fulltext.java +++ b/source/net/yacy/search/index/Fulltext.java @@ -357,6 +357,14 @@ public final class Fulltext implements Iterable { return false; } + public String failReason(final String urlHash) throws IOException { + if (urlHash == null) return null; + SolrDocument doc = this.solr.get(urlHash); + if (doc == null) return null; + String reason = (String) doc.getFieldValue(YaCySchema.failreason_t.name()); + return reason == null ? null : reason.length() == 0 ? null : reason; + } + @Override public Iterator iterator() { CloneableIterator a = null; diff --git a/source/net/yacy/search/index/SolrConfiguration.java b/source/net/yacy/search/index/SolrConfiguration.java index 33bad5ef3..584b9aa40 100644 --- a/source/net/yacy/search/index/SolrConfiguration.java +++ b/source/net/yacy/search/index/SolrConfiguration.java @@ -416,12 +416,9 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable final int contentwc = content.split(" ").length; add(doc, YaCySchema.wordcount_i, contentwc); } - if (allAttr || contains(YaCySchema.synonyms_t)) { - Set synonyms = condenser.synonyms(); - StringBuilder s = new StringBuilder(synonyms.size() * 8); - for (String o: synonyms) s.append(o).append(' '); - if (s.length() > 0) s.setLength(s.length() - 1); - add(doc, YaCySchema.synonyms_t, s.toString()); + if (allAttr || contains(YaCySchema.synonyms_sxt)) { + List synonyms = condenser.synonyms(); + add(doc, YaCySchema.synonyms_sxt, synonyms); } // path elements of link