turned the synonyms_t Text field into a multi-valued String field

synonyms_sxt
pull/1/head
Michael Peter Christen 13 years ago
parent 41ab2a2279
commit 3d33a5bdf6

@ -119,8 +119,8 @@ responsetime_i
## all visible text, text
text_t
## additional synonyms to the words in the text, text
synonyms_t
## additional synonyms to the words in the text
synonyms_sxt
## h1 header
h1_txt

@ -1113,3 +1113,7 @@ interaction.dontimportbookmarks =
interaction.autocrawler.enabled = false
interaction.autocrawler.domainfilter = .*
interaction.autocrawler.categoryfilter = .*
# host browser settings
browser.autoload = true
browser.load4everyone = false

@ -67,7 +67,7 @@ public enum YaCySchema implements Schema {
imagescount_i(SolrType.integer, true, true, false, "number of images"),
responsetime_i(SolrType.integer, true, true, false, "response time of target server in milliseconds"),
text_t(SolrType.text_general, true, true, false, "all visible text"),
synonyms_t(SolrType.text_general, true, true, false, "additional synonyms to the words in the text"),
synonyms_sxt(SolrType.string, true, true, true, "additional synonyms to the words in the text"),
h1_txt(SolrType.text_general, true, true, true, "h1 header"),
h2_txt(SolrType.text_general, true, true, true, "h2 header"),
h3_txt(SolrType.text_general, true, true, true, "h3 header"),

@ -24,9 +24,12 @@ import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Properties;
@ -81,14 +84,14 @@ public final class Condenser {
final boolean indexText,
final boolean indexMedia,
final WordCache meaningLib,
final SynonymLibrary stemming,
final SynonymLibrary synonyms,
final boolean doAutotagging
) {
Thread.currentThread().setName("condenser-" + document.dc_identifier()); // for debugging
// if addMedia == true, then all the media links are also parsed and added to the words
// added media words are flagged with the appropriate media flag
this.words = new HashMap<String, Word>();
this.synonyms = new HashSet<String>();
this.synonyms = new LinkedHashSet<String>();
this.RESULT_FLAGS = new Bitfield(4);
// construct flag set for document
@ -208,9 +211,9 @@ public final class Condenser {
}
// create the synonyms set
if (stemming != null) {
if (synonyms != null) {
for (String word: this.words.keySet()) {
Set<String> syms = stemming.getSynonyms(word);
Set<String> syms = synonyms.getSynonyms(word);
if (syms != null) this.synonyms.addAll(syms);
}
}
@ -268,8 +271,10 @@ public final class Condenser {
return this.words;
}
public Set<String> synonyms() {
return this.synonyms;
public List<String> synonyms() {
ArrayList<String> l = new ArrayList<String>(this.synonyms.size());
for (String s: this.synonyms) l.add(s);
return l;
}
public String language() {

@ -357,6 +357,14 @@ public final class Fulltext implements Iterable<byte[]> {
return false;
}
public String failReason(final String urlHash) throws IOException {
if (urlHash == null) return null;
SolrDocument doc = this.solr.get(urlHash);
if (doc == null) return null;
String reason = (String) doc.getFieldValue(YaCySchema.failreason_t.name());
return reason == null ? null : reason.length() == 0 ? null : reason;
}
@Override
public Iterator<byte[]> iterator() {
CloneableIterator<byte[]> a = null;

@ -416,12 +416,9 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
final int contentwc = content.split(" ").length;
add(doc, YaCySchema.wordcount_i, contentwc);
}
if (allAttr || contains(YaCySchema.synonyms_t)) {
Set<String> synonyms = condenser.synonyms();
StringBuilder s = new StringBuilder(synonyms.size() * 8);
for (String o: synonyms) s.append(o).append(' ');
if (s.length() > 0) s.setLength(s.length() - 1);
add(doc, YaCySchema.synonyms_t, s.toString());
if (allAttr || contains(YaCySchema.synonyms_sxt)) {
List<String> synonyms = condenser.synonyms();
add(doc, YaCySchema.synonyms_sxt, synonyms);
}
// path elements of link

Loading…
Cancel
Save