diff --git a/htroot/Vocabulary_p.java b/htroot/Vocabulary_p.java index 8fd2fe3ec..8d56eb999 100644 --- a/htroot/Vocabulary_p.java +++ b/htroot/Vocabulary_p.java @@ -30,6 +30,7 @@ import java.util.Iterator; import java.util.LinkedHashMap; import java.util.Map; import java.util.Set; +import java.util.regex.Pattern; import net.yacy.cora.document.id.DigestURL; import net.yacy.cora.document.id.MultiProtocolURL; @@ -89,11 +90,17 @@ public class Vocabulary_p { if (discoverFromCSV && discoverFromCSVFile != null && discoverFromCSVFile.exists()) { BufferedReader r = new BufferedReader(new InputStreamReader(new FileInputStream(discoverFromCSVFile), discoverFromCSVCharset)); String line = null; + Pattern semicolon = Pattern.compile(";"); while ((line = r.readLine()) != null) { - String[] l = line.split(";"); + if (line.length() == 0) continue; + String[] l = semicolon.split(line); + if (l.length == 0) l = new String[]{line}; String literal = discovercolumnliteral < 0 || l.length <= discovercolumnliteral ? null : l[discovercolumnliteral].trim(); + if (literal == null) continue; + if (literal.length() > 0 && (literal.charAt(0) == '"' || literal.charAt(0) == '\'')) literal = literal.substring(1); + if (literal.length() > 0 && (literal.charAt(literal.length() - 1) == '"' || literal.charAt(literal.length() - 1) == '\'')) literal = literal.substring(0, literal.length() - 1); String objectlink = discovercolumnobjectlink < 0 || l.length <= discovercolumnobjectlink ? null : l[discovercolumnobjectlink].trim(); - if (literal != null && literal.length() > 0) { + if (literal.length() > 0) { String synonyms = Tagging.normalizeTerm(literal); if (discoverenrichsynonyms) { Set sy = SynonymLibrary.getSynonyms(literal);