diff --git a/yacy.badwords.example b/defaults/yacy.badwords.example similarity index 81% rename from yacy.badwords.example rename to defaults/yacy.badwords.example index f78e151ab..23ac72aaf 100644 --- a/yacy.badwords.example +++ b/defaults/yacy.badwords.example @@ -15,6 +15,7 @@ # $LastChangedRevision$ # $LastChangedBy$ # +# if list exists in DATA/SETTINGS/ it takes preference otherwise list is loaded from defaults/ directory aber allen alles diff --git a/defaults/yacy.stopwords b/defaults/yacy.stopwords new file mode 100644 index 000000000..b89f5d4ca --- /dev/null +++ b/defaults/yacy.stopwords @@ -0,0 +1,6 @@ +# Default stopword list (always loaded) +# a configured language specific stopword list is appended (like: yacy.stopwords.de) +# (depending on the language of the yacy user interface) +# Stopwords are excluded from search queries and excluded from the topword (navigation, in addition to yacy.badwords) +# +# if list exists in DATA/SETTINGS/ it takes preference otherwise list is loaded from defaults/ directory diff --git a/yacy.stopwords.de b/defaults/yacy.stopwords.de similarity index 100% rename from yacy.stopwords.de rename to defaults/yacy.stopwords.de diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java index 3166e521c..1202c11d6 100644 --- a/source/net/yacy/search/Switchboard.java +++ b/source/net/yacy/search/Switchboard.java @@ -633,7 +633,10 @@ public final class Switchboard extends serverSwitch { // load badwords (to filter the topwords) if ( badwords == null || badwords.isEmpty() ) { - final File badwordsFile = new File(appPath, SwitchboardConstants.LIST_BADWORDS_DEFAULT); + File badwordsFile = new File(appPath, "/DATA/SETTINGS/" + SwitchboardConstants.LIST_BADWORDS_DEFAULT); + if (!badwordsFile.exists()) { + badwordsFile = new File(appPath, SwitchboardConstants.LIST_BADWORDS_DEFAULT); + } badwords = SetTools.loadList(badwordsFile, NaturalOrder.naturalComparator); // badwordHashes = Word.words2hashesHandles(badwords); this.log.config("loaded badwords from file " @@ -644,30 +647,30 @@ public final class Switchboard extends serverSwitch { + ppRamString(badwordsFile.length() / 1024)); } - // load stopwords + // load stopwords (to filter query and topwords) if ( stopwords == null || stopwords.isEmpty() ) { - final File stopwordsFile = new File(appPath, SwitchboardConstants.LIST_STOPWORDS_DEFAULT); + File stopwordsFile = new File(dataPath, "/DATA/SETTINGS/" + SwitchboardConstants.LIST_STOPWORDS_DEFAULT); + if (!stopwordsFile.exists()) { + stopwordsFile = new File(appPath, "defaults/"+SwitchboardConstants.LIST_STOPWORDS_DEFAULT); + } stopwords = SetTools.loadList(stopwordsFile, NaturalOrder.naturalComparator); // append locale language stopwords using setting of interface language (file yacy.stopwords.xx) - //TODO: append / share Solr stopwords.txt - final File stopwordsFilelocale = new File (stopwordsFile.getAbsolutePath()+"."+this.getConfig("locale.language","default")); + String lng = this.getConfig("locale.language", "en"); + if ("default".equals(lng)) lng="en"; // english is stored as default (needed for locale html file overlay) + File stopwordsFilelocale = new File (dataPath, "DATA/SETTINGS/"+stopwordsFile.getName()+"."+lng); + if (!stopwordsFilelocale.exists()) stopwordsFilelocale = new File (appPath, "defaults/"+stopwordsFile.getName()+"."+lng); if (stopwordsFilelocale.exists()) { + // load YaCy locale stopword list stopwords.addAll(SetTools.loadList(stopwordsFilelocale, NaturalOrder.naturalComparator)); - } - - if (!stopwords.isEmpty()) { - stopwordHashes = new TreeSet(NaturalOrder.naturalOrder); - for (final String wordstr : stopwords) { - stopwordHashes.add(Word.word2hash(wordstr)); + this.log.config("append stopwords from file " + stopwordsFilelocale.getName()); + } else { + // alternatively load/append default solr stopword list + stopwordsFilelocale = new File (appPath, "defaults/solr/lang/stopwords_" + lng + ".txt"); + if (stopwordsFilelocale.exists()) { + stopwords.addAll(SetTools.loadList(stopwordsFilelocale, NaturalOrder.naturalComparator)); + this.log.config("append stopwords from file " + stopwordsFilelocale.getName()); } } - - this.log.config("loaded stopwords from file " - + stopwordsFile.getName() - + ", " - + stopwords.size() - + " entries, " - + ppRamString(stopwordsFile.length() / 1024)); } // start a cache manager diff --git a/yacy.stopwords b/yacy.stopwords deleted file mode 100644 index 8659daa67..000000000 --- a/yacy.stopwords +++ /dev/null @@ -1,3 +0,0 @@ -# Default stopword list (always loaded) -# a configured language specific stopword list is appended (like: yacy.stopwords.de) -#