From fa8da7f89d3d8df8a40719afc212ceb47355d074 Mon Sep 17 00:00:00 2001 From: Michael Christen Date: Sun, 8 Jan 2012 02:13:52 +0100 Subject: [PATCH] vocabularies are now also used as source for a did-you-mean computation --- source/net/yacy/document/Autotagging.java | 4 ++++ source/net/yacy/document/LibraryProvider.java | 4 ++++ source/net/yacy/document/WordCache.java | 6 ++++++ 3 files changed, 14 insertions(+) diff --git a/source/net/yacy/document/Autotagging.java b/source/net/yacy/document/Autotagging.java index 28d8b9caa..c3c220cdd 100644 --- a/source/net/yacy/document/Autotagging.java +++ b/source/net/yacy/document/Autotagging.java @@ -89,6 +89,10 @@ public class Autotagging { } } + public Set allTags() { + return this.allTags.keySet(); + } + public void addDictionaries(Map dictionaries) { for (Map.Entry entry: dictionaries.entrySet()) { Vocabulary voc = new Vocabulary(entry.getKey(), entry.getValue()); diff --git a/source/net/yacy/document/LibraryProvider.java b/source/net/yacy/document/LibraryProvider.java index 7074eeafa..37994fc03 100644 --- a/source/net/yacy/document/LibraryProvider.java +++ b/source/net/yacy/document/LibraryProvider.java @@ -32,6 +32,7 @@ import java.io.InputStreamReader; import java.io.PrintWriter; import java.net.MalformedURLException; import java.util.ArrayList; +import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.TreeSet; @@ -111,8 +112,11 @@ public class LibraryProvider integrateOpenGeoDB(); integrateGeonames(); initAutotagging(tagPrefix); + Set allTags = new HashSet() ; + allTags.addAll(autotagging.allTags()); // we must copy this into a clone to prevent circularity autotagging.addLocalization(geoLoc); autotagging.addDictionaries(dymLib.getDictionaries()); + WordCache.learn(allTags); } public static void integrateOpenGeoDB() { diff --git a/source/net/yacy/document/WordCache.java b/source/net/yacy/document/WordCache.java index a89d316f5..a6998c8d4 100644 --- a/source/net/yacy/document/WordCache.java +++ b/source/net/yacy/document/WordCache.java @@ -220,6 +220,12 @@ public class WordCache { } } + public static void learn(Set wordset) { + for (String s: wordset) { + learn(new StringBuilder(s)); + } + } + /** * scan the input directory and load all dictionaries (again) */