changed way to integrate dictionary files:

the must be downloaded manually by the user and placed in DATA/DICTIONARIES/source
for each externally imported dictionary file there will be a translator that converts the input file once
into a YaCy-internat data format.
Files that will be provided together with yacy releases may still be placed in <root>/dictionaries

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6286 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 16 years ago
parent d656a94f55
commit 67eddaec4b

File diff suppressed because it is too large Load Diff

@ -5,13 +5,12 @@ The dictionaries directory contains files that YaCy can use to generate
These files are once read and translated into a YaCy-internal format. These files are once read and translated into a YaCy-internal format.
The source of the files are: To integrate dictionaries, you must download them separately in store them in DATA/DICTIONARIES/source
You can do that with the following files:
Korpusbasierte Wortgrundformliste DeReWo des Institut für Deutsche Sprache:
derewo-v-30000g-2007-12-31-0.1.txt derewo-v-30000g-2007-12-31-0.1.txt
derewo-v-30000g-2007-12-31-0.1.pdf from
> Korpusbasierte Wortgrundformliste DeReWo des Institut für Deutsche Sprache http://www.ids-mannheim.de/kl/derewo/derewo-v-30000g-2007-12-31-0.1.zip
> http://www.ids-mannheim.de/kl/derewo/derewo-v-30000g-2007-12-31-0.1.zip
> (derewo-v-30000g-2007-12-31-0.1.pdf is not used here,
> but must be replicated because of the document license)
> The target of the one-time translation is:
> DATA/DICTIONARIES/didyoumean/derewo-v-30000g-2007-12-31-0.1.words

@ -42,10 +42,12 @@ import java.util.TreeSet;
public class LibraryProvider { public class LibraryProvider {
private static final String path_to_source_dictionaries = "source";
private static final String path_to_did_you_mean_dictionaries = "didyoumean"; private static final String path_to_did_you_mean_dictionaries = "didyoumean";
private static final String path_to_source = "dictionaries";
public static DidYouMeanLibrary dymLib = new DidYouMeanLibrary(null); public static DidYouMeanLibrary dymLib = new DidYouMeanLibrary(null);
public static File dictSource = null;
public static File dictRoot = null;
/** /**
* initialize the LibraryProvider as static class. * initialize the LibraryProvider as static class.
@ -56,12 +58,27 @@ public class LibraryProvider {
* @param pathToSource * @param pathToSource
* @param pathToDICTIONARIES * @param pathToDICTIONARIES
*/ */
public static void initialize(File rootPath, File pathToDICTIONARIES) { public static void initialize(File rootPath) {
File dictSource = new File(rootPath, path_to_source_dictionaries);
if (!dictSource.exists()) dictSource.mkdirs();
dictRoot = rootPath;
// initialize libraries
integrateDeReWo();
initDidYouMean();
}
public static void initDidYouMean() {
File dymDict = new File(dictRoot, path_to_did_you_mean_dictionaries);
if (!dymDict.exists()) dymDict.mkdirs();
dymLib = new DidYouMeanLibrary(dymDict);
}
public static void integrateDeReWo() {
// translate input files (once..) // translate input files (once..)
File dymDict = new File(pathToDICTIONARIES, path_to_did_you_mean_dictionaries); File dymDict = new File(dictRoot, path_to_did_you_mean_dictionaries);
if (!dymDict.exists()) dymDict.mkdirs(); if (!dymDict.exists()) dymDict.mkdirs();
File pathToSource = new File(rootPath, path_to_source); File pathToSource = new File(dictRoot, path_to_source_dictionaries);
File derewoInput = new File(pathToSource, "derewo-v-30000g-2007-12-31-0.1.txt"); File derewoInput = new File(pathToSource, "derewo-v-30000g-2007-12-31-0.1.txt");
File derewoOutput = new File(dymDict, "derewo-v-30000g-2007-12-31-0.1.words"); File derewoOutput = new File(dymDict, "derewo-v-30000g-2007-12-31-0.1.words");
if (!derewoOutput.exists() && derewoInput.exists()) { if (!derewoOutput.exists() && derewoInput.exists()) {
@ -73,9 +90,6 @@ public class LibraryProvider {
e.printStackTrace(); e.printStackTrace();
} }
} }
// initialize libraries
dymLib = new DidYouMeanLibrary(dymDict);
} }
/* /*
@ -152,7 +166,7 @@ public class LibraryProvider {
public static void main(String[] args) { public static void main(String[] args) {
File here = new File("dummy").getParentFile(); File here = new File("dummy").getParentFile();
initialize(here, new File(new File(here, "DATA"), "DICTIONARIES")); initialize(new File(here, "DATA/DICTIONARIES"));
System.out.println("dymDict-size = " + dymLib.size()); System.out.println("dymDict-size = " + dymLib.size());
Set<String> r = dymLib.recommend("da"); Set<String> r = dymLib.recommend("da");
for (String s: r) { for (String s: r) {

@ -320,7 +320,7 @@ public final class Switchboard extends serverAbstractSwitch implements serverSwi
// init libraries // init libraries
this.log.logConfig("initializing libraries"); this.log.logConfig("initializing libraries");
LibraryProvider.initialize(rootPath, this.dictionariesPath); LibraryProvider.initialize(this.dictionariesPath);
// set a high maximum cache size to current size; this is adopted later automatically // set a high maximum cache size to current size; this is adopted later automatically
final int wordCacheMaxCount = (int) getConfigLong(SwitchboardConstants.WORDCACHE_MAX_COUNT, 20000); final int wordCacheMaxCount = (int) getConfigLong(SwitchboardConstants.WORDCACHE_MAX_COUNT, 20000);

Loading…
Cancel
Save