diff --git a/htroot/DictionaryLoader_p.html b/htroot/DictionaryLoader_p.html index f7bd274a9..fd48f6c6a 100644 --- a/htroot/DictionaryLoader_p.html +++ b/htroot/DictionaryLoader_p.html @@ -1,13 +1,13 @@ - YaCy '#[clientname]#': Dictionary Loader + YaCy '#[clientname]#': Knowledge Loader #%env/templates/metas.template%# #%env/templates/header.template%# #%env/templates/submenuSemantic.template%# -

Dictionary Loader

+

Knowledge Loader

YaCy can use external libraries to enable or enhance some functions. These libraries are not @@ -15,6 +15,50 @@ You can download additional files here.

+ +
+
+ Persons + Person dictionaries will help YaCy to identify Person names in input documents. As a result, a person Navigator is presented + +

PND (Personennamendatei) identifiers from dbPedia - wikipedia extraction

+

This file provides 150000 person names as an extraction from wikipedia where a PND from the Deutsche Nationalbibliothek is mentioned. It is not the 'raw' PND from d-dnb.de which is much larger but not available in the public.

+ +
+
+
#[pnd0URL]#
+
+
#[pnd0Storage]#
+
+
#(pnd0Status)#
not loaded
::
loaded
::deactivated#(/pnd0Status)#
+
Action
+
#(pnd0Status)# + :: + + :: + + + #(/pnd0Status)#
+ #(pnd0ActionLoaded)#:: +
Result
loaded and activated dictionary file
:: +
Result
loading of dictionary file failed: #[error]#
+ #(/pnd0ActionLoaded)# + #(pnd0ActionRemoved)#:: +
Result
deactivated and removed dictionary file
:: +
Result
cannot remove dictionary file: #[error]#
+ #(/pnd0ActionRemoved)# + #(pnd0ActionDeactivated)#:: +
Result
deactivated dictionary file
:: +
Result
cannot deactivate dictionary file: #[error]#
+ #(/pnd0ActionDeactivated)# + #(pnd0ActionActivated)#:: +
Result
activated dictionary file
:: +
Result
cannot activate dictionary file: #[error]#
+ #(/pnd0ActionActivated)# +
+
+
+
Geolocalization diff --git a/htroot/DictionaryLoader_p.java b/htroot/DictionaryLoader_p.java index f50ac0118..32e60d18c 100644 --- a/htroot/DictionaryLoader_p.java +++ b/htroot/DictionaryLoader_p.java @@ -68,7 +68,7 @@ public class DictionaryLoader_p { final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEON0.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, false); final byte[] b = response.getContent(); FileUtils.copy(b, LibraryProvider.Dictionary.GEON0.file()); - LibraryProvider.geoLoc.addLocalization(LibraryProvider.Dictionary.GEON0.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON0.file())); + LibraryProvider.geoLoc.activateLocalization(LibraryProvider.Dictionary.GEON0.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON0.file())); LibraryProvider.autotagging.addPlaces(LibraryProvider.geoLoc); prop.put("geon0Status", LibraryProvider.Dictionary.GEON0.file().exists() ? 1 : 0); prop.put("geon0ActionLoaded", 1); @@ -86,19 +86,19 @@ public class DictionaryLoader_p { if (post.containsKey("geon0Remove")) { FileUtils.deletedelete(LibraryProvider.Dictionary.GEON0.file()); FileUtils.deletedelete(LibraryProvider.Dictionary.GEON0.fileDisabled()); - LibraryProvider.geoLoc.removeLocalization(LibraryProvider.Dictionary.GEON0.nickname); + LibraryProvider.geoLoc.deactivateLocalization(LibraryProvider.Dictionary.GEON0.nickname); prop.put("geon0ActionRemoved", 1); } if (post.containsKey("geon0Deactivate")) { LibraryProvider.Dictionary.GEON0.file().renameTo(LibraryProvider.Dictionary.GEON0.fileDisabled()); - LibraryProvider.geoLoc.removeLocalization(LibraryProvider.Dictionary.GEON0.nickname); + LibraryProvider.geoLoc.deactivateLocalization(LibraryProvider.Dictionary.GEON0.nickname); prop.put("geon0ActionDeactivated", 1); } if (post.containsKey("geon0Activate")) { LibraryProvider.Dictionary.GEON0.fileDisabled().renameTo(LibraryProvider.Dictionary.GEON0.file()); - LibraryProvider.geoLoc.addLocalization(LibraryProvider.Dictionary.GEON0.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON0.file())); + LibraryProvider.geoLoc.activateLocalization(LibraryProvider.Dictionary.GEON0.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON0.file())); LibraryProvider.autotagging.addPlaces(LibraryProvider.geoLoc); prop.put("geon0ActionActivated", 1); } @@ -110,8 +110,8 @@ public class DictionaryLoader_p { final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEODB1.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, false); final byte[] b = response.getContent(); FileUtils.copy(b, LibraryProvider.Dictionary.GEODB1.file()); - LibraryProvider.geoLoc.removeLocalization(LibraryProvider.Dictionary.GEODB0.nickname); - LibraryProvider.geoLoc.addLocalization(LibraryProvider.Dictionary.GEODB1.nickname, new OpenGeoDBLocation(LibraryProvider.Dictionary.GEODB1.file(), false)); + LibraryProvider.geoLoc.deactivateLocalization(LibraryProvider.Dictionary.GEODB0.nickname); + LibraryProvider.geoLoc.activateLocalization(LibraryProvider.Dictionary.GEODB1.nickname, new OpenGeoDBLocation(LibraryProvider.Dictionary.GEODB1.file(), false)); LibraryProvider.autotagging.addPlaces(LibraryProvider.geoLoc); prop.put("geo1Status", LibraryProvider.Dictionary.GEODB1.file().exists() ? 1 : 0); prop.put("geo1ActionLoaded", 1); @@ -129,19 +129,19 @@ public class DictionaryLoader_p { if (post.containsKey("geo1Remove")) { FileUtils.deletedelete(LibraryProvider.Dictionary.GEODB1.file()); FileUtils.deletedelete(LibraryProvider.Dictionary.GEODB1.fileDisabled()); - LibraryProvider.geoLoc.removeLocalization(LibraryProvider.Dictionary.GEODB1.nickname); + LibraryProvider.geoLoc.deactivateLocalization(LibraryProvider.Dictionary.GEODB1.nickname); prop.put("geo1ActionRemoved", 1); } if (post.containsKey("geo1Deactivate")) { LibraryProvider.Dictionary.GEODB1.file().renameTo(LibraryProvider.Dictionary.GEODB1.fileDisabled()); - LibraryProvider.geoLoc.removeLocalization(LibraryProvider.Dictionary.GEODB1.nickname); + LibraryProvider.geoLoc.deactivateLocalization(LibraryProvider.Dictionary.GEODB1.nickname); prop.put("geo1ActionDeactivated", 1); } if (post.containsKey("geo1Activate")) { LibraryProvider.Dictionary.GEODB1.fileDisabled().renameTo(LibraryProvider.Dictionary.GEODB1.file()); - LibraryProvider.geoLoc.addLocalization(LibraryProvider.Dictionary.GEODB1.nickname, new OpenGeoDBLocation(LibraryProvider.Dictionary.GEODB1.file(), false)); + LibraryProvider.geoLoc.activateLocalization(LibraryProvider.Dictionary.GEODB1.nickname, new OpenGeoDBLocation(LibraryProvider.Dictionary.GEODB1.file(), false)); LibraryProvider.autotagging.addPlaces(LibraryProvider.geoLoc); prop.put("geo1ActionActivated", 1); } @@ -153,7 +153,7 @@ public class DictionaryLoader_p { final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.DRW0.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, false); final byte[] b = response.getContent(); FileUtils.copy(b, LibraryProvider.Dictionary.DRW0.file()); - LibraryProvider.integrateDeReWo(); + LibraryProvider.activateDeReWo(); LibraryProvider.initDidYouMean(); prop.put("drw0Status", LibraryProvider.Dictionary.DRW0.file().exists() ? 1 : 0); prop.put("drw0ActionLoaded", 1); @@ -169,7 +169,7 @@ public class DictionaryLoader_p { } if (post.containsKey("drw0Remove")) { - LibraryProvider.removeDeReWo(); + LibraryProvider.deactivateDeReWo(); LibraryProvider.initDidYouMean(); FileUtils.deletedelete(LibraryProvider.Dictionary.DRW0.file()); FileUtils.deletedelete(LibraryProvider.Dictionary.DRW0.fileDisabled()); @@ -177,7 +177,7 @@ public class DictionaryLoader_p { } if (post.containsKey("drw0Deactivate")) { - LibraryProvider.removeDeReWo(); + LibraryProvider.deactivateDeReWo(); LibraryProvider.initDidYouMean(); LibraryProvider.Dictionary.DRW0.file().renameTo(LibraryProvider.Dictionary.DRW0.fileDisabled()); prop.put("drw0ActionDeactivated", 1); @@ -185,11 +185,51 @@ public class DictionaryLoader_p { if (post.containsKey("drw0Activate")) { LibraryProvider.Dictionary.DRW0.fileDisabled().renameTo(LibraryProvider.Dictionary.DRW0.file()); - LibraryProvider.integrateDeReWo(); + LibraryProvider.activateDeReWo(); LibraryProvider.initDidYouMean(); prop.put("drw0ActionActivated", 1); } + // PND0 + if (post.containsKey("pnd0Load")) { + // load from the net + try { + final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.PND0.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, false); + final byte[] b = response.getContent(); + FileUtils.copy(b, LibraryProvider.Dictionary.PND0.file()); + LibraryProvider.activatePND(); + prop.put("pnd0Status", LibraryProvider.Dictionary.PND0.file().exists() ? 1 : 0); + prop.put("pnd0ActionLoaded", 1); + } catch (final MalformedURLException e) { + Log.logException(e); + prop.put("pnd0ActionLoaded", 2); + prop.put("pnd0ActionLoaded_error", e.getMessage()); + } catch (final IOException e) { + Log.logException(e); + prop.put("pnd0ActionLoaded", 2); + prop.put("pnd0ActionLoaded_error", e.getMessage()); + } + } + + if (post.containsKey("pnd0Remove")) { + LibraryProvider.deactivatePND(); + FileUtils.deletedelete(LibraryProvider.Dictionary.PND0.file()); + FileUtils.deletedelete(LibraryProvider.Dictionary.PND0.fileDisabled()); + prop.put("pnd0ActionRemoved", 1); + } + + if (post.containsKey("pnd0Deactivate")) { + LibraryProvider.deactivatePND(); + LibraryProvider.Dictionary.PND0.file().renameTo(LibraryProvider.Dictionary.PND0.fileDisabled()); + prop.put("pnd0ActionDeactivated", 1); + } + + if (post.containsKey("pnd0Activate")) { + LibraryProvider.Dictionary.PND0.fileDisabled().renameTo(LibraryProvider.Dictionary.PND0.file()); + LibraryProvider.activatePND(); + prop.put("pnd0ActionActivated", 1); + } + // check status again for (final LibraryProvider.Dictionary dictionary: LibraryProvider.Dictionary.values()) { prop.put(dictionary.nickname + "Status", dictionary.file().exists() ? 1 : dictionary.fileDisabled().exists() ? 2 : 0); diff --git a/htroot/env/templates/submenuSemantic.template b/htroot/env/templates/submenuSemantic.template index e5fe538f8..471c5f365 100644 --- a/htroot/env/templates/submenuSemantic.template +++ b/htroot/env/templates/submenuSemantic.template @@ -6,7 +6,7 @@

Automated Annotation

diff --git a/source/net/yacy/cora/lod/JenaTripleStore.java b/source/net/yacy/cora/lod/JenaTripleStore.java index 0be297f33..2a1fca8be 100644 --- a/source/net/yacy/cora/lod/JenaTripleStore.java +++ b/source/net/yacy/cora/lod/JenaTripleStore.java @@ -44,12 +44,15 @@ public class JenaTripleStore { model.setNsPrefix("pnd", "http://dbpedia.org/ontology/individualisedPnd"); model.setNsPrefix(DCTerms.PREFIX, DCTerms.NAMESPACE); } + + public static long size() { + return model.size(); + } public static ConcurrentHashMap privatestorage = null; public static String file; - public static void load(String filename) throws IOException { if (filename.endsWith(".nt")) LoadNTriples(filename); else loadRDF(filename); @@ -70,16 +73,20 @@ public class JenaTripleStore { } public static void LoadNTriples(String fileNameOrUri) throws IOException { - Model tmp = ModelFactory.createDefaultModel(); - Log.logInfo("TRIPLESTORE", "Loading N-Triples from " + fileNameOrUri); + Log.logInfo("TRIPLESTORE", "Loading N-Triples from " + fileNameOrUri); InputStream is = FileManager.get().open(fileNameOrUri); + LoadNTriples(is); + } + + public static void LoadNTriples(InputStream is) throws IOException { + Model tmp = ModelFactory.createDefaultModel(); if (is != null) { tmp.read(is, null, "N-TRIPLE"); - Log.logInfo("TRIPLESTORE", "loaded " + tmp.size() + " triples from " + fileNameOrUri); + Log.logInfo("TRIPLESTORE", "loaded " + tmp.size() + " triples"); model = model.union(tmp); //model.write(System.out, "TURTLE"); } else { - throw new IOException("cannot read " + fileNameOrUri); + throw new IOException("cannot read input stream"); } } @@ -174,7 +181,7 @@ public class JenaTripleStore { } public static void deleteObjects(String subject, String predicate) { - Resource r = getResource(subject); + Resource r = subject == null ? null : getResource(subject); Property pr = getProperty(predicate); JenaTripleStore.model.removeAll(r, pr, (Resource) null); } diff --git a/source/net/yacy/document/LibraryProvider.java b/source/net/yacy/document/LibraryProvider.java index b2df5a3cd..ea7f9d32e 100644 --- a/source/net/yacy/document/LibraryProvider.java +++ b/source/net/yacy/document/LibraryProvider.java @@ -40,6 +40,8 @@ import java.util.zip.ZipException; import java.util.zip.ZipFile; import net.yacy.cora.document.MultiProtocolURI; +import net.yacy.cora.lod.JenaTripleStore; +import net.yacy.cora.storage.Files; import net.yacy.document.geolocalization.GeonamesLocation; import net.yacy.document.geolocalization.OpenGeoDBLocation; import net.yacy.document.geolocalization.OverarchingLocation; @@ -67,7 +69,8 @@ public class LibraryProvider { "http://downloads.sourceforge.net/project/opengeodb/Data/0.2.5a/opengeodb-0.2.5a-UTF8-sql.gz" ), GEODB1( "geo1", "http://fa-technik.adfc.de/code/opengeodb/dump/opengeodb-02624_2011-10-17.sql.gz" ), GEON0( "geon0", "http://download.geonames.org/export/dump/cities1000.zip" ), - DRW0( "drw0", "http://www.ids-mannheim.de/kl/derewo/derewo-v-100000t-2009-04-30-0.1.zip" ); + DRW0( "drw0", "http://www.ids-mannheim.de/kl/derewo/derewo-v-100000t-2009-04-30-0.1.zip" ), + PND0( "pnd0", "http://downloads.dbpedia.org/3.7-i18n/de/pnd_de.nt.bz2" ); public String nickname, url, filename; @@ -106,10 +109,11 @@ public class LibraryProvider { dictRoot = rootPath; // initialize libraries - integrateDeReWo(); + activateDeReWo(); initDidYouMean(); integrateOpenGeoDB(); integrateGeonames(); + activatePND(); initAutotagging(tagPrefix); Set allTags = new HashSet() ; allTags.addAll(autotagging.allTags()); // we must copy this into a clone to prevent circularity @@ -125,11 +129,11 @@ public class LibraryProvider { if ( geo0.exists() ) { geo0.renameTo(Dictionary.GEODB0.fileDisabled()); } - geoLoc.addLocalization(Dictionary.GEODB1.nickname, new OpenGeoDBLocation(geo1, false)); + geoLoc.activateLocalization(Dictionary.GEODB1.nickname, new OpenGeoDBLocation(geo1, false)); return; } if ( geo0.exists() ) { - geoLoc.addLocalization(Dictionary.GEODB0.nickname, new OpenGeoDBLocation(geo0, false)); + geoLoc.activateLocalization(Dictionary.GEODB0.nickname, new OpenGeoDBLocation(geo0, false)); return; } } @@ -137,7 +141,7 @@ public class LibraryProvider { public static void integrateGeonames() { final File geon = Dictionary.GEON0.file(); if ( geon.exists() ) { - geoLoc.addLocalization(Dictionary.GEON0.nickname, new GeonamesLocation(geon)); + geoLoc.activateLocalization(Dictionary.GEON0.nickname, new GeonamesLocation(geon)); return; } } @@ -158,14 +162,7 @@ public class LibraryProvider { autotagging = new Autotagging(autotaggingPath, prefix); } - public static void removeDeReWo() { - final File dymDict = new File(dictRoot, path_to_did_you_mean_dictionaries); - final File derewoInput = LibraryProvider.Dictionary.DRW0.file(); - final File derewoOutput = new File(dymDict, derewoInput.getName() + ".words"); - FileUtils.deletedelete(derewoOutput); - } - - public static void integrateDeReWo() { + public static void activateDeReWo() { // translate input files (once..) final File dymDict = new File(dictRoot, path_to_did_you_mean_dictionaries); if ( !dymDict.exists() ) { @@ -184,6 +181,37 @@ public class LibraryProvider { } } + public static void deactivateDeReWo() { + final File dymDict = new File(dictRoot, path_to_did_you_mean_dictionaries); + final File derewoInput = LibraryProvider.Dictionary.DRW0.file(); + final File derewoOutput = new File(dymDict, derewoInput.getName() + ".words"); + FileUtils.deletedelete(derewoOutput); + } + + public static void activatePND() { + // translate input files (once..) + final File dymDict = new File(dictRoot, path_to_did_you_mean_dictionaries); + if ( !dymDict.exists() ) { + dymDict.mkdirs(); + } + // read the pnd file and store it into the triplestore + final File dictInput = LibraryProvider.Dictionary.PND0.file(); + if ( dictInput.exists() ) { + try { + JenaTripleStore.LoadNTriples(Files.read(dictInput)); + } catch ( final IOException e ) { + Log.logException(e); + } + } + // read the triplestore and generate a vocabulary + + } + + public static void deactivatePND() { + // remove the PND Triples from the triplestore + JenaTripleStore.deleteObjects(null, "http://dbpedia.org/ontology/individualisedPnd"); + } + /* private static ArrayList loadList(final File file, String comment, boolean toLowerCase) { final ArrayList list = new ArrayList(); diff --git a/source/net/yacy/document/geolocalization/OverarchingLocation.java b/source/net/yacy/document/geolocalization/OverarchingLocation.java index 121712601..1367b332e 100644 --- a/source/net/yacy/document/geolocalization/OverarchingLocation.java +++ b/source/net/yacy/document/geolocalization/OverarchingLocation.java @@ -44,7 +44,7 @@ public class OverarchingLocation implements Locations { * @param nickname the nickname of the service * @param service the service */ - public void addLocalization(final String nickname, final Locations service) { + public void activateLocalization(final String nickname, final Locations service) { this.services.put(nickname, service); } @@ -52,7 +52,7 @@ public class OverarchingLocation implements Locations { * remove a localization service * @param nickname */ - public void removeLocalization(final String nickname) { + public void deactivateLocalization(final String nickname) { this.services.remove(nickname); }