From 6d17686258730c14cac0d019e0fb2bc45fac155b Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Fri, 15 Jun 2012 19:13:07 +0200 Subject: [PATCH 1/3] made triplestore persistent by default added a size display in triplestore servlet --- defaults/yacy.init | 2 +- htroot/Triplestore_p.html | 5 +++++ htroot/Triplestore_p.java | 3 ++- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/defaults/yacy.init b/defaults/yacy.init index 6cdd6785f..e8f2b78c5 100644 --- a/defaults/yacy.init +++ b/defaults/yacy.init @@ -1054,7 +1054,7 @@ federated.service.solr.indexing.sharding = MODULO_HOST_MD5 federated.service.solr.indexing.schemefile = solr.keys.default.list # RDF triplestore settings -triplestore.persistent = false +triplestore.persistent = true # Augmentation settings parserAugmentation = false diff --git a/htroot/Triplestore_p.html b/htroot/Triplestore_p.html index fd1e20243..0606841ae 100644 --- a/htroot/Triplestore_p.html +++ b/htroot/Triplestore_p.html @@ -25,6 +25,11 @@ To see a list of all APIs, please visit the Persistent store:
Enabled
diff --git a/htroot/Triplestore_p.java b/htroot/Triplestore_p.java index 58bf82e91..150ac4709 100644 --- a/htroot/Triplestore_p.java +++ b/htroot/Triplestore_p.java @@ -1,3 +1,4 @@ +import net.yacy.cora.lod.JenaTripleStore; import net.yacy.cora.protocol.RequestHeader; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; @@ -29,7 +30,7 @@ public final class Triplestore_p { // prop.put("acceptfeedbackenabled_checked", // env.getConfigBool("interaction.feedback.accept", false) ? "1" : "0"); - + prop.put("size", JenaTripleStore.size()); // return rewrite properties return prop; From a0f1decd827a85f2152f9387020f6185513c456e Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Fri, 15 Jun 2012 19:19:18 +0200 Subject: [PATCH 2/3] - added loading of the dbpedia pnd triplestore in the dictionary loader - renamed the dictionary loader to knowledge loader - some refactoring in the library provider method names --- htroot/DictionaryLoader_p.html | 48 +++++++++++++- htroot/DictionaryLoader_p.java | 66 +++++++++++++++---- htroot/env/templates/submenuSemantic.template | 2 +- source/net/yacy/cora/lod/JenaTripleStore.java | 19 ++++-- source/net/yacy/document/LibraryProvider.java | 54 +++++++++++---- .../geolocalization/OverarchingLocation.java | 4 +- 6 files changed, 156 insertions(+), 37 deletions(-) diff --git a/htroot/DictionaryLoader_p.html b/htroot/DictionaryLoader_p.html index f7bd274a9..fd48f6c6a 100644 --- a/htroot/DictionaryLoader_p.html +++ b/htroot/DictionaryLoader_p.html @@ -1,13 +1,13 @@ - YaCy '#[clientname]#': Dictionary Loader + YaCy '#[clientname]#': Knowledge Loader #%env/templates/metas.template%# #%env/templates/header.template%# #%env/templates/submenuSemantic.template%# -

Dictionary Loader

+

Knowledge Loader

YaCy can use external libraries to enable or enhance some functions. These libraries are not @@ -15,6 +15,50 @@ You can download additional files here.

+ +
+
+ Persons + Person dictionaries will help YaCy to identify Person names in input documents. As a result, a person Navigator is presented + +

PND (Personennamendatei) identifiers from dbPedia - wikipedia extraction

+

This file provides 150000 person names as an extraction from wikipedia where a PND from the Deutsche Nationalbibliothek is mentioned. It is not the 'raw' PND from d-dnb.de which is much larger but not available in the public.

+ +
+
+
#[pnd0URL]#
+
+
#[pnd0Storage]#
+
+
#(pnd0Status)#
not loaded
::
loaded
::deactivated#(/pnd0Status)#
+
Action
+
#(pnd0Status)# + :: + + :: + + + #(/pnd0Status)#
+ #(pnd0ActionLoaded)#:: +
Result
loaded and activated dictionary file
:: +
Result
loading of dictionary file failed: #[error]#
+ #(/pnd0ActionLoaded)# + #(pnd0ActionRemoved)#:: +
Result
deactivated and removed dictionary file
:: +
Result
cannot remove dictionary file: #[error]#
+ #(/pnd0ActionRemoved)# + #(pnd0ActionDeactivated)#:: +
Result
deactivated dictionary file
:: +
Result
cannot deactivate dictionary file: #[error]#
+ #(/pnd0ActionDeactivated)# + #(pnd0ActionActivated)#:: +
Result
activated dictionary file
:: +
Result
cannot activate dictionary file: #[error]#
+ #(/pnd0ActionActivated)# +
+
+
+
Geolocalization diff --git a/htroot/DictionaryLoader_p.java b/htroot/DictionaryLoader_p.java index f50ac0118..32e60d18c 100644 --- a/htroot/DictionaryLoader_p.java +++ b/htroot/DictionaryLoader_p.java @@ -68,7 +68,7 @@ public class DictionaryLoader_p { final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEON0.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, false); final byte[] b = response.getContent(); FileUtils.copy(b, LibraryProvider.Dictionary.GEON0.file()); - LibraryProvider.geoLoc.addLocalization(LibraryProvider.Dictionary.GEON0.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON0.file())); + LibraryProvider.geoLoc.activateLocalization(LibraryProvider.Dictionary.GEON0.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON0.file())); LibraryProvider.autotagging.addPlaces(LibraryProvider.geoLoc); prop.put("geon0Status", LibraryProvider.Dictionary.GEON0.file().exists() ? 1 : 0); prop.put("geon0ActionLoaded", 1); @@ -86,19 +86,19 @@ public class DictionaryLoader_p { if (post.containsKey("geon0Remove")) { FileUtils.deletedelete(LibraryProvider.Dictionary.GEON0.file()); FileUtils.deletedelete(LibraryProvider.Dictionary.GEON0.fileDisabled()); - LibraryProvider.geoLoc.removeLocalization(LibraryProvider.Dictionary.GEON0.nickname); + LibraryProvider.geoLoc.deactivateLocalization(LibraryProvider.Dictionary.GEON0.nickname); prop.put("geon0ActionRemoved", 1); } if (post.containsKey("geon0Deactivate")) { LibraryProvider.Dictionary.GEON0.file().renameTo(LibraryProvider.Dictionary.GEON0.fileDisabled()); - LibraryProvider.geoLoc.removeLocalization(LibraryProvider.Dictionary.GEON0.nickname); + LibraryProvider.geoLoc.deactivateLocalization(LibraryProvider.Dictionary.GEON0.nickname); prop.put("geon0ActionDeactivated", 1); } if (post.containsKey("geon0Activate")) { LibraryProvider.Dictionary.GEON0.fileDisabled().renameTo(LibraryProvider.Dictionary.GEON0.file()); - LibraryProvider.geoLoc.addLocalization(LibraryProvider.Dictionary.GEON0.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON0.file())); + LibraryProvider.geoLoc.activateLocalization(LibraryProvider.Dictionary.GEON0.nickname, new GeonamesLocation(LibraryProvider.Dictionary.GEON0.file())); LibraryProvider.autotagging.addPlaces(LibraryProvider.geoLoc); prop.put("geon0ActionActivated", 1); } @@ -110,8 +110,8 @@ public class DictionaryLoader_p { final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEODB1.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, false); final byte[] b = response.getContent(); FileUtils.copy(b, LibraryProvider.Dictionary.GEODB1.file()); - LibraryProvider.geoLoc.removeLocalization(LibraryProvider.Dictionary.GEODB0.nickname); - LibraryProvider.geoLoc.addLocalization(LibraryProvider.Dictionary.GEODB1.nickname, new OpenGeoDBLocation(LibraryProvider.Dictionary.GEODB1.file(), false)); + LibraryProvider.geoLoc.deactivateLocalization(LibraryProvider.Dictionary.GEODB0.nickname); + LibraryProvider.geoLoc.activateLocalization(LibraryProvider.Dictionary.GEODB1.nickname, new OpenGeoDBLocation(LibraryProvider.Dictionary.GEODB1.file(), false)); LibraryProvider.autotagging.addPlaces(LibraryProvider.geoLoc); prop.put("geo1Status", LibraryProvider.Dictionary.GEODB1.file().exists() ? 1 : 0); prop.put("geo1ActionLoaded", 1); @@ -129,19 +129,19 @@ public class DictionaryLoader_p { if (post.containsKey("geo1Remove")) { FileUtils.deletedelete(LibraryProvider.Dictionary.GEODB1.file()); FileUtils.deletedelete(LibraryProvider.Dictionary.GEODB1.fileDisabled()); - LibraryProvider.geoLoc.removeLocalization(LibraryProvider.Dictionary.GEODB1.nickname); + LibraryProvider.geoLoc.deactivateLocalization(LibraryProvider.Dictionary.GEODB1.nickname); prop.put("geo1ActionRemoved", 1); } if (post.containsKey("geo1Deactivate")) { LibraryProvider.Dictionary.GEODB1.file().renameTo(LibraryProvider.Dictionary.GEODB1.fileDisabled()); - LibraryProvider.geoLoc.removeLocalization(LibraryProvider.Dictionary.GEODB1.nickname); + LibraryProvider.geoLoc.deactivateLocalization(LibraryProvider.Dictionary.GEODB1.nickname); prop.put("geo1ActionDeactivated", 1); } if (post.containsKey("geo1Activate")) { LibraryProvider.Dictionary.GEODB1.fileDisabled().renameTo(LibraryProvider.Dictionary.GEODB1.file()); - LibraryProvider.geoLoc.addLocalization(LibraryProvider.Dictionary.GEODB1.nickname, new OpenGeoDBLocation(LibraryProvider.Dictionary.GEODB1.file(), false)); + LibraryProvider.geoLoc.activateLocalization(LibraryProvider.Dictionary.GEODB1.nickname, new OpenGeoDBLocation(LibraryProvider.Dictionary.GEODB1.file(), false)); LibraryProvider.autotagging.addPlaces(LibraryProvider.geoLoc); prop.put("geo1ActionActivated", 1); } @@ -153,7 +153,7 @@ public class DictionaryLoader_p { final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.DRW0.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, false); final byte[] b = response.getContent(); FileUtils.copy(b, LibraryProvider.Dictionary.DRW0.file()); - LibraryProvider.integrateDeReWo(); + LibraryProvider.activateDeReWo(); LibraryProvider.initDidYouMean(); prop.put("drw0Status", LibraryProvider.Dictionary.DRW0.file().exists() ? 1 : 0); prop.put("drw0ActionLoaded", 1); @@ -169,7 +169,7 @@ public class DictionaryLoader_p { } if (post.containsKey("drw0Remove")) { - LibraryProvider.removeDeReWo(); + LibraryProvider.deactivateDeReWo(); LibraryProvider.initDidYouMean(); FileUtils.deletedelete(LibraryProvider.Dictionary.DRW0.file()); FileUtils.deletedelete(LibraryProvider.Dictionary.DRW0.fileDisabled()); @@ -177,7 +177,7 @@ public class DictionaryLoader_p { } if (post.containsKey("drw0Deactivate")) { - LibraryProvider.removeDeReWo(); + LibraryProvider.deactivateDeReWo(); LibraryProvider.initDidYouMean(); LibraryProvider.Dictionary.DRW0.file().renameTo(LibraryProvider.Dictionary.DRW0.fileDisabled()); prop.put("drw0ActionDeactivated", 1); @@ -185,11 +185,51 @@ public class DictionaryLoader_p { if (post.containsKey("drw0Activate")) { LibraryProvider.Dictionary.DRW0.fileDisabled().renameTo(LibraryProvider.Dictionary.DRW0.file()); - LibraryProvider.integrateDeReWo(); + LibraryProvider.activateDeReWo(); LibraryProvider.initDidYouMean(); prop.put("drw0ActionActivated", 1); } + // PND0 + if (post.containsKey("pnd0Load")) { + // load from the net + try { + final Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.PND0.url), false, true), CacheStrategy.NOCACHE, Integer.MAX_VALUE, false); + final byte[] b = response.getContent(); + FileUtils.copy(b, LibraryProvider.Dictionary.PND0.file()); + LibraryProvider.activatePND(); + prop.put("pnd0Status", LibraryProvider.Dictionary.PND0.file().exists() ? 1 : 0); + prop.put("pnd0ActionLoaded", 1); + } catch (final MalformedURLException e) { + Log.logException(e); + prop.put("pnd0ActionLoaded", 2); + prop.put("pnd0ActionLoaded_error", e.getMessage()); + } catch (final IOException e) { + Log.logException(e); + prop.put("pnd0ActionLoaded", 2); + prop.put("pnd0ActionLoaded_error", e.getMessage()); + } + } + + if (post.containsKey("pnd0Remove")) { + LibraryProvider.deactivatePND(); + FileUtils.deletedelete(LibraryProvider.Dictionary.PND0.file()); + FileUtils.deletedelete(LibraryProvider.Dictionary.PND0.fileDisabled()); + prop.put("pnd0ActionRemoved", 1); + } + + if (post.containsKey("pnd0Deactivate")) { + LibraryProvider.deactivatePND(); + LibraryProvider.Dictionary.PND0.file().renameTo(LibraryProvider.Dictionary.PND0.fileDisabled()); + prop.put("pnd0ActionDeactivated", 1); + } + + if (post.containsKey("pnd0Activate")) { + LibraryProvider.Dictionary.PND0.fileDisabled().renameTo(LibraryProvider.Dictionary.PND0.file()); + LibraryProvider.activatePND(); + prop.put("pnd0ActionActivated", 1); + } + // check status again for (final LibraryProvider.Dictionary dictionary: LibraryProvider.Dictionary.values()) { prop.put(dictionary.nickname + "Status", dictionary.file().exists() ? 1 : dictionary.fileDisabled().exists() ? 2 : 0); diff --git a/htroot/env/templates/submenuSemantic.template b/htroot/env/templates/submenuSemantic.template index e5fe538f8..471c5f365 100644 --- a/htroot/env/templates/submenuSemantic.template +++ b/htroot/env/templates/submenuSemantic.template @@ -6,7 +6,7 @@

Automated Annotation

diff --git a/source/net/yacy/cora/lod/JenaTripleStore.java b/source/net/yacy/cora/lod/JenaTripleStore.java index 0be297f33..2a1fca8be 100644 --- a/source/net/yacy/cora/lod/JenaTripleStore.java +++ b/source/net/yacy/cora/lod/JenaTripleStore.java @@ -44,12 +44,15 @@ public class JenaTripleStore { model.setNsPrefix("pnd", "http://dbpedia.org/ontology/individualisedPnd"); model.setNsPrefix(DCTerms.PREFIX, DCTerms.NAMESPACE); } + + public static long size() { + return model.size(); + } public static ConcurrentHashMap privatestorage = null; public static String file; - public static void load(String filename) throws IOException { if (filename.endsWith(".nt")) LoadNTriples(filename); else loadRDF(filename); @@ -70,16 +73,20 @@ public class JenaTripleStore { } public static void LoadNTriples(String fileNameOrUri) throws IOException { - Model tmp = ModelFactory.createDefaultModel(); - Log.logInfo("TRIPLESTORE", "Loading N-Triples from " + fileNameOrUri); + Log.logInfo("TRIPLESTORE", "Loading N-Triples from " + fileNameOrUri); InputStream is = FileManager.get().open(fileNameOrUri); + LoadNTriples(is); + } + + public static void LoadNTriples(InputStream is) throws IOException { + Model tmp = ModelFactory.createDefaultModel(); if (is != null) { tmp.read(is, null, "N-TRIPLE"); - Log.logInfo("TRIPLESTORE", "loaded " + tmp.size() + " triples from " + fileNameOrUri); + Log.logInfo("TRIPLESTORE", "loaded " + tmp.size() + " triples"); model = model.union(tmp); //model.write(System.out, "TURTLE"); } else { - throw new IOException("cannot read " + fileNameOrUri); + throw new IOException("cannot read input stream"); } } @@ -174,7 +181,7 @@ public class JenaTripleStore { } public static void deleteObjects(String subject, String predicate) { - Resource r = getResource(subject); + Resource r = subject == null ? null : getResource(subject); Property pr = getProperty(predicate); JenaTripleStore.model.removeAll(r, pr, (Resource) null); } diff --git a/source/net/yacy/document/LibraryProvider.java b/source/net/yacy/document/LibraryProvider.java index b2df5a3cd..ea7f9d32e 100644 --- a/source/net/yacy/document/LibraryProvider.java +++ b/source/net/yacy/document/LibraryProvider.java @@ -40,6 +40,8 @@ import java.util.zip.ZipException; import java.util.zip.ZipFile; import net.yacy.cora.document.MultiProtocolURI; +import net.yacy.cora.lod.JenaTripleStore; +import net.yacy.cora.storage.Files; import net.yacy.document.geolocalization.GeonamesLocation; import net.yacy.document.geolocalization.OpenGeoDBLocation; import net.yacy.document.geolocalization.OverarchingLocation; @@ -67,7 +69,8 @@ public class LibraryProvider { "http://downloads.sourceforge.net/project/opengeodb/Data/0.2.5a/opengeodb-0.2.5a-UTF8-sql.gz" ), GEODB1( "geo1", "http://fa-technik.adfc.de/code/opengeodb/dump/opengeodb-02624_2011-10-17.sql.gz" ), GEON0( "geon0", "http://download.geonames.org/export/dump/cities1000.zip" ), - DRW0( "drw0", "http://www.ids-mannheim.de/kl/derewo/derewo-v-100000t-2009-04-30-0.1.zip" ); + DRW0( "drw0", "http://www.ids-mannheim.de/kl/derewo/derewo-v-100000t-2009-04-30-0.1.zip" ), + PND0( "pnd0", "http://downloads.dbpedia.org/3.7-i18n/de/pnd_de.nt.bz2" ); public String nickname, url, filename; @@ -106,10 +109,11 @@ public class LibraryProvider { dictRoot = rootPath; // initialize libraries - integrateDeReWo(); + activateDeReWo(); initDidYouMean(); integrateOpenGeoDB(); integrateGeonames(); + activatePND(); initAutotagging(tagPrefix); Set allTags = new HashSet() ; allTags.addAll(autotagging.allTags()); // we must copy this into a clone to prevent circularity @@ -125,11 +129,11 @@ public class LibraryProvider { if ( geo0.exists() ) { geo0.renameTo(Dictionary.GEODB0.fileDisabled()); } - geoLoc.addLocalization(Dictionary.GEODB1.nickname, new OpenGeoDBLocation(geo1, false)); + geoLoc.activateLocalization(Dictionary.GEODB1.nickname, new OpenGeoDBLocation(geo1, false)); return; } if ( geo0.exists() ) { - geoLoc.addLocalization(Dictionary.GEODB0.nickname, new OpenGeoDBLocation(geo0, false)); + geoLoc.activateLocalization(Dictionary.GEODB0.nickname, new OpenGeoDBLocation(geo0, false)); return; } } @@ -137,7 +141,7 @@ public class LibraryProvider { public static void integrateGeonames() { final File geon = Dictionary.GEON0.file(); if ( geon.exists() ) { - geoLoc.addLocalization(Dictionary.GEON0.nickname, new GeonamesLocation(geon)); + geoLoc.activateLocalization(Dictionary.GEON0.nickname, new GeonamesLocation(geon)); return; } } @@ -158,14 +162,7 @@ public class LibraryProvider { autotagging = new Autotagging(autotaggingPath, prefix); } - public static void removeDeReWo() { - final File dymDict = new File(dictRoot, path_to_did_you_mean_dictionaries); - final File derewoInput = LibraryProvider.Dictionary.DRW0.file(); - final File derewoOutput = new File(dymDict, derewoInput.getName() + ".words"); - FileUtils.deletedelete(derewoOutput); - } - - public static void integrateDeReWo() { + public static void activateDeReWo() { // translate input files (once..) final File dymDict = new File(dictRoot, path_to_did_you_mean_dictionaries); if ( !dymDict.exists() ) { @@ -184,6 +181,37 @@ public class LibraryProvider { } } + public static void deactivateDeReWo() { + final File dymDict = new File(dictRoot, path_to_did_you_mean_dictionaries); + final File derewoInput = LibraryProvider.Dictionary.DRW0.file(); + final File derewoOutput = new File(dymDict, derewoInput.getName() + ".words"); + FileUtils.deletedelete(derewoOutput); + } + + public static void activatePND() { + // translate input files (once..) + final File dymDict = new File(dictRoot, path_to_did_you_mean_dictionaries); + if ( !dymDict.exists() ) { + dymDict.mkdirs(); + } + // read the pnd file and store it into the triplestore + final File dictInput = LibraryProvider.Dictionary.PND0.file(); + if ( dictInput.exists() ) { + try { + JenaTripleStore.LoadNTriples(Files.read(dictInput)); + } catch ( final IOException e ) { + Log.logException(e); + } + } + // read the triplestore and generate a vocabulary + + } + + public static void deactivatePND() { + // remove the PND Triples from the triplestore + JenaTripleStore.deleteObjects(null, "http://dbpedia.org/ontology/individualisedPnd"); + } + /* private static ArrayList loadList(final File file, String comment, boolean toLowerCase) { final ArrayList list = new ArrayList(); diff --git a/source/net/yacy/document/geolocalization/OverarchingLocation.java b/source/net/yacy/document/geolocalization/OverarchingLocation.java index 121712601..1367b332e 100644 --- a/source/net/yacy/document/geolocalization/OverarchingLocation.java +++ b/source/net/yacy/document/geolocalization/OverarchingLocation.java @@ -44,7 +44,7 @@ public class OverarchingLocation implements Locations { * @param nickname the nickname of the service * @param service the service */ - public void addLocalization(final String nickname, final Locations service) { + public void activateLocalization(final String nickname, final Locations service) { this.services.put(nickname, service); } @@ -52,7 +52,7 @@ public class OverarchingLocation implements Locations { * remove a localization service * @param nickname */ - public void removeLocalization(final String nickname) { + public void deactivateLocalization(final String nickname) { this.services.remove(nickname); } From df3531f8d5376ced022748597e042e23d9e0d443 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Sat, 16 Jun 2012 12:36:15 +0200 Subject: [PATCH 3/3] added the generation of virtual vocabularies using the pnd --- htroot/Vocabulary_p.java | 1 + htroot/interaction/Triple.java | 2 +- .../anomic/http/server/HTTPDFileHandler.java | 1 + source/net/yacy/cora/lod/JenaTripleStore.java | 118 +++++++----------- .../net/yacy/cora/lod/vocabulary/Tagging.java | 59 +++++++-- source/net/yacy/document/Autotagging.java | 22 ++-- source/net/yacy/document/LibraryProvider.java | 33 ++++- 7 files changed, 138 insertions(+), 98 deletions(-) diff --git a/htroot/Vocabulary_p.java b/htroot/Vocabulary_p.java index 656992967..44d98cbc8 100644 --- a/htroot/Vocabulary_p.java +++ b/htroot/Vocabulary_p.java @@ -172,6 +172,7 @@ public class Vocabulary_p { prop.putHTML("edit_terms_" + c + "_editable_synonyms", entry.getValue().getSynonymsCSV()); prop.putHTML("edit_terms_" + c + "_editable_objectlink", entry.getValue().getObjectlink()); c++; + if (c > 1000) break; } prop.put("edit_terms", c); } diff --git a/htroot/interaction/Triple.java b/htroot/interaction/Triple.java index 7d94a0490..92f92c841 100644 --- a/htroot/interaction/Triple.java +++ b/htroot/interaction/Triple.java @@ -133,7 +133,7 @@ public class Triple { if (global) { o = JenaTripleStore.getObject(s, p); } else { - o = JenaTripleStore.getObject(s, p, username); + o = JenaTripleStore.getPrivateObject(s, p, username); } diff --git a/source/de/anomic/http/server/HTTPDFileHandler.java b/source/de/anomic/http/server/HTTPDFileHandler.java index c889b483d..0f97cec05 100644 --- a/source/de/anomic/http/server/HTTPDFileHandler.java +++ b/source/de/anomic/http/server/HTTPDFileHandler.java @@ -330,6 +330,7 @@ public final class HTTPDFileHandler { if (!accessGranted) { final boolean accessFromLocalhost = Domains.isLocalhost(clientIP) && (refererHost == null || refererHost.length() == 0 || Domains.isLocalhost(refererHost)); accessGranted = accessFromLocalhost && realmValue != null && realmProp.length() > 6 && (adminAccountBase64MD5.equals(realmValue)); + Log.logInfo("HTTPDFileHandler", "access from localhost blocked, clientIP=" + clientIP); } // -5- a password is configured and access comes with matching http-authentify diff --git a/source/net/yacy/cora/lod/JenaTripleStore.java b/source/net/yacy/cora/lod/JenaTripleStore.java index 2a1fca8be..6b316c6d1 100644 --- a/source/net/yacy/cora/lod/JenaTripleStore.java +++ b/source/net/yacy/cora/lod/JenaTripleStore.java @@ -24,6 +24,7 @@ import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.ModelFactory; import com.hp.hpl.jena.rdf.model.Property; import com.hp.hpl.jena.rdf.model.RDFNode; +import com.hp.hpl.jena.rdf.model.ResIterator; import com.hp.hpl.jena.rdf.model.Resource; import com.hp.hpl.jena.rdf.model.StmtIterator; import com.hp.hpl.jena.util.FileManager; @@ -91,21 +92,15 @@ public class JenaTripleStore { } public static void addFile(String rdffile) { - Model tmp = ModelFactory.createDefaultModel(); - - try { InputStream in = new ByteArrayInputStream(UTF8.getBytes(rdffile)); // read the RDF/XML file tmp.read(in, null); + } finally { + model = model.union(tmp); } - finally - { - model = model.union(tmp); - } - } public static void saveFile(String filename) { @@ -133,65 +128,25 @@ public class JenaTripleStore { init(model); } - - public static Resource getResource(String uri, String username) { - - if (privatestorage != null && privatestorage.containsKey(username)) { - - return getResource (uri, privatestorage.get(username)); - } - - return null; - } - /** * Return a Resource instance with the given URI in this model. * @param uri * @return */ public static Resource getResource(String uri) { - return getResource(uri, model); - } - - public static Resource getResource(String uri, Model model) { return model.getResource(uri); } - public static Resource getProperty(String uri, String username) { - - if (privatestorage != null && privatestorage.containsKey(username)) { - - return getProperty (uri, privatestorage.get(username)); - } - - return null; - } - - /** - * Return a Property instance in this model. - * @param uri - * @return - */ - public static Property getProperty(String uri) { - return getProperty(uri, model); - } - - public static Property getProperty(String uri, Model model) { - return model.getProperty(uri); - } - public static void deleteObjects(String subject, String predicate) { Resource r = subject == null ? null : getResource(subject); - Property pr = getProperty(predicate); + Property pr = model.getProperty(predicate); JenaTripleStore.model.removeAll(r, pr, (Resource) null); } public static void addTriple(String subject, String predicate, String object, String username) { if (privatestorage != null && privatestorage.containsKey(username)) { - addTriple (subject, predicate, object, privatestorage.get(username)); } - } public static void addTriple(String subject, String predicate, String object) { @@ -199,55 +154,46 @@ public class JenaTripleStore { } public static void addTriple(String subject, String predicate, String object, Model model) { - Resource r = getResource(subject, model); - Property pr = getProperty(predicate, model); + Resource r = model.getResource(subject); + Property pr = model.getProperty(predicate); r.addProperty(pr, object); Log.logInfo("TRIPLESTORE", "ADD " + subject + " - " + predicate + " - " + object); } - public static String getObject (final String subject, final String predicate) { - + public static String getObject(final String subject, final String predicate) { Log.logInfo("TRIPLESTORE", "GET " + subject + " - " + predicate + " ... "); - Iterator ni = JenaTripleStore.getObjects (subject, predicate); + Iterator ni = JenaTripleStore.getObjects(subject, predicate); if (!ni.hasNext()) return ""; return ni.next().toString(); - + } + + public static Iterator getObjects(final String subject, final String predicate) { + final Resource r = subject == null ? null : JenaTripleStore.getResource(subject); + return getObjects(r, predicate); } - public static String getObject (final String subject, final String predicate, final String username) { - + public static String getPrivateObject(final String subject, final String predicate, final String username) { Log.logInfo("TRIPLESTORE", "GET " + subject + " - " + predicate + " ... ("+username+")"); - Iterator ni = JenaTripleStore.getObjects (subject, predicate, username); + Iterator ni = JenaTripleStore.getPrivateObjects(subject, predicate, username); if (!ni.hasNext()) return ""; return ni.next().toString(); - } - public static Iterator getObjects(final String subject, final String predicate, final String username) { - - final Resource r = JenaTripleStore.getResource(subject, username); - + private static Iterator getPrivateObjects(final String subject, final String predicate, final String username) { if (privatestorage != null && privatestorage.containsKey(username)) { - - return getObjects(r, predicate, privatestorage.get(username)); + return getObjects(privatestorage.get(username).getResource(subject), predicate, privatestorage.get(username)); } - return null; } - - public static Iterator getObjects(final String subject, final String predicate) { - final Resource r = JenaTripleStore.getResource(subject); - return getObjects(r, predicate); - } public static Iterator getObjects(final Resource r, final String predicate) { - return getObjects (r, predicate, model); + return getObjects(r, predicate, model); } - public static Iterator getObjects(final Resource r, final String predicate, final Model model) { - final Property pr = JenaTripleStore.getProperty(predicate, model); + private static Iterator getObjects(final Resource r, final String predicate, final Model model) { + final Property pr = model.getProperty(predicate); final StmtIterator iter = model.listStatements(r, pr, (Resource) null); return new Iterator() { @@ -265,6 +211,30 @@ public class JenaTripleStore { } }; } + + public static Iterator getSubjects(final String predicate) { + return getSubjects(predicate, model); + } + + private static Iterator getSubjects(final String predicate, final Model model) { + final Property pr = model.getProperty(predicate); + final ResIterator iter = model.listSubjectsWithProperty(pr); + + return new Iterator() { + @Override + public boolean hasNext() { + return iter.hasNext(); + } + @Override + public Resource next() { + return iter.nextResource(); + } + @Override + public void remove() { + iter.remove(); + } + }; + } public static void initPrivateStores() { diff --git a/source/net/yacy/cora/lod/vocabulary/Tagging.java b/source/net/yacy/cora/lod/vocabulary/Tagging.java index 5f42af68a..96992b8ad 100644 --- a/source/net/yacy/cora/lod/vocabulary/Tagging.java +++ b/source/net/yacy/cora/lod/vocabulary/Tagging.java @@ -82,15 +82,58 @@ public class Tagging { this(name); this.propFile = propFile; this.objectspace = objectspace; - BufferedWriter w = new BufferedWriter(new FileWriter(propFile)); - w.write("#objectspace:" + objectspace + "\n"); - for (Map.Entry e: table.entrySet()) { - String s = e.getValue() == null ? "" : e.getValue().getSynonymsCSV(); - String o = e.getValue() == null ? "" : e.getValue().getObjectlink(); - w.write(e.getKey() + (s == null || s.length() == 0 ? "" : ":" + e.getValue()) + (o == null || o.length() == 0 || o.equals(objectspace + e.getKey()) ? "" : "#" + o) + "\n"); + if (propFile == null) { + this.synonym2term.clear(); + this.term2synonym.clear(); + this.term2objectlink.clear(); + this.synonym2synonyms.clear(); + this.namespace = DEFAULT_NAMESPACE; + this.predicate = this.namespace + this.navigatorName; + + String term, v; + String[] tags; + vocloop: for (Map.Entry e: table.entrySet()) { + if (e.getValue().getSynonymsCSV() == null || e.getValue().getSynonymsCSV().length() == 0) { + term = normalizeKey(e.getKey()); + v = normalizeWord(e.getKey()); + this.synonym2term.put(v, term); + this.term2synonym.put(term, v); + if (e.getValue().getObjectlink() != null && e.getValue().getObjectlink().length() > 0) this.term2objectlink.put(term, e.getValue().getObjectlink()); + continue vocloop; + } + term = normalizeKey(e.getKey()); + tags = e.getValue().getSynonymsList(); + Set synonyms = new HashSet(); + synonyms.add(term); + tagloop: for (String synonym: tags) { + if (synonym.length() == 0) continue tagloop; + synonyms.add(synonym); + synonym = normalizeWord(synonym); + if (synonym.length() == 0) continue tagloop; + synonyms.add(synonym); + this.synonym2term.put(synonym, term); + this.term2synonym.put(term, synonym); + } + String synonym = normalizeWord(term); + this.synonym2term.put(synonym, term); + this.term2synonym.put(term, synonym); + if (e.getValue().getObjectlink() != null && e.getValue().getObjectlink().length() > 0) this.term2objectlink.put(term, e.getValue().getObjectlink()); + synonyms.add(synonym); + for (String s: synonyms) { + this.synonym2synonyms.put(s, synonyms); + } + } + } else { + BufferedWriter w = new BufferedWriter(new FileWriter(propFile)); + if (objectspace != null && objectspace.length() > 0) w.write("#objectspace:" + objectspace + "\n"); + for (Map.Entry e: table.entrySet()) { + String s = e.getValue() == null ? "" : e.getValue().getSynonymsCSV(); + String o = e.getValue() == null ? "" : e.getValue().getObjectlink(); + w.write(e.getKey() + (s == null || s.length() == 0 ? "" : ":" + e.getValue()) + (o == null || o.length() == 0 || o.equals(objectspace + e.getKey()) ? "" : "#" + o) + "\n"); + } + w.close(); + init(); } - w.close(); - init(); } /** diff --git a/source/net/yacy/document/Autotagging.java b/source/net/yacy/document/Autotagging.java index e3559f12a..8b10e2fea 100644 --- a/source/net/yacy/document/Autotagging.java +++ b/source/net/yacy/document/Autotagging.java @@ -49,26 +49,20 @@ public class Autotagging { private final Map vocabularies; // mapping from vocabulary name to the tagging vocabulary private final Map allTags; - public Autotagging(final File autotaggingPath, char prefixChar) { - this.vocabularies = new ConcurrentHashMap(); - this.autotaggingPath = autotaggingPath; - this.prefixChar = prefixChar; - this.allTags = new ConcurrentHashMap(); - init(); - } - - /** - * scan the input directory and load all tag tables (again) - * a tag table is a property file where + * create a Autotagging object: + * scan the input directory and load all tag tables. + * A tag table is a property file where * the key is the tag name * the value is the visible name for the tag (shown in a navigator) * properties without values are allowed (the value is then set to the key) * also the value can be used as a tag */ - public void init() { - this.vocabularies.clear(); - this.allTags.clear(); + public Autotagging(final File autotaggingPath, char prefixChar) { + this.vocabularies = new ConcurrentHashMap(); + this.autotaggingPath = autotaggingPath; + this.prefixChar = prefixChar; + this.allTags = new ConcurrentHashMap(); if (this.autotaggingPath == null || !this.autotaggingPath.exists()) { return; } diff --git a/source/net/yacy/document/LibraryProvider.java b/source/net/yacy/document/LibraryProvider.java index ea7f9d32e..92d6d352f 100644 --- a/source/net/yacy/document/LibraryProvider.java +++ b/source/net/yacy/document/LibraryProvider.java @@ -32,15 +32,22 @@ import java.io.InputStreamReader; import java.io.PrintWriter; import java.net.MalformedURLException; import java.util.ArrayList; +import java.util.HashMap; import java.util.HashSet; +import java.util.Iterator; import java.util.List; +import java.util.Map; import java.util.Set; import java.util.TreeSet; import java.util.zip.ZipException; import java.util.zip.ZipFile; +import com.hp.hpl.jena.rdf.model.Resource; + import net.yacy.cora.document.MultiProtocolURI; import net.yacy.cora.lod.JenaTripleStore; +import net.yacy.cora.lod.vocabulary.Tagging; +import net.yacy.cora.lod.vocabulary.Tagging.SOTuple; import net.yacy.cora.storage.Files; import net.yacy.document.geolocalization.GeonamesLocation; import net.yacy.document.geolocalization.OpenGeoDBLocation; @@ -204,12 +211,36 @@ public class LibraryProvider { } } // read the triplestore and generate a vocabulary - + Map map = new HashMap(); + Iterator i = JenaTripleStore.getSubjects("http://dbpedia.org/ontology/individualisedPnd"); + while (i.hasNext()) { + Resource resource = i.next(); + String subject = resource.toString(); + + // prepare a propert term from the subject uri + int p = subject.lastIndexOf('/'); + if (p < 0) continue; + String term = subject.substring(p + 1); + //String objectspace = subject.substring(0, p); + p = term.indexOf('('); + if (p >= 0) term = term.substring(0, p); + term = term.replaceAll("_", " ").trim(); + if (term.length() == 0) continue; + + // store the term into the vocabulary map + map.put(term, new SOTuple("", subject)); + } + try { + Tagging pndVoc = new Tagging("Persons", null, "", map); + autotagging.addVocabulary(pndVoc); + } catch (IOException e) { + } } public static void deactivatePND() { // remove the PND Triples from the triplestore JenaTripleStore.deleteObjects(null, "http://dbpedia.org/ontology/individualisedPnd"); + autotagging.deleteVocabulary("Persons"); } /*