From df3531f8d5376ced022748597e042e23d9e0d443 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Sat, 16 Jun 2012 12:36:15 +0200 Subject: [PATCH] added the generation of virtual vocabularies using the pnd --- htroot/Vocabulary_p.java | 1 + htroot/interaction/Triple.java | 2 +- .../anomic/http/server/HTTPDFileHandler.java | 1 + source/net/yacy/cora/lod/JenaTripleStore.java | 118 +++++++----------- .../net/yacy/cora/lod/vocabulary/Tagging.java | 59 +++++++-- source/net/yacy/document/Autotagging.java | 22 ++-- source/net/yacy/document/LibraryProvider.java | 33 ++++- 7 files changed, 138 insertions(+), 98 deletions(-) diff --git a/htroot/Vocabulary_p.java b/htroot/Vocabulary_p.java index 656992967..44d98cbc8 100644 --- a/htroot/Vocabulary_p.java +++ b/htroot/Vocabulary_p.java @@ -172,6 +172,7 @@ public class Vocabulary_p { prop.putHTML("edit_terms_" + c + "_editable_synonyms", entry.getValue().getSynonymsCSV()); prop.putHTML("edit_terms_" + c + "_editable_objectlink", entry.getValue().getObjectlink()); c++; + if (c > 1000) break; } prop.put("edit_terms", c); } diff --git a/htroot/interaction/Triple.java b/htroot/interaction/Triple.java index 7d94a0490..92f92c841 100644 --- a/htroot/interaction/Triple.java +++ b/htroot/interaction/Triple.java @@ -133,7 +133,7 @@ public class Triple { if (global) { o = JenaTripleStore.getObject(s, p); } else { - o = JenaTripleStore.getObject(s, p, username); + o = JenaTripleStore.getPrivateObject(s, p, username); } diff --git a/source/de/anomic/http/server/HTTPDFileHandler.java b/source/de/anomic/http/server/HTTPDFileHandler.java index c889b483d..0f97cec05 100644 --- a/source/de/anomic/http/server/HTTPDFileHandler.java +++ b/source/de/anomic/http/server/HTTPDFileHandler.java @@ -330,6 +330,7 @@ public final class HTTPDFileHandler { if (!accessGranted) { final boolean accessFromLocalhost = Domains.isLocalhost(clientIP) && (refererHost == null || refererHost.length() == 0 || Domains.isLocalhost(refererHost)); accessGranted = accessFromLocalhost && realmValue != null && realmProp.length() > 6 && (adminAccountBase64MD5.equals(realmValue)); + Log.logInfo("HTTPDFileHandler", "access from localhost blocked, clientIP=" + clientIP); } // -5- a password is configured and access comes with matching http-authentify diff --git a/source/net/yacy/cora/lod/JenaTripleStore.java b/source/net/yacy/cora/lod/JenaTripleStore.java index 2a1fca8be..6b316c6d1 100644 --- a/source/net/yacy/cora/lod/JenaTripleStore.java +++ b/source/net/yacy/cora/lod/JenaTripleStore.java @@ -24,6 +24,7 @@ import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.ModelFactory; import com.hp.hpl.jena.rdf.model.Property; import com.hp.hpl.jena.rdf.model.RDFNode; +import com.hp.hpl.jena.rdf.model.ResIterator; import com.hp.hpl.jena.rdf.model.Resource; import com.hp.hpl.jena.rdf.model.StmtIterator; import com.hp.hpl.jena.util.FileManager; @@ -91,21 +92,15 @@ public class JenaTripleStore { } public static void addFile(String rdffile) { - Model tmp = ModelFactory.createDefaultModel(); - - try { InputStream in = new ByteArrayInputStream(UTF8.getBytes(rdffile)); // read the RDF/XML file tmp.read(in, null); + } finally { + model = model.union(tmp); } - finally - { - model = model.union(tmp); - } - } public static void saveFile(String filename) { @@ -133,65 +128,25 @@ public class JenaTripleStore { init(model); } - - public static Resource getResource(String uri, String username) { - - if (privatestorage != null && privatestorage.containsKey(username)) { - - return getResource (uri, privatestorage.get(username)); - } - - return null; - } - /** * Return a Resource instance with the given URI in this model. * @param uri * @return */ public static Resource getResource(String uri) { - return getResource(uri, model); - } - - public static Resource getResource(String uri, Model model) { return model.getResource(uri); } - public static Resource getProperty(String uri, String username) { - - if (privatestorage != null && privatestorage.containsKey(username)) { - - return getProperty (uri, privatestorage.get(username)); - } - - return null; - } - - /** - * Return a Property instance in this model. - * @param uri - * @return - */ - public static Property getProperty(String uri) { - return getProperty(uri, model); - } - - public static Property getProperty(String uri, Model model) { - return model.getProperty(uri); - } - public static void deleteObjects(String subject, String predicate) { Resource r = subject == null ? null : getResource(subject); - Property pr = getProperty(predicate); + Property pr = model.getProperty(predicate); JenaTripleStore.model.removeAll(r, pr, (Resource) null); } public static void addTriple(String subject, String predicate, String object, String username) { if (privatestorage != null && privatestorage.containsKey(username)) { - addTriple (subject, predicate, object, privatestorage.get(username)); } - } public static void addTriple(String subject, String predicate, String object) { @@ -199,55 +154,46 @@ public class JenaTripleStore { } public static void addTriple(String subject, String predicate, String object, Model model) { - Resource r = getResource(subject, model); - Property pr = getProperty(predicate, model); + Resource r = model.getResource(subject); + Property pr = model.getProperty(predicate); r.addProperty(pr, object); Log.logInfo("TRIPLESTORE", "ADD " + subject + " - " + predicate + " - " + object); } - public static String getObject (final String subject, final String predicate) { - + public static String getObject(final String subject, final String predicate) { Log.logInfo("TRIPLESTORE", "GET " + subject + " - " + predicate + " ... "); - Iterator ni = JenaTripleStore.getObjects (subject, predicate); + Iterator ni = JenaTripleStore.getObjects(subject, predicate); if (!ni.hasNext()) return ""; return ni.next().toString(); - + } + + public static Iterator getObjects(final String subject, final String predicate) { + final Resource r = subject == null ? null : JenaTripleStore.getResource(subject); + return getObjects(r, predicate); } - public static String getObject (final String subject, final String predicate, final String username) { - + public static String getPrivateObject(final String subject, final String predicate, final String username) { Log.logInfo("TRIPLESTORE", "GET " + subject + " - " + predicate + " ... ("+username+")"); - Iterator ni = JenaTripleStore.getObjects (subject, predicate, username); + Iterator ni = JenaTripleStore.getPrivateObjects(subject, predicate, username); if (!ni.hasNext()) return ""; return ni.next().toString(); - } - public static Iterator getObjects(final String subject, final String predicate, final String username) { - - final Resource r = JenaTripleStore.getResource(subject, username); - + private static Iterator getPrivateObjects(final String subject, final String predicate, final String username) { if (privatestorage != null && privatestorage.containsKey(username)) { - - return getObjects(r, predicate, privatestorage.get(username)); + return getObjects(privatestorage.get(username).getResource(subject), predicate, privatestorage.get(username)); } - return null; } - - public static Iterator getObjects(final String subject, final String predicate) { - final Resource r = JenaTripleStore.getResource(subject); - return getObjects(r, predicate); - } public static Iterator getObjects(final Resource r, final String predicate) { - return getObjects (r, predicate, model); + return getObjects(r, predicate, model); } - public static Iterator getObjects(final Resource r, final String predicate, final Model model) { - final Property pr = JenaTripleStore.getProperty(predicate, model); + private static Iterator getObjects(final Resource r, final String predicate, final Model model) { + final Property pr = model.getProperty(predicate); final StmtIterator iter = model.listStatements(r, pr, (Resource) null); return new Iterator() { @@ -265,6 +211,30 @@ public class JenaTripleStore { } }; } + + public static Iterator getSubjects(final String predicate) { + return getSubjects(predicate, model); + } + + private static Iterator getSubjects(final String predicate, final Model model) { + final Property pr = model.getProperty(predicate); + final ResIterator iter = model.listSubjectsWithProperty(pr); + + return new Iterator() { + @Override + public boolean hasNext() { + return iter.hasNext(); + } + @Override + public Resource next() { + return iter.nextResource(); + } + @Override + public void remove() { + iter.remove(); + } + }; + } public static void initPrivateStores() { diff --git a/source/net/yacy/cora/lod/vocabulary/Tagging.java b/source/net/yacy/cora/lod/vocabulary/Tagging.java index 5f42af68a..96992b8ad 100644 --- a/source/net/yacy/cora/lod/vocabulary/Tagging.java +++ b/source/net/yacy/cora/lod/vocabulary/Tagging.java @@ -82,15 +82,58 @@ public class Tagging { this(name); this.propFile = propFile; this.objectspace = objectspace; - BufferedWriter w = new BufferedWriter(new FileWriter(propFile)); - w.write("#objectspace:" + objectspace + "\n"); - for (Map.Entry e: table.entrySet()) { - String s = e.getValue() == null ? "" : e.getValue().getSynonymsCSV(); - String o = e.getValue() == null ? "" : e.getValue().getObjectlink(); - w.write(e.getKey() + (s == null || s.length() == 0 ? "" : ":" + e.getValue()) + (o == null || o.length() == 0 || o.equals(objectspace + e.getKey()) ? "" : "#" + o) + "\n"); + if (propFile == null) { + this.synonym2term.clear(); + this.term2synonym.clear(); + this.term2objectlink.clear(); + this.synonym2synonyms.clear(); + this.namespace = DEFAULT_NAMESPACE; + this.predicate = this.namespace + this.navigatorName; + + String term, v; + String[] tags; + vocloop: for (Map.Entry e: table.entrySet()) { + if (e.getValue().getSynonymsCSV() == null || e.getValue().getSynonymsCSV().length() == 0) { + term = normalizeKey(e.getKey()); + v = normalizeWord(e.getKey()); + this.synonym2term.put(v, term); + this.term2synonym.put(term, v); + if (e.getValue().getObjectlink() != null && e.getValue().getObjectlink().length() > 0) this.term2objectlink.put(term, e.getValue().getObjectlink()); + continue vocloop; + } + term = normalizeKey(e.getKey()); + tags = e.getValue().getSynonymsList(); + Set synonyms = new HashSet(); + synonyms.add(term); + tagloop: for (String synonym: tags) { + if (synonym.length() == 0) continue tagloop; + synonyms.add(synonym); + synonym = normalizeWord(synonym); + if (synonym.length() == 0) continue tagloop; + synonyms.add(synonym); + this.synonym2term.put(synonym, term); + this.term2synonym.put(term, synonym); + } + String synonym = normalizeWord(term); + this.synonym2term.put(synonym, term); + this.term2synonym.put(term, synonym); + if (e.getValue().getObjectlink() != null && e.getValue().getObjectlink().length() > 0) this.term2objectlink.put(term, e.getValue().getObjectlink()); + synonyms.add(synonym); + for (String s: synonyms) { + this.synonym2synonyms.put(s, synonyms); + } + } + } else { + BufferedWriter w = new BufferedWriter(new FileWriter(propFile)); + if (objectspace != null && objectspace.length() > 0) w.write("#objectspace:" + objectspace + "\n"); + for (Map.Entry e: table.entrySet()) { + String s = e.getValue() == null ? "" : e.getValue().getSynonymsCSV(); + String o = e.getValue() == null ? "" : e.getValue().getObjectlink(); + w.write(e.getKey() + (s == null || s.length() == 0 ? "" : ":" + e.getValue()) + (o == null || o.length() == 0 || o.equals(objectspace + e.getKey()) ? "" : "#" + o) + "\n"); + } + w.close(); + init(); } - w.close(); - init(); } /** diff --git a/source/net/yacy/document/Autotagging.java b/source/net/yacy/document/Autotagging.java index e3559f12a..8b10e2fea 100644 --- a/source/net/yacy/document/Autotagging.java +++ b/source/net/yacy/document/Autotagging.java @@ -49,26 +49,20 @@ public class Autotagging { private final Map vocabularies; // mapping from vocabulary name to the tagging vocabulary private final Map allTags; - public Autotagging(final File autotaggingPath, char prefixChar) { - this.vocabularies = new ConcurrentHashMap(); - this.autotaggingPath = autotaggingPath; - this.prefixChar = prefixChar; - this.allTags = new ConcurrentHashMap(); - init(); - } - - /** - * scan the input directory and load all tag tables (again) - * a tag table is a property file where + * create a Autotagging object: + * scan the input directory and load all tag tables. + * A tag table is a property file where * the key is the tag name * the value is the visible name for the tag (shown in a navigator) * properties without values are allowed (the value is then set to the key) * also the value can be used as a tag */ - public void init() { - this.vocabularies.clear(); - this.allTags.clear(); + public Autotagging(final File autotaggingPath, char prefixChar) { + this.vocabularies = new ConcurrentHashMap(); + this.autotaggingPath = autotaggingPath; + this.prefixChar = prefixChar; + this.allTags = new ConcurrentHashMap(); if (this.autotaggingPath == null || !this.autotaggingPath.exists()) { return; } diff --git a/source/net/yacy/document/LibraryProvider.java b/source/net/yacy/document/LibraryProvider.java index ea7f9d32e..92d6d352f 100644 --- a/source/net/yacy/document/LibraryProvider.java +++ b/source/net/yacy/document/LibraryProvider.java @@ -32,15 +32,22 @@ import java.io.InputStreamReader; import java.io.PrintWriter; import java.net.MalformedURLException; import java.util.ArrayList; +import java.util.HashMap; import java.util.HashSet; +import java.util.Iterator; import java.util.List; +import java.util.Map; import java.util.Set; import java.util.TreeSet; import java.util.zip.ZipException; import java.util.zip.ZipFile; +import com.hp.hpl.jena.rdf.model.Resource; + import net.yacy.cora.document.MultiProtocolURI; import net.yacy.cora.lod.JenaTripleStore; +import net.yacy.cora.lod.vocabulary.Tagging; +import net.yacy.cora.lod.vocabulary.Tagging.SOTuple; import net.yacy.cora.storage.Files; import net.yacy.document.geolocalization.GeonamesLocation; import net.yacy.document.geolocalization.OpenGeoDBLocation; @@ -204,12 +211,36 @@ public class LibraryProvider { } } // read the triplestore and generate a vocabulary - + Map map = new HashMap(); + Iterator i = JenaTripleStore.getSubjects("http://dbpedia.org/ontology/individualisedPnd"); + while (i.hasNext()) { + Resource resource = i.next(); + String subject = resource.toString(); + + // prepare a propert term from the subject uri + int p = subject.lastIndexOf('/'); + if (p < 0) continue; + String term = subject.substring(p + 1); + //String objectspace = subject.substring(0, p); + p = term.indexOf('('); + if (p >= 0) term = term.substring(0, p); + term = term.replaceAll("_", " ").trim(); + if (term.length() == 0) continue; + + // store the term into the vocabulary map + map.put(term, new SOTuple("", subject)); + } + try { + Tagging pndVoc = new Tagging("Persons", null, "", map); + autotagging.addVocabulary(pndVoc); + } catch (IOException e) { + } } public static void deactivatePND() { // remove the PND Triples from the triplestore JenaTripleStore.deleteObjects(null, "http://dbpedia.org/ontology/individualisedPnd"); + autotagging.deleteVocabulary("Persons"); } /*