diff --git a/source/net/yacy/cora/lod/Literal.java b/source/net/yacy/cora/lod/Literal.java new file mode 100644 index 000000000..c5794c5c2 --- /dev/null +++ b/source/net/yacy/cora/lod/Literal.java @@ -0,0 +1,74 @@ +/** + * Literal + * Copyright 2011 by Michael Peter Christen, mc@yacy.net, Frankfurt am Main, Germany + * First released 18.12.2011 at http://yacy.net + * + * $LastChangedDate: 2011-04-14 00:04:23 +0200 (Do, 14 Apr 2011) $ + * $LastChangedRevision: 7653 $ + * $LastChangedBy: orbiter $ + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program in the file lgpl21.txt + * If not, see . + */ + +package net.yacy.cora.lod; + +import java.util.regex.Pattern; + +import net.yacy.cora.document.MultiProtocolURI; + +/** + * A literal is the possible value for a predicate. + * A set of literals is the norm of a predicate. + * Each literal can have an attached explanation which we express + * as a link to the resource that explains the literal. + */ +public interface Literal { + + /** + * the terminal is the actual content of the property and also + * the visual representation of the content of a property if the + * literal is assigned to that property. + * @return a string representing the literal + */ + public String getTerminal(); + + /** + * the subject of a literal is a reference to a resource that + * explains the literal. If an object has attached properties + * from different vocabularies and properties assigned to the + * object have actual literal instances assigned, then the set + * of subjects of these literals explain the object as a co-notation + * to knowledge. Subjects of literals shall therefore be + * knowledge authorities for the predicates where the literal is + * assigned. + * @return an url to a knowledge authority for the literal + */ + public MultiProtocolURI getSubject(); + + /** + * if a resource is poorly annotated with metadata an it shall + * be automatically annotated, then the terminal of a literal + * may be too weak to discover literals in the resource. An additional + * discovery pattern may help to reduce the set of literals that can + * be discovered automatically. A discovery pattern is then not + * a replacement of the terminal itself, it is an additional pattern + * that must occur also in the resource where also the terminal of + * the literal appears. If the terminal itself is sufficient to discover + * the literal, then the discovery pattern may be a catch-all '.*' pattern. + * @return the discovery pattern to identify the literal in the resource. + */ + public Pattern getDiscoveryPattern(); + +} diff --git a/source/net/yacy/cora/lod/Node.java b/source/net/yacy/cora/lod/Node.java index 60f2026e2..d1b0fc34a 100644 --- a/source/net/yacy/cora/lod/Node.java +++ b/source/net/yacy/cora/lod/Node.java @@ -28,20 +28,27 @@ import java.util.HashMap; import java.util.Map; import net.yacy.cora.document.UTF8; -import net.yacy.cora.lod.vocabulary.Vocabulary; +import net.yacy.cora.lod.vocabulary.Rdf; +/** + * class for a RDF node element. For a short primer see + * http://www.w3.org/TR/REC-rdf-syntax/ + */ public class Node extends HashMap implements Map { private static final long serialVersionUID = -6715118942251224832L; public static final String SUBJECT = "rdf:about"; - public Node() { + private final Rdf type; + + public Node(Rdf type) { super(); + this.type = type; } - public Node(byte[] subject) { - super(); + public Node(Rdf type, byte[] subject) { + this(type); this.put(SUBJECT, subject); } @@ -51,11 +58,15 @@ public class Node extends HashMap implements Map * for a blank node the SUBJECT can be omitted * @param set */ - public Node(Map set) { - super(); + public Node(Rdf type, Map set) { + this(type); this.putAll(set); } + public Rdf getType() { + return this.type; + } + public boolean isBlank() { return !this.containsKey(SUBJECT); } @@ -82,7 +93,8 @@ public class Node extends HashMap implements Map public byte[] toObject() { StringBuilder sb = new StringBuilder(this.size() * 50); - sb.append("\n"); @@ -92,7 +104,9 @@ public class Node extends HashMap implements Map sb.append(UTF8.String(entry.getValue())); sb.append("\n"); } - sb.append("\n"); + sb.append("\n"); return UTF8.getBytes(sb); } diff --git a/source/net/yacy/cora/lod/Syntax.java b/source/net/yacy/cora/lod/Syntax.java new file mode 100644 index 000000000..b34a78e87 --- /dev/null +++ b/source/net/yacy/cora/lod/Syntax.java @@ -0,0 +1,78 @@ +/** + * Syntax + * Copyright 2011 by Michael Peter Christen, mc@yacy.net, Frankfurt am Main, Germany + * First released 17.12.2011 at http://yacy.net + * + * $LastChangedDate: 2011-04-14 00:04:23 +0200 (Do, 14 Apr 2011) $ + * $LastChangedRevision: 7653 $ + * $LastChangedBy: orbiter $ + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program in the file lgpl21.txt + * If not, see . + */ + +package net.yacy.cora.lod; + +import java.util.HashMap; +import java.util.Map; + +import net.yacy.cora.lod.vocabulary.CreativeCommons; +import net.yacy.cora.lod.vocabulary.DublinCore; +import net.yacy.cora.lod.vocabulary.Foaf; +import net.yacy.cora.lod.vocabulary.Geo; +import net.yacy.cora.lod.vocabulary.HttpHeader; +import net.yacy.cora.lod.vocabulary.Rdf; +import net.yacy.cora.lod.vocabulary.YaCyMetadata; + +/** + * helper class to understand xml tags and vocabularies + */ +public class Syntax { + + private final static Class[] vocabularies = new Class[]{ + CreativeCommons.class, + DublinCore.class, + Foaf.class, + Geo.class, + HttpHeader.class, + Rdf.class, + YaCyMetadata.class + }; + + private final static Map tagMap = new HashMap(); + + static { + Vocabulary voc; + for (Class v: vocabularies) { + Object[] cs = v.getEnumConstants(); + for (Object c: cs) { + voc = (Vocabulary) c; + tagMap.put(voc.getPredicate(), voc); + } + } + } + + /** + * recognizer for vocabulary tag names + * @param tag + * @return the vocabulary object for the given tag + */ + public static Vocabulary getVocabulary(String tag) { + return tagMap.get(tag); + } + + public static void main(String[] args) { + System.out.println(tagMap); + } +} diff --git a/source/net/yacy/cora/lod/TripleStore.java b/source/net/yacy/cora/lod/TripleStore.java index 43b22f0bf..f39bcd4c1 100644 --- a/source/net/yacy/cora/lod/TripleStore.java +++ b/source/net/yacy/cora/lod/TripleStore.java @@ -29,6 +29,7 @@ import java.util.AbstractMap; import java.util.Iterator; import java.util.Map; +import net.yacy.cora.lod.vocabulary.Rdf; import net.yacy.cora.order.ByteOrder; import net.yacy.cora.order.CloneableIterator; import net.yacy.cora.storage.MapStore; @@ -52,7 +53,7 @@ public class TripleStore { public Node get(byte[] id) { Map n = this.store.get(id); if (n == null) return null; - return new Node(n); + return new Node(Rdf.Description, n); } public boolean isEmpty() { @@ -62,7 +63,7 @@ public class TripleStore { public Node put(byte[] id, Node node) { Map n = this.store.put(id, node); if (n == null) return null; - return new Node(n); + return new Node(Rdf.Description, n); } public void putAll(TripleStore entries) { @@ -77,7 +78,7 @@ public class TripleStore { public Node remove(byte[] id) { Map n = this.store.remove(id); if (n == null) return null; - return new Node(n); + return new Node(Rdf.Description, n); } public int size() { diff --git a/source/net/yacy/cora/lod/vocabulary/Vocabulary.java b/source/net/yacy/cora/lod/Vocabulary.java similarity index 61% rename from source/net/yacy/cora/lod/vocabulary/Vocabulary.java rename to source/net/yacy/cora/lod/Vocabulary.java index 93a432bf0..cc1c0d658 100644 --- a/source/net/yacy/cora/lod/vocabulary/Vocabulary.java +++ b/source/net/yacy/cora/lod/Vocabulary.java @@ -22,9 +22,19 @@ * If not, see . */ +package net.yacy.cora.lod; -package net.yacy.cora.lod.vocabulary; +import java.util.Set; + +/* + * A Vocabulary is an interface to an 'extensible enum pattern'. + * We want to have an kind of extensible enum for vocabularies. + * Since enum classes cannot be extended we use a hack as explained in + * http://blogs.oracle.com/darcy/entry/enums_and_mixins . + * For an example for 'extensible enum pattern' see + * http://stackoverflow.com/questions/1414755/java-extend-enum + */ public interface Vocabulary { /** @@ -45,4 +55,18 @@ public interface Vocabulary { */ public String getPredicate(); + /** + * get a set of literals that are allowed for the predicate as values + * @return + */ + public Set getLiterals(); + + /** + * the name method is identical to the java.lang.Enum method. + * If an Enum class for vocabularies + * implements this interface, the name() method is automatically implemented + * + * @return Returns the name of the enum constant as declared in the enum declaration. + */ + public String name(); } diff --git a/source/net/yacy/cora/lod/vocabulary/CreativeCommons.java b/source/net/yacy/cora/lod/vocabulary/CreativeCommons.java new file mode 100644 index 000000000..e4c3d5fe9 --- /dev/null +++ b/source/net/yacy/cora/lod/vocabulary/CreativeCommons.java @@ -0,0 +1,138 @@ +/** + * CreativeCommons + * Copyright 2011 by Michael Peter Christen, mc@yacy.net, Frankfurt am Main, Germany + * First released 17.12.2011 at http://yacy.net + * + * $LastChangedDate: 2011-04-14 00:04:23 +0200 (Do, 14 Apr 2011) $ + * $LastChangedRevision: 7653 $ + * $LastChangedBy: orbiter $ + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program in the file lgpl21.txt + * If not, see . + */ + +package net.yacy.cora.lod.vocabulary; + +import java.net.MalformedURLException; +import java.util.HashSet; +import java.util.Set; +import java.util.regex.Pattern; + +import net.yacy.cora.document.MultiProtocolURI; +import net.yacy.cora.lod.Literal; +import net.yacy.cora.lod.Vocabulary; + +/** + * a vocabulary for creative commons license declarations. see: + * http://creativecommons.org/ns# + */ +public enum CreativeCommons implements Vocabulary { + + // License Properties + permits(new Literal[]{ + PermitLiteral.Reproduction, + PermitLiteral.Distribution, + PermitLiteral.DerivativeWorks, + PermitLiteral.Sharing}), + requires, + prohibits, + jurisdiction, + legalcode, + deprecatedOn, + + // Work Properties + license, + morePermissions, + attributionName, + attributionURL, + useGuidelines; + + + enum PermitLiteral implements Literal { + + Reproduction("Reproduction", null, ".*"), + Distribution("Distribution", null, ".*"), + DerivativeWorks("Derivative Works",null, ".*"), + Sharing("Sharing", null, ".*"); + + String terminal; + MultiProtocolURI subject; + Pattern discoveryPattern; + + private PermitLiteral( + String terminal, + String subject, + String discoveryPattern) { + this.terminal = terminal; + try { + this.subject = subject == null ? null : new MultiProtocolURI(subject); + } catch (MalformedURLException e) { + this.subject = null; + } + this.discoveryPattern = Pattern.compile(discoveryPattern == null ? ".*" : discoveryPattern); + } + + @Override + public String getTerminal() { + return this.terminal; + } + + @Override + public MultiProtocolURI getSubject() { + return this.subject; + } + + @Override + public Pattern getDiscoveryPattern() { + return this.discoveryPattern; + } + } + + public final static String IDENTIFIER = "http://dublincore.org/documents/2010/10/11/dces/"; + public final static String PREFIX = "cc"; + + private final String predicate; + private final Set literals; + + private CreativeCommons() { + this.predicate = PREFIX + ":" + this.name(); + this.literals = null; + } + + private CreativeCommons(Literal[] literals) { + this.predicate = PREFIX + ":" + this.name(); + this.literals = new HashSet(); + for (Literal l: literals) this.literals.add(l); + } + + @Override + public String getIdentifier() { + return IDENTIFIER; + } + + @Override + public String getPrefix() { + return PREFIX; + } + + @Override + public Set getLiterals() { + return null; + } + + @Override + public String getPredicate() { + return this.predicate; + } +} diff --git a/source/net/yacy/cora/lod/vocabulary/DublinCore.java b/source/net/yacy/cora/lod/vocabulary/DublinCore.java index 31868c72c..cd461be8c 100644 --- a/source/net/yacy/cora/lod/vocabulary/DublinCore.java +++ b/source/net/yacy/cora/lod/vocabulary/DublinCore.java @@ -25,6 +25,11 @@ package net.yacy.cora.lod.vocabulary; +import java.util.Set; + +import net.yacy.cora.lod.Literal; +import net.yacy.cora.lod.Vocabulary; + public enum DublinCore implements Vocabulary { Contributor, @@ -61,6 +66,11 @@ public enum DublinCore implements Vocabulary { public String getPrefix() { return PREFIX; } + + @Override + public Set getLiterals() { + return null; + } @Override public String getPredicate() { diff --git a/source/net/yacy/cora/lod/vocabulary/Foaf.java b/source/net/yacy/cora/lod/vocabulary/Foaf.java new file mode 100644 index 000000000..aaa92482d --- /dev/null +++ b/source/net/yacy/cora/lod/vocabulary/Foaf.java @@ -0,0 +1,62 @@ +/** + * Foaf + * Copyright 2011 by Michael Peter Christen, mc@yacy.net, Frankfurt am Main, Germany + * First released 17.12.2011 at http://yacy.net + * + * $LastChangedDate: 2011-04-14 00:04:23 +0200 (Do, 14 Apr 2011) $ + * $LastChangedRevision: 7653 $ + * $LastChangedBy: orbiter $ + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program in the file lgpl21.txt + * If not, see . + */ + +package net.yacy.cora.lod.vocabulary; + +import java.util.Set; + +import net.yacy.cora.lod.Literal; +import net.yacy.cora.lod.Vocabulary; + +/** + * The friend of a friend vocabulary. see: + * http://xmlns.com/foaf/spec/ + */ +public enum Foaf implements Vocabulary { + ; + + @Override + public String getIdentifier() { + // TODO Auto-generated method stub + return null; + } + + @Override + public String getPrefix() { + // TODO Auto-generated method stub + return null; + } + + @Override + public Set getLiterals() { + return null; + } + + @Override + public String getPredicate() { + // TODO Auto-generated method stub + return null; + } + +} diff --git a/source/net/yacy/cora/lod/vocabulary/Geo.java b/source/net/yacy/cora/lod/vocabulary/Geo.java index 020b4a92b..60558e405 100644 --- a/source/net/yacy/cora/lod/vocabulary/Geo.java +++ b/source/net/yacy/cora/lod/vocabulary/Geo.java @@ -24,6 +24,11 @@ package net.yacy.cora.lod.vocabulary; +import java.util.Set; + +import net.yacy.cora.lod.Literal; +import net.yacy.cora.lod.Vocabulary; + public enum Geo implements Vocabulary { Long, @@ -47,6 +52,11 @@ public enum Geo implements Vocabulary { public String getPrefix() { return PREFIX; } + + @Override + public Set getLiterals() { + return null; + } @Override public String getPredicate() { diff --git a/source/net/yacy/cora/lod/vocabulary/HttpHeader.java b/source/net/yacy/cora/lod/vocabulary/HttpHeader.java index 5458c133e..34ae898d3 100644 --- a/source/net/yacy/cora/lod/vocabulary/HttpHeader.java +++ b/source/net/yacy/cora/lod/vocabulary/HttpHeader.java @@ -25,6 +25,11 @@ package net.yacy.cora.lod.vocabulary; +import java.util.Set; + +import net.yacy.cora.lod.Literal; +import net.yacy.cora.lod.Vocabulary; + public enum HttpHeader implements Vocabulary { //The following properties may appear in nodes of type Request: @@ -99,6 +104,11 @@ public enum HttpHeader implements Vocabulary { public String getPrefix() { return PREFIX; } + + @Override + public Set getLiterals() { + return null; + } @Override public String getPredicate() { diff --git a/source/net/yacy/cora/lod/vocabulary/Rdf.java b/source/net/yacy/cora/lod/vocabulary/Rdf.java new file mode 100644 index 000000000..0081d2fee --- /dev/null +++ b/source/net/yacy/cora/lod/vocabulary/Rdf.java @@ -0,0 +1,69 @@ +/** + * Rdf + * Copyright 2011 by Michael Peter Christen, mc@yacy.net, Frankfurt am Main, Germany + * First released 17.12.2011 at http://yacy.net + * + * $LastChangedDate: 2011-04-14 00:04:23 +0200 (Do, 14 Apr 2011) $ + * $LastChangedRevision: 7653 $ + * $LastChangedBy: orbiter $ + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program in the file lgpl21.txt + * If not, see . + */ + +package net.yacy.cora.lod.vocabulary; + +import java.util.Set; + +import net.yacy.cora.lod.Literal; +import net.yacy.cora.lod.Vocabulary; + +public enum Rdf implements Vocabulary { + + RDF, + Description, + Bag, + Seq, + Alt; + + public final static String IDENTIFIER = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"; + public final static String PREFIX = "rdf"; + + private final String predicate; + + private Rdf() { + this.predicate = PREFIX + ":" + this.name(); + } + + @Override + public String getIdentifier() { + return IDENTIFIER; + } + + @Override + public String getPrefix() { + return PREFIX; + } + + @Override + public Set getLiterals() { + return null; + } + + @Override + public String getPredicate() { + return this.predicate; + } + +} diff --git a/source/net/yacy/cora/lod/vocabulary/YaCyMetadata.java b/source/net/yacy/cora/lod/vocabulary/YaCyMetadata.java index 40623a2a5..34d526521 100644 --- a/source/net/yacy/cora/lod/vocabulary/YaCyMetadata.java +++ b/source/net/yacy/cora/lod/vocabulary/YaCyMetadata.java @@ -25,6 +25,11 @@ package net.yacy.cora.lod.vocabulary; +import java.util.Set; + +import net.yacy.cora.lod.Literal; +import net.yacy.cora.lod.Vocabulary; + /** * this is the vocabulary of the 'classic' YaCy metadata database */ @@ -86,6 +91,11 @@ public enum YaCyMetadata implements Vocabulary { public String getPrefix() { return PREFIX; } + + @Override + public Set getLiterals() { + return null; + } @Override public String getPredicate() {