From dcc72799c4c4a301c837c1eda489c15e222fb074 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Fri, 10 Aug 2012 07:45:43 +0200 Subject: [PATCH] better abstraction for result writers using controlled vocabularies and URIRefs --- source/net/yacy/cora/document/RSSMessage.java | 39 +++++----- source/net/yacy/cora/lod/Vocabulary.java | 8 ++ .../cora/lod/vocabulary/CreativeCommons.java | 5 ++ .../net/yacy/cora/lod/vocabulary/DCTerms.java | 5 ++ .../yacy/cora/lod/vocabulary/DublinCore.java | 8 +- source/net/yacy/cora/lod/vocabulary/Foaf.java | 12 ++- source/net/yacy/cora/lod/vocabulary/Geo.java | 8 +- .../yacy/cora/lod/vocabulary/HttpHeader.java | 5 ++ source/net/yacy/cora/lod/vocabulary/Owl.java | 5 ++ source/net/yacy/cora/lod/vocabulary/Rdf.java | 5 ++ .../cora/lod/vocabulary/YaCyMetadata.java | 4 + .../solr/EnhancedXMLResponseWriter.java | 16 ++-- .../solr/OpensearchResponseWriter.java | 76 +++++++++++++++---- .../services/federated/solr/SolrType.java | 6 +- .../data/meta/MetadataVocabulary.java | 15 ++-- 15 files changed, 164 insertions(+), 53 deletions(-) diff --git a/source/net/yacy/cora/document/RSSMessage.java b/source/net/yacy/cora/document/RSSMessage.java index cb643e7be..1e10a1480 100644 --- a/source/net/yacy/cora/document/RSSMessage.java +++ b/source/net/yacy/cora/document/RSSMessage.java @@ -35,6 +35,8 @@ import java.util.Set; import net.yacy.cora.date.GenericFormatter; import net.yacy.cora.date.ISO8601Formatter; +import net.yacy.cora.lod.vocabulary.DublinCore; +import net.yacy.cora.lod.vocabulary.Geo; import net.yacy.cora.protocol.HeaderFramework; import net.yacy.kelondro.data.meta.DigestURI; @@ -42,30 +44,29 @@ public class RSSMessage implements Hit, Comparable, Comparator keys; - private Token(final String keylist) { - final String[] k = keylist.split(","); + private Token(final String[] keylist) { this.keys = new HashSet(); - this.keys.addAll(Arrays.asList(k)); + this.keys.addAll(Arrays.asList(keylist)); } public String valueFrom(final Map map, final String dflt) { diff --git a/source/net/yacy/cora/lod/Vocabulary.java b/source/net/yacy/cora/lod/Vocabulary.java index 0bf05a3cf..3566128e3 100644 --- a/source/net/yacy/cora/lod/Vocabulary.java +++ b/source/net/yacy/cora/lod/Vocabulary.java @@ -55,6 +55,14 @@ public interface Vocabulary { */ public String getPredicate(); + /** + * The URI Reference as defined in http://www.w3.org/TR/rdf-concepts/ 2.2.3 + * This is a combination of the namespace prefic and the constant name, + * concatenated with ':'. + * @return + */ + public String getURIref(); + /** * get a set of literals that are allowed for the predicate as values * @return diff --git a/source/net/yacy/cora/lod/vocabulary/CreativeCommons.java b/source/net/yacy/cora/lod/vocabulary/CreativeCommons.java index 344ed9b22..8d94ff1fb 100644 --- a/source/net/yacy/cora/lod/vocabulary/CreativeCommons.java +++ b/source/net/yacy/cora/lod/vocabulary/CreativeCommons.java @@ -204,4 +204,9 @@ public enum CreativeCommons implements Vocabulary { public String getPredicate() { return this.predicate; } + + @Override + public String getURIref() { + return PREFIX + ':' + this.name(); + } } diff --git a/source/net/yacy/cora/lod/vocabulary/DCTerms.java b/source/net/yacy/cora/lod/vocabulary/DCTerms.java index 52be7ddfe..ea9fea78c 100644 --- a/source/net/yacy/cora/lod/vocabulary/DCTerms.java +++ b/source/net/yacy/cora/lod/vocabulary/DCTerms.java @@ -41,4 +41,9 @@ public enum DCTerms implements Vocabulary { public String getPredicate() { return this.predicate; } + + @Override + public String getURIref() { + return PREFIX + ':' + this.name(); + } } diff --git a/source/net/yacy/cora/lod/vocabulary/DublinCore.java b/source/net/yacy/cora/lod/vocabulary/DublinCore.java index cd870942d..0c29f4515 100644 --- a/source/net/yacy/cora/lod/vocabulary/DublinCore.java +++ b/source/net/yacy/cora/lod/vocabulary/DublinCore.java @@ -51,10 +51,11 @@ public enum DublinCore implements Vocabulary { public final static String IDENTIFIER = "http://dublincore.org/documents/2010/10/11/dces/"; public final static String PREFIX = "dc"; - private final String predicate; + private final String predicate, uriref; private DublinCore() { this.predicate = IDENTIFIER + this.name().toLowerCase(); + this.uriref = PREFIX + ':' + this.name().toLowerCase(); } @Override @@ -76,4 +77,9 @@ public enum DublinCore implements Vocabulary { public String getPredicate() { return this.predicate; } + + @Override + public String getURIref() { + return this.uriref; + } } diff --git a/source/net/yacy/cora/lod/vocabulary/Foaf.java b/source/net/yacy/cora/lod/vocabulary/Foaf.java index 20c9a9458..760c969e1 100644 --- a/source/net/yacy/cora/lod/vocabulary/Foaf.java +++ b/source/net/yacy/cora/lod/vocabulary/Foaf.java @@ -11,12 +11,12 @@ * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. - * + * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. - * + * * You should have received a copy of the GNU Lesser General Public License * along with this program in the file lgpl21.txt * If not, see . @@ -47,7 +47,7 @@ public enum Foaf implements Vocabulary { // TODO Auto-generated method stub return null; } - + @Override public Set getLiterals() { return null; @@ -59,4 +59,10 @@ public enum Foaf implements Vocabulary { return null; } + @Override + public String getURIref() { + // TODO Auto-generated method stub + return null; + } + } diff --git a/source/net/yacy/cora/lod/vocabulary/Geo.java b/source/net/yacy/cora/lod/vocabulary/Geo.java index a820e3052..1b6084bbb 100644 --- a/source/net/yacy/cora/lod/vocabulary/Geo.java +++ b/source/net/yacy/cora/lod/vocabulary/Geo.java @@ -37,10 +37,11 @@ public enum Geo implements Vocabulary { public final static String NAMESPACE = "http://www.w3.org/2003/01/geo/wgs84_pos#"; public final static String PREFIX = "geo"; - private final String predicate; + private final String predicate, uriref; private Geo() { this.predicate = NAMESPACE + this.name().toLowerCase(); + this.uriref = PREFIX + ':' + this.name().toLowerCase(); } @Override @@ -62,4 +63,9 @@ public enum Geo implements Vocabulary { public String getPredicate() { return this.predicate; } + + @Override + public String getURIref() { + return this.uriref; + } } \ No newline at end of file diff --git a/source/net/yacy/cora/lod/vocabulary/HttpHeader.java b/source/net/yacy/cora/lod/vocabulary/HttpHeader.java index b50f05c56..ae6934295 100644 --- a/source/net/yacy/cora/lod/vocabulary/HttpHeader.java +++ b/source/net/yacy/cora/lod/vocabulary/HttpHeader.java @@ -114,4 +114,9 @@ public enum HttpHeader implements Vocabulary { public String getPredicate() { return this.predicate; } + + @Override + public String getURIref() { + return PREFIX + ':' + this.name(); + } } diff --git a/source/net/yacy/cora/lod/vocabulary/Owl.java b/source/net/yacy/cora/lod/vocabulary/Owl.java index c85003804..a4c56e5e0 100644 --- a/source/net/yacy/cora/lod/vocabulary/Owl.java +++ b/source/net/yacy/cora/lod/vocabulary/Owl.java @@ -61,4 +61,9 @@ public enum Owl implements Vocabulary { public String getPredicate() { return this.predicate; } + + @Override + public String getURIref() { + return PREFIX + ':' + this.name(); + } } diff --git a/source/net/yacy/cora/lod/vocabulary/Rdf.java b/source/net/yacy/cora/lod/vocabulary/Rdf.java index 812fa2507..a7a74935a 100644 --- a/source/net/yacy/cora/lod/vocabulary/Rdf.java +++ b/source/net/yacy/cora/lod/vocabulary/Rdf.java @@ -66,4 +66,9 @@ public enum Rdf implements Vocabulary { return this.predicate; } + @Override + public String getURIref() { + return PREFIX + ':' + this.name(); + } + } diff --git a/source/net/yacy/cora/lod/vocabulary/YaCyMetadata.java b/source/net/yacy/cora/lod/vocabulary/YaCyMetadata.java index df9d8ac3c..afe185b3b 100644 --- a/source/net/yacy/cora/lod/vocabulary/YaCyMetadata.java +++ b/source/net/yacy/cora/lod/vocabulary/YaCyMetadata.java @@ -112,4 +112,8 @@ public enum YaCyMetadata implements Vocabulary { return this.predicate; } + @Override + public String getURIref() { + return PREFIX + ':' + this.name(); + } } diff --git a/source/net/yacy/cora/services/federated/solr/EnhancedXMLResponseWriter.java b/source/net/yacy/cora/services/federated/solr/EnhancedXMLResponseWriter.java index a287445d7..6cbc65ad9 100644 --- a/source/net/yacy/cora/services/federated/solr/EnhancedXMLResponseWriter.java +++ b/source/net/yacy/cora/services/federated/solr/EnhancedXMLResponseWriter.java @@ -171,19 +171,21 @@ public class EnhancedXMLResponseWriter implements QueryResponseWriter { private static void writeField(final Writer writer, final FieldType type, final String name, final String value) throws IOException { String typeName = type.getTypeName(); - if (typeName.equals("text_general") || typeName.equals("string") || typeName.equals("text_en_splitting_tight")) { + if (typeName.equals(SolrType.text_general.printName()) || + typeName.equals(SolrType.string.printName()) || + typeName.equals(SolrType.text_en_splitting_tight.printName())) { writeTag(writer, "str", name, value, true); - } else if (typeName.equals("boolean")) { + } else if (typeName.equals(SolrType.bool.printName())) { writeTag(writer, "bool", name, "F".equals(value) ? "false" : "true", true); - } else if (typeName.equals("int")) { + } else if (typeName.equals(SolrType.integer.printName())) { writeTag(writer, "int", name, value, true); - } else if (typeName.equals("long")) { + } else if (typeName.equals(SolrType.tlong.printName())) { writeTag(writer, "long", name, value, true); - } else if (typeName.equals("date")) { + } else if (typeName.equals(SolrType.date.printName())) { writeTag(writer, "date", name, DateField.formatExternal(new Date(Long.parseLong(value))), true); - } else if (typeName.equals("float")) { + } else if (typeName.equals(SolrType.tfloat.printName())) { writeTag(writer, "float", name, value, true); - } else if (typeName.equals("double")) { + } else if (typeName.equals(SolrType.tdouble.printName())) { writeTag(writer, "double", name, value, true); } } diff --git a/source/net/yacy/cora/services/federated/solr/OpensearchResponseWriter.java b/source/net/yacy/cora/services/federated/solr/OpensearchResponseWriter.java index ce203c930..0a711aec1 100644 --- a/source/net/yacy/cora/services/federated/solr/OpensearchResponseWriter.java +++ b/source/net/yacy/cora/services/federated/solr/OpensearchResponseWriter.java @@ -26,6 +26,10 @@ import java.util.ArrayList; import java.util.List; import java.util.Set; +import net.yacy.cora.document.RSSMessage; +import net.yacy.cora.lod.vocabulary.DublinCore; +import net.yacy.search.index.YaCySchema; + import org.apache.lucene.document.Document; import org.apache.lucene.document.Fieldable; import org.apache.solr.common.util.NamedList; @@ -102,11 +106,11 @@ public class OpensearchResponseWriter implements QueryResponseWriter { //resHead.maxScore = response.maxScore(); // write header - startTagOpen(writer, "channel"); + openTag(writer, "channel"); solitaireTag(writer, "opensearch:totalResults", Integer.toString(resHead.numFound)); solitaireTag(writer, "opensearch:startIndex", Integer.toString(resHead.offset)); solitaireTag(writer, "opensearch:itemsPerPage", Integer.toString(resHead.rows)); - solitaireTag(writer, "title", this.title); + solitaireTag(writer, RSSMessage.Token.title.name(), this.title); //solitaireTag(writer, "description", ""); //solitaireTag(writer, "link", ""); //solitaireTag(writer, "image", ""); @@ -116,7 +120,7 @@ public class OpensearchResponseWriter implements QueryResponseWriter { SolrIndexSearcher searcher = request.getSearcher(); DocIterator iterator = response.iterator(); for (int i = 0; i < responseCount; i++) { - startTagOpen(writer, "item"); + openTag(writer, "item"); int id = iterator.nextDoc(); Document doc = searcher.doc(id, DEFAULT_FIELD_LIST); List fields = doc.getFields(); @@ -126,19 +130,50 @@ public class OpensearchResponseWriter implements QueryResponseWriter { for (int j = 0; j < fieldc; j++) { Fieldable f1 = fields.get(j); String fieldName = f1.name(); - if ("id".equals(fieldName)) {writer.write(""); writer.write(f1.stringValue()); writer.write(""); writer.write(lb); continue;} - if ("sku".equals(fieldName)) {solitaireTag(writer, "link", f1.stringValue()); continue;} - if ("title".equals(fieldName)) {solitaireTag(writer, "title", f1.stringValue()); texts.add(f1.stringValue()); continue;} - if ("last_modified".equals(fieldName)) {solitaireTag(writer, "pubDate", f1.stringValue()); continue;} - if ("".equals(fieldName)) {solitaireTag(writer, "dc:publisher", f1.stringValue()); continue;} - if ("".equals(fieldName)) {solitaireTag(writer, "dc:creator", f1.stringValue()); continue;} - if ("description".equals(fieldName)) {description = f1.stringValue(); solitaireTag(writer, "dc:subject", description); texts.add(description); continue;} - if ("text_t".equals(fieldName)) {texts.add(f1.stringValue()); continue;} - if ("h1_txt".equals(fieldName) || "h2_txt".equals(fieldName) || "h3_txt".equals(fieldName) || - "h4_txt".equals(fieldName) || "h5_txt".equals(fieldName) || "h6_txt".equals(fieldName)) {texts.add(f1.stringValue()); continue;} + if (YaCySchema.id.name().equals(fieldName)) { + solitaireTag(writer, RSSMessage.Token.guid.name(), f1.stringValue(), "isPermaLink=\"false\""); + continue; + } + if (YaCySchema.sku.name().equals(fieldName)) { + solitaireTag(writer, RSSMessage.Token.link.name(), f1.stringValue()); + continue; + } + if (YaCySchema.title.name().equals(fieldName)) { + solitaireTag(writer, RSSMessage.Token.title.name(), f1.stringValue()); + texts.add(f1.stringValue()); + continue; + } + if (YaCySchema.last_modified.name().equals(fieldName)) { + solitaireTag(writer, RSSMessage.Token.pubDate.name(), f1.stringValue()); + continue; + } + if (YaCySchema.publisher_t.name().equals(fieldName)) { + solitaireTag(writer, DublinCore.Publisher.getURIref(), f1.stringValue()); + continue; + } + if (YaCySchema.author.name().equals(fieldName)) { + solitaireTag(writer, DublinCore.Creator.getURIref(), f1.stringValue()); + continue; + } + if (YaCySchema.description.name().equals(fieldName)) { + description = f1.stringValue(); + solitaireTag(writer, DublinCore.Description.getURIref(), description); + texts.add(description); + continue; + } + if (YaCySchema.text_t.name().equals(fieldName)) { + texts.add(f1.stringValue()); + continue; + } + if (YaCySchema.h1_txt.name().equals(fieldName) || YaCySchema.h2_txt.name().equals(fieldName) || + YaCySchema.h3_txt.name().equals(fieldName) || YaCySchema.h4_txt.name().equals(fieldName) || + YaCySchema.h5_txt.name().equals(fieldName) || YaCySchema.h6_txt.name().equals(fieldName)) { + texts.add(f1.stringValue()); + continue; + } } // compute snippet from texts - solitaireTag(writer, "description", description); + solitaireTag(writer, RSSMessage.Token.description.name(), description); closeTag(writer, "item"); } @@ -146,7 +181,7 @@ public class OpensearchResponseWriter implements QueryResponseWriter { writer.write(XML_STOP); } - public static void startTagOpen(final Writer writer, final String tag) throws IOException { + public static void openTag(final Writer writer, final String tag) throws IOException { writer.write('<'); writer.write(tag); writer.write('>'); writer.write(lb); } @@ -155,11 +190,22 @@ public class OpensearchResponseWriter implements QueryResponseWriter { } public static void solitaireTag(final Writer writer, final String tagname, String value) throws IOException { + if (value == null || value.length() == 0) return; writer.write("<"); writer.write(tagname); writer.write('>'); writer.write(value); writer.write("'); writer.write(lb); } + public static void solitaireTag(final Writer writer, final String tagname, String value, String attr) throws IOException { + if (value == null || value.length() == 0) return; + writer.write("<"); writer.write(tagname); + if (attr.charAt(0) != ' ') writer.write(' '); + writer.write(attr); + writer.write('>'); + writer.write(value); + writer.write("'); writer.write(lb); + } + } /* diff --git a/source/net/yacy/cora/services/federated/solr/SolrType.java b/source/net/yacy/cora/services/federated/solr/SolrType.java index d35b0418e..36320494e 100644 --- a/source/net/yacy/cora/services/federated/solr/SolrType.java +++ b/source/net/yacy/cora/services/federated/solr/SolrType.java @@ -31,8 +31,10 @@ public enum SolrType { text_en_splitting_tight, date, integer("int"), - tdouble, - bool("boolean"); + bool("boolean"), + tlong("long"), + tfloat("float"), + tdouble("double"); private String printName; private SolrType() { diff --git a/source/net/yacy/kelondro/data/meta/MetadataVocabulary.java b/source/net/yacy/kelondro/data/meta/MetadataVocabulary.java index 546eba1d6..d278b7063 100644 --- a/source/net/yacy/kelondro/data/meta/MetadataVocabulary.java +++ b/source/net/yacy/kelondro/data/meta/MetadataVocabulary.java @@ -9,12 +9,12 @@ * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. - * + * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. - * + * * You should have received a copy of the GNU Lesser General Public License * along with this program in the file lgpl21.txt * If not, see . @@ -28,18 +28,18 @@ import net.yacy.cora.lod.Literal; import net.yacy.cora.lod.Vocabulary; public enum MetadataVocabulary implements Vocabulary { - + moddate, url; public final static String IDENTIFIER = "http://yacy.net/metadata"; public final static String PREFIX = "ym"; private final String predicate; - + private MetadataVocabulary() { this.predicate = PREFIX + ":" + this.name().toLowerCase(); } - + @Override public String getNamespace() { return IDENTIFIER; @@ -59,4 +59,9 @@ public enum MetadataVocabulary implements Vocabulary { public Set getLiterals() { return null; } + + @Override + public String getURIref() { + return PREFIX + ':' + this.name(); + } }