From 89fe13e73d64da77170d06ad8874c5bfe5b612d0 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Tue, 14 Aug 2012 13:19:29 +0200 Subject: [PATCH] enhanced GSA and RSS output format: corrected date, added some missing fields, added xml encoding for utf8 --- .../federated/solr/GSAResponseWriter.java | 28 +++++++++++++------ .../solr/OpensearchResponseWriter.java | 25 +++++++++++++---- 2 files changed, 39 insertions(+), 14 deletions(-) diff --git a/source/net/yacy/cora/services/federated/solr/GSAResponseWriter.java b/source/net/yacy/cora/services/federated/solr/GSAResponseWriter.java index 564936f9b..aff9377fe 100644 --- a/source/net/yacy/cora/services/federated/solr/GSAResponseWriter.java +++ b/source/net/yacy/cora/services/federated/solr/GSAResponseWriter.java @@ -23,6 +23,7 @@ package net.yacy.cora.services.federated.solr; import java.io.IOException; import java.io.Writer; import java.util.ArrayList; +import java.util.Date; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -31,6 +32,8 @@ import java.util.Set; import net.yacy.cora.document.RSSMessage; import net.yacy.cora.lod.vocabulary.DublinCore; +import net.yacy.cora.protocol.HeaderFramework; +import net.yacy.document.parser.html.CharacterCoding; import net.yacy.search.index.YaCySchema; import org.apache.lucene.document.Document; @@ -81,8 +84,6 @@ public class GSAResponseWriter implements QueryResponseWriter { }; private static final Set SOLR_FIELDS = new HashSet(); static { - field2tag.put(YaCySchema.last_modified.name(), GSAToken.CACHE_LAST_MODIFIED.name()); - field2tag.put(YaCySchema.load_date_dt.name(), GSAToken.CRAWLDATE.name()); field2tag.put(YaCySchema.language_txt.name(), GSAToken.LANG.name()); SOLR_FIELDS.addAll(field2tag.keySet()); for (YaCySchema field: extrafields) SOLR_FIELDS.add(field.name()); @@ -167,22 +168,33 @@ public class GSAResponseWriter implements QueryResponseWriter { // if the rule is not generic, use the specific here if (YaCySchema.sku.name().equals(fieldName)) { - String U = value.stringValue(); - OpensearchResponseWriter.solitaireTag(writer, GSAToken.U.name(), U); - OpensearchResponseWriter.solitaireTag(writer, GSAToken.UE.name(), U); + OpensearchResponseWriter.solitaireTag(writer, GSAToken.U.name(), CharacterCoding.unicode2xml(value.stringValue(), true)); + OpensearchResponseWriter.solitaireTag(writer, GSAToken.UE.name(), CharacterCoding.unicode2html(value.stringValue(), true)); continue; } if (YaCySchema.title.name().equals(fieldName)) { - OpensearchResponseWriter.solitaireTag(writer, GSAToken.T.name(), value.stringValue()); + OpensearchResponseWriter.solitaireTag(writer, GSAToken.T.name(), CharacterCoding.unicode2xml(value.stringValue(), true)); texts.add(value.stringValue()); continue; } if (YaCySchema.description.name().equals(fieldName)) { description = value.stringValue(); - OpensearchResponseWriter.solitaireTag(writer, DublinCore.Description.getURIref(), description); + OpensearchResponseWriter.solitaireTag(writer, DublinCore.Description.getURIref(), CharacterCoding.unicode2xml(description, true)); texts.add(description); continue; } + if (YaCySchema.last_modified.name().equals(fieldName)) { + Date d = new Date(Long.parseLong(value.stringValue())); + OpensearchResponseWriter.solitaireTag(writer, GSAToken.CACHE_LAST_MODIFIED.name(), HeaderFramework.formatRFC1123(d)); + texts.add(value.stringValue()); + continue; + } + if (YaCySchema.load_date_dt.name().equals(fieldName)) { + Date d = new Date(Long.parseLong(value.stringValue())); + OpensearchResponseWriter.solitaireTag(writer, GSAToken.CRAWLDATE.name(), HeaderFramework.formatRFC1123(d)); + texts.add(value.stringValue()); + continue; + } if (YaCySchema.text_t.name().equals(fieldName)) { texts.add(value.stringValue()); continue; @@ -196,7 +208,7 @@ public class GSAResponseWriter implements QueryResponseWriter { } } // compute snippet from texts - OpensearchResponseWriter.solitaireTag(writer, RSSMessage.Token.description.name(), description); + OpensearchResponseWriter.solitaireTag(writer, RSSMessage.Token.description.name(), CharacterCoding.unicode2xml(description, true)); OpensearchResponseWriter.solitaireTag(writer, GSAToken.ENT_SOURCE.name(), "YaCy"); OpensearchResponseWriter.closeTag(writer, "R"); } diff --git a/source/net/yacy/cora/services/federated/solr/OpensearchResponseWriter.java b/source/net/yacy/cora/services/federated/solr/OpensearchResponseWriter.java index a82bc3d65..20bab1fd0 100644 --- a/source/net/yacy/cora/services/federated/solr/OpensearchResponseWriter.java +++ b/source/net/yacy/cora/services/federated/solr/OpensearchResponseWriter.java @@ -23,6 +23,7 @@ package net.yacy.cora.services.federated.solr; import java.io.IOException; import java.io.Writer; import java.util.ArrayList; +import java.util.Date; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -31,6 +32,8 @@ import java.util.Set; import net.yacy.cora.document.RSSMessage; import net.yacy.cora.lod.vocabulary.DublinCore; +import net.yacy.cora.protocol.HeaderFramework; +import net.yacy.document.parser.html.CharacterCoding; import net.yacy.search.index.YaCySchema; import org.apache.lucene.document.Document; @@ -71,7 +74,6 @@ public class OpensearchResponseWriter implements QueryResponseWriter { private static final Set SOLR_FIELDS = new HashSet(); static { field2tag.put(YaCySchema.sku.name(), RSSMessage.Token.link.name()); - field2tag.put(YaCySchema.last_modified.name(), RSSMessage.Token.pubDate.name()); field2tag.put(YaCySchema.publisher_t.name(), DublinCore.Publisher.getURIref()); field2tag.put(YaCySchema.author.name(), DublinCore.Creator.getURIref()); SOLR_FIELDS.addAll(field2tag.keySet()); @@ -133,7 +135,8 @@ public class OpensearchResponseWriter implements QueryResponseWriter { solitaireTag(writer, "opensearch:startIndex", Integer.toString(resHead.offset)); solitaireTag(writer, "opensearch:itemsPerPage", Integer.toString(resHead.rows)); solitaireTag(writer, RSSMessage.Token.title.name(), this.title); - //solitaireTag(writer, "description", ""); + writer.write(""); + solitaireTag(writer, "description", "Search Result"); //solitaireTag(writer, "link", ""); //solitaireTag(writer, "image", ""); @@ -156,7 +159,7 @@ public class OpensearchResponseWriter implements QueryResponseWriter { // apply generic matching rule String stag = field2tag.get(fieldName); if (stag != null) { - solitaireTag(writer, stag, value.stringValue()); + solitaireTag(writer, stag, CharacterCoding.unicode2xml(value.stringValue(), true)); continue; } @@ -166,13 +169,19 @@ public class OpensearchResponseWriter implements QueryResponseWriter { continue; } if (YaCySchema.title.name().equals(fieldName)) { - solitaireTag(writer, RSSMessage.Token.title.name(), value.stringValue()); + solitaireTag(writer, RSSMessage.Token.title.name(), CharacterCoding.unicode2xml(value.stringValue(), true)); + texts.add(value.stringValue()); + continue; + } + if (YaCySchema.last_modified.name().equals(fieldName)) { + Date d = new Date(Long.parseLong(value.stringValue())); + solitaireTag(writer, RSSMessage.Token.pubDate.name(), HeaderFramework.formatRFC1123(d)); texts.add(value.stringValue()); continue; } if (YaCySchema.description.name().equals(fieldName)) { description = value.stringValue(); - solitaireTag(writer, DublinCore.Description.getURIref(), description); + solitaireTag(writer, DublinCore.Description.getURIref(), CharacterCoding.unicode2xml(description, true)); texts.add(description); continue; } @@ -189,7 +198,7 @@ public class OpensearchResponseWriter implements QueryResponseWriter { } } // compute snippet from texts - solitaireTag(writer, RSSMessage.Token.description.name(), description); + solitaireTagNocheck(writer, RSSMessage.Token.description.name(), CharacterCoding.unicode2xml(description, true)); closeTag(writer, "item"); } @@ -207,6 +216,10 @@ public class OpensearchResponseWriter implements QueryResponseWriter { public static void solitaireTag(final Writer writer, final String tagname, String value) throws IOException { if (value == null || value.length() == 0) return; + solitaireTagNocheck(writer, tagname, value); + } + + public static void solitaireTagNocheck(final Writer writer, final String tagname, String value) throws IOException { writer.write("<"); writer.write(tagname); writer.write('>'); writer.write(value); writer.write("'); writer.write(lb);