enhanced GSA and RSS output format: corrected date, added some missing

fields, added xml encoding for utf8
pull/1/head
Michael Peter Christen 13 years ago
parent ea49a8aa8c
commit 89fe13e73d

@ -23,6 +23,7 @@ package net.yacy.cora.services.federated.solr;
import java.io.IOException; import java.io.IOException;
import java.io.Writer; import java.io.Writer;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
import java.util.List; import java.util.List;
@ -31,6 +32,8 @@ import java.util.Set;
import net.yacy.cora.document.RSSMessage; import net.yacy.cora.document.RSSMessage;
import net.yacy.cora.lod.vocabulary.DublinCore; import net.yacy.cora.lod.vocabulary.DublinCore;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.document.parser.html.CharacterCoding;
import net.yacy.search.index.YaCySchema; import net.yacy.search.index.YaCySchema;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
@ -81,8 +84,6 @@ public class GSAResponseWriter implements QueryResponseWriter {
}; };
private static final Set<String> SOLR_FIELDS = new HashSet<String>(); private static final Set<String> SOLR_FIELDS = new HashSet<String>();
static { static {
field2tag.put(YaCySchema.last_modified.name(), GSAToken.CACHE_LAST_MODIFIED.name());
field2tag.put(YaCySchema.load_date_dt.name(), GSAToken.CRAWLDATE.name());
field2tag.put(YaCySchema.language_txt.name(), GSAToken.LANG.name()); field2tag.put(YaCySchema.language_txt.name(), GSAToken.LANG.name());
SOLR_FIELDS.addAll(field2tag.keySet()); SOLR_FIELDS.addAll(field2tag.keySet());
for (YaCySchema field: extrafields) SOLR_FIELDS.add(field.name()); for (YaCySchema field: extrafields) SOLR_FIELDS.add(field.name());
@ -167,22 +168,33 @@ public class GSAResponseWriter implements QueryResponseWriter {
// if the rule is not generic, use the specific here // if the rule is not generic, use the specific here
if (YaCySchema.sku.name().equals(fieldName)) { if (YaCySchema.sku.name().equals(fieldName)) {
String U = value.stringValue(); OpensearchResponseWriter.solitaireTag(writer, GSAToken.U.name(), CharacterCoding.unicode2xml(value.stringValue(), true));
OpensearchResponseWriter.solitaireTag(writer, GSAToken.U.name(), U); OpensearchResponseWriter.solitaireTag(writer, GSAToken.UE.name(), CharacterCoding.unicode2html(value.stringValue(), true));
OpensearchResponseWriter.solitaireTag(writer, GSAToken.UE.name(), U);
continue; continue;
} }
if (YaCySchema.title.name().equals(fieldName)) { if (YaCySchema.title.name().equals(fieldName)) {
OpensearchResponseWriter.solitaireTag(writer, GSAToken.T.name(), value.stringValue()); OpensearchResponseWriter.solitaireTag(writer, GSAToken.T.name(), CharacterCoding.unicode2xml(value.stringValue(), true));
texts.add(value.stringValue()); texts.add(value.stringValue());
continue; continue;
} }
if (YaCySchema.description.name().equals(fieldName)) { if (YaCySchema.description.name().equals(fieldName)) {
description = value.stringValue(); description = value.stringValue();
OpensearchResponseWriter.solitaireTag(writer, DublinCore.Description.getURIref(), description); OpensearchResponseWriter.solitaireTag(writer, DublinCore.Description.getURIref(), CharacterCoding.unicode2xml(description, true));
texts.add(description); texts.add(description);
continue; continue;
} }
if (YaCySchema.last_modified.name().equals(fieldName)) {
Date d = new Date(Long.parseLong(value.stringValue()));
OpensearchResponseWriter.solitaireTag(writer, GSAToken.CACHE_LAST_MODIFIED.name(), HeaderFramework.formatRFC1123(d));
texts.add(value.stringValue());
continue;
}
if (YaCySchema.load_date_dt.name().equals(fieldName)) {
Date d = new Date(Long.parseLong(value.stringValue()));
OpensearchResponseWriter.solitaireTag(writer, GSAToken.CRAWLDATE.name(), HeaderFramework.formatRFC1123(d));
texts.add(value.stringValue());
continue;
}
if (YaCySchema.text_t.name().equals(fieldName)) { if (YaCySchema.text_t.name().equals(fieldName)) {
texts.add(value.stringValue()); texts.add(value.stringValue());
continue; continue;
@ -196,7 +208,7 @@ public class GSAResponseWriter implements QueryResponseWriter {
} }
} }
// compute snippet from texts // compute snippet from texts
OpensearchResponseWriter.solitaireTag(writer, RSSMessage.Token.description.name(), description); OpensearchResponseWriter.solitaireTag(writer, RSSMessage.Token.description.name(), CharacterCoding.unicode2xml(description, true));
OpensearchResponseWriter.solitaireTag(writer, GSAToken.ENT_SOURCE.name(), "YaCy"); OpensearchResponseWriter.solitaireTag(writer, GSAToken.ENT_SOURCE.name(), "YaCy");
OpensearchResponseWriter.closeTag(writer, "R"); OpensearchResponseWriter.closeTag(writer, "R");
} }

@ -23,6 +23,7 @@ package net.yacy.cora.services.federated.solr;
import java.io.IOException; import java.io.IOException;
import java.io.Writer; import java.io.Writer;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
import java.util.List; import java.util.List;
@ -31,6 +32,8 @@ import java.util.Set;
import net.yacy.cora.document.RSSMessage; import net.yacy.cora.document.RSSMessage;
import net.yacy.cora.lod.vocabulary.DublinCore; import net.yacy.cora.lod.vocabulary.DublinCore;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.document.parser.html.CharacterCoding;
import net.yacy.search.index.YaCySchema; import net.yacy.search.index.YaCySchema;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
@ -71,7 +74,6 @@ public class OpensearchResponseWriter implements QueryResponseWriter {
private static final Set<String> SOLR_FIELDS = new HashSet<String>(); private static final Set<String> SOLR_FIELDS = new HashSet<String>();
static { static {
field2tag.put(YaCySchema.sku.name(), RSSMessage.Token.link.name()); field2tag.put(YaCySchema.sku.name(), RSSMessage.Token.link.name());
field2tag.put(YaCySchema.last_modified.name(), RSSMessage.Token.pubDate.name());
field2tag.put(YaCySchema.publisher_t.name(), DublinCore.Publisher.getURIref()); field2tag.put(YaCySchema.publisher_t.name(), DublinCore.Publisher.getURIref());
field2tag.put(YaCySchema.author.name(), DublinCore.Creator.getURIref()); field2tag.put(YaCySchema.author.name(), DublinCore.Creator.getURIref());
SOLR_FIELDS.addAll(field2tag.keySet()); SOLR_FIELDS.addAll(field2tag.keySet());
@ -133,7 +135,8 @@ public class OpensearchResponseWriter implements QueryResponseWriter {
solitaireTag(writer, "opensearch:startIndex", Integer.toString(resHead.offset)); solitaireTag(writer, "opensearch:startIndex", Integer.toString(resHead.offset));
solitaireTag(writer, "opensearch:itemsPerPage", Integer.toString(resHead.rows)); solitaireTag(writer, "opensearch:itemsPerPage", Integer.toString(resHead.rows));
solitaireTag(writer, RSSMessage.Token.title.name(), this.title); solitaireTag(writer, RSSMessage.Token.title.name(), this.title);
//solitaireTag(writer, "description", ""); writer.write("<atom:link rel=\"search\" href=\"http://localhost:8090/opensearchdescription.xml\" type=\"application/opensearchdescription+xml\"/>");
solitaireTag(writer, "description", "Search Result");
//solitaireTag(writer, "link", ""); //solitaireTag(writer, "link", "");
//solitaireTag(writer, "image", ""); //solitaireTag(writer, "image", "");
@ -156,7 +159,7 @@ public class OpensearchResponseWriter implements QueryResponseWriter {
// apply generic matching rule // apply generic matching rule
String stag = field2tag.get(fieldName); String stag = field2tag.get(fieldName);
if (stag != null) { if (stag != null) {
solitaireTag(writer, stag, value.stringValue()); solitaireTag(writer, stag, CharacterCoding.unicode2xml(value.stringValue(), true));
continue; continue;
} }
@ -166,13 +169,19 @@ public class OpensearchResponseWriter implements QueryResponseWriter {
continue; continue;
} }
if (YaCySchema.title.name().equals(fieldName)) { if (YaCySchema.title.name().equals(fieldName)) {
solitaireTag(writer, RSSMessage.Token.title.name(), value.stringValue()); solitaireTag(writer, RSSMessage.Token.title.name(), CharacterCoding.unicode2xml(value.stringValue(), true));
texts.add(value.stringValue());
continue;
}
if (YaCySchema.last_modified.name().equals(fieldName)) {
Date d = new Date(Long.parseLong(value.stringValue()));
solitaireTag(writer, RSSMessage.Token.pubDate.name(), HeaderFramework.formatRFC1123(d));
texts.add(value.stringValue()); texts.add(value.stringValue());
continue; continue;
} }
if (YaCySchema.description.name().equals(fieldName)) { if (YaCySchema.description.name().equals(fieldName)) {
description = value.stringValue(); description = value.stringValue();
solitaireTag(writer, DublinCore.Description.getURIref(), description); solitaireTag(writer, DublinCore.Description.getURIref(), CharacterCoding.unicode2xml(description, true));
texts.add(description); texts.add(description);
continue; continue;
} }
@ -189,7 +198,7 @@ public class OpensearchResponseWriter implements QueryResponseWriter {
} }
} }
// compute snippet from texts // compute snippet from texts
solitaireTag(writer, RSSMessage.Token.description.name(), description); solitaireTagNocheck(writer, RSSMessage.Token.description.name(), CharacterCoding.unicode2xml(description, true));
closeTag(writer, "item"); closeTag(writer, "item");
} }
@ -207,6 +216,10 @@ public class OpensearchResponseWriter implements QueryResponseWriter {
public static void solitaireTag(final Writer writer, final String tagname, String value) throws IOException { public static void solitaireTag(final Writer writer, final String tagname, String value) throws IOException {
if (value == null || value.length() == 0) return; if (value == null || value.length() == 0) return;
solitaireTagNocheck(writer, tagname, value);
}
public static void solitaireTagNocheck(final Writer writer, final String tagname, String value) throws IOException {
writer.write("<"); writer.write(tagname); writer.write('>'); writer.write("<"); writer.write(tagname); writer.write('>');
writer.write(value); writer.write(value);
writer.write("</"); writer.write(tagname); writer.write('>'); writer.write(lb); writer.write("</"); writer.write(tagname); writer.write('>'); writer.write(lb);

Loading…
Cancel
Save