From 3ce4c2f9373c9ec841a5200d07764a3c9a383908 Mon Sep 17 00:00:00 2001 From: Michael Peter Christen Date: Tue, 21 Aug 2012 01:57:46 +0200 Subject: [PATCH] fixes for gsa result format --- htroot/gsa/searchresult.java | 39 +++++++++ .../federated/solr/GSAResponseWriter.java | 85 +++++++------------ 2 files changed, 71 insertions(+), 53 deletions(-) diff --git a/htroot/gsa/searchresult.java b/htroot/gsa/searchresult.java index a445bd7ba..546214e17 100644 --- a/htroot/gsa/searchresult.java +++ b/htroot/gsa/searchresult.java @@ -22,6 +22,7 @@ import java.io.IOException; import java.io.OutputStream; import java.io.OutputStreamWriter; import java.io.Writer; +import java.util.Map; import net.yacy.cora.document.UTF8; import net.yacy.cora.protocol.HeaderFramework; @@ -29,6 +30,7 @@ import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.services.federated.solr.GSAResponseWriter; import net.yacy.kelondro.logging.Log; import net.yacy.search.Switchboard; +import net.yacy.search.index.YaCySchema; import net.yacy.search.solr.EmbeddedSolrConnector; import org.apache.solr.common.SolrException; @@ -63,6 +65,24 @@ public class searchresult { return "text/xml"; } + public static class Sort { + public String sort, action, direction, mode, format; + public Sort(String d) { + this.sort = d; + String[] s = d.split(":"); + this.action = s[0]; // date + this.direction = s[1]; // A or D + this.mode = s[2]; // S, R, L + this.format = s[3]; // d1 + } + public String toSolr() { + if ("date".equals(this.action)) { + return YaCySchema.last_modified.name() + " " + (("D".equals(this.direction) ? "desc" : "asc")); + } + return null; + } + } + /** * @param header * @param post @@ -98,6 +118,16 @@ public class searchresult { post.put(CommonParams.ROWS, post.remove("num")); post.put(CommonParams.ROWS, Math.min(post.getInt("num", 10), (authenticated) ? 5000 : 100)); post.remove("num"); + Sort sort = new Sort(post.get(CommonParams.SORT, "")); + String sorts = sort.toSolr(); + if (sorts == null) { + post.remove(CommonParams.SORT); + } else { + post.put(CommonParams.SORT, sorts); + } + String site = post.remove("site"); + String access = post.remove("access"); + String entqr = post.remove("entqr"); // get the embedded connector EmbeddedSolrConnector connector = (EmbeddedSolrConnector) sb.index.fulltext().getLocalSolr(); @@ -114,6 +144,15 @@ public class searchresult { return null; } + // set some context for the writer + Map context = req.getContext(); + context.put("ip", header.get("CLIENTIP", "")); + context.put("client", header.get("User-Agent", "")); + context.put("sort", sort.sort); + context.put("site", site == null ? "" : site); + context.put("access", access == null ? "p" : access); + context.put("entqr", entqr == null ? "3" : entqr); + // write the result directly to the output stream Writer ow = new FastWriter(new OutputStreamWriter(out, UTF8.charset)); try { diff --git a/source/net/yacy/cora/services/federated/solr/GSAResponseWriter.java b/source/net/yacy/cora/services/federated/solr/GSAResponseWriter.java index 4400553e1..a6f219456 100644 --- a/source/net/yacy/cora/services/federated/solr/GSAResponseWriter.java +++ b/source/net/yacy/cora/services/federated/solr/GSAResponseWriter.java @@ -31,6 +31,8 @@ import java.util.Map; import java.util.Set; import net.yacy.cora.protocol.HeaderFramework; +import net.yacy.peers.operation.yacyVersion; +import net.yacy.search.Switchboard; import net.yacy.search.index.YaCySchema; import org.apache.lucene.document.Document; @@ -52,6 +54,7 @@ import org.apache.solr.search.SolrIndexSearcher; */ public class GSAResponseWriter implements QueryResponseWriter { + private static String YaCyVer = null; private static final char lb = '\n'; private enum GSAToken { CACHE_LAST_MODIFIED, // Date that the document was crawled, as specified in the Date HTTP header when the document was crawled for this index. @@ -113,9 +116,12 @@ public class GSAResponseWriter implements QueryResponseWriter { assert rsp.getValues().get("responseHeader") != null; assert rsp.getValues().get("response") != null; + long start = System.currentTimeMillis(); + @SuppressWarnings("unchecked") SimpleOrderedMap responseHeader = (SimpleOrderedMap) rsp.getResponseHeader(); DocSlice response = (DocSlice) rsp.getValues().get("response"); + Map context = request.getContext(); // parse response header ResHead resHead = new ResHead(); @@ -132,15 +138,35 @@ public class GSAResponseWriter implements QueryResponseWriter { // write header writer.write(XML_START); + OpensearchResponseWriter.solitaireTag(writer, "TM", Long.toString(System.currentTimeMillis() - start)); + OpensearchResponseWriter.solitaireTag(writer, "Q", request.getParams().get("q")); + paramTag(writer, "sort", (String) context.get("sort")); + paramTag(writer, "output", "xml_no_dtd"); + paramTag(writer, "ie", "UTF-8"); + paramTag(writer, "oe", "UTF-8"); + paramTag(writer, "client", (String) context.get("client")); + paramTag(writer, "q", request.getParams().get("q")); + paramTag(writer, "site", (String) context.get("site")); paramTag(writer, "start", Integer.toString(resHead.offset)); paramTag(writer, "num", Integer.toString(resHead.rows)); + paramTag(writer, "ip", (String) context.get("ip")); + paramTag(writer, "access", (String) context.get("access")); // p - search only public content, s - search only secure content, a - search all content, both public and secure + paramTag(writer, "entqr", (String) context.get("entqr")); // query expansion policy; (entqr=1) -- Uses only the search appliance's synonym file, (entqr=1) -- Uses only the search appliance's synonym file, (entqr=3) -- Uses both standard and local synonym files. - // parse body + // body introduction final int responseCount = response.size(); + writer.write(""); writer.write(lb); // The index (1-based) of the first and last search result returned in this result set. + writer.write("" + response.matches() + ""); writer.write(lb); // The estimated total number of results for the search. + writer.write(""); writer.write(lb); // Indicates that document filtering was performed during this search. + XML.escapeCharData("/search?q=" + request.getParams().get("q") + "&site=" + (String) context.get("site") + + "&lr=&ie=UTF-8&oe=UTF-8&output=xml_no_dtd&client=" + (String) context.get("client") + "&access=" + (String) context.get("access") + + "&sort=" + (String) context.get("sort") + "&start=" + resHead.offset + responseCount + "&sa=N", writer); writer.write(lb); // a relative URL pointing to the NEXT results page. + + // parse body SolrIndexSearcher searcher = request.getSearcher(); DocIterator iterator = response.iterator(); for (int i = 0; i < responseCount; i++) { - OpensearchResponseWriter.openTag(writer, "R"); + writer.write(""); writer.write(lb); int id = iterator.nextDoc(); Document doc = searcher.doc(id, SOLR_FIELDS); List fields = doc.getFields(); @@ -158,13 +184,6 @@ public class GSAResponseWriter implements QueryResponseWriter { continue; } -/* - - - - -*/ - // if the rule is not generic, use the specific here if (YaCySchema.sku.name().equals(fieldName)) { OpensearchResponseWriter.solitaireTag(writer, GSAToken.U.name(), value.stringValue()); @@ -207,14 +226,14 @@ public class GSAResponseWriter implements QueryResponseWriter { } // compute snippet from texts OpensearchResponseWriter.solitaireTag(writer, GSAToken.GD.name(), description); - OpensearchResponseWriter.solitaireTag(writer, GSAToken.ENT_SOURCE.name(), "YaCy"); + if (YaCyVer == null) YaCyVer = yacyVersion.thisVersion().getName() + "/" + Switchboard.getSwitchboard().peers.mySeed().hash; + OpensearchResponseWriter.solitaireTag(writer, GSAToken.ENT_SOURCE.name(), YaCyVer); OpensearchResponseWriter.closeTag(writer, "R"); } - + writer.write(""); writer.write(lb); writer.write(XML_STOP); } - public static void paramTag(final Writer writer, final String tagname, String value) throws IOException { if (value == null || value.length() == 0) return; writer.write(""); writer.write(lb); } -} - -/* - - -0.053898 -pdf - - - - - - - - - - - - - - -296 - - - - - - - - - - - - -de - - - - - -*/ \ No newline at end of file +} \ No newline at end of file