diff --git a/htroot/gsa/searchresult.java b/htroot/gsa/searchresult.java index 548cae36e..f58afdc0a 100644 --- a/htroot/gsa/searchresult.java +++ b/htroot/gsa/searchresult.java @@ -30,6 +30,7 @@ import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.services.federated.solr.GSAResponseWriter; import net.yacy.kelondro.logging.Log; import net.yacy.search.Switchboard; +import net.yacy.search.query.SnippetProcess; import net.yacy.search.solr.EmbeddedSolrConnector; import org.apache.solr.common.SolrException; @@ -42,7 +43,7 @@ import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; // try -// http://localhost:8090/gsa/search?q=chicken+teriyaki&output=xml&client=test&site=test&sort=date:D:S:d1 +// http://localhost:8090/gsa/searchresult?q=chicken+teriyaki&output=xml&client=test&site=test&sort=date:D:S:d1 /** * This is a gsa result formatter for solr search results. @@ -100,6 +101,11 @@ public class searchresult { post.put(CommonParams.ROWS, post.remove("num")); post.put(CommonParams.ROWS, Math.min(post.getInt("num", 10), (authenticated) ? 5000 : 100)); post.remove("num"); + post.put("hl", "true"); + post.put("hl.fl", "text_t,h1,h2"); + post.put("hl.simple.pre", ""); + post.put("hl.simple.post", ""); + post.put("hl.fragsize", Integer.toString(SnippetProcess.SNIPPET_MAX_LENGTH)); GSAResponseWriter.Sort sort = new GSAResponseWriter.Sort(post.get(CommonParams.SORT, "")); String sorts = sort.toSolr(); if (sorts == null) { diff --git a/htroot/solr/select.java b/htroot/solr/select.java index 6191e0dff..074af46a7 100644 --- a/htroot/solr/select.java +++ b/htroot/solr/select.java @@ -35,6 +35,7 @@ import net.yacy.cora.services.federated.solr.OpensearchResponseWriter; import net.yacy.kelondro.logging.Log; import net.yacy.search.Switchboard; import net.yacy.search.SwitchboardConstants; +import net.yacy.search.query.SnippetProcess; import net.yacy.search.solr.EmbeddedSolrConnector; import net.yacy.search.solr.SolrServlet; @@ -71,7 +72,7 @@ public class select { xsltWriter.init(initArgs); RESPONSE_WRITER.put("xslt", xsltWriter); // try i.e. http://localhost:8090/solr/select?q=*:*&start=0&rows=10&wt=xslt&tr=json.xsl RESPONSE_WRITER.put("exml", new EnhancedXMLResponseWriter()); - RESPONSE_WRITER.put("rss", new OpensearchResponseWriter()); //try http://localhost:8090/solr/select?wt=rss&q=olympia + RESPONSE_WRITER.put("rss", new OpensearchResponseWriter()); //try http://localhost:8090/solr/select?wt=rss&q=olympia&hl=true&hl.fl=text_t,h1,h2 } /** @@ -147,6 +148,14 @@ public class select { "") : env.getConfig(SwitchboardConstants.GREETING, ""); ((OpensearchResponseWriter) responseWriter).setTitle(promoteSearchPageGreeting); } + if (responseWriter instanceof OpensearchResponseWriter) { + // add options for snippet generation + post.put("hl", "true"); + post.put("hl.fl", "text_t,h1,h2"); + post.put("hl.simple.pre", ""); + post.put("hl.simple.post", ""); + post.put("hl.fragsize", Integer.toString(SnippetProcess.SNIPPET_MAX_LENGTH)); + } // get the embedded connector EmbeddedSolrConnector connector = (EmbeddedSolrConnector) sb.index.fulltext().getLocalSolr(); diff --git a/source/net/yacy/cora/services/federated/solr/EnhancedXMLResponseWriter.java b/source/net/yacy/cora/services/federated/solr/EnhancedXMLResponseWriter.java index bec20cc44..9b05321f4 100644 --- a/source/net/yacy/cora/services/federated/solr/EnhancedXMLResponseWriter.java +++ b/source/net/yacy/cora/services/federated/solr/EnhancedXMLResponseWriter.java @@ -74,9 +74,11 @@ public class EnhancedXMLResponseWriter implements QueryResponseWriter { @SuppressWarnings("unchecked") SimpleOrderedMap responseHeader = (SimpleOrderedMap) rsp.getResponseHeader(); DocSlice response = (DocSlice) rsp.getValues().get("response"); + @SuppressWarnings("unchecked") + SimpleOrderedMap highlighting = (SimpleOrderedMap) rsp.getValues().get("highlighting"); writeProps(writer, "responseHeader", responseHeader); // this.writeVal("responseHeader", responseHeader); writeDocs(writer, request, response); // this.writeVal("response", response); - + writeProps(writer, "highlighting", highlighting); writer.write(XML_STOP); } @@ -89,6 +91,7 @@ public class EnhancedXMLResponseWriter implements QueryResponseWriter { v = val.getVal(i); if (v instanceof Integer) writeTag(writer, "int", n, ((Integer) v).toString(), false); else if (v instanceof String) writeTag(writer, "str", n, (String) v, true); + else if (v instanceof String[]) writeTag(writer, "str", n, (String[]) v, true); else if (v instanceof NamedList) writeProps(writer, n, (NamedList) v); } if (sz > 0) { @@ -205,6 +208,14 @@ public class EnhancedXMLResponseWriter implements QueryResponseWriter { writer.write("'); writer.write(lb); } + private static void writeTag(final Writer writer, final String tag, final String nameAttr, final String[] vals, final boolean escape) throws IOException { + startTagOpen(writer, "arr", nameAttr); + for (String val: vals) { + writeTag(writer, tag, null, val, escape); + } + writer.write(""); writer.write(lb); + } + private static void startTagOpen(final Writer writer, final String tag, final String nameAttr) throws IOException { writer.write('<'); writer.write(tag); if (nameAttr != null) writeAttr(writer, "name", nameAttr); diff --git a/source/net/yacy/cora/services/federated/solr/GSAResponseWriter.java b/source/net/yacy/cora/services/federated/solr/GSAResponseWriter.java index 1987c83d2..c2065f18a 100644 --- a/source/net/yacy/cora/services/federated/solr/GSAResponseWriter.java +++ b/source/net/yacy/cora/services/federated/solr/GSAResponseWriter.java @@ -140,6 +140,9 @@ public class GSAResponseWriter implements QueryResponseWriter { @SuppressWarnings("unchecked") SimpleOrderedMap responseHeader = (SimpleOrderedMap) rsp.getResponseHeader(); DocSlice response = (DocSlice) rsp.getValues().get("response"); + @SuppressWarnings("unchecked") + SimpleOrderedMap highlighting = (SimpleOrderedMap) rsp.getValues().get("highlighting"); + Map> snippets = OpensearchResponseWriter.highlighting(highlighting); Map context = request.getContext(); // parse response header @@ -187,6 +190,7 @@ public class GSAResponseWriter implements QueryResponseWriter { // parse body SolrIndexSearcher searcher = request.getSearcher(); DocIterator iterator = response.iterator(); + String urlhash = null; for (int i = 0; i < responseCount; i++) { writer.write(""); writer.write(lb); int id = iterator.nextDoc(); @@ -207,6 +211,10 @@ public class GSAResponseWriter implements QueryResponseWriter { } // if the rule is not generic, use the specific here + if (YaCySchema.id.name().equals(fieldName)) { + urlhash = value.stringValue(); + continue; + } if (YaCySchema.sku.name().equals(fieldName)) { OpensearchResponseWriter.solitaireTag(writer, GSAToken.U.name(), value.stringValue()); OpensearchResponseWriter.solitaireTag(writer, GSAToken.UE.name(), value.stringValue()); @@ -247,6 +255,8 @@ public class GSAResponseWriter implements QueryResponseWriter { } } // compute snippet from texts + List snippet = urlhash == null ? null : snippets.get(urlhash); + OpensearchResponseWriter.solitaireTag(writer, GSAToken.S.name(), snippet == null || snippet.size() == 0 ? description : snippet.get(0)); OpensearchResponseWriter.solitaireTag(writer, GSAToken.GD.name(), description); if (YaCyVer == null) YaCyVer = yacyVersion.thisVersion().getName() + "/" + Switchboard.getSwitchboard().peers.mySeed().hash; OpensearchResponseWriter.solitaireTag(writer, GSAToken.ENT_SOURCE.name(), YaCyVer); diff --git a/source/net/yacy/cora/services/federated/solr/OpensearchResponseWriter.java b/source/net/yacy/cora/services/federated/solr/OpensearchResponseWriter.java index 9e252ca6e..de0c52714 100644 --- a/source/net/yacy/cora/services/federated/solr/OpensearchResponseWriter.java +++ b/source/net/yacy/cora/services/federated/solr/OpensearchResponseWriter.java @@ -114,6 +114,9 @@ public class OpensearchResponseWriter implements QueryResponseWriter { @SuppressWarnings("unchecked") SimpleOrderedMap responseHeader = (SimpleOrderedMap) rsp.getResponseHeader(); DocSlice response = (DocSlice) rsp.getValues().get("response"); + @SuppressWarnings("unchecked") + SimpleOrderedMap highlighting = (SimpleOrderedMap) rsp.getValues().get("highlighting"); + Map> snippets = highlighting(highlighting); // parse response header ResHead resHead = new ResHead(); @@ -144,6 +147,7 @@ public class OpensearchResponseWriter implements QueryResponseWriter { final int responseCount = response.size(); SolrIndexSearcher searcher = request.getSearcher(); DocIterator iterator = response.iterator(); + String urlhash = null; for (int i = 0; i < responseCount; i++) { openTag(writer, "item"); int id = iterator.nextDoc(); @@ -165,7 +169,8 @@ public class OpensearchResponseWriter implements QueryResponseWriter { // if the rule is not generic, use the specific here if (YaCySchema.id.name().equals(fieldName)) { - solitaireTag(writer, RSSMessage.Token.guid.name(), value.stringValue(), "isPermaLink=\"false\""); + urlhash = value.stringValue(); + solitaireTag(writer, RSSMessage.Token.guid.name(), urlhash, "isPermaLink=\"false\""); continue; } if (YaCySchema.title.name().equals(fieldName)) { @@ -198,7 +203,8 @@ public class OpensearchResponseWriter implements QueryResponseWriter { } } // compute snippet from texts - solitaireTagNocheck(writer, RSSMessage.Token.description.name(), description); + List snippet = urlhash == null ? null : snippets.get(urlhash); + solitaireTagNocheck(writer, RSSMessage.Token.description.name(), snippet == null || snippet.size() == 0 ? description : snippet.get(0)); closeTag(writer, "item"); } @@ -206,6 +212,30 @@ public class OpensearchResponseWriter implements QueryResponseWriter { writer.write(XML_STOP); } + @SuppressWarnings("unchecked") + public static Map> highlighting(final SimpleOrderedMap val) { + Map> snippets = new HashMap>(); + if (val == null) return snippets; + int sz = val.size(); + Object v, vv; + for (int i = 0; i < sz; i++) { + String n = val.getName(i); + v = val.getVal(i); + if (v instanceof SimpleOrderedMap) { + int sz1 = ((SimpleOrderedMap) v).size(); + List t = new ArrayList(sz1); + for (int j = 0; j < sz1; j++) { + vv = ((SimpleOrderedMap) v).getVal(j); + if (vv instanceof String[]) { + for (String t0: ((String[]) vv)) t.add(t0); + } + } + snippets.put(n, t); + } + } + return snippets; + } + public static void openTag(final Writer writer, final String tag) throws IOException { writer.write('<'); writer.write(tag); writer.write('>'); writer.write(lb); } diff --git a/source/net/yacy/search/query/SnippetProcess.java b/source/net/yacy/search/query/SnippetProcess.java index cac3386f6..8693fb83b 100644 --- a/source/net/yacy/search/query/SnippetProcess.java +++ b/source/net/yacy/search/query/SnippetProcess.java @@ -69,6 +69,7 @@ public class SnippetProcess { public static Log log = new Log("SEARCH"); + public static final int SNIPPET_MAX_LENGTH = 220; private final static int SNIPPET_WORKER_THREADS = Math.max(4, Runtime.getRuntime().availableProcessors() * 2); // input values @@ -579,7 +580,7 @@ public class SnippetProcess { //this.query.queryString, null, ((this.query.constraint != null) && (this.query.constraint.get(Condenser.flag_cat_indexof))), - 220, + SNIPPET_MAX_LENGTH, !this.query.isLocal()); return new ResultEntry(page, this.query.getSegment(), this.peers, snippet, null, dbRetrievalTime, 0); // result without snippet }