From 74e6d6e984225a5218a5dce5547b08e29b60eba5 Mon Sep 17 00:00:00 2001 From: luccioman Date: Wed, 20 Mar 2019 18:24:16 +0100 Subject: [PATCH] Added Solr GrepHTML writer support for responses from remote instances --- .../GrepHTMLResponseWriter.java | 257 ++++++++++++++---- .../responsewriter/HTMLResponseWriter.java | 10 +- 2 files changed, 202 insertions(+), 65 deletions(-) diff --git a/source/net/yacy/cora/federate/solr/responsewriter/GrepHTMLResponseWriter.java b/source/net/yacy/cora/federate/solr/responsewriter/GrepHTMLResponseWriter.java index 6e95523cd..8a4f9082f 100644 --- a/source/net/yacy/cora/federate/solr/responsewriter/GrepHTMLResponseWriter.java +++ b/source/net/yacy/cora/federate/solr/responsewriter/GrepHTMLResponseWriter.java @@ -28,10 +28,10 @@ import java.util.LinkedHashMap; import java.util.Set; import java.util.regex.Pattern; -import net.yacy.document.SentenceReader; -import net.yacy.search.schema.CollectionSchema; - import org.apache.lucene.document.Document; +import org.apache.solr.client.solrj.response.QueryResponse; +import org.apache.solr.common.SolrDocument; +import org.apache.solr.common.SolrDocumentList; import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.NamedList; @@ -43,7 +43,12 @@ import org.apache.solr.response.SolrQueryResponse; import org.apache.solr.schema.IndexSchema; import org.apache.solr.search.DocIterator; import org.apache.solr.search.DocList; +import org.apache.solr.search.ReturnFields; import org.apache.solr.search.SolrIndexSearcher; +import org.apache.solr.search.SolrReturnFields; + +import net.yacy.document.SentenceReader; +import net.yacy.search.schema.CollectionSchema; /** * this response writer shows a list of documents with the lines containing matches @@ -52,9 +57,9 @@ import org.apache.solr.search.SolrIndexSearcher; * text part and each sentence is shown as separate line. grep attributes can be used to * show leading and trainling lines. */ -public class GrepHTMLResponseWriter implements QueryResponseWriter, EmbeddedSolrResponseWriter { +public class GrepHTMLResponseWriter implements QueryResponseWriter, SolrjResponseWriter { - private static final Set DEFAULT_FIELD_LIST = new HashSet(); + private static final Set DEFAULT_FIELD_LIST = new HashSet<>(); private static final Pattern dqp = Pattern.compile("\""); static { DEFAULT_FIELD_LIST.add(CollectionSchema.id.getSolrFieldName()); @@ -63,10 +68,6 @@ public class GrepHTMLResponseWriter implements QueryResponseWriter, EmbeddedSolr DEFAULT_FIELD_LIST.add(CollectionSchema.text_t.getSolrFieldName()); } - public GrepHTMLResponseWriter() { - super(); - } - @Override public String getContentType(final SolrQueryRequest request, final SolrQueryResponse response) { return "text/html"; @@ -78,70 +79,214 @@ public class GrepHTMLResponseWriter implements QueryResponseWriter, EmbeddedSolr @Override public void write(final Writer writer, final SolrQueryRequest request, final SolrQueryResponse rsp) throws IOException { - NamedList values = rsp.getValues(); - assert values.get("responseHeader") != null; - assert values.get("response") != null; + writeHtmlHead(writer); + + final SolrParams params = request.getOriginalParams(); + + final String query = getQueryParam(params); + final String grep = getGrepParam(params, query); + + + final Object responseObj = rsp.getResponse(); + + if(responseObj instanceof SolrDocumentList) { + /* + * The response object can be a SolrDocumentList when the response is partial, + * for example when the allowed processing time has been exceeded + */ + final SolrDocumentList docList = ((SolrDocumentList)responseObj); + + writeSolrDocumentList(writer, params, query, grep, docList); + + } else if(responseObj instanceof ResultContext) { + /* Regular response object */ + final DocList documents = ((ResultContext)responseObj).getDocList(); + + final int sz = documents.size(); + if (sz > 0) { + final SolrIndexSearcher searcher = request.getSearcher(); + final DocIterator iterator = documents.iterator(); + final IndexSchema schema = request.getSchema(); + writeTitleAndHeadeing(writer, grep, query); + writeApiLink(writer, params); + for (int i = 0; i < sz; i++) { + int id = iterator.nextDoc(); + final Document doc = searcher.doc(id, DEFAULT_FIELD_LIST); + final LinkedHashMap tdoc = HTMLResponseWriter.translateDoc(schema, doc); + final String sku = tdoc.get(CollectionSchema.sku.getSolrFieldName()); + final String title = tdoc.get(CollectionSchema.title.getSolrFieldName()); + final String text = tdoc.get(CollectionSchema.text_t.getSolrFieldName()); + + final ArrayList sentences = extractSentences(title, text); + writeDoc(writer, sku, sentences, grep); + } + } else { + writer.write("No Document Found\n\n"); + } + } else { + writer.write("Unable to process Solr response\n\n"); + } + + writer.write("\n"); + } - writer.write("\n\n\n"); + /** + * Process the solr documents list and append a representation to the output writer. + * @param writer an open output writer. Must not be null. + * @param params the original Solr parameters + * @param query the query parameter value + * @param grep the grep parameter value + * @param docList the solr documents list + * @throws IOException when a write error occurred + */ + private void writeSolrDocumentList(final Writer writer, final SolrParams params, final String query, + final String grep, final SolrDocumentList docList) throws IOException { + if (docList == null || docList.isEmpty()) { + writer.write("No Document Found\n\n"); + } else { + writeTitleAndHeadeing(writer, grep, query); + writeApiLink(writer, params); + + final ReturnFields fieldsToReturn = new SolrReturnFields(); + for (final SolrDocument doc : docList) { + final LinkedHashMap tdoc = HTMLResponseWriter.translateDoc(doc, fieldsToReturn); + final String sku = tdoc.get(CollectionSchema.sku.getSolrFieldName()); + final String title = tdoc.get(CollectionSchema.title.getSolrFieldName()); + final String text = tdoc.get(CollectionSchema.text_t.getSolrFieldName()); + + final ArrayList sentences = extractSentences(title, text); + writeDoc(writer, sku, sentences, grep); + } + } + } + + /** + * Write the html header beginning + * @param writer an open output writer + * @throws IOException when a write error occurred + */ + private void writeHtmlHead(final Writer writer) throws IOException { + writer.write("\n\n\n"); writer.write("\n"); writer.write("\n"); writer.write("\n"); - SolrParams params = request.getOriginalParams(); - String grep = params.get("grep"); + } + + /** + * @param params the original request parameters. Must not be null. + * @param query the query parameter value + * @return the grep parameter value + */ + private String getGrepParam(final SolrParams params, String query) { + String grep = params.get("grep"); + if (grep == null) { + if(query.length() > 0) { + grep = query; + } else { + grep = ""; + } + } + if (grep.length() > 0) { + if (grep.charAt(0) == '"') { + grep = grep.substring(1); + } + if (grep.charAt(grep.length() - 1) == '"') { + grep = grep.substring(0, grep.length() - 1); + } + } + return grep; + } + + /** + * @param params the original request parameters. Must not be null. + * @return the query parameter value + */ + private String getQueryParam(final SolrParams params) { + final String q = params.get(CommonParams.Q, ""); String query = ""; - String q = params.get(CommonParams.Q); if (q == null) q = ""; int p = q.indexOf(':'); if (p >= 0) { int r = q.charAt(p + 1) == '"' ? q.indexOf(p + 2, '"') : q.indexOf(' '); - if (r < 0) r = q.length(); + if (r < 0) { + r = q.length(); + } query = q.substring(p + 1, r); if (query.length() > 0) { - if (query.charAt(0) == '"') query = query.substring(1); - if (query.charAt(query.length() - 1) == '"') query = query.substring(0, query.length() - 1); + if (query.charAt(0) == '"') { + query = query.substring(1); + } + if (query.charAt(query.length() - 1) == '"') { + query = query.substring(0, query.length() - 1); + } } } - if (grep == null && query.length() > 0) grep = query; - if (grep.length() > 0) { - if (grep.charAt(0) == '"') grep = grep.substring(1); - if (grep.charAt(grep.length() - 1) == '"') grep = grep.substring(0, grep.length() - 1); - } - NamedList paramsList = params.toNamedList(); + return query; + } + + /** + * Append the response title and level 1 html heading + * @param writer an open output writer. Must not be null. + * @param grep the grep phrase + * @param query the search query + * @throws IOException when a write error occurred + */ + private void writeTitleAndHeadeing(final Writer writer, final String grep, final String query) throws IOException { + final String h1 = "Document Grep for query \"" + query + "\" and grep phrase \"" + grep + "\""; + writer.write("" + h1 + "\n\n

" + h1 + "

\n"); + } + + /** + * Append a link to the related Solr api + * @param writer an open output writer. Must not be null. + * @param solrParams the original request parameters. Must not be null. + * @throws IOException when a write error occurred + */ + private void writeApiLink(final Writer writer, final SolrParams solrParams) throws IOException { + final NamedList paramsList = solrParams.toNamedList(); paramsList.remove("wt"); String xmlquery = dqp.matcher("select?" + SolrParams.toSolrParams(paramsList).toString()).replaceAll("%22"); - DocList response = ((ResultContext) values.get("response")).getDocList(); - final int sz = response.size(); - if (sz > 0) { - SolrIndexSearcher searcher = request.getSearcher(); - DocIterator iterator = response.iterator(); - IndexSchema schema = request.getSchema(); - String h1 = "Document Grep for query \"" + query + "\" and grep phrase \"" + grep + "\""; - writer.write("" + h1 + "\n\n

" + h1 + "

\n"); - writer.write("
\"API\"\n"); - writer.write("This search result can also be retrieved as XML. Click the API icon to see an example call to the search rss API.
\n"); - for (int i = 0; i < sz; i++) { - int id = iterator.nextDoc(); - Document doc = searcher.doc(id, DEFAULT_FIELD_LIST); - LinkedHashMap tdoc = HTMLResponseWriter.translateDoc(schema, doc); - String sku = tdoc.get(CollectionSchema.sku.getSolrFieldName()); - String title = tdoc.get(CollectionSchema.title.getSolrFieldName()); - String text = tdoc.get(CollectionSchema.text_t.getSolrFieldName()); - - ArrayList sentences = new ArrayList(); - if (title != null) sentences.add(title); - SentenceReader sr = new SentenceReader(text); - StringBuilder line; - while (sr.hasNext()) { - line = sr.next(); - if (line.length() > 0) sentences.add(line.toString()); - } - writeDoc(writer, sku, sentences, grep); - } - } else { - writer.write("No Document Found\n\n"); - } + writer.write("
\"API\"\n"); + writer.write("This search result can also be retrieved as XML. Click the API icon to see an example call to the search rss API.
\n"); + } + + /** + * @param title + * @param text + * @return a list of sentences extracted from the given document text and title + */ + private ArrayList extractSentences(final String title, final String text) { + final ArrayList sentences = new ArrayList<>(); + if (title != null) { + sentences.add(title); + } + if(text != null) { + final SentenceReader sr = new SentenceReader(text); + StringBuilder line; + while (sr.hasNext()) { + line = sr.next(); + if (line.length() > 0) { + sentences.add(line.toString()); + } + } + } + return sentences; + } + + @Override + public void write(Writer writer, SolrQueryRequest request, String coreName, QueryResponse rsp) throws IOException { + writeHtmlHead(writer); + + final SolrParams params = request.getOriginalParams(); + + final String query = getQueryParam(params); + final String grep = getGrepParam(params, query); + + writeSolrDocumentList(writer, params, query, grep, rsp.getResults()); + writer.write("\n"); + } private static final void writeDoc(Writer writer, String url, ArrayList sentences, String grep) throws IOException { diff --git a/source/net/yacy/cora/federate/solr/responsewriter/HTMLResponseWriter.java b/source/net/yacy/cora/federate/solr/responsewriter/HTMLResponseWriter.java index 79abef90e..b5b50e4ff 100644 --- a/source/net/yacy/cora/federate/solr/responsewriter/HTMLResponseWriter.java +++ b/source/net/yacy/cora/federate/solr/responsewriter/HTMLResponseWriter.java @@ -72,10 +72,6 @@ public class HTMLResponseWriter implements QueryResponseWriter, SolrjResponseWri public static final Pattern dqp = Pattern.compile("\""); - public HTMLResponseWriter() { - super(); - } - @Override public String getContentType(final SolrQueryRequest request, final SolrQueryResponse response) { return "text/html"; @@ -288,10 +284,6 @@ public class HTMLResponseWriter implements QueryResponseWriter, SolrjResponseWri @Override public void write(final Writer writer, final SolrQueryRequest request, final SolrQueryResponse rsp) throws IOException { - NamedList values = rsp.getValues(); - assert values.get("responseHeader") != null; - assert values.get("response") != null; - writeHtmlHead(writer, request); final String coreName = request.getCore().getName(); @@ -643,7 +635,7 @@ public class HTMLResponseWriter implements QueryResponseWriter, SolrjResponseWri * restrict the actually returned fields. May be null. * @return a map of field names to field values */ - private static final LinkedHashMap translateDoc(final SolrDocument doc, + public static final LinkedHashMap translateDoc(final SolrDocument doc, final ReturnFields returnFields) { LinkedHashMap kv = new LinkedHashMap(); for (final Entry entry : doc) {