From 26d8ad591c91c4ab49ccbcca316874cd7ffd494f Mon Sep 17 00:00:00 2001 From: luccioman Date: Fri, 6 Apr 2018 15:16:54 +0200 Subject: [PATCH] Adjusted Solr select servlet output when using an external Solr only - Use the EnhancedXMLResponseWriter only when requested output is "exml" - Use the Standard Solr writers when possible, for example for json, xml or javabin output formats - Return an error when the requested format can not been rendered with an external Solr server only Important : this modification is necessary for peers using exclusively an external Solr server to be reachable as robinson targets in p2p search, as the binary format ("javabin") is the default Solr exchange format for peers. Before this, when a peer requested a remote one attached only to an external Solr (no embedded one), it ended with "Invalid type" error, as the remote peer answered with xml although binary format was requested. --- .../EmbeddedSolrResponseWriter.java | 32 ++++++++ .../FlatJSONResponseWriter.java | 2 +- .../responsewriter/GSAResponseWriter.java | 2 +- .../GrepHTMLResponseWriter.java | 2 +- .../responsewriter/HTMLResponseWriter.java | 2 +- .../OpensearchResponseWriter.java | 2 +- .../SnapshotImagesReponseWriter.java | 2 +- .../responsewriter/YJsonResponseWriter.java | 2 +- .../yacy/http/servlets/SolrSelectServlet.java | 75 ++++++++++++++++--- .../net/yacy/search/query/AccessTracker.java | 6 +- 10 files changed, 106 insertions(+), 21 deletions(-) create mode 100644 source/net/yacy/cora/federate/solr/responsewriter/EmbeddedSolrResponseWriter.java diff --git a/source/net/yacy/cora/federate/solr/responsewriter/EmbeddedSolrResponseWriter.java b/source/net/yacy/cora/federate/solr/responsewriter/EmbeddedSolrResponseWriter.java new file mode 100644 index 000000000..0b8cfabe3 --- /dev/null +++ b/source/net/yacy/cora/federate/solr/responsewriter/EmbeddedSolrResponseWriter.java @@ -0,0 +1,32 @@ +// EmbeddedSolrResponseWriter.java +// --------------------------- +// Copyright 2018 by luccioman; https://github.com/luccioman +// +// This is a part of YaCy, a peer-to-peer based web search engine +// +// LICENSE +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +package net.yacy.cora.federate.solr.responsewriter; + +import org.apache.solr.response.QueryResponseWriter; + +/** + * Interface used only to mark that a {@link QueryResponseWriter} implementation + * class is only compatible with results from an embedded Solr server + */ +public interface EmbeddedSolrResponseWriter { +} diff --git a/source/net/yacy/cora/federate/solr/responsewriter/FlatJSONResponseWriter.java b/source/net/yacy/cora/federate/solr/responsewriter/FlatJSONResponseWriter.java index 9d05e00de..9358261d1 100644 --- a/source/net/yacy/cora/federate/solr/responsewriter/FlatJSONResponseWriter.java +++ b/source/net/yacy/cora/federate/solr/responsewriter/FlatJSONResponseWriter.java @@ -47,7 +47,7 @@ import org.json.simple.JSONArray; import net.yacy.cora.federate.solr.SolrType; import net.yacy.cora.util.JSONObject; -public class FlatJSONResponseWriter implements QueryResponseWriter { +public class FlatJSONResponseWriter implements QueryResponseWriter, EmbeddedSolrResponseWriter { private static final char lb = '\n'; diff --git a/source/net/yacy/cora/federate/solr/responsewriter/GSAResponseWriter.java b/source/net/yacy/cora/federate/solr/responsewriter/GSAResponseWriter.java index d5c7cc6da..7d4ac1842 100644 --- a/source/net/yacy/cora/federate/solr/responsewriter/GSAResponseWriter.java +++ b/source/net/yacy/cora/federate/solr/responsewriter/GSAResponseWriter.java @@ -59,7 +59,7 @@ import org.apache.solr.search.SolrIndexSearcher; * example: GET /gsa/searchresult?q=chicken+teriyaki&output=xml&client=test&site=test&sort=date:D:S:d1 * for a xml reference, see https://developers.google.com/search-appliance/documentation/614/xml_reference */ -public class GSAResponseWriter implements QueryResponseWriter { +public class GSAResponseWriter implements QueryResponseWriter, EmbeddedSolrResponseWriter { private static String YaCyVer = null; private static final char lb = '\n'; diff --git a/source/net/yacy/cora/federate/solr/responsewriter/GrepHTMLResponseWriter.java b/source/net/yacy/cora/federate/solr/responsewriter/GrepHTMLResponseWriter.java index cdea36dad..6e95523cd 100644 --- a/source/net/yacy/cora/federate/solr/responsewriter/GrepHTMLResponseWriter.java +++ b/source/net/yacy/cora/federate/solr/responsewriter/GrepHTMLResponseWriter.java @@ -52,7 +52,7 @@ import org.apache.solr.search.SolrIndexSearcher; * text part and each sentence is shown as separate line. grep attributes can be used to * show leading and trainling lines. */ -public class GrepHTMLResponseWriter implements QueryResponseWriter { +public class GrepHTMLResponseWriter implements QueryResponseWriter, EmbeddedSolrResponseWriter { private static final Set DEFAULT_FIELD_LIST = new HashSet(); private static final Pattern dqp = Pattern.compile("\""); diff --git a/source/net/yacy/cora/federate/solr/responsewriter/HTMLResponseWriter.java b/source/net/yacy/cora/federate/solr/responsewriter/HTMLResponseWriter.java index 639685cbb..db5d08ebe 100644 --- a/source/net/yacy/cora/federate/solr/responsewriter/HTMLResponseWriter.java +++ b/source/net/yacy/cora/federate/solr/responsewriter/HTMLResponseWriter.java @@ -54,7 +54,7 @@ import net.yacy.cora.lod.vocabulary.DublinCore; import net.yacy.search.schema.CollectionSchema; import net.yacy.search.schema.WebgraphSchema; -public class HTMLResponseWriter implements QueryResponseWriter { +public class HTMLResponseWriter implements QueryResponseWriter, EmbeddedSolrResponseWriter { public static final Pattern dqp = Pattern.compile("\""); diff --git a/source/net/yacy/cora/federate/solr/responsewriter/OpensearchResponseWriter.java b/source/net/yacy/cora/federate/solr/responsewriter/OpensearchResponseWriter.java index fd17e8f6a..83f0a1ab8 100644 --- a/source/net/yacy/cora/federate/solr/responsewriter/OpensearchResponseWriter.java +++ b/source/net/yacy/cora/federate/solr/responsewriter/OpensearchResponseWriter.java @@ -58,7 +58,7 @@ import org.apache.solr.search.DocIterator; import org.apache.solr.search.DocList; import org.apache.solr.search.SolrIndexSearcher; -public class OpensearchResponseWriter implements QueryResponseWriter { +public class OpensearchResponseWriter implements QueryResponseWriter, EmbeddedSolrResponseWriter { // define a list of simple YaCySchema -> RSS Token matchings private static final Map field2tag = new HashMap(); diff --git a/source/net/yacy/cora/federate/solr/responsewriter/SnapshotImagesReponseWriter.java b/source/net/yacy/cora/federate/solr/responsewriter/SnapshotImagesReponseWriter.java index 0ee342571..f40466ebf 100644 --- a/source/net/yacy/cora/federate/solr/responsewriter/SnapshotImagesReponseWriter.java +++ b/source/net/yacy/cora/federate/solr/responsewriter/SnapshotImagesReponseWriter.java @@ -20,7 +20,7 @@ import org.apache.solr.search.SolrIndexSearcher; /** * this writer is supposed to be used to generate iframes. It generates links for the /api/snapshot.jpg servlet. */ -public class SnapshotImagesReponseWriter implements QueryResponseWriter { +public class SnapshotImagesReponseWriter implements QueryResponseWriter, EmbeddedSolrResponseWriter { private static final Set DEFAULT_FIELD_LIST = new HashSet<>(); diff --git a/source/net/yacy/cora/federate/solr/responsewriter/YJsonResponseWriter.java b/source/net/yacy/cora/federate/solr/responsewriter/YJsonResponseWriter.java index 47dbb2e65..3380f537b 100644 --- a/source/net/yacy/cora/federate/solr/responsewriter/YJsonResponseWriter.java +++ b/source/net/yacy/cora/federate/solr/responsewriter/YJsonResponseWriter.java @@ -59,7 +59,7 @@ import org.apache.solr.search.SolrIndexSearcher; * example: * http://localhost:8090/solr/select?hl=false&wt=yjson&facet=true&facet.mincount=1&facet.field=host_s&facet.field=url_file_ext_s&facet.field=url_protocol_s&facet.field=author_sxt&facet.field=collection_sxt&start=0&rows=10&query=www */ -public class YJsonResponseWriter implements QueryResponseWriter { +public class YJsonResponseWriter implements QueryResponseWriter, EmbeddedSolrResponseWriter { // define a list of simple YaCySchema -> json Token matchings private static final Map field2tag = new HashMap(); diff --git a/source/net/yacy/http/servlets/SolrSelectServlet.java b/source/net/yacy/http/servlets/SolrSelectServlet.java index 7371816a2..fbfa84012 100644 --- a/source/net/yacy/http/servlets/SolrSelectServlet.java +++ b/source/net/yacy/http/servlets/SolrSelectServlet.java @@ -40,6 +40,7 @@ import javax.servlet.http.HttpServletResponse; import net.yacy.cora.federate.solr.Ranking; import net.yacy.cora.federate.solr.connector.EmbeddedSolrConnector; import net.yacy.cora.federate.solr.connector.SolrConnector; +import net.yacy.cora.federate.solr.responsewriter.EmbeddedSolrResponseWriter; import net.yacy.cora.federate.solr.responsewriter.EnhancedXMLResponseWriter; import net.yacy.cora.federate.solr.responsewriter.GSAResponseWriter; import net.yacy.cora.federate.solr.responsewriter.GrepHTMLResponseWriter; @@ -60,17 +61,23 @@ import net.yacy.search.schema.CollectionSchema; import net.yacy.search.schema.WebgraphSchema; import org.apache.commons.lang.StringUtils; +import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.common.SolrDocumentList; import org.apache.solr.common.SolrException; import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.DisMaxParams; +import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.params.MultiMapSolrParams; import org.apache.solr.common.util.NamedList; +import org.apache.solr.common.util.SimpleOrderedMap; import org.apache.solr.core.SolrCore; import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.request.SolrQueryRequestBase; import org.apache.solr.request.SolrRequestInfo; import org.apache.solr.response.BinaryResponseWriter; +import org.apache.solr.response.CSVResponseWriter; import org.apache.solr.response.QueryResponseWriter; +import org.apache.solr.response.RawResponseWriter; import org.apache.solr.response.ResultContext; import org.apache.solr.response.SolrQueryResponse; import org.apache.solr.response.XSLTResponseWriter; @@ -287,16 +294,55 @@ public class SolrSelectServlet extends HttpServlet { out.flush(); } } else { - // write a 'faked' response using a call to the backend - SolrDocumentList sdl = connector.getDocumentListByQuery( - mmsp.getMap().get(CommonParams.Q)[0], - mmsp.getMap().get(CommonParams.SORT) == null ? null : mmsp.getMap().get(CommonParams.SORT)[0], - Integer.parseInt(mmsp.getMap().get(CommonParams.START)[0]), - Integer.parseInt(mmsp.getMap().get(CommonParams.ROWS)[0]), - mmsp.getMap().get(CommonParams.FL)); - OutputStreamWriter osw = new OutputStreamWriter(response.getOutputStream()); - EnhancedXMLResponseWriter.write(osw, req, sdl); - osw.close(); + if (responseWriter instanceof EmbeddedSolrResponseWriter || responseWriter instanceof CSVResponseWriter + || responseWriter instanceof XSLTResponseWriter || responseWriter instanceof RawResponseWriter) { + /* These writers need a non null req.getSearcher(), req.getSchema() and/or req.getCore() */ + throw new ServletException("The writer " + responseWriter.getClass().getSimpleName() + " can only process responses from an embedded Solr server."); + } + + QueryResponse queryRsp = connector.getResponseByParams(ModifiableSolrParams.of(mmsp)); + + /* Create SolrQueryRequestBase and SolrQueryResponse instances as these types are requited by Solr standard writers. + * WARNING : the SolrQueryRequestBase instance will return null for the getSearcher(), getCore() and getSchema() functions. + * Be sure thath the responseWriter instance can handle this properly. */ + req = new SolrQueryRequestBase(null, mmsp) {}; + + rsp = new SolrQueryResponse(); + NamedList responseHeader = new SimpleOrderedMap(); + responseHeader.add("params", mmsp.toNamedList()); + rsp.add("responseHeader", responseHeader); + rsp.setHttpCaching(false); + rsp.getValues().addAll(queryRsp.getResponse()); + + + // prepare response + hresponse.setHeader("Cache-Control", "no-cache, no-store"); + + final SolrDocumentList documentsList = queryRsp.getResults(); + long numFound = documentsList.getNumFound(); + AccessTracker.addToDump(querystring, numFound, new Date(), "sq"); + + // write response header + final String contentType = responseWriter.getContentType(req, rsp); + if (null != contentType) { + response.setContentType(contentType); + } + + if (Method.HEAD == reqMethod) { + return; + } + + // write response body + if (responseWriter instanceof EnhancedXMLResponseWriter) { + out = new OutputStreamWriter(response.getOutputStream(), StandardCharsets.UTF_8); + EnhancedXMLResponseWriter.write(out, req, documentsList); + } else if(responseWriter instanceof BinaryResponseWriter) { + ((BinaryResponseWriter) responseWriter).write(response.getOutputStream(), req, rsp); + } else { + out = new FastWriter(new OutputStreamWriter(response.getOutputStream(), StandardCharsets.UTF_8)); + responseWriter.write(out, req, rsp); + out.flush(); + } } } catch (final Throwable ex) { sendError(hresponse, ex); @@ -305,7 +351,14 @@ public class SolrSelectServlet extends HttpServlet { req.close(); } SolrRequestInfo.clearRequestInfo(); - if (out != null) try {out.close();} catch (final IOException e1) {} + if (out != null) { + try { + out.close(); + } catch (final IOException e1) { + ConcurrentLog.info("SolrSelect", "Could not close output writer." + + (e1.getMessage() != null ? "Cause : " + e1.getMessage() : "")); + } + } } } diff --git a/source/net/yacy/search/query/AccessTracker.java b/source/net/yacy/search/query/AccessTracker.java index 46b017626..d4ec3ba27 100644 --- a/source/net/yacy/search/query/AccessTracker.java +++ b/source/net/yacy/search/query/AccessTracker.java @@ -151,13 +151,13 @@ public class AccessTracker { return 0; } - private static void addToDump(final QueryParams query, int resultCount) { + private static void addToDump(final QueryParams query, long resultCount) { String queryString = query.getQueryGoal().getQueryString(false); if (queryString == null || queryString.isEmpty()) return; addToDump(queryString, resultCount, new Date(query.starttime), "qs"); } - public static void addToDump(String querystring, int resultCount) { + public static void addToDump(String querystring, long resultCount) { addToDump(querystring, resultCount, new Date(), "qs"); } @@ -169,7 +169,7 @@ public class AccessTracker { * @param d start time * @param querySyntax used syntax (qs=normal querstring, sq=solr querystring, */ - public static void addToDump(String querystring, int resultcount, Date d, String querySyntax) { + public static void addToDump(String querystring, long resultcount, Date d, String querySyntax) { //if (query.resultcount == 0) return; if (querystring == null || querystring.isEmpty()) return; final StringBuilder sb = new StringBuilder(40);