diff --git a/source/net/yacy/http/GSAsearchServlet.java b/source/net/yacy/http/GSAsearchServlet.java new file mode 100644 index 000000000..4b88e948a --- /dev/null +++ b/source/net/yacy/http/GSAsearchServlet.java @@ -0,0 +1,238 @@ +/** + * search + * Copyright 2012 by Michael Peter Christen, mc@yacy.net, Frankfurt am Main, Germany + * First released 30.10.2013 at http://yacy.net + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program in the file lgpl21.txt + * If not, see . + */ +package net.yacy.http; + +import java.io.IOException; +import java.io.OutputStream; +import java.io.OutputStreamWriter; +import java.io.Writer; +import java.security.Principal; +import java.util.Date; +import javax.servlet.ServletException; +import javax.servlet.http.HttpServlet; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import net.yacy.cora.date.ISO8601Formatter; +import net.yacy.cora.document.encoding.UTF8; +import net.yacy.cora.federate.solr.Ranking; +import net.yacy.cora.federate.solr.connector.EmbeddedSolrConnector; +import net.yacy.cora.federate.solr.responsewriter.GSAResponseWriter; +import net.yacy.cora.protocol.HeaderFramework; +import net.yacy.cora.util.ConcurrentLog; +import net.yacy.search.Switchboard; +import net.yacy.search.query.AccessTracker; +import net.yacy.search.query.QueryGoal; +import net.yacy.search.query.QueryModifier; +import net.yacy.search.query.SearchEvent; +import net.yacy.search.schema.CollectionSchema; +import net.yacy.server.serverObjects; + +import org.apache.solr.common.SolrDocumentList; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.params.CommonParams; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.request.SolrRequestInfo; +import org.apache.solr.response.ResultContext; +import org.apache.solr.response.SolrQueryResponse; +import org.apache.solr.util.FastWriter; + + +/** + * This is a gsa result formatter for solr search results. + * The result format is implemented according to + * https://developers.google.com/search-appliance/documentation/68/xml_reference#results_xml + */ +public class GSAsearchServlet extends HttpServlet { + + private final static GSAResponseWriter responseWriter = new GSAResponseWriter(); + + @Override + protected void doPost(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + doGet(request, response); + } + + @Override + protected void doGet(HttpServletRequest request, HttpServletResponse response) + throws ServletException, IOException { + respond(request, new serverObjects(), Switchboard.getSwitchboard(), response.getOutputStream()); + } + + // ------------------------------------------ + /** + * from here copy of htroot/gsa/gsasearchresult.java + * with modification to use HttpServletRequest instead of (yacy) RequestHeader + */ + + + public static serverObjects respond(final HttpServletRequest header, serverObjects post, final Switchboard sb, final OutputStream out) { + + // remember the peer contact for peer statistics + String clientip = header.getHeader(HeaderFramework.CONNECTION_PROP_CLIENTIP); + if (clientip == null) clientip = ""; // read an artificial header addendum + String userAgent = header.getHeader(HeaderFramework.USER_AGENT); + if (userAgent == null) userAgent = ""; + sb.peers.peerActions.setUserAgent(clientip, userAgent); + + // --- handled by Servlet securityHandler + // check if user is allowed to search (can be switched in /ConfigPortal.html) + boolean authenticated = header.isUserInRole("admin"); //sb.adminAuthenticated(header) >= 2; + // final boolean searchAllowed = authenticated || sb.getConfigBool("publicSearchpage", true); + // if (!searchAllowed) return null; + + // check post + if (post == null) {post = new serverObjects(); post.put("q", ""); post.put("num", "0");} + ConcurrentLog.info("GSA Query", post.toString()); + sb.intermissionAllThreads(3000); // tell all threads to do nothing for a specific time + + // rename post fields according to result style + //post.put(CommonParams.Q, post.remove("q")); // same as solr + //post.put(CommonParams.START, post.remove("start")); // same as solr + //post.put(, post.remove("client"));//required, example: myfrontend + //post.put(, post.remove("output"));//required, example: xml,xml_no_dtd + String originalQuery = post.get(CommonParams.Q, ""); + post.put("originalQuery", originalQuery); + + // get a solr query string + QueryGoal qg = new QueryGoal(originalQuery, originalQuery); + StringBuilder solrQ = qg.collectionTextQueryString(sb.index.fulltext().getDefaultConfiguration(), 0, false); + post.put("defType", "edismax"); + post.put(CommonParams.Q, solrQ.toString()); + post.put(CommonParams.ROWS, post.remove("num")); + post.put(CommonParams.ROWS, Math.min(post.getInt(CommonParams.ROWS, 10), (authenticated) ? 100000000 : 100)); + + // set ranking + if (post.containsKey("sort")) { + // if a gsa-style sort attribute is given, use this to set the solr sort attribute + GSAResponseWriter.Sort sort = new GSAResponseWriter.Sort(post.get(CommonParams.SORT, "")); + String sorts = sort.toSolr(); + if (sorts == null) { + post.remove(CommonParams.SORT); + } else { + post.put(CommonParams.SORT, sorts); + } + } else { + // if no such sort attribute is given, use the ranking as configured for YaCy + Ranking ranking = sb.index.fulltext().getDefaultConfiguration().getRanking(0); + String bq = ranking.getBoostQuery(); + String bf = ranking.getBoostFunction(); + if (bq.length() > 0) post.put("bq", bq); + if (bf.length() > 0) post.put("boost", bf); // a boost function extension, see http://wiki.apache.org/solr/ExtendedDisMax#bf_.28Boost_Function.2C_additive.29 + } + String daterange[] = post.remove("daterange"); + if (daterange != null) { + String origfq = post.get(CommonParams.FQ); + String datefq = ""; + for (String dr: daterange) { + String from_to[] = dr.endsWith("..") ? new String[]{dr.substring(0, dr.length() - 2), ""} : dr.startsWith("..") ? new String[]{"", dr.substring(2)} : dr.split("\\.\\."); + if (from_to.length != 2) continue; + Date from = HeaderFramework.parseGSAFS(from_to[0]); + if (from == null) from = new Date(0); + Date to = HeaderFramework.parseGSAFS(from_to[1]); + if (to == null) to = new Date(); + to.setTime(to.getTime() + 24L * 60L * 60L * 1000L); // we add a day because the day is inclusive + String z = CollectionSchema.last_modified.getSolrFieldName() + ":[" + ISO8601Formatter.FORMATTER.format(from) + " TO " + ISO8601Formatter.FORMATTER.format(to) + "]"; + datefq = datefq.length() == 0 ? z : " OR " + z; + } + if (datefq.length() > 0) post.put(CommonParams.FQ, origfq == null || origfq.length() == 0 ? datefq : "(" + origfq + ") AND (" + datefq + ")"); + } + post.put(CommonParams.FL, + CollectionSchema.content_type.getSolrFieldName() + ',' + + CollectionSchema.id.getSolrFieldName() + ',' + + CollectionSchema.sku.getSolrFieldName() + ',' + + CollectionSchema.title.getSolrFieldName() + ',' + + CollectionSchema.description_txt.getSolrFieldName() + ',' + + CollectionSchema.load_date_dt.getSolrFieldName() + ',' + + CollectionSchema.last_modified.getSolrFieldName() + ',' + + CollectionSchema.size_i.getSolrFieldName()); + post.put("hl", "true"); + post.put("hl.q", originalQuery); + post.put("hl.fl", CollectionSchema.h1_txt.getSolrFieldName() + "," + CollectionSchema.h2_txt.getSolrFieldName() + "," + CollectionSchema.text_t.getSolrFieldName()); + post.put("hl.alternateField", CollectionSchema.description_txt.getSolrFieldName()); + post.put("hl.simple.pre", ""); + post.put("hl.simple.post", ""); + post.put("hl.fragsize", Integer.toString(SearchEvent.SNIPPET_MAX_LENGTH)); + + //String[] access = post.remove("access"); + //String[] entqr = post.remove("entqr"); + + // add sites operator + String[] site = post.remove("site"); // example: col1|col2 + if (site != null && site[0].length() > 0) { + String origfq = post.get(CommonParams.FQ); + String sitefq = QueryModifier.parseCollectionExpression(site[0]); + post.put(CommonParams.FQ, origfq == null || origfq.length() == 0 ? sitefq : "(" + origfq + ") AND (" + sitefq + ")"); + } + + // get the embedded connector + EmbeddedSolrConnector connector = sb.index.fulltext().getDefaultEmbeddedConnector(); + if (connector == null) return null; + + // do the solr request + SolrQueryRequest req = connector.request(post.toSolrParams(null)); + SolrQueryResponse response = null; + Exception e = null; + try {response = connector.query(req);} catch (final SolrException ee) {e = ee;} + if (response != null) e = response.getException(); + if (e != null) { + ConcurrentLog.logException(e); + if (req != null) req.close(); + SolrRequestInfo.clearRequestInfo(); + return null; + } + + // set some context for the writer + /* + Map context = req.getContext(); + context.put("ip", header.get("CLIENTIP", "")); + context.put("client", "vsm_frontent"); + context.put("sort", sort.sort); + context.put("site", site == null ? "" : site); + context.put("access", access == null ? "p" : access[0]); + context.put("entqr", entqr == null ? "3" : entqr[0]); + */ + + // write the result directly to the output stream + Writer ow = new FastWriter(new OutputStreamWriter(out, UTF8.charset)); + try { + responseWriter.write(ow, req, response); + ow.flush(); + } catch (final IOException e1) { + } finally { + req.close(); + SolrRequestInfo.clearRequestInfo(); + try {ow.close();} catch (final IOException e1) {} + } + + // log result + Object rv = response.getValues().get("response"); + int matches = 0; + if (rv != null && rv instanceof ResultContext) { + matches = ((ResultContext) rv).docs.matches(); + } else if (rv != null && rv instanceof SolrDocumentList) { + matches = (int) ((SolrDocumentList) rv).getNumFound(); + } + AccessTracker.addToDump(originalQuery, Integer.toString(matches)); + ConcurrentLog.info("GSA Query", "results: " + matches + ", for query:" + post.toString()); + + return null; + } +} diff --git a/source/net/yacy/http/Jetty8HttpServerImpl.java b/source/net/yacy/http/Jetty8HttpServerImpl.java index ef331902e..442ac1419 100644 --- a/source/net/yacy/http/Jetty8HttpServerImpl.java +++ b/source/net/yacy/http/Jetty8HttpServerImpl.java @@ -91,9 +91,14 @@ public class Jetty8HttpServerImpl implements YaCyHttpServer { sholder.setInitParameter("gzip","false"); htrootContext.addServlet(sholder,"/*"); + // add proxy?url= servlet ServletHolder proxyholder= new ServletHolder(YaCyProxyServlet.class); htrootContext.addServlet(proxyholder,"/proxy.html"); + // add GSA servlet + ServletHolder gsaholder = new ServletHolder (GSAsearchServlet.class); + htrootContext.addServlet(gsaholder,"/gsa/search"); + // assemble the servlet handlers ContextHandlerCollection servletContext = new ContextHandlerCollection(); servletContext.setHandlers(new Handler[] { solrContext, htrootContext }); diff --git a/source/net/yacy/http/Jetty8YaCyDefaultServlet.java b/source/net/yacy/http/Jetty8YaCyDefaultServlet.java index ee66d4054..065bb5ed2 100644 --- a/source/net/yacy/http/Jetty8YaCyDefaultServlet.java +++ b/source/net/yacy/http/Jetty8YaCyDefaultServlet.java @@ -46,7 +46,6 @@ import org.eclipse.jetty.io.Buffer; import org.eclipse.jetty.io.WriterOutputStream; import org.eclipse.jetty.server.AbstractHttpConnection; import org.eclipse.jetty.server.Connector; -import org.eclipse.jetty.server.Dispatcher; import org.eclipse.jetty.server.HttpOutput; import org.eclipse.jetty.server.InclusiveByteRange; import org.eclipse.jetty.server.Response; @@ -112,10 +111,10 @@ public class Jetty8YaCyDefaultServlet extends YaCyDefaultServlet implements Reso String servletPath = null; String pathInfo = null; Enumeration reqRanges = null; - Boolean included = request.getAttribute(Dispatcher.INCLUDE_REQUEST_URI) != null; + Boolean included = request.getAttribute(RequestDispatcher.INCLUDE_REQUEST_URI) != null; if (included != null && included.booleanValue()) { - servletPath = (String) request.getAttribute(Dispatcher.INCLUDE_SERVLET_PATH); - pathInfo = (String) request.getAttribute(Dispatcher.INCLUDE_PATH_INFO); + servletPath = (String) request.getAttribute(RequestDispatcher.INCLUDE_SERVLET_PATH); + pathInfo = (String) request.getAttribute(RequestDispatcher.INCLUDE_PATH_INFO); if (servletPath == null) { servletPath = request.getServletPath(); pathInfo = request.getPathInfo(); diff --git a/source/net/yacy/http/Jetty8YaCySecurityHandler.java b/source/net/yacy/http/Jetty8YaCySecurityHandler.java index 1261342ec..dc140a907 100644 --- a/source/net/yacy/http/Jetty8YaCySecurityHandler.java +++ b/source/net/yacy/http/Jetty8YaCySecurityHandler.java @@ -181,7 +181,11 @@ public class Jetty8YaCySecurityHandler extends SecurityHandler { final boolean accessFromLocalhost = Domains.isLocalhost(request.getRemoteHost()) && (refererHost == null || refererHost.length() == 0 || Domains.isLocalhost(refererHost)); // ! note : accessFromLocalhost compares localhost ip pattern ( ! currently also any intranet host is a local host) final boolean grantedForLocalhost = adminAccountForLocalhost && accessFromLocalhost; - final boolean protectedPage = pathInContext.indexOf("_p.") > 0; + boolean protectedPage = (pathInContext.indexOf("_p.") > 0); + // check "/gsa" and "/solr" if not publicSearchpage + if (!protectedPage && !sb.getConfigBool("publicSearchpage", true)) { + protectedPage = pathInContext.startsWith("/solr/") || pathInContext.startsWith("/gsa/"); + } //final boolean accountEmpty = adminAccountBase64MD5.length() == 0; //final boolean yacyBot = request.getHeader("User-Agent").startsWith("yacybot");