added a solr search index

- by default, a (empty) solr storage instance is created at
SEGMENTS/solr_36
- the index is written if in /IndexFederated_p.html the flag "embedded
solr search index" is switched on
- a standard solr query interface is available now with a new servlet at
http://127.0.0.1:8090/solr/select

To test this, do the following:
- switch to webportal mode
- switch on the feature as described
- do a crawl. this fills the solr index. The normal YaCy search will NOT
work now!
- do a solr query, like:
http://127.0.0.1:8090/solr/select?q=*:*
http://127.0.0.1:8090/solr/select?q=text_t:Help
play with different search fields as you can see in
/IndexFederated_p.html
You can use the standard solr query attributes as described in
http://wiki.apache.org/solr/SearchHandler
pull/1/head
Michael Peter Christen 13 years ago
parent f0a079ac9f
commit 97b7bcf2a6

@ -0,0 +1,73 @@
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import javax.servlet.ServletException;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.kelondro.logging.Log;
import net.yacy.search.Switchboard;
import net.yacy.search.solr.EmbeddedSolrConnector;
import net.yacy.search.solr.SolrServlet;
import org.apache.solr.common.util.FastWriter;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.QueryResponseWriter;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.response.XMLResponseWriter;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
public class select {
private static SolrServlet solrServlet = new SolrServlet();
private static final QueryResponseWriter responseWriter = new XMLResponseWriter();
static {
try {solrServlet.init(null);} catch (ServletException e) {}
}
/**
* a query to solr, for documentation of parameters see:
* http://lucene.apache.org/solr/api-3_6_0/doc-files/tutorial.html
* and
* http://wiki.apache.org/solr/SolrQuerySyntax
* @param header
* @param post
* @param env
* @param out
* @return
*/
public static serverObjects respond(@SuppressWarnings("unused") final RequestHeader header, final serverObjects post, final serverSwitch env, final OutputStream out) {
// this uses the methods in the jetty servlet environment and can be removed if jetty in implemented
Switchboard sb = (Switchboard) env;
EmbeddedSolrConnector connector = (EmbeddedSolrConnector) sb.index.getLocalSolr();
if (connector == null) return null;
if (post == null) return null;
if (!post.containsKey("df")) post.put("df", "text_t"); // set default field to all fields
SolrQueryRequest req = connector.request(post.toSolrParams());
SolrQueryResponse response = connector.query(req);
Exception e = response.getException();
if (e != null) {
Log.logException(e);
return null;
}
// write the result directly to the output stream
Writer ow = new FastWriter(new OutputStreamWriter(out, UTF8.charset));
try {
responseWriter.write(ow, req, response);
ow.flush();
} catch (IOException e1) {
} finally {
req.close();
try {ow.close();} catch (IOException e1) {}
}
return null;
}
}

@ -536,7 +536,7 @@ public final class HTTPDFileHandler {
} else {
//XXX: you cannot share a .png/.gif file with a name like a class in htroot.
if ( !(targetFile.exists()) &&
!((path.endsWith("png")||path.endsWith("gif") ||
!((path.endsWith("png")||path.endsWith("gif") || path.indexOf('.') < 0 ||
matchesSuffix(path, switchboard.getConfig("cgi.suffixes", null)) ||
path.endsWith(".stream")) &&
targetClass!=null ) ){
@ -574,7 +574,7 @@ public final class HTTPDFileHandler {
requestHeader.put(HeaderFramework.CONNECTION_PROP_PATH, path);
requestHeader.put(HeaderFramework.CONNECTION_PROP_EXT, "png");
// in case that there are no args given, args = null or empty hashmap
img = invokeServlet(targetClass, requestHeader, args);
img = invokeServlet(targetClass, requestHeader, args, null);
if (img == null) {
// error with image generation; send file-not-found
HTTPDemon.sendRespondError(conProp, out, 3, 404, "File not Found", null, null);
@ -867,18 +867,18 @@ public final class HTTPDFileHandler {
}
}
}
} else if ((targetClass != null) && (path.endsWith(".stream"))) {
} else if (targetClass != null && (path.endsWith(".stream") || path.indexOf('.') < 0)) {
// call rewrite-class
requestHeader.put(HeaderFramework.CONNECTION_PROP_CLIENTIP, (String) conProp.get(HeaderFramework.CONNECTION_PROP_CLIENTIP));
requestHeader.put(HeaderFramework.CONNECTION_PROP_PATH, path);
requestHeader.put(HeaderFramework.CONNECTION_PROP_EXT, "stream");
requestHeader.put(HeaderFramework.CONNECTION_PROP_EXT, path.endsWith(".stream") ? "stream" : "");
//requestHeader.put(httpHeader.CONNECTION_PROP_INPUTSTREAM, body);
//requestHeader.put(httpHeader.CONNECTION_PROP_OUTPUTSTREAM, out);
HTTPDemon.sendRespondHeader(conProp, out, httpVersion, 200, null);
// in case that there are no args given, args = null or empty hashmap
/* servletProperties tp = (servlerObjects) */ invokeServlet(targetClass, requestHeader, args);
ResponseHeader header = new ResponseHeader(200);
header.put(HeaderFramework.CONTENT_TYPE, "text/xml"); // this is a hack; the actual content type should be given by the servlet, but there is no handover process for that at this time
conProp.remove(HeaderFramework.CONNECTION_PROP_PERSISTENT);
HTTPDemon.sendRespondHeader(conProp, out, httpVersion, 200, header);
invokeServlet(targetClass, requestHeader, args, out);
forceConnectionClose(conProp);
return;
} else if (targetFile.exists() && targetFile.isFile() && targetFile.canRead()) {
@ -920,7 +920,7 @@ public final class HTTPDFileHandler {
final int ep = path.lastIndexOf(".");
requestHeader.put(HeaderFramework.CONNECTION_PROP_EXT, path.substring(ep + 1));
// in case that there are no args given, args = null or empty hashmap
final Object tmp = invokeServlet(targetClass, requestHeader, args);
final Object tmp = invokeServlet(targetClass, requestHeader, args, null);
if (tmp == null) {
// if no args given, then tp will be an empty Hashtable object (not null)
templatePatterns = new servletProperties();
@ -1313,9 +1313,7 @@ public final class HTTPDFileHandler {
try {
String f = template.getCanonicalPath();
final int p = f.lastIndexOf('.');
if (p < 0) return null;
f = f.substring(0, p) + ".class";
//System.out.println("constructed class path " + f);
f = p < 0 ? f + ".class" : f.substring(0, p) + ".class";
final File cf = new File(f);
if (cf.exists()) return cf;
return null;
@ -1341,11 +1339,20 @@ public final class HTTPDFileHandler {
}
final Class<?> c = provider.loadClass(classFile);
final Class<?>[] params = new Class[] {
Class<?>[] params = new Class[] {
RequestHeader.class,
serverObjects.class,
serverSwitch.class };
m = c.getMethod("respond", params);
try {
m = c.getMethod("respond", params);
} catch (NoSuchMethodException e) {
params = new Class[] {
RequestHeader.class,
serverObjects.class,
serverSwitch.class,
OutputStream.class};
m = c.getMethod("respond", params);
}
if (MemoryControl.shortStatus()) {
templateMethodCache.clear();
@ -1365,9 +1372,12 @@ public final class HTTPDFileHandler {
return m;
}
private static final Object invokeServlet(final File targetClass, final RequestHeader request, final serverObjects args) {
private static final Object invokeServlet(final File targetClass, final RequestHeader request, final serverObjects args, final OutputStream os) {
try {
return rewriteMethod(targetClass).invoke(null, new Object[] {request, args, switchboard});
if (os == null) {
return rewriteMethod(targetClass).invoke(null, new Object[] {request, args, switchboard});
}
return rewriteMethod(targetClass).invoke(null, new Object[] {request, args, switchboard, os});
} catch (final Throwable e) {
theLogger.logSevere("INTERNAL ERROR: " + e.toString() + ":" +
e.getMessage() +

@ -63,6 +63,9 @@ import net.yacy.document.parser.html.CharacterCoding;
import net.yacy.kelondro.util.Formatter;
import net.yacy.search.Switchboard;
import org.apache.solr.common.params.MultiMapSolrParams;
import org.apache.solr.common.params.SolrParams;
public class serverObjects extends HashMap<String, String> implements Cloneable {
@ -464,6 +467,15 @@ public class serverObjects extends HashMap<String, String> implements Cloneable
return param.toString();
}
public SolrParams toSolrParams() {
Map<String,String[]> m = new HashMap<String, String[]>();
for (Map.Entry<String, String> e: this.entrySet()) {
m.put(e.getKey(), new String[]{e.getValue()});
}
final SolrParams solrParams = new MultiMapSolrParams(m);
return solrParams;
}
public static void main(final String[] args) {
final String v = "ein \"zitat\"";
System.out.println(toJSON(v));

@ -63,6 +63,9 @@ import net.yacy.repository.Blacklist;
import net.yacy.repository.Blacklist.BlacklistType;
import net.yacy.search.Switchboard;
import net.yacy.search.solr.EmbeddedSolrConnector;
import org.apache.lucene.util.Version;
import de.anomic.crawler.CrawlStacker;
public final class MetadataRepository implements /*Metadata,*/ Iterable<byte[]> {
@ -98,8 +101,15 @@ public final class MetadataRepository implements /*Metadata,*/ Iterable<byte[]>
public void connectLocalSolr() throws IOException {
File solrLocation = this.location;
if (solrLocation.getName().equals("default")) solrLocation = solrLocation.getParentFile();
solrLocation = new File(solrLocation, "solr");
this.localSolr = new EmbeddedSolrConnector(solrLocation, new File(new File(Switchboard.getSwitchboard().appPath,"defaults"), "solr"));
String solrPath = "solr_36";
solrLocation = new File(solrLocation, solrPath); // the number should be identical to the number in the property luceneMatchVersion in solrconfig.xml
EmbeddedSolrConnector solr = new EmbeddedSolrConnector(solrLocation, new File(new File(Switchboard.getSwitchboard().appPath, "defaults"), "solr"));
Version luceneVersion = solr.getConfig().getLuceneVersion("luceneMatchVersion");
String lvn = luceneVersion.name();
int p = lvn.indexOf('_');
assert solrPath.endsWith(lvn.substring(p)) : "luceneVersion = " + lvn + ", solrPath = " + solrPath + ", p = " + p;
Log.logInfo("MetadataRepository", "connected solr in " + solrLocation.toString() + ", lucene version " + lvn);
this.localSolr = solr;
}
public SolrConnector getLocalSolr() {

@ -139,7 +139,7 @@ public class Segment {
// create LURL-db
this.urlMetadata = new MetadataRepository(segmentPath, "text.urlmd", useTailCache, exceed134217727);
//this.connectLocalSolr();
this.connectLocalSolr();
}
public long URLCount() {

@ -35,16 +35,34 @@ import net.yacy.search.index.SolrField;
import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.core.CoreContainer;
import org.apache.solr.core.SolrConfig;
import org.apache.solr.core.SolrCore;
import org.apache.solr.handler.component.SearchHandler;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.request.SolrQueryRequestBase;
import org.apache.solr.request.SolrRequestInfo;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.servlet.SolrRequestParsers;
import org.xml.sax.SAXException;
import com.google.common.io.Files;
public class EmbeddedSolrConnector extends AbstractSolrConnector implements SolrConnector {
private final CoreContainer core;
public static final String SELECT = "/select";
public static final String CONTEXT = "/solr";
private final static String[] confFiles = {"solrconfig.xml", "schema.xml", "stopwords.txt", "synonyms.txt", "protwords.txt", "currency.xml", "elevate.xml", "lang/"};
//private final static String[] confFiles = {"solrconfig.xml", "schema.xml", "stopwords.txt", "synonyms.txt", "protwords.txt", "currency.xml", "elevate.xml", "lang/"};
private final CoreContainer cores;
private final String defaultCoreName;
private final SolrCore defaultCore;
protected SolrRequestParsers adminRequestParser;
private final SearchHandler requestHandler;
public EmbeddedSolrConnector(File storagePath, File solr_config) throws IOException {
super();
@ -67,19 +85,62 @@ public class EmbeddedSolrConnector extends AbstractSolrConnector implements Solr
}
}
try {
this.core = new CoreContainer(storagePath.getAbsolutePath(), new File(solr_config, "solr.xml"));
this.cores = new CoreContainer(storagePath.getAbsolutePath(), new File(solr_config, "solr.xml"));
} catch (ParserConfigurationException e) {
throw new IOException(e.getMessage(), e);
} catch (SAXException e) {
throw new IOException(e.getMessage(), e);
}
super.init(new EmbeddedSolrServer(this.core, "collection1"));
this.defaultCoreName = this.cores.getDefaultCoreName();
this.defaultCore = this.cores.getCore(this.defaultCoreName); // should be "collection1"
final NamedList<Object> config = new NamedList<Object>();
this.requestHandler = new SearchHandler();
this.requestHandler.init(config);
this.requestHandler.inform(this.defaultCore);
super.init(new EmbeddedSolrServer(this.cores, this.defaultCoreName));
}
public SolrCore getCore() {
return this.defaultCore;
}
public SolrConfig getConfig() {
return this.defaultCore.getSolrConfig();
}
@Override
public void close() {
public synchronized void close() {
super.close();
this.core.shutdown();
this.cores.shutdown();
}
public SolrQueryRequest request(final SolrParams params) {
SolrQueryRequest req = null;
req = new SolrQueryRequestBase(this.defaultCore, params){};
req.getContext().put("path", SELECT);
req.getContext().put("webapp", CONTEXT);
return req;
}
public SolrQueryResponse query(SolrQueryRequest req) {
final long startTime = System.currentTimeMillis();
SolrQueryResponse rsp = new SolrQueryResponse();
NamedList<Object> responseHeader = new SimpleOrderedMap<Object>();
rsp.add("responseHeader", responseHeader);
SolrRequestInfo.setRequestInfo(new SolrRequestInfo(req, rsp));
// send request to solr and create a result
this.requestHandler.handleRequest(req, rsp);
// get statistics and add a header with that
Exception exception = rsp.getException();
int status = exception == null ? 0 : exception instanceof SolrException ? ((SolrException) exception).code() : 500;
responseHeader.add("status", status);
responseHeader.add("QTime",(int) (System.currentTimeMillis() - startTime));
// return result
return rsp;
}
public static void main(String[] args) {
@ -93,25 +154,17 @@ public class EmbeddedSolrConnector extends AbstractSolrConnector implements Solr
solrdoc.addSolr(SolrField.title, "Lorem ipsum");
solrdoc.addSolr(SolrField.text_t, "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.");
solr.add(solrdoc);
SolrServlet.startServer("/solr", 8091, solr);
SolrDocumentList searchresult = solr.get(SolrField.text_t.name() + ":tempor", 0, 10);
for (SolrDocument d: searchresult) {
for (SolrDocument d : searchresult) {
System.out.println(d.toString());
}
// try http://127.0.0.1:8091/solr/select?q=ping
try {Thread.sleep(1000 * 1000);} catch (InterruptedException e) {}
solr.close();
/*
JettySolrRunner solrJetty = new JettySolrRunner("/solr", 8091, storage.getAbsolutePath());
try {
solrJetty.start();
String url = "http://localhost:" + solrJetty.getLocalPort() + "/solr";
SolrServer server = new HttpSolrServer(url);
} catch (Exception e) {
e.printStackTrace();
}
*/
} catch (IOException e) {
Log.logException(e);
}
}
}

@ -0,0 +1,237 @@
package net.yacy.search.solr;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.io.Writer;
import java.net.URL;
import java.util.Iterator;
import java.util.Map;
import javax.servlet.Filter;
import javax.servlet.FilterChain;
import javax.servlet.FilterConfig;
import javax.servlet.ServletException;
import javax.servlet.ServletRequest;
import javax.servlet.ServletResponse;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import net.yacy.cora.document.UTF8;
import net.yacy.kelondro.logging.Log;
import org.apache.lucene.document.Document;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.util.FastWriter;
import org.apache.solr.core.SolrCore;
import org.apache.solr.request.ServletSolrParams;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.request.SolrRequestInfo;
import org.apache.solr.response.QueryResponseWriter;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.response.XMLResponseWriter;
import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocList;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.servlet.cache.HttpCacheHeaderUtil;
import org.apache.solr.servlet.cache.Method;
import org.mortbay.jetty.Handler;
import org.mortbay.jetty.Server;
import org.mortbay.jetty.servlet.Context;
import org.mortbay.jetty.servlet.FilterHolder;
public class SolrServlet implements Filter {
private static final QueryResponseWriter responseWriter = new XMLResponseWriter();
private static EmbeddedSolrConnector connector;
public SolrServlet() {
}
public static void initCore(EmbeddedSolrConnector c) {
connector = c;
}
@Override
public void init(FilterConfig config) throws ServletException {
}
@Override
public void destroy() {
}
@Override
public void doFilter(ServletRequest request, ServletResponse response, FilterChain chain) throws IOException, ServletException {
if (!(request instanceof HttpServletRequest)) {
if (chain != null) chain.doFilter(request, response);
return;
}
HttpServletRequest hrequest = (HttpServletRequest) request;
HttpServletResponse hresponse = (HttpServletResponse) response;
SolrQueryRequest req = null;
// check if this servlet was called correctly
String pathInfo = hrequest.getPathInfo();
String path = pathInfo == null ? hrequest.getServletPath() : hrequest.getServletPath() + pathInfo; // should be "/select" after this
if (!EmbeddedSolrConnector.SELECT.equals(path)) {
// this is not for this servlet
if (chain != null) chain.doFilter(request, response);
return;
}
if (!EmbeddedSolrConnector.CONTEXT.equals(hrequest.getContextPath())) {
// this is not for this servlet
if (chain != null) chain.doFilter(request, response);
return;
}
// reject POST which is not supported here
final Method reqMethod = Method.getMethod(hrequest.getMethod());
if (reqMethod == null || (reqMethod != Method.GET && reqMethod != Method.HEAD)) {
throw new ServletException("Unsupported method: " + hrequest.getMethod());
}
try {
SolrCore core = connector.getCore();
if (core == null) {
throw new UnsupportedOperationException("core not initialized");
}
// prepare request to solr
hrequest.setAttribute("org.apache.solr.CoreContainer", core);
req = connector.request(new ServletSolrParams(hrequest));
SolrQueryResponse rsp = connector.query(req);
// prepare response
hresponse.setHeader("Cache-Control", "no-cache");
HttpCacheHeaderUtil.checkHttpCachingVeto(rsp, hresponse, reqMethod);
// check error
if (rsp.getException() != null) {
sendError(hresponse, rsp.getException());
return;
}
// write response header
final String contentType = responseWriter.getContentType(req, rsp);
if (null != contentType) response.setContentType(contentType);
if (Method.HEAD == reqMethod) {
return;
}
// write response body
Writer out = new FastWriter(new OutputStreamWriter(response.getOutputStream(), UTF8.charset));
//debug
@SuppressWarnings("unchecked")
Iterator<Map.Entry<String, Object>> ie = rsp.getValues().iterator();
Map.Entry<String, Object> e;
while (ie.hasNext()) {
e = ie.next();
System.out.println("Field: " + e.getKey() + ", value: " + e.getValue().getClass().getName());
//Field: responseHeader, value: org.apache.solr.common.util.SimpleOrderedMap
//Field: response, value: org.apache.solr.search.DocSlice
if (e.getValue() instanceof DocList) {
DocList ids = (DocList) e.getValue();
SolrIndexSearcher searcher = req.getSearcher();
DocIterator iterator = ids.iterator();
int sz = ids.size();
for (int i = 0; i < sz; i++) {
int id = iterator.nextDoc();
Document doc = searcher.doc(id);
}
}
}
responseWriter.write(out, req, rsp);
out.flush();
return;
} catch (Throwable ex) {
sendError(hresponse, ex);
return;
} finally {
if (req != null) {
req.close();
}
SolrRequestInfo.clearRequestInfo();
}
}
private static void sendError(HttpServletResponse hresponse, Throwable ex) throws IOException {
int code = (ex instanceof SolrException) ? ((SolrException) ex).code() : 500;
StringWriter sw = new StringWriter();
ex.printStackTrace(new PrintWriter(sw));
hresponse.sendError((code < 100) ? 500 : code, ex.getMessage() + "\n\n" + sw.toString());
}
/**
* from org.apache.solr.client.solrj.embedded.JettySolrRunner
*/
public static Server startServer(String context, int port, EmbeddedSolrConnector c) {
//this.context = context;
Server server = new Server(port);
/*
SocketConnector connector = new SocketConnector();
connector.setPort(port);
connector.setReuseAddress(true);
this.server.setConnectors(new Connector[] { connector });
this.server.setSessionIdManager(new HashSessionIdManager(new Random()));
*/
server.setStopAtShutdown(true);
Context root = new Context(server, context, Context.SESSIONS);
root.addServlet(Servlet404.class, "/*");
// attach org.apache.solr.response.XMLWriter to search requests
SolrServlet.initCore(c);
FilterHolder dispatchFilter = root.addFilter(SolrServlet.class, "*", Handler.REQUEST);
if (!server.isRunning()) {
try {
server.start();
waitForSolr(context, port);
} catch (Exception e) {
Log.logException(e);
}
}
return server;
}
public static void waitForSolr(String context, int port) throws Exception {
// A raw term query type doesn't check the schema
URL url = new URL("http://127.0.0.1:" + port + context + "/select?q={!raw+f=test_query}ping");
Exception ex=null;
// Wait for a total of 20 seconds: 100 tries, 200 milliseconds each
for (int i = 0; i < 600; i++) {
try {
InputStream stream = url.openStream();
stream.close();
} catch (IOException e) {
ex=e;
Thread.sleep(200);
continue;
}
return;
}
throw new RuntimeException("Jetty/Solr unresponsive", ex);
}
public static class Servlet404 extends HttpServlet {
private static final long serialVersionUID=-4497069674942245148L;
@Override
public void service(HttpServletRequest req, HttpServletResponse res) throws IOException {
res.sendError(404, "Can not find: " + req.getRequestURI());
}
}
}
Loading…
Cancel
Save