added a very rudimentary, incomplete, non-verified GSA response writer

for solr. Try this:
http://localhost:8090/gsa/searchresult?q=pdf&site=col1&num=10
pull/1/head
Michael Peter Christen 13 years ago
parent aab0b680c3
commit d988ba50cf

@ -1,7 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?> <?xml version="1.0" encoding="UTF-8"?>
<classpath> <classpath>
<classpathentry kind="src" path="source"/> <classpathentry kind="src" path="source"/>
<classpathentry excluding="api/|env/|processing/domaingraph/applet/|yacy/|api/bookmarks/|api/ymarks/|api/bookmarks/posts/|api/bookmarks/tags/|api/bookmarks/xbel/|solr/" kind="src" path="htroot"/> <classpathentry excluding="api/|env/|processing/domaingraph/applet/|yacy/|api/bookmarks/|api/ymarks/|api/bookmarks/posts/|api/bookmarks/tags/|api/bookmarks/xbel/|solr/|gsa/" kind="src" path="htroot"/>
<classpathentry excluding="bookmarks/|ymarks/|bookmarks/posts/|bookmarks/tags/|bookmarks/xbel/" kind="src" path="htroot/api"/> <classpathentry excluding="bookmarks/|ymarks/|bookmarks/posts/|bookmarks/tags/|bookmarks/xbel/" kind="src" path="htroot/api"/>
<classpathentry kind="src" path="htroot/env"/> <classpathentry kind="src" path="htroot/env"/>
<classpathentry kind="src" path="htroot/yacy"/> <classpathentry kind="src" path="htroot/yacy"/>
@ -11,6 +11,7 @@
<classpathentry kind="src" path="htroot/api/bookmarks/tags"/> <classpathentry kind="src" path="htroot/api/bookmarks/tags"/>
<classpathentry kind="src" path="htroot/api/bookmarks/xbel"/> <classpathentry kind="src" path="htroot/api/bookmarks/xbel"/>
<classpathentry kind="src" path="htroot/solr"/> <classpathentry kind="src" path="htroot/solr"/>
<classpathentry kind="src" path="htroot/gsa"/>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/> <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
<classpathentry kind="lib" path="lib/commons-logging-1.1.1.jar"/> <classpathentry kind="lib" path="lib/commons-logging-1.1.1.jar"/>
<classpathentry kind="lib" path="lib/J7Zip-modified.jar"/> <classpathentry kind="lib" path="lib/J7Zip-modified.jar"/>

@ -0,0 +1,120 @@
/**
* search
* Copyright 2012 by Michael Peter Christen, mc@yacy.net, Frankfurt am Main, Germany
* First released 14.08.2012 at http://yacy.net
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.services.federated.solr.GSAResponseWriter;
import net.yacy.kelondro.logging.Log;
import net.yacy.search.Switchboard;
import net.yacy.search.solr.EmbeddedSolrConnector;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.util.FastWriter;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
// try
// http://localhost:8090/gsa/search?q=chicken+teriyaki&output=xml&client=test&site=test&sort=date:D:S:d1
/**
* This is a gsa result formatter for solr search results.
* The result format is implemented according to
* https://developers.google.com/search-appliance/documentation/68/xml_reference#results_xml
*/
public class searchresult {
private final static GSAResponseWriter responseWriter = new GSAResponseWriter();
/**
* get the right mime type for this streamed result page
* @param header
* @param post
* @param env
* @return
*/
public static String mime(final RequestHeader header, final serverObjects post, final serverSwitch env) {
return "text/xml";
}
/**
* @param header
* @param post
* @param env
* @param out
* @return
*/
public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env, final OutputStream out) {
// this uses the methods in the jetty servlet environment and can be removed if jetty in implemented
Switchboard sb = (Switchboard) env;
// check if user is allowed to search (can be switched in /ConfigPortal.html)
final boolean searchAllowed = sb.getConfigBool("publicSearchpage", true) || sb.verifyAuthentication(header);
if (!searchAllowed) return null;
// check post
if (post == null) return null;
// rename post fields according to result style
//post.put(CommonParams.Q, post.remove("q")); // same as solr
//post.put(CommonParams.START, post.remove("start")); // same as solr
//post.put(, post.remove("site"));//required, example: col1|col2
//post.put(, post.remove("client"));//required, example: myfrontend
//post.put(, post.remove("output"));//required, example: xml,xml_no_dtd
post.put(CommonParams.ROWS, post.remove("num"));
// get the embedded connector
EmbeddedSolrConnector connector = (EmbeddedSolrConnector) sb.index.getLocalSolr();
if (connector == null) return null;
// do the solr request
SolrQueryRequest req = connector.request(post.toSolrParams());
SolrQueryResponse response = null;
Exception e = null;
try {response = connector.query(req);} catch (SolrException ee) {e = ee;}
if (response != null) e = response.getException();
if (e != null) {
Log.logException(e);
return null;
}
// write the result directly to the output stream
Writer ow = new FastWriter(new OutputStreamWriter(out, UTF8.charset));
try {
responseWriter.write(ow, req, response);
ow.flush();
} catch (IOException e1) {
} finally {
req.close();
try {ow.close();} catch (IOException e1) {}
}
return null;
}
}

@ -34,7 +34,6 @@ import net.yacy.cora.services.federated.solr.OpensearchResponseWriter;
import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.logging.Log;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants; import net.yacy.search.SwitchboardConstants;
import net.yacy.search.index.YaCySchema;
import net.yacy.search.solr.EmbeddedSolrConnector; import net.yacy.search.solr.EmbeddedSolrConnector;
import net.yacy.search.solr.SolrServlet; import net.yacy.search.solr.SolrServlet;
@ -55,9 +54,8 @@ import de.anomic.server.serverSwitch;
// http://localhost:8090/solr/select?q=*:*&start=0&rows=10&indent=on // http://localhost:8090/solr/select?q=*:*&start=0&rows=10&indent=on
/** /**
* * this is a standard solr search result formatter as defined in
* http://wiki.apache.org/solr/SolrQuerySyntax * http://wiki.apache.org/solr/SolrQuerySyntax
*
*/ */
public class select { public class select {
@ -128,10 +126,18 @@ public class select {
if (!post.containsKey(CommonParams.START)) post.put(CommonParams.START, post.remove("startRecord")); // sru patch if (!post.containsKey(CommonParams.START)) post.put(CommonParams.START, post.remove("startRecord")); // sru patch
if (!post.containsKey(CommonParams.ROWS)) post.put(CommonParams.ROWS, post.remove("maximumRecords")); // sru patch if (!post.containsKey(CommonParams.ROWS)) post.put(CommonParams.ROWS, post.remove("maximumRecords")); // sru patch
// check if all required post fields are there // get a response writer for the result
if (!post.containsKey(CommonParams.DF)) post.put(CommonParams.DF, YaCySchema.text_t.name()); // set default field to all fields String wt = post.get(CommonParams.WT, "xml"); // maybe use /solr/select?q=*:*&start=0&rows=10&wt=exml
if (!post.containsKey(CommonParams.START)) post.put(CommonParams.START, "0"); // set default start item QueryResponseWriter responseWriter = RESPONSE_WRITER.get(wt);
if (!post.containsKey(CommonParams.ROWS)) post.put(CommonParams.ROWS, "10"); // set default number of search results if (responseWriter == null) return null;
if (responseWriter instanceof OpensearchResponseWriter) {
// set the title every time, it is possible that it has changed
final String promoteSearchPageGreeting =
(env.getConfigBool(SwitchboardConstants.GREETING_NETWORK_NAME, false)) ? env.getConfig(
"network.unit.description",
"") : env.getConfig(SwitchboardConstants.GREETING, "");
((OpensearchResponseWriter) responseWriter).setTitle(promoteSearchPageGreeting);
}
// get the embedded connector // get the embedded connector
EmbeddedSolrConnector connector = (EmbeddedSolrConnector) sb.index.getLocalSolr(); EmbeddedSolrConnector connector = (EmbeddedSolrConnector) sb.index.getLocalSolr();
@ -148,19 +154,6 @@ public class select {
return null; return null;
} }
// get a response writer for the result
String wt = post.get(CommonParams.WT, "xml"); // maybe use /solr/select?q=*:*&start=0&rows=10&wt=exml
QueryResponseWriter responseWriter = RESPONSE_WRITER.get(wt);
if (responseWriter == null) return null;
if (responseWriter instanceof OpensearchResponseWriter) {
// set the title every time, it is possible that it has changed
final String promoteSearchPageGreeting =
(env.getConfigBool(SwitchboardConstants.GREETING_NETWORK_NAME, false)) ? env.getConfig(
"network.unit.description",
"") : env.getConfig(SwitchboardConstants.GREETING, "");
((OpensearchResponseWriter) responseWriter).setTitle(promoteSearchPageGreeting);
}
// write the result directly to the output stream // write the result directly to the output stream
Writer ow = new FastWriter(new OutputStreamWriter(out, UTF8.charset)); Writer ow = new FastWriter(new OutputStreamWriter(out, UTF8.charset));
try { try {

@ -62,7 +62,9 @@ import net.yacy.cora.protocol.RequestHeader.FileType;
import net.yacy.document.parser.html.CharacterCoding; import net.yacy.document.parser.html.CharacterCoding;
import net.yacy.kelondro.util.Formatter; import net.yacy.kelondro.util.Formatter;
import net.yacy.search.Switchboard; import net.yacy.search.Switchboard;
import net.yacy.search.index.YaCySchema;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.MultiMapSolrParams; import org.apache.solr.common.params.MultiMapSolrParams;
import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.params.SolrParams;
@ -469,6 +471,11 @@ public class serverObjects extends HashMap<String, String> implements Cloneable
} }
public SolrParams toSolrParams() { public SolrParams toSolrParams() {
// check if all required post fields are there
if (!this.containsKey(CommonParams.DF)) this.put(CommonParams.DF, YaCySchema.text_t.name()); // set default field to all fields
if (!this.containsKey(CommonParams.START)) this.put(CommonParams.START, "0"); // set default start item
if (!this.containsKey(CommonParams.ROWS)) this.put(CommonParams.ROWS, "10"); // set default number of search results
Map<String,String[]> m = new HashMap<String, String[]>(); Map<String,String[]> m = new HashMap<String, String[]>();
for (Map.Entry<String, String> e: this.entrySet()) { for (Map.Entry<String, String> e: this.entrySet()) {
m.put(e.getKey(), new String[]{e.getValue()}); m.put(e.getKey(), new String[]{e.getValue()});

@ -0,0 +1,258 @@
/**
* GSAResponseWriter
* Copyright 2012 by Michael Peter Christen
* First released 14.08.2012 at http://yacy.net
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.cora.services.federated.solr;
import java.io.IOException;
import java.io.Writer;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import net.yacy.cora.document.RSSMessage;
import net.yacy.cora.lod.vocabulary.DublinCore;
import net.yacy.search.index.YaCySchema;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Fieldable;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.QueryResponseWriter;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocSlice;
import org.apache.solr.search.SolrIndexSearcher;
/**
* implementation of a GSA search result.
* example: GET /gsa/searchresult?q=chicken+teriyaki&output=xml&client=test&site=test&sort=date:D:S:d1
* for a xml reference, see https://developers.google.com/search-appliance/documentation/68/xml_reference
*/
public class GSAResponseWriter implements QueryResponseWriter {
private static final char lb = '\n';
private enum GSAToken {
CACHE_LAST_MODIFIED, // Date that the document was crawled, as specified in the Date HTTP header when the document was crawled for this index.
CRAWLDATE, // An optional element that shows the date when the page was crawled. It is shown only for pages that have been crawled within the past two days.
U, // The URL of the search result.
UE, // The URL-encoded version of the URL that is in the U parameter.
T, // The title of the search result.
RK, // Provides a ranking number used internally by the search appliance.
ENT_SOURCE, // Identifies the application ID (serial number) of the search appliance that contributes to a result. Example: <ENT_SOURCE>S5-KUB000F0ADETLA</ENT_SOURCE>
FS, // Additional details about the search result.
S, // The snippet for the search result. Query terms appear in bold in the results. Line breaks are included for proper text wrapping.
LANG, // Indicates the language of the search result. The LANG element contains a two-letter language code.
HAS; // Encapsulates special features that are included for this search result.
}
private static final char[] XML_START = (
"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n<GSP VER=\"3.2\">\n").toCharArray();
private static final char[] XML_STOP = "</GSP>\n".toCharArray();
// define a list of simple YaCySchema -> RSS Token matchings
private static final Map<String, String> field2tag = new HashMap<String, String>();
// pre-select a set of YaCy schema fields for the solr searcher which should cause a better caching
private static final YaCySchema[] extrafields = new YaCySchema[]{
YaCySchema.id, YaCySchema.title, YaCySchema.description, YaCySchema.text_t,
YaCySchema.h1_txt, YaCySchema.h2_txt, YaCySchema.h3_txt, YaCySchema.h4_txt, YaCySchema.h5_txt, YaCySchema.h6_txt,
};
private static final Set<String> SOLR_FIELDS = new HashSet<String>();
static {
field2tag.put(YaCySchema.last_modified.name(), GSAToken.CACHE_LAST_MODIFIED.name());
field2tag.put(YaCySchema.load_date_dt.name(), GSAToken.CRAWLDATE.name());
field2tag.put(YaCySchema.language_txt.name(), GSAToken.LANG.name());
SOLR_FIELDS.addAll(field2tag.keySet());
for (YaCySchema field: extrafields) SOLR_FIELDS.add(field.name());
}
private static class ResHead {
public int offset, rows, numFound;
//public int status, QTime;
//public String df, q, wt;
//public float maxScore;
}
public GSAResponseWriter() {
super();
}
@Override
public String getContentType(final SolrQueryRequest request, final SolrQueryResponse response) {
return CONTENT_TYPE_XML_UTF8;
}
@Override
public void init(@SuppressWarnings("rawtypes") NamedList n) {
}
@Override
public void write(final Writer writer, final SolrQueryRequest request, final SolrQueryResponse rsp) throws IOException {
assert rsp.getValues().get("responseHeader") != null;
assert rsp.getValues().get("response") != null;
@SuppressWarnings("unchecked")
SimpleOrderedMap<Object> responseHeader = (SimpleOrderedMap<Object>) rsp.getResponseHeader();
DocSlice response = (DocSlice) rsp.getValues().get("response");
// parse response header
ResHead resHead = new ResHead();
NamedList<?> val0 = (NamedList<?>) responseHeader.get("params");
resHead.rows = Integer.parseInt((String) val0.get("rows"));
resHead.offset = response.offset(); // equal to 'start'
resHead.numFound = response.matches();
//resHead.df = (String) val0.get("df");
//resHead.q = (String) val0.get("q");
//resHead.wt = (String) val0.get("wt");
//resHead.status = (Integer) responseHeader.get("status");
//resHead.QTime = (Integer) responseHeader.get("QTime");
//resHead.maxScore = response.maxScore();
// write header
writer.write(XML_START);
paramTag(writer, "start", Integer.toString(resHead.offset));
paramTag(writer, "num", Integer.toString(resHead.rows));
// parse body
final int responseCount = response.size();
SolrIndexSearcher searcher = request.getSearcher();
DocIterator iterator = response.iterator();
for (int i = 0; i < responseCount; i++) {
OpensearchResponseWriter.openTag(writer, "R");
int id = iterator.nextDoc();
Document doc = searcher.doc(id, SOLR_FIELDS);
List<Fieldable> fields = doc.getFields();
int fieldc = fields.size();
List<String> texts = new ArrayList<String>();
String description = "";
for (int j = 0; j < fieldc; j++) {
Fieldable value = fields.get(j);
String fieldName = value.name();
// apply generic matching rule
String stag = field2tag.get(fieldName);
if (stag != null) {
OpensearchResponseWriter.solitaireTag(writer, stag, value.stringValue());
continue;
}
/*
<RK></RK>
<FS NAME="date" VALUE=""/>
<S></S>
<HAS><L/><C SZ="7k" CID="XN-uikfmLv0J" ENC="UTF-8"/></HAS>
*/
// if the rule is not generic, use the specific here
if (YaCySchema.sku.name().equals(fieldName)) {
String U = value.stringValue();
OpensearchResponseWriter.solitaireTag(writer, GSAToken.U.name(), U);
OpensearchResponseWriter.solitaireTag(writer, GSAToken.UE.name(), U);
continue;
}
if (YaCySchema.title.name().equals(fieldName)) {
OpensearchResponseWriter.solitaireTag(writer, GSAToken.T.name(), value.stringValue());
texts.add(value.stringValue());
continue;
}
if (YaCySchema.description.name().equals(fieldName)) {
description = value.stringValue();
OpensearchResponseWriter.solitaireTag(writer, DublinCore.Description.getURIref(), description);
texts.add(description);
continue;
}
if (YaCySchema.text_t.name().equals(fieldName)) {
texts.add(value.stringValue());
continue;
}
if (YaCySchema.h1_txt.name().equals(fieldName) || YaCySchema.h2_txt.name().equals(fieldName) ||
YaCySchema.h3_txt.name().equals(fieldName) || YaCySchema.h4_txt.name().equals(fieldName) ||
YaCySchema.h5_txt.name().equals(fieldName) || YaCySchema.h6_txt.name().equals(fieldName)) {
// because these are multi-valued fields, there can be several of each
texts.add(value.stringValue());
continue;
}
}
// compute snippet from texts
OpensearchResponseWriter.solitaireTag(writer, RSSMessage.Token.description.name(), description);
OpensearchResponseWriter.solitaireTag(writer, GSAToken.ENT_SOURCE.name(), "YaCy");
OpensearchResponseWriter.closeTag(writer, "R");
}
writer.write(XML_STOP);
}
public static void paramTag(final Writer writer, final String tagname, String value) throws IOException {
if (value == null || value.length() == 0) return;
writer.write("<PARAM name=\"");
writer.write(tagname);
writer.write("\" value=\"");
writer.write(value);
writer.write("\" original_value=\"");
writer.write(value);
writer.write("\"/>"); writer.write(lb);
}
}
/*
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<GSP VER="3.2">
<TM>0.053898</TM>
<Q>pdf</Q>
<PARAM name="sort" value="date:D:L:d1" original_value="date:D:L:d1"/>
<PARAM name="output" value="xml_no_dtd" original_value="xml_no_dtd"/>
<PARAM name="ie" value="UTF-8" original_value="UTF-8"/>
<PARAM name="oe" value="UTF-8" original_value="UTF-8"/>
<PARAM name="client" value="" original_value=""/>
<PARAM name="q" value="pdf" original_value="pdf"/>
<PARAM name="site" value="" original_value=""/>
<PARAM name="start" value="0" original_value="0"/>
<PARAM name="num" value="10" original_value="10"/>
<PARAM name="ip" value="" original_value=""/>
<PARAM name="access" value="p" original_value="p"/>
<PARAM name="entqr" value="3" original_value="3"/>
<PARAM name="entqrm" value="0" original_value="0"/>
<RES SN="1" EN="10">
<M>296</M>
<NB>
<NU></NU>
</NB>
<R N="1">
<U></U>
<UE></UE>
<T></T>
<RK></RK>
<ENT_SOURCE></ENT_SOURCE>
<FS NAME="date" VALUE=""/>
<S></S>
<LANG>de</LANG>
<HAS><L/><C SZ="7k" CID="XN-uikfmLv0J" ENC="UTF-8"/></HAS>
</R>
<R N="2"></R>
</RES>
</GSP>
*/

@ -103,7 +103,7 @@ public interface SolrConnector {
* @param querystring * @param querystring
* @throws IOException * @throws IOException
*/ */
public SolrDocumentList query(final String querystring, final int offset, final int count) throws IOException; public SolrDocumentList query(final String querystring, final int offset, final int count) throws IOException, SolrException;
/** /**
* get the size of the index * get the size of the index

@ -143,7 +143,7 @@ public class EmbeddedSolrConnector extends AbstractSolrConnector implements Solr
return req; return req;
} }
public SolrQueryResponse query(SolrQueryRequest req) { public SolrQueryResponse query(SolrQueryRequest req) throws SolrException {
final long startTime = System.currentTimeMillis(); final long startTime = System.currentTimeMillis();
SolrQueryResponse rsp = new SolrQueryResponse(); SolrQueryResponse rsp = new SolrQueryResponse();

Loading…
Cancel
Save