for solr. Try this: http://localhost:8090/gsa/searchresult?q=pdf&site=col1&num=10pull/1/head
parent
aab0b680c3
commit
d988ba50cf
@ -0,0 +1,120 @@
|
|||||||
|
/**
|
||||||
|
* search
|
||||||
|
* Copyright 2012 by Michael Peter Christen, mc@yacy.net, Frankfurt am Main, Germany
|
||||||
|
* First released 14.08.2012 at http://yacy.net
|
||||||
|
*
|
||||||
|
* This library is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This library is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public License
|
||||||
|
* along with this program in the file lgpl21.txt
|
||||||
|
* If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.OutputStream;
|
||||||
|
import java.io.OutputStreamWriter;
|
||||||
|
import java.io.Writer;
|
||||||
|
|
||||||
|
import net.yacy.cora.document.UTF8;
|
||||||
|
import net.yacy.cora.protocol.RequestHeader;
|
||||||
|
import net.yacy.cora.services.federated.solr.GSAResponseWriter;
|
||||||
|
import net.yacy.kelondro.logging.Log;
|
||||||
|
import net.yacy.search.Switchboard;
|
||||||
|
import net.yacy.search.solr.EmbeddedSolrConnector;
|
||||||
|
|
||||||
|
import org.apache.solr.common.SolrException;
|
||||||
|
import org.apache.solr.common.params.CommonParams;
|
||||||
|
import org.apache.solr.common.util.FastWriter;
|
||||||
|
import org.apache.solr.request.SolrQueryRequest;
|
||||||
|
import org.apache.solr.response.SolrQueryResponse;
|
||||||
|
|
||||||
|
import de.anomic.server.serverObjects;
|
||||||
|
import de.anomic.server.serverSwitch;
|
||||||
|
|
||||||
|
// try
|
||||||
|
// http://localhost:8090/gsa/search?q=chicken+teriyaki&output=xml&client=test&site=test&sort=date:D:S:d1
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This is a gsa result formatter for solr search results.
|
||||||
|
* The result format is implemented according to
|
||||||
|
* https://developers.google.com/search-appliance/documentation/68/xml_reference#results_xml
|
||||||
|
*/
|
||||||
|
public class searchresult {
|
||||||
|
|
||||||
|
private final static GSAResponseWriter responseWriter = new GSAResponseWriter();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* get the right mime type for this streamed result page
|
||||||
|
* @param header
|
||||||
|
* @param post
|
||||||
|
* @param env
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public static String mime(final RequestHeader header, final serverObjects post, final serverSwitch env) {
|
||||||
|
return "text/xml";
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param header
|
||||||
|
* @param post
|
||||||
|
* @param env
|
||||||
|
* @param out
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public static serverObjects respond(final RequestHeader header, final serverObjects post, final serverSwitch env, final OutputStream out) {
|
||||||
|
|
||||||
|
// this uses the methods in the jetty servlet environment and can be removed if jetty in implemented
|
||||||
|
Switchboard sb = (Switchboard) env;
|
||||||
|
|
||||||
|
// check if user is allowed to search (can be switched in /ConfigPortal.html)
|
||||||
|
final boolean searchAllowed = sb.getConfigBool("publicSearchpage", true) || sb.verifyAuthentication(header);
|
||||||
|
if (!searchAllowed) return null;
|
||||||
|
|
||||||
|
// check post
|
||||||
|
if (post == null) return null;
|
||||||
|
|
||||||
|
// rename post fields according to result style
|
||||||
|
//post.put(CommonParams.Q, post.remove("q")); // same as solr
|
||||||
|
//post.put(CommonParams.START, post.remove("start")); // same as solr
|
||||||
|
//post.put(, post.remove("site"));//required, example: col1|col2
|
||||||
|
//post.put(, post.remove("client"));//required, example: myfrontend
|
||||||
|
//post.put(, post.remove("output"));//required, example: xml,xml_no_dtd
|
||||||
|
post.put(CommonParams.ROWS, post.remove("num"));
|
||||||
|
|
||||||
|
// get the embedded connector
|
||||||
|
EmbeddedSolrConnector connector = (EmbeddedSolrConnector) sb.index.getLocalSolr();
|
||||||
|
if (connector == null) return null;
|
||||||
|
|
||||||
|
// do the solr request
|
||||||
|
SolrQueryRequest req = connector.request(post.toSolrParams());
|
||||||
|
SolrQueryResponse response = null;
|
||||||
|
Exception e = null;
|
||||||
|
try {response = connector.query(req);} catch (SolrException ee) {e = ee;}
|
||||||
|
if (response != null) e = response.getException();
|
||||||
|
if (e != null) {
|
||||||
|
Log.logException(e);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// write the result directly to the output stream
|
||||||
|
Writer ow = new FastWriter(new OutputStreamWriter(out, UTF8.charset));
|
||||||
|
try {
|
||||||
|
responseWriter.write(ow, req, response);
|
||||||
|
ow.flush();
|
||||||
|
} catch (IOException e1) {
|
||||||
|
} finally {
|
||||||
|
req.close();
|
||||||
|
try {ow.close();} catch (IOException e1) {}
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,258 @@
|
|||||||
|
/**
|
||||||
|
* GSAResponseWriter
|
||||||
|
* Copyright 2012 by Michael Peter Christen
|
||||||
|
* First released 14.08.2012 at http://yacy.net
|
||||||
|
*
|
||||||
|
* This library is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This library is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public License
|
||||||
|
* along with this program in the file lgpl21.txt
|
||||||
|
* If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package net.yacy.cora.services.federated.solr;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.Writer;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import net.yacy.cora.document.RSSMessage;
|
||||||
|
import net.yacy.cora.lod.vocabulary.DublinCore;
|
||||||
|
import net.yacy.search.index.YaCySchema;
|
||||||
|
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.Fieldable;
|
||||||
|
import org.apache.solr.common.util.NamedList;
|
||||||
|
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||||
|
import org.apache.solr.request.SolrQueryRequest;
|
||||||
|
import org.apache.solr.response.QueryResponseWriter;
|
||||||
|
import org.apache.solr.response.SolrQueryResponse;
|
||||||
|
import org.apache.solr.search.DocIterator;
|
||||||
|
import org.apache.solr.search.DocSlice;
|
||||||
|
import org.apache.solr.search.SolrIndexSearcher;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* implementation of a GSA search result.
|
||||||
|
* example: GET /gsa/searchresult?q=chicken+teriyaki&output=xml&client=test&site=test&sort=date:D:S:d1
|
||||||
|
* for a xml reference, see https://developers.google.com/search-appliance/documentation/68/xml_reference
|
||||||
|
*/
|
||||||
|
public class GSAResponseWriter implements QueryResponseWriter {
|
||||||
|
|
||||||
|
private static final char lb = '\n';
|
||||||
|
private enum GSAToken {
|
||||||
|
CACHE_LAST_MODIFIED, // Date that the document was crawled, as specified in the Date HTTP header when the document was crawled for this index.
|
||||||
|
CRAWLDATE, // An optional element that shows the date when the page was crawled. It is shown only for pages that have been crawled within the past two days.
|
||||||
|
U, // The URL of the search result.
|
||||||
|
UE, // The URL-encoded version of the URL that is in the U parameter.
|
||||||
|
T, // The title of the search result.
|
||||||
|
RK, // Provides a ranking number used internally by the search appliance.
|
||||||
|
ENT_SOURCE, // Identifies the application ID (serial number) of the search appliance that contributes to a result. Example: <ENT_SOURCE>S5-KUB000F0ADETLA</ENT_SOURCE>
|
||||||
|
FS, // Additional details about the search result.
|
||||||
|
S, // The snippet for the search result. Query terms appear in bold in the results. Line breaks are included for proper text wrapping.
|
||||||
|
LANG, // Indicates the language of the search result. The LANG element contains a two-letter language code.
|
||||||
|
HAS; // Encapsulates special features that are included for this search result.
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private static final char[] XML_START = (
|
||||||
|
"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n<GSP VER=\"3.2\">\n").toCharArray();
|
||||||
|
private static final char[] XML_STOP = "</GSP>\n".toCharArray();
|
||||||
|
|
||||||
|
// define a list of simple YaCySchema -> RSS Token matchings
|
||||||
|
private static final Map<String, String> field2tag = new HashMap<String, String>();
|
||||||
|
|
||||||
|
// pre-select a set of YaCy schema fields for the solr searcher which should cause a better caching
|
||||||
|
private static final YaCySchema[] extrafields = new YaCySchema[]{
|
||||||
|
YaCySchema.id, YaCySchema.title, YaCySchema.description, YaCySchema.text_t,
|
||||||
|
YaCySchema.h1_txt, YaCySchema.h2_txt, YaCySchema.h3_txt, YaCySchema.h4_txt, YaCySchema.h5_txt, YaCySchema.h6_txt,
|
||||||
|
};
|
||||||
|
private static final Set<String> SOLR_FIELDS = new HashSet<String>();
|
||||||
|
static {
|
||||||
|
field2tag.put(YaCySchema.last_modified.name(), GSAToken.CACHE_LAST_MODIFIED.name());
|
||||||
|
field2tag.put(YaCySchema.load_date_dt.name(), GSAToken.CRAWLDATE.name());
|
||||||
|
field2tag.put(YaCySchema.language_txt.name(), GSAToken.LANG.name());
|
||||||
|
SOLR_FIELDS.addAll(field2tag.keySet());
|
||||||
|
for (YaCySchema field: extrafields) SOLR_FIELDS.add(field.name());
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class ResHead {
|
||||||
|
public int offset, rows, numFound;
|
||||||
|
//public int status, QTime;
|
||||||
|
//public String df, q, wt;
|
||||||
|
//public float maxScore;
|
||||||
|
}
|
||||||
|
|
||||||
|
public GSAResponseWriter() {
|
||||||
|
super();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getContentType(final SolrQueryRequest request, final SolrQueryResponse response) {
|
||||||
|
return CONTENT_TYPE_XML_UTF8;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void init(@SuppressWarnings("rawtypes") NamedList n) {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void write(final Writer writer, final SolrQueryRequest request, final SolrQueryResponse rsp) throws IOException {
|
||||||
|
assert rsp.getValues().get("responseHeader") != null;
|
||||||
|
assert rsp.getValues().get("response") != null;
|
||||||
|
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
SimpleOrderedMap<Object> responseHeader = (SimpleOrderedMap<Object>) rsp.getResponseHeader();
|
||||||
|
DocSlice response = (DocSlice) rsp.getValues().get("response");
|
||||||
|
|
||||||
|
// parse response header
|
||||||
|
ResHead resHead = new ResHead();
|
||||||
|
NamedList<?> val0 = (NamedList<?>) responseHeader.get("params");
|
||||||
|
resHead.rows = Integer.parseInt((String) val0.get("rows"));
|
||||||
|
resHead.offset = response.offset(); // equal to 'start'
|
||||||
|
resHead.numFound = response.matches();
|
||||||
|
//resHead.df = (String) val0.get("df");
|
||||||
|
//resHead.q = (String) val0.get("q");
|
||||||
|
//resHead.wt = (String) val0.get("wt");
|
||||||
|
//resHead.status = (Integer) responseHeader.get("status");
|
||||||
|
//resHead.QTime = (Integer) responseHeader.get("QTime");
|
||||||
|
//resHead.maxScore = response.maxScore();
|
||||||
|
|
||||||
|
// write header
|
||||||
|
writer.write(XML_START);
|
||||||
|
paramTag(writer, "start", Integer.toString(resHead.offset));
|
||||||
|
paramTag(writer, "num", Integer.toString(resHead.rows));
|
||||||
|
|
||||||
|
// parse body
|
||||||
|
final int responseCount = response.size();
|
||||||
|
SolrIndexSearcher searcher = request.getSearcher();
|
||||||
|
DocIterator iterator = response.iterator();
|
||||||
|
for (int i = 0; i < responseCount; i++) {
|
||||||
|
OpensearchResponseWriter.openTag(writer, "R");
|
||||||
|
int id = iterator.nextDoc();
|
||||||
|
Document doc = searcher.doc(id, SOLR_FIELDS);
|
||||||
|
List<Fieldable> fields = doc.getFields();
|
||||||
|
int fieldc = fields.size();
|
||||||
|
List<String> texts = new ArrayList<String>();
|
||||||
|
String description = "";
|
||||||
|
for (int j = 0; j < fieldc; j++) {
|
||||||
|
Fieldable value = fields.get(j);
|
||||||
|
String fieldName = value.name();
|
||||||
|
|
||||||
|
// apply generic matching rule
|
||||||
|
String stag = field2tag.get(fieldName);
|
||||||
|
if (stag != null) {
|
||||||
|
OpensearchResponseWriter.solitaireTag(writer, stag, value.stringValue());
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
<RK></RK>
|
||||||
|
<FS NAME="date" VALUE=""/>
|
||||||
|
<S></S>
|
||||||
|
<HAS><L/><C SZ="7k" CID="XN-uikfmLv0J" ENC="UTF-8"/></HAS>
|
||||||
|
*/
|
||||||
|
|
||||||
|
// if the rule is not generic, use the specific here
|
||||||
|
if (YaCySchema.sku.name().equals(fieldName)) {
|
||||||
|
String U = value.stringValue();
|
||||||
|
OpensearchResponseWriter.solitaireTag(writer, GSAToken.U.name(), U);
|
||||||
|
OpensearchResponseWriter.solitaireTag(writer, GSAToken.UE.name(), U);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (YaCySchema.title.name().equals(fieldName)) {
|
||||||
|
OpensearchResponseWriter.solitaireTag(writer, GSAToken.T.name(), value.stringValue());
|
||||||
|
texts.add(value.stringValue());
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (YaCySchema.description.name().equals(fieldName)) {
|
||||||
|
description = value.stringValue();
|
||||||
|
OpensearchResponseWriter.solitaireTag(writer, DublinCore.Description.getURIref(), description);
|
||||||
|
texts.add(description);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (YaCySchema.text_t.name().equals(fieldName)) {
|
||||||
|
texts.add(value.stringValue());
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (YaCySchema.h1_txt.name().equals(fieldName) || YaCySchema.h2_txt.name().equals(fieldName) ||
|
||||||
|
YaCySchema.h3_txt.name().equals(fieldName) || YaCySchema.h4_txt.name().equals(fieldName) ||
|
||||||
|
YaCySchema.h5_txt.name().equals(fieldName) || YaCySchema.h6_txt.name().equals(fieldName)) {
|
||||||
|
// because these are multi-valued fields, there can be several of each
|
||||||
|
texts.add(value.stringValue());
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// compute snippet from texts
|
||||||
|
OpensearchResponseWriter.solitaireTag(writer, RSSMessage.Token.description.name(), description);
|
||||||
|
OpensearchResponseWriter.solitaireTag(writer, GSAToken.ENT_SOURCE.name(), "YaCy");
|
||||||
|
OpensearchResponseWriter.closeTag(writer, "R");
|
||||||
|
}
|
||||||
|
|
||||||
|
writer.write(XML_STOP);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public static void paramTag(final Writer writer, final String tagname, String value) throws IOException {
|
||||||
|
if (value == null || value.length() == 0) return;
|
||||||
|
writer.write("<PARAM name=\"");
|
||||||
|
writer.write(tagname);
|
||||||
|
writer.write("\" value=\"");
|
||||||
|
writer.write(value);
|
||||||
|
writer.write("\" original_value=\"");
|
||||||
|
writer.write(value);
|
||||||
|
writer.write("\"/>"); writer.write(lb);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||||
|
<GSP VER="3.2">
|
||||||
|
<TM>0.053898</TM>
|
||||||
|
<Q>pdf</Q>
|
||||||
|
<PARAM name="sort" value="date:D:L:d1" original_value="date:D:L:d1"/>
|
||||||
|
<PARAM name="output" value="xml_no_dtd" original_value="xml_no_dtd"/>
|
||||||
|
<PARAM name="ie" value="UTF-8" original_value="UTF-8"/>
|
||||||
|
<PARAM name="oe" value="UTF-8" original_value="UTF-8"/>
|
||||||
|
<PARAM name="client" value="" original_value=""/>
|
||||||
|
<PARAM name="q" value="pdf" original_value="pdf"/>
|
||||||
|
<PARAM name="site" value="" original_value=""/>
|
||||||
|
<PARAM name="start" value="0" original_value="0"/>
|
||||||
|
<PARAM name="num" value="10" original_value="10"/>
|
||||||
|
<PARAM name="ip" value="" original_value=""/>
|
||||||
|
<PARAM name="access" value="p" original_value="p"/>
|
||||||
|
<PARAM name="entqr" value="3" original_value="3"/>
|
||||||
|
<PARAM name="entqrm" value="0" original_value="0"/>
|
||||||
|
<RES SN="1" EN="10">
|
||||||
|
<M>296</M>
|
||||||
|
<NB>
|
||||||
|
<NU></NU>
|
||||||
|
</NB>
|
||||||
|
|
||||||
|
<R N="1">
|
||||||
|
<U></U>
|
||||||
|
<UE></UE>
|
||||||
|
<T></T>
|
||||||
|
<RK></RK>
|
||||||
|
<ENT_SOURCE></ENT_SOURCE>
|
||||||
|
<FS NAME="date" VALUE=""/>
|
||||||
|
<S></S>
|
||||||
|
<LANG>de</LANG>
|
||||||
|
<HAS><L/><C SZ="7k" CID="XN-uikfmLv0J" ENC="UTF-8"/></HAS>
|
||||||
|
</R>
|
||||||
|
<R N="2"></R>
|
||||||
|
</RES>
|
||||||
|
</GSP>
|
||||||
|
*/
|
Loading…
Reference in new issue