Added Solr GSA writer support for responses from remote instances

pull/292/head
luccioman 6 years ago
parent fc83d35f3f
commit 5b7e41202a

@ -25,20 +25,24 @@ import java.io.Writer;
import java.nio.charset.StandardCharsets;
import java.time.DateTimeException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.regex.Pattern;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexableField;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.common.util.XML;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.QueryResponseWriter;
@ -61,7 +65,7 @@ import net.yacy.search.schema.CollectionSchema;
* example: GET /gsa/searchresult?q=chicken+teriyaki&output=xml&client=test&site=test&sort=date:D:S:d1
* for a xml reference, see https://developers.google.com/search-appliance/documentation/614/xml_reference
*/
public class GSAResponseWriter implements QueryResponseWriter, EmbeddedSolrResponseWriter {
public class GSAResponseWriter implements QueryResponseWriter, SolrjResponseWriter {
private static String YaCyVer = null;
private static final char lb = '\n';
@ -86,9 +90,6 @@ public class GSAResponseWriter implements QueryResponseWriter, EmbeddedSolrRespo
"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n<GSP VER=\"3.2\">\n<!-- This is a Google Search Appliance API result, provided by YaCy. See https://developers.google.com/search-appliance/documentation/614/xml_reference -->\n").toCharArray();
private static final char[] XML_STOP = "</GSP>\n".toCharArray();
// define a list of simple YaCySchema -> RSS Token matchings
private static final Map<String, String> field2tag = new HashMap<String, String>();
// pre-select a set of YaCy schema fields for the solr searcher which should cause a better caching
private static final CollectionSchema[] extrafields = new CollectionSchema[]{
CollectionSchema.id, CollectionSchema.sku, CollectionSchema.title, CollectionSchema.description_txt,
@ -96,15 +97,16 @@ public class GSAResponseWriter implements QueryResponseWriter, EmbeddedSolrRespo
CollectionSchema.language_s, CollectionSchema.collection_sxt
};
private static final Set<String> SOLR_FIELDS = new HashSet<String>();
private static final Set<String> SOLR_FIELDS = new HashSet<>();
static {
field2tag.put(CollectionSchema.language_s.getSolrFieldName(), GSAToken.LANG.name());
SOLR_FIELDS.addAll(field2tag.keySet());
SOLR_FIELDS.add(CollectionSchema.language_s.getSolrFieldName());
for (CollectionSchema field: extrafields) SOLR_FIELDS.add(field.getSolrFieldName());
}
private static class ResHead {
public int offset, rows, numFound;
public long offset, numFound;
public int rows;
//public int status, QTime;
//public String df, q, wt;
//public float maxScore;
@ -129,10 +131,6 @@ public class GSAResponseWriter implements QueryResponseWriter, EmbeddedSolrRespo
}
}
public GSAResponseWriter() {
super();
}
@Override
public String getContentType(final SolrQueryRequest request, final SolrQueryResponse response) {
return CONTENT_TYPE_XML_UTF8;
@ -144,41 +142,150 @@ public class GSAResponseWriter implements QueryResponseWriter, EmbeddedSolrRespo
@Override
public void write(final Writer writer, final SolrQueryRequest request, final SolrQueryResponse rsp) throws IOException {
assert rsp.getValues().get("responseHeader") != null;
assert rsp.getValues().get("response") != null;
long start = System.currentTimeMillis();
final long start = System.currentTimeMillis();
final Object responseObj = rsp.getResponse();
if(responseObj instanceof ResultContext) {
/* Regular response object */
final DocList documents = ((ResultContext) responseObj).getDocList();
final Object highlightingObj = rsp.getValues().get("highlighting");
final Map<String, Collection<String>> snippets = highlightingObj instanceof NamedList
? OpensearchResponseWriter.snippetsFromHighlighting((NamedList<?>) highlightingObj)
: new HashMap<>();
// parse response header
final ResHead resHead = new ResHead();
resHead.rows = request.getParams().getInt(CommonParams.ROWS, 0);
resHead.offset = documents.offset(); // equal to 'start'
resHead.numFound = documents.matches();
//resHead.df = (String) val0.get("df");
//resHead.q = (String) val0.get("q");
//resHead.wt = (String) val0.get("wt");
//resHead.status = (Integer) responseHeader.get("status");
//resHead.QTime = (Integer) responseHeader.get("QTime");
//resHead.maxScore = response.maxScore();
SimpleOrderedMap<Object> responseHeader = (SimpleOrderedMap<Object>) rsp.getResponseHeader();
DocList response = ((ResultContext) rsp.getValues().get("response")).getDocList();
@SuppressWarnings("unchecked")
SimpleOrderedMap<Object> highlighting = (SimpleOrderedMap<Object>) rsp.getValues().get("highlighting");
Map<String, LinkedHashSet<String>> snippets = OpensearchResponseWriter.highlighting(highlighting);
Map<Object,Object> context = request.getContext();
// write header
writeHeader(writer, request, resHead, start);
// body introduction
writeBodyIntro(writer, request, resHead, documents.size());
writeDocs(writer, request, documents, snippets, resHead);
writer.write("</RES>"); writer.write(lb);
writer.write(XML_STOP);
} else if(responseObj instanceof SolrDocumentList) {
/*
* The response object can be a SolrDocumentList when the response is partial,
* for example when the allowed processing time has been exceeded
*/
final SolrDocumentList documents = (SolrDocumentList) responseObj;
final Object highlightingObj = rsp.getValues().get("highlighting");
final Map<String, Collection<String>> snippets = highlightingObj instanceof NamedList
? OpensearchResponseWriter.snippetsFromHighlighting((NamedList<?>) highlightingObj)
: new HashMap<>();
writeSolrDocumentList(writer, request, snippets, start, documents);
} else {
throw new IOException("Unable to process Solr response format");
}
}
@Override
public void write(Writer writer, SolrQueryRequest request, String coreName, QueryResponse rsp) throws IOException {
final long start = System.currentTimeMillis();
writeSolrDocumentList(writer, request, snippetsFromHighlighting(rsp.getHighlighting()), start,
rsp.getResults());
}
/**
* Produce snippets from Solr (they call that 'highlighting')
*
* @param sorlHighlighting highlighting from Solr
* @return a map from urlhashes to a list of snippets for that url
*/
private Map<String, Collection<String>> snippetsFromHighlighting(
final Map<String, Map<String, List<String>>> sorlHighlighting) {
final Map<String, Collection<String>> snippets = new HashMap<>();
if (sorlHighlighting == null) {
return snippets;
}
for (final Entry<String, Map<String, List<String>>> highlightingEntry : sorlHighlighting.entrySet()) {
final String urlHash = highlightingEntry.getKey();
final Map<String, List<String>> highlights = highlightingEntry.getValue();
final LinkedHashSet<String> urlSnippets = new LinkedHashSet<>();
for (final List<String> texts : highlights.values()) {
urlSnippets.addAll(texts);
}
snippets.put(urlHash, urlSnippets);
}
return snippets;
}
/**
* Append to the writer a representation of a list of Solr documents. All
* parameters are required and must not be null.
*
* @param writer an open output writer
* @param request the Solr request
* @param snippets the snippets computed from the Solr highlighting
* @param start the results start index
* @param documents the Solr documents to process
* @throws IOException when a write error occurred
*/
private void writeSolrDocumentList(final Writer writer, final SolrQueryRequest request,
final Map<String, Collection<String>> snippets, final long start, final SolrDocumentList documents)
throws IOException {
// parse response header
ResHead resHead = new ResHead();
NamedList<?> val0 = (NamedList<?>) responseHeader.get("params");
resHead.rows = Integer.parseInt((String) val0.get(CommonParams.ROWS));
resHead.offset = response.offset(); // equal to 'start'
resHead.numFound = response.matches();
//resHead.df = (String) val0.get("df");
//resHead.q = (String) val0.get("q");
//resHead.wt = (String) val0.get("wt");
//resHead.status = (Integer) responseHeader.get("status");
//resHead.QTime = (Integer) responseHeader.get("QTime");
//resHead.maxScore = response.maxScore();
final ResHead resHead = new ResHead();
resHead.rows = request.getParams().getInt(CommonParams.ROWS, 0);
resHead.offset = documents.getStart();
resHead.numFound = documents.getNumFound();
// write header
writeHeader(writer, request, resHead, start);
// body introduction
writeBodyIntro(writer, request, resHead, documents.size());
writeDocs(writer, documents, snippets, resHead, request.getParams().get("originalQuery"));
writer.write("</RES>"); writer.write(lb);
writer.write(XML_STOP);
}
/**
* Append the response header to the writer. All parameters are required and
* must not be null.
*
* @param writer an open output writer
* @param request the Solr request
* @param resHead results header information
* @param startTime this writer processing start time in milliseconds since
* Epoch
* @throws IOException when a write error occurred
*/
private void writeHeader(final Writer writer, final SolrQueryRequest request, final ResHead resHead,
final long startTime) throws IOException {
final Map<Object,Object> context = request.getContext();
writer.write(XML_START);
String query = request.getParams().get("originalQuery");
String site = getContextString(context, "site", "");
String sort = getContextString(context, "sort", "");
String client = getContextString(context, "client", "");
String ip = getContextString(context, "ip", "");
String access = getContextString(context, "access", "");
String entqr = getContextString(context, "entqr", "");
OpensearchResponseWriter.solitaireTag(writer, "TM", Long.toString(System.currentTimeMillis() - start));
final String query = request.getParams().get("originalQuery");
final String site = getContextString(context, "site", "");
final String sort = getContextString(context, "sort", "");
final String client = getContextString(context, "client", "");
final String ip = getContextString(context, "ip", "");
final String access = getContextString(context, "access", "");
final String entqr = getContextString(context, "entqr", "");
OpensearchResponseWriter.solitaireTag(writer, "TM", Long.toString(System.currentTimeMillis() - startTime));
OpensearchResponseWriter.solitaireTag(writer, "Q", query);
paramTag(writer, "sort", sort);
paramTag(writer, "output", "xml_no_dtd");
@ -187,20 +294,35 @@ public class GSAResponseWriter implements QueryResponseWriter, EmbeddedSolrRespo
paramTag(writer, "client", client);
paramTag(writer, "q", query);
paramTag(writer, "site", site);
paramTag(writer, "start", Integer.toString(resHead.offset));
paramTag(writer, "start", Long.toString(resHead.offset));
paramTag(writer, "num", Integer.toString(resHead.rows));
paramTag(writer, "ip", ip);
paramTag(writer, "access", access); // p - search only public content, s - search only secure content, a - search all content, both public and secure
paramTag(writer, "entqr", entqr); // query expansion policy; (entqr=1) -- Uses only the search appliance's synonym file, (entqr=1) -- Uses only the search appliance's synonym file, (entqr=3) -- Uses both standard and local synonym files.
// body introduction
final int responseCount = response.size();
}
/**
* Append the response body introduction to the writer. All parameters are
* required and must not be null.
*
* @param writer an open output writer
* @param resHead results header information
* @param responseCount the number of result documents
* @throws IOException when a write error occurred
*/
private void writeBodyIntro(final Writer writer, final SolrQueryRequest request, final ResHead resHead,
final int responseCount) throws IOException {
final Map<Object,Object> context = request.getContext();
final String site = getContextString(context, "site", "");
final String sort = getContextString(context, "sort", "");
final String client = getContextString(context, "client", "");
final String access = getContextString(context, "access", "");
writer.write("<RES SN=\"" + (resHead.offset + 1) + "\" EN=\"" + (resHead.offset + responseCount) + "\">"); writer.write(lb); // The index (1-based) of the first and last search result returned in this result set.
writer.write("<M>" + resHead.numFound + "</M>"); writer.write(lb); // The estimated total number of results for the search.
writer.write("<FI/>"); writer.write(lb); // Indicates that document filtering was performed during this search.
int nextStart = resHead.offset + responseCount;
int nextNum = Math.min(resHead.numFound - nextStart, responseCount < resHead.rows ? 0 : resHead.rows);
int prevStart = resHead.offset - resHead.rows;
long nextStart = resHead.offset + responseCount;
long nextNum = Math.min(resHead.numFound - nextStart, responseCount < resHead.rows ? 0 : resHead.rows);
long prevStart = resHead.offset - resHead.rows;
if (prevStart >= 0 || nextNum > 0) {
writer.write("<NB>");
if (prevStart >= 0) {
@ -220,11 +342,28 @@ public class GSAResponseWriter implements QueryResponseWriter, EmbeddedSolrRespo
writer.write("</NB>");
}
writer.write(lb);
}
// parse body
/**
* Append to the writer a representation of a list of Solr documents. All
* parameters are required and must not be null.
*
* @param writer an open output writer
* @param request the Solr request
* @param documents the Solr documents to process
* @param snippets the snippets computed from the Solr highlighting
* @param resHead results header information
* @throws IOException when a write error occurred
*/
private void writeDocs(final Writer writer, final SolrQueryRequest request, final DocList documents,
final Map<String, Collection<String>> snippets, final ResHead resHead)
throws IOException {
// parse body
final String query = request.getParams().get("originalQuery");
SolrIndexSearcher searcher = request.getSearcher();
DocIterator iterator = response.iterator();
DocIterator iterator = documents.iterator();
String urlhash = null;
final int responseCount = documents.size();
for (int i = 0; i < responseCount; i++) {
int id = iterator.nextDoc();
Document doc = searcher.doc(id, SOLR_FIELDS);
@ -242,69 +381,41 @@ public class GSAResponseWriter implements QueryResponseWriter, EmbeddedSolrRespo
// write the R header for a search result
writer.write("<R N=\"" + (resHead.offset + i + 1) + "\"" + (i == 1 ? " L=\"2\"" : "") + (mime != null && mime.length() > 0 ? " MIME=\"" + mime + "\"" : "") + ">"); writer.write(lb);
//List<String> texts = new ArrayList<String>();
List<String> descriptions = new ArrayList<String>();
List<String> collections = new ArrayList<String>();
List<String> descriptions = new ArrayList<>();
List<String> collections = new ArrayList<>();
int size = 0;
boolean title_written = false; // the solr index may contain several; we take only the first which should be the visible tag in <title></title>
String title = null;
for (IndexableField value: fields) {
String fieldName = value.name();
// apply generic matching rule
String stag = field2tag.get(fieldName);
if (stag != null) {
OpensearchResponseWriter.solitaireTag(writer, stag, value.stringValue());
continue;
}
// if the rule is not generic, use the specific here
if (CollectionSchema.id.getSolrFieldName().equals(fieldName)) {
if (CollectionSchema.language_s.getSolrFieldName().equals(fieldName)) {
OpensearchResponseWriter.solitaireTag(writer, GSAToken.LANG.name(), value.stringValue());
} else if (CollectionSchema.id.getSolrFieldName().equals(fieldName)) {
urlhash = value.stringValue();
continue;
}
if (CollectionSchema.sku.getSolrFieldName().equals(fieldName)) {
} else if (CollectionSchema.sku.getSolrFieldName().equals(fieldName)) {
OpensearchResponseWriter.solitaireTag(writer, GSAToken.U.name(), value.stringValue());
OpensearchResponseWriter.solitaireTag(writer, GSAToken.UE.name(), value.stringValue());
continue;
}
if (CollectionSchema.title.getSolrFieldName().equals(fieldName) && !title_written) {
} else if (CollectionSchema.title.getSolrFieldName().equals(fieldName) && !title_written) {
title = value.stringValue();
OpensearchResponseWriter.solitaireTag(writer, GSAToken.T.name(), highlight(title, query));
//texts.add(value.stringValue());
title_written = true;
continue;
}
if (CollectionSchema.description_txt.getSolrFieldName().equals(fieldName)) {
} else if (CollectionSchema.description_txt.getSolrFieldName().equals(fieldName)) {
descriptions.add(value.stringValue());
//texts.adds(description);
continue;
}
if (CollectionSchema.last_modified.getSolrFieldName().equals(fieldName)) {
} else if (CollectionSchema.last_modified.getSolrFieldName().equals(fieldName)) {
Date d = new Date(Long.parseLong(value.stringValue()));
writer.write("<FS NAME=\"date\" VALUE=\"" + formatGSAFS(d) + "\"/>\n");
//OpensearchResponseWriter.solitaireTag(writer, GSAToken.CACHE_LAST_MODIFIED.getSolrFieldName(), HeaderFramework.formatRFC1123(d));
//texts.add(value.stringValue());
continue;
}
if (CollectionSchema.load_date_dt.getSolrFieldName().equals(fieldName)) {
} else if (CollectionSchema.load_date_dt.getSolrFieldName().equals(fieldName)) {
Date d = new Date(Long.parseLong(value.stringValue()));
OpensearchResponseWriter.solitaireTag(writer, GSAToken.CRAWLDATE.name(), HeaderFramework.formatRFC1123(d));
//texts.add(value.stringValue());
continue;
}
if (CollectionSchema.size_i.getSolrFieldName().equals(fieldName)) {
} else if (CollectionSchema.size_i.getSolrFieldName().equals(fieldName)) {
size = value.stringValue() != null && value.stringValue().length() > 0 ? Integer.parseInt(value.stringValue()) : -1;
continue;
}
if (CollectionSchema.collection_sxt.getSolrFieldName().equals(fieldName)) {
} else if (CollectionSchema.collection_sxt.getSolrFieldName().equals(fieldName)) {
collections.add(value.stringValue());
continue;
}
//System.out.println("superfluous field: " + fieldName + ": " + value.stringValue()); // this can be avoided setting the enableLazyFieldLoading = false in solrconfig.xml
}
// compute snippet from texts
LinkedHashSet<String> snippet = urlhash == null ? null : snippets.get(urlhash);
Collection<String> snippet = urlhash == null ? null : snippets.get(urlhash);
OpensearchResponseWriter.removeSubsumedTitle(snippet, title);
OpensearchResponseWriter.solitaireTag(writer, GSAToken.S.name(), snippet == null || snippet.size() == 0 ? (descriptions.size() > 0 ? descriptions.get(0) : "") : OpensearchResponseWriter.getLargestSnippet(snippet));
OpensearchResponseWriter.solitaireTag(writer, GSAToken.GD.name(), descriptions.size() > 0 ? descriptions.get(0) : "");
@ -315,9 +426,108 @@ public class GSAResponseWriter implements QueryResponseWriter, EmbeddedSolrRespo
OpensearchResponseWriter.solitaireTag(writer, GSAToken.ENT_SOURCE.name(), YaCyVer);
OpensearchResponseWriter.closeTag(writer, "R");
}
writer.write("</RES>"); writer.write(lb);
writer.write(XML_STOP);
}
}
/**
* Append to the writer a representation of a list of Solr documents. All
* parameters are required and must not be null.
*
* @param writer an open output writer
* @param documents the Solr documents to process
* @param snippets the snippets computed from the Solr highlighting
* @param resHead results header information
* @param query the original search query
* @throws IOException when a write error occurred
*/
private void writeDocs(final Writer writer, final SolrDocumentList documents,
final Map<String, Collection<String>> snippets, final ResHead resHead, final String query)
throws IOException {
// parse body
String urlhash = null;
int i = 0;
for (final SolrDocument doc : documents) {
// pre-scan the fields to get the mime-type
final Object contentTypeObj = doc.getFirstValue(CollectionSchema.content_type.getSolrFieldName());
final String mime = contentTypeObj != null ? contentTypeObj.toString() : "";
// write the R header for a search result
writer.write("<R N=\"" + (resHead.offset + i + 1) + "\"" + (i == 1 ? " L=\"2\"" : "") + (mime != null && mime.length() > 0 ? " MIME=\"" + mime + "\"" : "") + ">"); writer.write(lb);
final List<String> descriptions = new ArrayList<>();
final List<String> collections = new ArrayList<>();
int size = 0;
String title = null;
for (final Entry<String, Object> field : doc.entrySet()) {
final String fieldName = field.getKey();
final Object value = field.getValue();
if (CollectionSchema.language_s.getSolrFieldName().equals(fieldName)) {
OpensearchResponseWriter.solitaireTag(writer, GSAToken.LANG.name(), value.toString());
} else if (CollectionSchema.id.getSolrFieldName().equals(fieldName)) {
urlhash = value.toString();
} else if (CollectionSchema.sku.getSolrFieldName().equals(fieldName)) {
OpensearchResponseWriter.solitaireTag(writer, GSAToken.U.name(), value.toString());
OpensearchResponseWriter.solitaireTag(writer, GSAToken.UE.name(), value.toString());
} else if (CollectionSchema.title.getSolrFieldName().equals(fieldName)) {
if(value instanceof Iterable) {
for(final Object titleObj : (Iterable<?>)value) {
if(titleObj != null) {
/* get only the first title */
title = titleObj.toString();
break;
}
}
} else if(value != null) {
title = value.toString();
}
if(title != null) {
OpensearchResponseWriter.solitaireTag(writer, GSAToken.T.name(), highlight(title, query));
}
} else if (CollectionSchema.description_txt.getSolrFieldName().equals(fieldName)) {
if(value instanceof Iterable) {
for(final Object descriptionObj : (Iterable<?>)value) {
if(descriptionObj != null) {
descriptions.add(descriptionObj.toString());
}
}
} else if(value != null) {
descriptions.add(value.toString());
}
} else if (CollectionSchema.last_modified.getSolrFieldName().equals(fieldName) && value instanceof Date) {
writer.write("<FS NAME=\"date\" VALUE=\"" + formatGSAFS((Date)value) + "\"/>\n");
} else if (CollectionSchema.load_date_dt.getSolrFieldName().equals(fieldName) && value instanceof Date) {
OpensearchResponseWriter.solitaireTag(writer, GSAToken.CRAWLDATE.name(), HeaderFramework.formatRFC1123((Date)value));
} else if (CollectionSchema.size_i.getSolrFieldName().equals(fieldName)) {
size = value instanceof Integer ? (Integer)value : -1;
} else if (CollectionSchema.collection_sxt.getSolrFieldName().equals(fieldName)) { // handle collection
if(value instanceof Iterable) {
for(final Object collectionObj : (Iterable<?>)value) {
if(collectionObj != null) {
collections.add(collectionObj.toString());
}
}
} else if(value != null) {
collections.add(value.toString());
}
}
}
// compute snippet from texts
Collection<String> snippet = urlhash == null ? null : snippets.get(urlhash);
OpensearchResponseWriter.removeSubsumedTitle(snippet, title);
OpensearchResponseWriter.solitaireTag(writer, GSAToken.S.name(), snippet == null || snippet.size() == 0 ? (descriptions.size() > 0 ? descriptions.get(0) : "") : OpensearchResponseWriter.getLargestSnippet(snippet));
OpensearchResponseWriter.solitaireTag(writer, GSAToken.GD.name(), descriptions.size() > 0 ? descriptions.get(0) : "");
String cols = collections.toString();
if (!collections.isEmpty()) {
OpensearchResponseWriter.solitaireTag(writer, "COLS" /*SPECIAL!*/, collections.size() > 1 ? cols.substring(1, cols.length() - 1).replaceAll(" ", "") : collections.get(0));
}
writer.write("<HAS><L/><C SZ=\""); writer.write(Integer.toString(size / 1024)); writer.write("k\" CID=\""); writer.write(urlhash); writer.write("\" ENC=\"UTF-8\"/></HAS>\n");
if (YaCyVer == null) YaCyVer = yacyVersion.thisVersion().getName() + "/" + Switchboard.getSwitchboard().peers.mySeed().hash;
OpensearchResponseWriter.solitaireTag(writer, GSAToken.ENT_SOURCE.name(), YaCyVer);
OpensearchResponseWriter.closeTag(writer, "R");
i++;
}
}
private static String getContextString(Map<Object,Object> context, String key, String dflt) {
Object v = context.get(key);

@ -24,6 +24,8 @@ import java.io.IOException;
import java.io.Writer;
import java.net.MalformedURLException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
@ -32,8 +34,8 @@ import java.util.LinkedHashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.Map.Entry;
import java.util.Set;
import java.util.regex.Pattern;
import org.apache.lucene.document.Document;
@ -151,9 +153,11 @@ public class OpensearchResponseWriter implements QueryResponseWriter, SolrjRespo
SimpleOrderedMap<Object> facetCounts = (SimpleOrderedMap<Object>) values.get("facet_counts");
@SuppressWarnings("unchecked")
SimpleOrderedMap<Object> facetFields = facetCounts == null || facetCounts.size() == 0 ? null : (SimpleOrderedMap<Object>) facetCounts.get("facet_fields");
@SuppressWarnings("unchecked")
SimpleOrderedMap<Object> highlighting = (SimpleOrderedMap<Object>) values.get("highlighting");
final Map<String, LinkedHashSet<String>> snippets = highlighting(highlighting);
final Object highlightingObj = values.get("highlighting");
final Map<String, Collection<String>> snippets = highlightingObj instanceof NamedList
? OpensearchResponseWriter.snippetsFromHighlighting((NamedList<?>) highlightingObj)
: new HashMap<>();
if(responseObj instanceof ResultContext){
/* Regular response object */
@ -266,7 +270,7 @@ public class OpensearchResponseWriter implements QueryResponseWriter, SolrjRespo
* @throws IOException when an unexpected error occurred while writing
*/
private void writeDocs(final Writer writer, final SolrDocumentList documents,
final Map<String, LinkedHashSet<String>> snippets) throws IOException {
final Map<String, Collection<String>> snippets) throws IOException {
// parse body
String urlhash = null;
MultiProtocolURL url = null;
@ -403,7 +407,7 @@ public class OpensearchResponseWriter implements QueryResponseWriter, SolrjRespo
* @throws IOException when an unexpected error occurred while writing
*/
private void writeDocs(final Writer writer, final DocList documents, final SolrQueryRequest request,
final Map<String, LinkedHashSet<String>> snippets) throws IOException {
final Map<String, Collection<String>> snippets) throws IOException {
// parse body
SolrIndexSearcher searcher = request.getSearcher();
String urlhash = null;
@ -518,7 +522,7 @@ public class OpensearchResponseWriter implements QueryResponseWriter, SolrjRespo
* Append to the writer the end of the RSS OpenSearch representation of the Solr
* document.
*/
private void writeDocEnd(final Writer writer, final Map<String, LinkedHashSet<String>> snippets, final String urlhash,
private void writeDocEnd(final Writer writer, final Map<String, Collection<String>> snippets, final String urlhash,
final MultiProtocolURL url, final String keywords, final List<String> texts, final List<String> descriptions, final String docTitle,
final List<Object> imagesProtocolObjs, final List<String> imagesStubs) throws IOException {
if (Math.min(imagesProtocolObjs.size(), imagesStubs.size()) > 0) {
@ -535,7 +539,7 @@ public class OpensearchResponseWriter implements QueryResponseWriter, SolrjRespo
// compute snippet from texts
solitaireTag(writer, RSSMessage.Token.title.name(), docTitle.length() == 0 ? (texts.size() == 0 ? "" : texts.get(0)) : docTitle);
LinkedHashSet<String> snippet = urlhash == null ? null : snippets.get(urlhash);
Collection<String> snippet = urlhash == null ? null : snippets.get(urlhash);
String tagname = RSSMessage.Token.description.name();
if (snippet == null || snippet.size() == 0) {
writer.write("<"); writer.write(tagname); writer.write('>');
@ -556,38 +560,37 @@ public class OpensearchResponseWriter implements QueryResponseWriter, SolrjRespo
}
/**
* produce snippets from solr (they call that 'highlighting')
* @param val
* @return a map from urlhashes to a list of snippets for that url
*/
@SuppressWarnings("unchecked")
public static Map<String, LinkedHashSet<String>> highlighting(final SimpleOrderedMap<Object> val) {
Map<String, LinkedHashSet<String>> snippets = new HashMap<String, LinkedHashSet<String>>();
if (val == null) return snippets;
int sz = val.size();
Object v, vv;
for (int i = 0; i < sz; i++) {
String n = val.getName(i);
v = val.getVal(i);
if (v instanceof SimpleOrderedMap) {
int sz1 = ((SimpleOrderedMap<Object>) v).size();
LinkedHashSet<String> t = new LinkedHashSet<String>();
for (int j = 0; j < sz1; j++) {
vv = ((SimpleOrderedMap<Object>) v).getVal(j);
if (vv instanceof String[]) {
for (String t0: ((String[]) vv)) t.add(t0);
}
}
snippets.put(n, t);
}
}
return snippets;
}
/**
* produce snippets from solr (they call that 'highlighting')
*
* @param sorlHighlighting highlighting from Solr
* @return a map from urlhashes to a list of snippets for that url
*/
public static Map<String, Collection<String>> snippetsFromHighlighting(final NamedList<?> sorlHighlighting) {
final Map<String, Collection<String>> snippets = new HashMap<>();
if (sorlHighlighting == null) {
return snippets;
}
for (final Entry<String, ?> highlightingEntry : sorlHighlighting) {
final String urlHash = highlightingEntry.getKey();
final Object highlights = highlightingEntry.getValue();
if (highlights instanceof SimpleOrderedMap) {
final LinkedHashSet<String> urlSnippets = new LinkedHashSet<>();
for (final Entry<String, ?> entry : (SimpleOrderedMap<?>) highlights) {
final Object texts = entry.getValue();
if (texts instanceof String[]) {
Collections.addAll(urlSnippets, (String[]) texts);
}
}
snippets.put(urlHash, urlSnippets);
}
}
return snippets;
}
final static Pattern keymarks = Pattern.compile("<b>|</b>");
public static void removeSubsumedTitle(LinkedHashSet<String> snippets, String title) {
public static void removeSubsumedTitle(Collection<String> snippets, String title) {
if (title == null || title.length() == 0 || snippets == null || snippets.size() == 0) return;
snippets.remove(title);
String tlc = title.toLowerCase();
@ -601,11 +604,11 @@ public class OpensearchResponseWriter implements QueryResponseWriter, SolrjRespo
}
/**
* @param snippets snippets list eventually empty
* @param snippets snippets collection eventually empty
* @return the largest snippet containing at least a space character among the list, or null
*/
public static String getLargestSnippet(final LinkedHashSet<String> snippets) {
if (snippets == null || snippets.size() == 0) {
public static String getLargestSnippet(final Collection<String> snippets) {
if (snippets == null || snippets.isEmpty()) {
return null;
}
String l = null;

@ -24,6 +24,7 @@ import java.io.IOException;
import java.io.Writer;
import java.net.MalformedURLException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Date;
import java.util.HashMap;
import java.util.LinkedHashSet;
@ -130,9 +131,11 @@ public class YJsonResponseWriter implements QueryResponseWriter, SolrjResponseWr
SimpleOrderedMap<Object> facetCounts = (SimpleOrderedMap<Object>) values.get("facet_counts");
@SuppressWarnings("unchecked")
SimpleOrderedMap<Object> facetFields = facetCounts == null || facetCounts.size() == 0 ? null : (SimpleOrderedMap<Object>) facetCounts.get("facet_fields");
@SuppressWarnings("unchecked")
SimpleOrderedMap<Object> highlighting = (SimpleOrderedMap<Object>) values.get("highlighting");
Map<String, LinkedHashSet<String>> snippets = OpensearchResponseWriter.highlighting(highlighting);
final Object highlightingObj = values.get("highlighting");
final Map<String, Collection<String>> snippets = highlightingObj instanceof NamedList
? OpensearchResponseWriter.snippetsFromHighlighting((NamedList<?>) highlightingObj)
: new HashMap<>();
// parse response header
final ResHead resHead = new ResHead();
@ -282,7 +285,7 @@ public class YJsonResponseWriter implements QueryResponseWriter, SolrjResponseWr
* @throws IOException when an unexpected error occurred while writing
*/
private void writeDocs(final Writer writer, final DocList documents, final SolrQueryRequest request,
final Map<String, LinkedHashSet<String>> snippets) throws IOException {
final Map<String, Collection<String>> snippets) throws IOException {
final SolrIndexSearcher searcher = request.getSearcher();
final DocIterator iterator = documents.iterator();
int writtenDocs = 0;
@ -377,7 +380,7 @@ public class YJsonResponseWriter implements QueryResponseWriter, SolrjResponseWr
* @throws IOException when an unexpected error occurred while writing
*/
private void writeDocs(final Writer writer, final SolrDocumentList documents,
final Map<String, LinkedHashSet<String>> snippets) throws IOException {
final Map<String, Collection<String>> snippets) throws IOException {
int writtenDocs = 0;
for (final SolrDocument doc : documents) {
if(writtenDocs > 0) {
@ -542,7 +545,7 @@ public class YJsonResponseWriter implements QueryResponseWriter, SolrjResponseWr
* Append to the writer the end of the YaCy json representation of the Solr
* document.
*/
private void writeDocEnd(final Writer writer, final Map<String, LinkedHashSet<String>> snippets,
private void writeDocEnd(final Writer writer, final Map<String, Collection<String>> snippets,
final MultiProtocolURL url, final String urlhash, final List<String> descriptions, final String docTitle, final StringBuilder path,
final List<Object> imagesProtocolObjs, final List<String> imagesStubs) throws IOException {
if (Math.min(imagesProtocolObjs.size(), imagesStubs.size()) > 0) {
@ -558,7 +561,7 @@ public class YJsonResponseWriter implements QueryResponseWriter, SolrjResponseWr
// compute snippet from texts
solitaireTag(writer, "path", path.toString());
solitaireTag(writer, "title", docTitle.length() == 0 ? path.toString() : docTitle.replaceAll("\"", "'"));
LinkedHashSet<String> snippet = urlhash == null ? null : snippets.get(urlhash);
Collection<String> snippet = urlhash == null ? null : snippets.get(urlhash);
if (snippet == null) {snippet = new LinkedHashSet<>(); snippet.addAll(descriptions);}
OpensearchResponseWriter.removeSubsumedTitle(snippet, docTitle);
String snippetstring = snippet == null || snippet.size() == 0 ? (descriptions.size() > 0 ? descriptions.get(0) : "") : OpensearchResponseWriter.getLargestSnippet(snippet);

Loading…
Cancel
Save