diff --git a/source/net/yacy/cora/federate/solr/responsewriter/GSAResponseWriter.java b/source/net/yacy/cora/federate/solr/responsewriter/GSAResponseWriter.java index af6e50c72..6718b9b8e 100644 --- a/source/net/yacy/cora/federate/solr/responsewriter/GSAResponseWriter.java +++ b/source/net/yacy/cora/federate/solr/responsewriter/GSAResponseWriter.java @@ -25,20 +25,24 @@ import java.io.Writer; import java.nio.charset.StandardCharsets; import java.time.DateTimeException; import java.util.ArrayList; +import java.util.Collection; import java.util.Date; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.Set; import java.util.regex.Pattern; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexableField; +import org.apache.solr.client.solrj.response.QueryResponse; +import org.apache.solr.common.SolrDocument; +import org.apache.solr.common.SolrDocumentList; import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.util.NamedList; -import org.apache.solr.common.util.SimpleOrderedMap; import org.apache.solr.common.util.XML; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.response.QueryResponseWriter; @@ -61,7 +65,7 @@ import net.yacy.search.schema.CollectionSchema; * example: GET /gsa/searchresult?q=chicken+teriyaki&output=xml&client=test&site=test&sort=date:D:S:d1 * for a xml reference, see https://developers.google.com/search-appliance/documentation/614/xml_reference */ -public class GSAResponseWriter implements QueryResponseWriter, EmbeddedSolrResponseWriter { +public class GSAResponseWriter implements QueryResponseWriter, SolrjResponseWriter { private static String YaCyVer = null; private static final char lb = '\n'; @@ -86,9 +90,6 @@ public class GSAResponseWriter implements QueryResponseWriter, EmbeddedSolrRespo "\n\n\n").toCharArray(); private static final char[] XML_STOP = "\n".toCharArray(); - // define a list of simple YaCySchema -> RSS Token matchings - private static final Map field2tag = new HashMap(); - // pre-select a set of YaCy schema fields for the solr searcher which should cause a better caching private static final CollectionSchema[] extrafields = new CollectionSchema[]{ CollectionSchema.id, CollectionSchema.sku, CollectionSchema.title, CollectionSchema.description_txt, @@ -96,15 +97,16 @@ public class GSAResponseWriter implements QueryResponseWriter, EmbeddedSolrRespo CollectionSchema.language_s, CollectionSchema.collection_sxt }; - private static final Set SOLR_FIELDS = new HashSet(); + private static final Set SOLR_FIELDS = new HashSet<>(); static { - field2tag.put(CollectionSchema.language_s.getSolrFieldName(), GSAToken.LANG.name()); - SOLR_FIELDS.addAll(field2tag.keySet()); + + SOLR_FIELDS.add(CollectionSchema.language_s.getSolrFieldName()); for (CollectionSchema field: extrafields) SOLR_FIELDS.add(field.getSolrFieldName()); } private static class ResHead { - public int offset, rows, numFound; + public long offset, numFound; + public int rows; //public int status, QTime; //public String df, q, wt; //public float maxScore; @@ -129,10 +131,6 @@ public class GSAResponseWriter implements QueryResponseWriter, EmbeddedSolrRespo } } - public GSAResponseWriter() { - super(); - } - @Override public String getContentType(final SolrQueryRequest request, final SolrQueryResponse response) { return CONTENT_TYPE_XML_UTF8; @@ -144,41 +142,150 @@ public class GSAResponseWriter implements QueryResponseWriter, EmbeddedSolrRespo @Override public void write(final Writer writer, final SolrQueryRequest request, final SolrQueryResponse rsp) throws IOException { - assert rsp.getValues().get("responseHeader") != null; - assert rsp.getValues().get("response") != null; - long start = System.currentTimeMillis(); + final long start = System.currentTimeMillis(); + + final Object responseObj = rsp.getResponse(); + + if(responseObj instanceof ResultContext) { + /* Regular response object */ + + final DocList documents = ((ResultContext) responseObj).getDocList(); + + final Object highlightingObj = rsp.getValues().get("highlighting"); + final Map> snippets = highlightingObj instanceof NamedList + ? OpensearchResponseWriter.snippetsFromHighlighting((NamedList) highlightingObj) + : new HashMap<>(); + + // parse response header + final ResHead resHead = new ResHead(); + resHead.rows = request.getParams().getInt(CommonParams.ROWS, 0); + resHead.offset = documents.offset(); // equal to 'start' + resHead.numFound = documents.matches(); + //resHead.df = (String) val0.get("df"); + //resHead.q = (String) val0.get("q"); + //resHead.wt = (String) val0.get("wt"); + //resHead.status = (Integer) responseHeader.get("status"); + //resHead.QTime = (Integer) responseHeader.get("QTime"); + //resHead.maxScore = response.maxScore(); - SimpleOrderedMap responseHeader = (SimpleOrderedMap) rsp.getResponseHeader(); - DocList response = ((ResultContext) rsp.getValues().get("response")).getDocList(); - @SuppressWarnings("unchecked") - SimpleOrderedMap highlighting = (SimpleOrderedMap) rsp.getValues().get("highlighting"); - Map> snippets = OpensearchResponseWriter.highlighting(highlighting); - Map context = request.getContext(); + // write header + writeHeader(writer, request, resHead, start); + + // body introduction + writeBodyIntro(writer, request, resHead, documents.size()); + + writeDocs(writer, request, documents, snippets, resHead); + + writer.write(""); writer.write(lb); + writer.write(XML_STOP); + } else if(responseObj instanceof SolrDocumentList) { + /* + * The response object can be a SolrDocumentList when the response is partial, + * for example when the allowed processing time has been exceeded + */ + final SolrDocumentList documents = (SolrDocumentList) responseObj; + + final Object highlightingObj = rsp.getValues().get("highlighting"); + final Map> snippets = highlightingObj instanceof NamedList + ? OpensearchResponseWriter.snippetsFromHighlighting((NamedList) highlightingObj) + : new HashMap<>(); + + writeSolrDocumentList(writer, request, snippets, start, documents); + } else { + throw new IOException("Unable to process Solr response format"); + } + } + + @Override + public void write(Writer writer, SolrQueryRequest request, String coreName, QueryResponse rsp) throws IOException { + final long start = System.currentTimeMillis(); + + writeSolrDocumentList(writer, request, snippetsFromHighlighting(rsp.getHighlighting()), start, + rsp.getResults()); + } + + /** + * Produce snippets from Solr (they call that 'highlighting') + * + * @param sorlHighlighting highlighting from Solr + * @return a map from urlhashes to a list of snippets for that url + */ + private Map> snippetsFromHighlighting( + final Map>> sorlHighlighting) { + final Map> snippets = new HashMap<>(); + if (sorlHighlighting == null) { + return snippets; + } + for (final Entry>> highlightingEntry : sorlHighlighting.entrySet()) { + final String urlHash = highlightingEntry.getKey(); + final Map> highlights = highlightingEntry.getValue(); + final LinkedHashSet urlSnippets = new LinkedHashSet<>(); + for (final List texts : highlights.values()) { + urlSnippets.addAll(texts); + } + snippets.put(urlHash, urlSnippets); + } + return snippets; + } + + /** + * Append to the writer a representation of a list of Solr documents. All + * parameters are required and must not be null. + * + * @param writer an open output writer + * @param request the Solr request + * @param snippets the snippets computed from the Solr highlighting + * @param start the results start index + * @param documents the Solr documents to process + * @throws IOException when a write error occurred + */ + private void writeSolrDocumentList(final Writer writer, final SolrQueryRequest request, + final Map> snippets, final long start, final SolrDocumentList documents) + throws IOException { // parse response header - ResHead resHead = new ResHead(); - NamedList val0 = (NamedList) responseHeader.get("params"); - resHead.rows = Integer.parseInt((String) val0.get(CommonParams.ROWS)); - resHead.offset = response.offset(); // equal to 'start' - resHead.numFound = response.matches(); - //resHead.df = (String) val0.get("df"); - //resHead.q = (String) val0.get("q"); - //resHead.wt = (String) val0.get("wt"); - //resHead.status = (Integer) responseHeader.get("status"); - //resHead.QTime = (Integer) responseHeader.get("QTime"); - //resHead.maxScore = response.maxScore(); + final ResHead resHead = new ResHead(); + resHead.rows = request.getParams().getInt(CommonParams.ROWS, 0); + resHead.offset = documents.getStart(); + resHead.numFound = documents.getNumFound(); // write header + writeHeader(writer, request, resHead, start); + + // body introduction + writeBodyIntro(writer, request, resHead, documents.size()); + + writeDocs(writer, documents, snippets, resHead, request.getParams().get("originalQuery")); + + writer.write(""); writer.write(lb); + writer.write(XML_STOP); + } + + /** + * Append the response header to the writer. All parameters are required and + * must not be null. + * + * @param writer an open output writer + * @param request the Solr request + * @param resHead results header information + * @param startTime this writer processing start time in milliseconds since + * Epoch + * @throws IOException when a write error occurred + */ + private void writeHeader(final Writer writer, final SolrQueryRequest request, final ResHead resHead, + final long startTime) throws IOException { + final Map context = request.getContext(); + writer.write(XML_START); - String query = request.getParams().get("originalQuery"); - String site = getContextString(context, "site", ""); - String sort = getContextString(context, "sort", ""); - String client = getContextString(context, "client", ""); - String ip = getContextString(context, "ip", ""); - String access = getContextString(context, "access", ""); - String entqr = getContextString(context, "entqr", ""); - OpensearchResponseWriter.solitaireTag(writer, "TM", Long.toString(System.currentTimeMillis() - start)); + final String query = request.getParams().get("originalQuery"); + final String site = getContextString(context, "site", ""); + final String sort = getContextString(context, "sort", ""); + final String client = getContextString(context, "client", ""); + final String ip = getContextString(context, "ip", ""); + final String access = getContextString(context, "access", ""); + final String entqr = getContextString(context, "entqr", ""); + OpensearchResponseWriter.solitaireTag(writer, "TM", Long.toString(System.currentTimeMillis() - startTime)); OpensearchResponseWriter.solitaireTag(writer, "Q", query); paramTag(writer, "sort", sort); paramTag(writer, "output", "xml_no_dtd"); @@ -187,20 +294,35 @@ public class GSAResponseWriter implements QueryResponseWriter, EmbeddedSolrRespo paramTag(writer, "client", client); paramTag(writer, "q", query); paramTag(writer, "site", site); - paramTag(writer, "start", Integer.toString(resHead.offset)); + paramTag(writer, "start", Long.toString(resHead.offset)); paramTag(writer, "num", Integer.toString(resHead.rows)); paramTag(writer, "ip", ip); paramTag(writer, "access", access); // p - search only public content, s - search only secure content, a - search all content, both public and secure paramTag(writer, "entqr", entqr); // query expansion policy; (entqr=1) -- Uses only the search appliance's synonym file, (entqr=1) -- Uses only the search appliance's synonym file, (entqr=3) -- Uses both standard and local synonym files. - - // body introduction - final int responseCount = response.size(); + } + + /** + * Append the response body introduction to the writer. All parameters are + * required and must not be null. + * + * @param writer an open output writer + * @param resHead results header information + * @param responseCount the number of result documents + * @throws IOException when a write error occurred + */ + private void writeBodyIntro(final Writer writer, final SolrQueryRequest request, final ResHead resHead, + final int responseCount) throws IOException { + final Map context = request.getContext(); + final String site = getContextString(context, "site", ""); + final String sort = getContextString(context, "sort", ""); + final String client = getContextString(context, "client", ""); + final String access = getContextString(context, "access", ""); writer.write(""); writer.write(lb); // The index (1-based) of the first and last search result returned in this result set. writer.write("" + resHead.numFound + ""); writer.write(lb); // The estimated total number of results for the search. writer.write(""); writer.write(lb); // Indicates that document filtering was performed during this search. - int nextStart = resHead.offset + responseCount; - int nextNum = Math.min(resHead.numFound - nextStart, responseCount < resHead.rows ? 0 : resHead.rows); - int prevStart = resHead.offset - resHead.rows; + long nextStart = resHead.offset + responseCount; + long nextNum = Math.min(resHead.numFound - nextStart, responseCount < resHead.rows ? 0 : resHead.rows); + long prevStart = resHead.offset - resHead.rows; if (prevStart >= 0 || nextNum > 0) { writer.write(""); if (prevStart >= 0) { @@ -220,11 +342,28 @@ public class GSAResponseWriter implements QueryResponseWriter, EmbeddedSolrRespo writer.write(""); } writer.write(lb); + } - // parse body + /** + * Append to the writer a representation of a list of Solr documents. All + * parameters are required and must not be null. + * + * @param writer an open output writer + * @param request the Solr request + * @param documents the Solr documents to process + * @param snippets the snippets computed from the Solr highlighting + * @param resHead results header information + * @throws IOException when a write error occurred + */ + private void writeDocs(final Writer writer, final SolrQueryRequest request, final DocList documents, + final Map> snippets, final ResHead resHead) + throws IOException { + // parse body + final String query = request.getParams().get("originalQuery"); SolrIndexSearcher searcher = request.getSearcher(); - DocIterator iterator = response.iterator(); + DocIterator iterator = documents.iterator(); String urlhash = null; + final int responseCount = documents.size(); for (int i = 0; i < responseCount; i++) { int id = iterator.nextDoc(); Document doc = searcher.doc(id, SOLR_FIELDS); @@ -242,69 +381,41 @@ public class GSAResponseWriter implements QueryResponseWriter, EmbeddedSolrRespo // write the R header for a search result writer.write(" 0 ? " MIME=\"" + mime + "\"" : "") + ">"); writer.write(lb); - //List texts = new ArrayList(); - List descriptions = new ArrayList(); - List collections = new ArrayList(); + List descriptions = new ArrayList<>(); + List collections = new ArrayList<>(); int size = 0; boolean title_written = false; // the solr index may contain several; we take only the first which should be the visible tag in String title = null; for (IndexableField value: fields) { String fieldName = value.name(); - // apply generic matching rule - String stag = field2tag.get(fieldName); - if (stag != null) { - OpensearchResponseWriter.solitaireTag(writer, stag, value.stringValue()); - continue; - } - - // if the rule is not generic, use the specific here - if (CollectionSchema.id.getSolrFieldName().equals(fieldName)) { + if (CollectionSchema.language_s.getSolrFieldName().equals(fieldName)) { + OpensearchResponseWriter.solitaireTag(writer, GSAToken.LANG.name(), value.stringValue()); + } else if (CollectionSchema.id.getSolrFieldName().equals(fieldName)) { urlhash = value.stringValue(); - continue; - } - if (CollectionSchema.sku.getSolrFieldName().equals(fieldName)) { + } else if (CollectionSchema.sku.getSolrFieldName().equals(fieldName)) { OpensearchResponseWriter.solitaireTag(writer, GSAToken.U.name(), value.stringValue()); OpensearchResponseWriter.solitaireTag(writer, GSAToken.UE.name(), value.stringValue()); - continue; - } - if (CollectionSchema.title.getSolrFieldName().equals(fieldName) && !title_written) { + } else if (CollectionSchema.title.getSolrFieldName().equals(fieldName) && !title_written) { title = value.stringValue(); OpensearchResponseWriter.solitaireTag(writer, GSAToken.T.name(), highlight(title, query)); - //texts.add(value.stringValue()); title_written = true; - continue; - } - if (CollectionSchema.description_txt.getSolrFieldName().equals(fieldName)) { + } else if (CollectionSchema.description_txt.getSolrFieldName().equals(fieldName)) { descriptions.add(value.stringValue()); - //texts.adds(description); - continue; - } - if (CollectionSchema.last_modified.getSolrFieldName().equals(fieldName)) { + } else if (CollectionSchema.last_modified.getSolrFieldName().equals(fieldName)) { Date d = new Date(Long.parseLong(value.stringValue())); writer.write("\n"); - //OpensearchResponseWriter.solitaireTag(writer, GSAToken.CACHE_LAST_MODIFIED.getSolrFieldName(), HeaderFramework.formatRFC1123(d)); - //texts.add(value.stringValue()); - continue; - } - if (CollectionSchema.load_date_dt.getSolrFieldName().equals(fieldName)) { + } else if (CollectionSchema.load_date_dt.getSolrFieldName().equals(fieldName)) { Date d = new Date(Long.parseLong(value.stringValue())); OpensearchResponseWriter.solitaireTag(writer, GSAToken.CRAWLDATE.name(), HeaderFramework.formatRFC1123(d)); - //texts.add(value.stringValue()); - continue; - } - if (CollectionSchema.size_i.getSolrFieldName().equals(fieldName)) { + } else if (CollectionSchema.size_i.getSolrFieldName().equals(fieldName)) { size = value.stringValue() != null && value.stringValue().length() > 0 ? Integer.parseInt(value.stringValue()) : -1; - continue; - } - if (CollectionSchema.collection_sxt.getSolrFieldName().equals(fieldName)) { + } else if (CollectionSchema.collection_sxt.getSolrFieldName().equals(fieldName)) { collections.add(value.stringValue()); - continue; } - //System.out.println("superfluous field: " + fieldName + ": " + value.stringValue()); // this can be avoided setting the enableLazyFieldLoading = false in solrconfig.xml } // compute snippet from texts - LinkedHashSet snippet = urlhash == null ? null : snippets.get(urlhash); + Collection snippet = urlhash == null ? null : snippets.get(urlhash); OpensearchResponseWriter.removeSubsumedTitle(snippet, title); OpensearchResponseWriter.solitaireTag(writer, GSAToken.S.name(), snippet == null || snippet.size() == 0 ? (descriptions.size() > 0 ? descriptions.get(0) : "") : OpensearchResponseWriter.getLargestSnippet(snippet)); OpensearchResponseWriter.solitaireTag(writer, GSAToken.GD.name(), descriptions.size() > 0 ? descriptions.get(0) : ""); @@ -315,9 +426,108 @@ public class GSAResponseWriter implements QueryResponseWriter, EmbeddedSolrRespo OpensearchResponseWriter.solitaireTag(writer, GSAToken.ENT_SOURCE.name(), YaCyVer); OpensearchResponseWriter.closeTag(writer, "R"); } - writer.write(""); writer.write(lb); - writer.write(XML_STOP); - } + } + + /** + * Append to the writer a representation of a list of Solr documents. All + * parameters are required and must not be null. + * + * @param writer an open output writer + * @param documents the Solr documents to process + * @param snippets the snippets computed from the Solr highlighting + * @param resHead results header information + * @param query the original search query + * @throws IOException when a write error occurred + */ + private void writeDocs(final Writer writer, final SolrDocumentList documents, + final Map> snippets, final ResHead resHead, final String query) + throws IOException { + // parse body + String urlhash = null; + int i = 0; + for (final SolrDocument doc : documents) { + + // pre-scan the fields to get the mime-type + final Object contentTypeObj = doc.getFirstValue(CollectionSchema.content_type.getSolrFieldName()); + final String mime = contentTypeObj != null ? contentTypeObj.toString() : ""; + + // write the R header for a search result + writer.write(" 0 ? " MIME=\"" + mime + "\"" : "") + ">"); writer.write(lb); + final List descriptions = new ArrayList<>(); + final List collections = new ArrayList<>(); + int size = 0; + String title = null; + for (final Entry field : doc.entrySet()) { + final String fieldName = field.getKey(); + final Object value = field.getValue(); + + if (CollectionSchema.language_s.getSolrFieldName().equals(fieldName)) { + OpensearchResponseWriter.solitaireTag(writer, GSAToken.LANG.name(), value.toString()); + } else if (CollectionSchema.id.getSolrFieldName().equals(fieldName)) { + urlhash = value.toString(); + } else if (CollectionSchema.sku.getSolrFieldName().equals(fieldName)) { + OpensearchResponseWriter.solitaireTag(writer, GSAToken.U.name(), value.toString()); + OpensearchResponseWriter.solitaireTag(writer, GSAToken.UE.name(), value.toString()); + } else if (CollectionSchema.title.getSolrFieldName().equals(fieldName)) { + if(value instanceof Iterable) { + for(final Object titleObj : (Iterable)value) { + if(titleObj != null) { + /* get only the first title */ + title = titleObj.toString(); + break; + } + } + } else if(value != null) { + title = value.toString(); + } + if(title != null) { + OpensearchResponseWriter.solitaireTag(writer, GSAToken.T.name(), highlight(title, query)); + } + } else if (CollectionSchema.description_txt.getSolrFieldName().equals(fieldName)) { + if(value instanceof Iterable) { + for(final Object descriptionObj : (Iterable)value) { + if(descriptionObj != null) { + descriptions.add(descriptionObj.toString()); + } + } + } else if(value != null) { + descriptions.add(value.toString()); + } + } else if (CollectionSchema.last_modified.getSolrFieldName().equals(fieldName) && value instanceof Date) { + writer.write("\n"); + } else if (CollectionSchema.load_date_dt.getSolrFieldName().equals(fieldName) && value instanceof Date) { + OpensearchResponseWriter.solitaireTag(writer, GSAToken.CRAWLDATE.name(), HeaderFramework.formatRFC1123((Date)value)); + } else if (CollectionSchema.size_i.getSolrFieldName().equals(fieldName)) { + size = value instanceof Integer ? (Integer)value : -1; + } else if (CollectionSchema.collection_sxt.getSolrFieldName().equals(fieldName)) { // handle collection + if(value instanceof Iterable) { + for(final Object collectionObj : (Iterable)value) { + if(collectionObj != null) { + collections.add(collectionObj.toString()); + } + } + } else if(value != null) { + collections.add(value.toString()); + } + } + } + // compute snippet from texts + Collection snippet = urlhash == null ? null : snippets.get(urlhash); + OpensearchResponseWriter.removeSubsumedTitle(snippet, title); + OpensearchResponseWriter.solitaireTag(writer, GSAToken.S.name(), snippet == null || snippet.size() == 0 ? (descriptions.size() > 0 ? descriptions.get(0) : "") : OpensearchResponseWriter.getLargestSnippet(snippet)); + OpensearchResponseWriter.solitaireTag(writer, GSAToken.GD.name(), descriptions.size() > 0 ? descriptions.get(0) : ""); + String cols = collections.toString(); + if (!collections.isEmpty()) { + OpensearchResponseWriter.solitaireTag(writer, "COLS" /*SPECIAL!*/, collections.size() > 1 ? cols.substring(1, cols.length() - 1).replaceAll(" ", "") : collections.get(0)); + } + writer.write("\n"); + if (YaCyVer == null) YaCyVer = yacyVersion.thisVersion().getName() + "/" + Switchboard.getSwitchboard().peers.mySeed().hash; + OpensearchResponseWriter.solitaireTag(writer, GSAToken.ENT_SOURCE.name(), YaCyVer); + OpensearchResponseWriter.closeTag(writer, "R"); + + i++; + } + } private static String getContextString(Map context, String key, String dflt) { Object v = context.get(key); diff --git a/source/net/yacy/cora/federate/solr/responsewriter/OpensearchResponseWriter.java b/source/net/yacy/cora/federate/solr/responsewriter/OpensearchResponseWriter.java index f698afc81..8fed925ac 100644 --- a/source/net/yacy/cora/federate/solr/responsewriter/OpensearchResponseWriter.java +++ b/source/net/yacy/cora/federate/solr/responsewriter/OpensearchResponseWriter.java @@ -24,6 +24,8 @@ import java.io.IOException; import java.io.Writer; import java.net.MalformedURLException; import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; import java.util.Date; import java.util.HashMap; import java.util.HashSet; @@ -32,8 +34,8 @@ import java.util.LinkedHashSet; import java.util.List; import java.util.Locale; import java.util.Map; -import java.util.Set; import java.util.Map.Entry; +import java.util.Set; import java.util.regex.Pattern; import org.apache.lucene.document.Document; @@ -151,9 +153,11 @@ public class OpensearchResponseWriter implements QueryResponseWriter, SolrjRespo SimpleOrderedMap facetCounts = (SimpleOrderedMap) values.get("facet_counts"); @SuppressWarnings("unchecked") SimpleOrderedMap facetFields = facetCounts == null || facetCounts.size() == 0 ? null : (SimpleOrderedMap) facetCounts.get("facet_fields"); - @SuppressWarnings("unchecked") - SimpleOrderedMap highlighting = (SimpleOrderedMap) values.get("highlighting"); - final Map> snippets = highlighting(highlighting); + + final Object highlightingObj = values.get("highlighting"); + final Map> snippets = highlightingObj instanceof NamedList + ? OpensearchResponseWriter.snippetsFromHighlighting((NamedList) highlightingObj) + : new HashMap<>(); if(responseObj instanceof ResultContext){ /* Regular response object */ @@ -266,7 +270,7 @@ public class OpensearchResponseWriter implements QueryResponseWriter, SolrjRespo * @throws IOException when an unexpected error occurred while writing */ private void writeDocs(final Writer writer, final SolrDocumentList documents, - final Map> snippets) throws IOException { + final Map> snippets) throws IOException { // parse body String urlhash = null; MultiProtocolURL url = null; @@ -403,7 +407,7 @@ public class OpensearchResponseWriter implements QueryResponseWriter, SolrjRespo * @throws IOException when an unexpected error occurred while writing */ private void writeDocs(final Writer writer, final DocList documents, final SolrQueryRequest request, - final Map> snippets) throws IOException { + final Map> snippets) throws IOException { // parse body SolrIndexSearcher searcher = request.getSearcher(); String urlhash = null; @@ -518,7 +522,7 @@ public class OpensearchResponseWriter implements QueryResponseWriter, SolrjRespo * Append to the writer the end of the RSS OpenSearch representation of the Solr * document. */ - private void writeDocEnd(final Writer writer, final Map> snippets, final String urlhash, + private void writeDocEnd(final Writer writer, final Map> snippets, final String urlhash, final MultiProtocolURL url, final String keywords, final List texts, final List descriptions, final String docTitle, final List imagesProtocolObjs, final List imagesStubs) throws IOException { if (Math.min(imagesProtocolObjs.size(), imagesStubs.size()) > 0) { @@ -535,7 +539,7 @@ public class OpensearchResponseWriter implements QueryResponseWriter, SolrjRespo // compute snippet from texts solitaireTag(writer, RSSMessage.Token.title.name(), docTitle.length() == 0 ? (texts.size() == 0 ? "" : texts.get(0)) : docTitle); - LinkedHashSet snippet = urlhash == null ? null : snippets.get(urlhash); + Collection snippet = urlhash == null ? null : snippets.get(urlhash); String tagname = RSSMessage.Token.description.name(); if (snippet == null || snippet.size() == 0) { writer.write("<"); writer.write(tagname); writer.write('>'); @@ -556,38 +560,37 @@ public class OpensearchResponseWriter implements QueryResponseWriter, SolrjRespo } - /** - * produce snippets from solr (they call that 'highlighting') - * @param val - * @return a map from urlhashes to a list of snippets for that url - */ - @SuppressWarnings("unchecked") - public static Map> highlighting(final SimpleOrderedMap val) { - Map> snippets = new HashMap>(); - if (val == null) return snippets; - int sz = val.size(); - Object v, vv; - for (int i = 0; i < sz; i++) { - String n = val.getName(i); - v = val.getVal(i); - if (v instanceof SimpleOrderedMap) { - int sz1 = ((SimpleOrderedMap) v).size(); - LinkedHashSet t = new LinkedHashSet(); - for (int j = 0; j < sz1; j++) { - vv = ((SimpleOrderedMap) v).getVal(j); - if (vv instanceof String[]) { - for (String t0: ((String[]) vv)) t.add(t0); - } - } - snippets.put(n, t); - } - } - return snippets; - } - + /** + * produce snippets from solr (they call that 'highlighting') + * + * @param sorlHighlighting highlighting from Solr + * @return a map from urlhashes to a list of snippets for that url + */ + public static Map> snippetsFromHighlighting(final NamedList sorlHighlighting) { + final Map> snippets = new HashMap<>(); + if (sorlHighlighting == null) { + return snippets; + } + for (final Entry highlightingEntry : sorlHighlighting) { + final String urlHash = highlightingEntry.getKey(); + final Object highlights = highlightingEntry.getValue(); + if (highlights instanceof SimpleOrderedMap) { + final LinkedHashSet urlSnippets = new LinkedHashSet<>(); + for (final Entry entry : (SimpleOrderedMap) highlights) { + final Object texts = entry.getValue(); + if (texts instanceof String[]) { + Collections.addAll(urlSnippets, (String[]) texts); + } + } + snippets.put(urlHash, urlSnippets); + } + } + return snippets; + } + final static Pattern keymarks = Pattern.compile("|"); - public static void removeSubsumedTitle(LinkedHashSet snippets, String title) { + public static void removeSubsumedTitle(Collection snippets, String title) { if (title == null || title.length() == 0 || snippets == null || snippets.size() == 0) return; snippets.remove(title); String tlc = title.toLowerCase(); @@ -601,11 +604,11 @@ public class OpensearchResponseWriter implements QueryResponseWriter, SolrjRespo } /** - * @param snippets snippets list eventually empty + * @param snippets snippets collection eventually empty * @return the largest snippet containing at least a space character among the list, or null */ - public static String getLargestSnippet(final LinkedHashSet snippets) { - if (snippets == null || snippets.size() == 0) { + public static String getLargestSnippet(final Collection snippets) { + if (snippets == null || snippets.isEmpty()) { return null; } String l = null; diff --git a/source/net/yacy/cora/federate/solr/responsewriter/YJsonResponseWriter.java b/source/net/yacy/cora/federate/solr/responsewriter/YJsonResponseWriter.java index ef4eaa0bb..b7477077b 100644 --- a/source/net/yacy/cora/federate/solr/responsewriter/YJsonResponseWriter.java +++ b/source/net/yacy/cora/federate/solr/responsewriter/YJsonResponseWriter.java @@ -24,6 +24,7 @@ import java.io.IOException; import java.io.Writer; import java.net.MalformedURLException; import java.util.ArrayList; +import java.util.Collection; import java.util.Date; import java.util.HashMap; import java.util.LinkedHashSet; @@ -130,9 +131,11 @@ public class YJsonResponseWriter implements QueryResponseWriter, SolrjResponseWr SimpleOrderedMap facetCounts = (SimpleOrderedMap) values.get("facet_counts"); @SuppressWarnings("unchecked") SimpleOrderedMap facetFields = facetCounts == null || facetCounts.size() == 0 ? null : (SimpleOrderedMap) facetCounts.get("facet_fields"); - @SuppressWarnings("unchecked") - SimpleOrderedMap highlighting = (SimpleOrderedMap) values.get("highlighting"); - Map> snippets = OpensearchResponseWriter.highlighting(highlighting); + + final Object highlightingObj = values.get("highlighting"); + final Map> snippets = highlightingObj instanceof NamedList + ? OpensearchResponseWriter.snippetsFromHighlighting((NamedList) highlightingObj) + : new HashMap<>(); // parse response header final ResHead resHead = new ResHead(); @@ -282,7 +285,7 @@ public class YJsonResponseWriter implements QueryResponseWriter, SolrjResponseWr * @throws IOException when an unexpected error occurred while writing */ private void writeDocs(final Writer writer, final DocList documents, final SolrQueryRequest request, - final Map> snippets) throws IOException { + final Map> snippets) throws IOException { final SolrIndexSearcher searcher = request.getSearcher(); final DocIterator iterator = documents.iterator(); int writtenDocs = 0; @@ -377,7 +380,7 @@ public class YJsonResponseWriter implements QueryResponseWriter, SolrjResponseWr * @throws IOException when an unexpected error occurred while writing */ private void writeDocs(final Writer writer, final SolrDocumentList documents, - final Map> snippets) throws IOException { + final Map> snippets) throws IOException { int writtenDocs = 0; for (final SolrDocument doc : documents) { if(writtenDocs > 0) { @@ -542,7 +545,7 @@ public class YJsonResponseWriter implements QueryResponseWriter, SolrjResponseWr * Append to the writer the end of the YaCy json representation of the Solr * document. */ - private void writeDocEnd(final Writer writer, final Map> snippets, + private void writeDocEnd(final Writer writer, final Map> snippets, final MultiProtocolURL url, final String urlhash, final List descriptions, final String docTitle, final StringBuilder path, final List imagesProtocolObjs, final List imagesStubs) throws IOException { if (Math.min(imagesProtocolObjs.size(), imagesStubs.size()) > 0) { @@ -558,7 +561,7 @@ public class YJsonResponseWriter implements QueryResponseWriter, SolrjResponseWr // compute snippet from texts solitaireTag(writer, "path", path.toString()); solitaireTag(writer, "title", docTitle.length() == 0 ? path.toString() : docTitle.replaceAll("\"", "'")); - LinkedHashSet snippet = urlhash == null ? null : snippets.get(urlhash); + Collection snippet = urlhash == null ? null : snippets.get(urlhash); if (snippet == null) {snippet = new LinkedHashSet<>(); snippet.addAll(descriptions);} OpensearchResponseWriter.removeSubsumedTitle(snippet, docTitle); String snippetstring = snippet == null || snippet.size() == 0 ? (descriptions.size() > 0 ? descriptions.get(0) : "") : OpensearchResponseWriter.getLargestSnippet(snippet);