added snippet computation to solr/rss and gsa result writer

pull/1/head
Michael Peter Christen 13 years ago
parent 4716546ef5
commit ab6ec4ec52

@ -30,6 +30,7 @@ import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.services.federated.solr.GSAResponseWriter;
import net.yacy.kelondro.logging.Log;
import net.yacy.search.Switchboard;
import net.yacy.search.query.SnippetProcess;
import net.yacy.search.solr.EmbeddedSolrConnector;
import org.apache.solr.common.SolrException;
@ -42,7 +43,7 @@ import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
// try
// http://localhost:8090/gsa/search?q=chicken+teriyaki&output=xml&client=test&site=test&sort=date:D:S:d1
// http://localhost:8090/gsa/searchresult?q=chicken+teriyaki&output=xml&client=test&site=test&sort=date:D:S:d1
/**
* This is a gsa result formatter for solr search results.
@ -100,6 +101,11 @@ public class searchresult {
post.put(CommonParams.ROWS, post.remove("num"));
post.put(CommonParams.ROWS, Math.min(post.getInt("num", 10), (authenticated) ? 5000 : 100));
post.remove("num");
post.put("hl", "true");
post.put("hl.fl", "text_t,h1,h2");
post.put("hl.simple.pre", "<b>");
post.put("hl.simple.post", "</b>");
post.put("hl.fragsize", Integer.toString(SnippetProcess.SNIPPET_MAX_LENGTH));
GSAResponseWriter.Sort sort = new GSAResponseWriter.Sort(post.get(CommonParams.SORT, ""));
String sorts = sort.toSolr();
if (sorts == null) {

@ -35,6 +35,7 @@ import net.yacy.cora.services.federated.solr.OpensearchResponseWriter;
import net.yacy.kelondro.logging.Log;
import net.yacy.search.Switchboard;
import net.yacy.search.SwitchboardConstants;
import net.yacy.search.query.SnippetProcess;
import net.yacy.search.solr.EmbeddedSolrConnector;
import net.yacy.search.solr.SolrServlet;
@ -71,7 +72,7 @@ public class select {
xsltWriter.init(initArgs);
RESPONSE_WRITER.put("xslt", xsltWriter); // try i.e. http://localhost:8090/solr/select?q=*:*&start=0&rows=10&wt=xslt&tr=json.xsl
RESPONSE_WRITER.put("exml", new EnhancedXMLResponseWriter());
RESPONSE_WRITER.put("rss", new OpensearchResponseWriter()); //try http://localhost:8090/solr/select?wt=rss&q=olympia
RESPONSE_WRITER.put("rss", new OpensearchResponseWriter()); //try http://localhost:8090/solr/select?wt=rss&q=olympia&hl=true&hl.fl=text_t,h1,h2
}
/**
@ -147,6 +148,14 @@ public class select {
"") : env.getConfig(SwitchboardConstants.GREETING, "");
((OpensearchResponseWriter) responseWriter).setTitle(promoteSearchPageGreeting);
}
if (responseWriter instanceof OpensearchResponseWriter) {
// add options for snippet generation
post.put("hl", "true");
post.put("hl.fl", "text_t,h1,h2");
post.put("hl.simple.pre", "");
post.put("hl.simple.post", "");
post.put("hl.fragsize", Integer.toString(SnippetProcess.SNIPPET_MAX_LENGTH));
}
// get the embedded connector
EmbeddedSolrConnector connector = (EmbeddedSolrConnector) sb.index.fulltext().getLocalSolr();

@ -74,9 +74,11 @@ public class EnhancedXMLResponseWriter implements QueryResponseWriter {
@SuppressWarnings("unchecked")
SimpleOrderedMap<Object> responseHeader = (SimpleOrderedMap<Object>) rsp.getResponseHeader();
DocSlice response = (DocSlice) rsp.getValues().get("response");
@SuppressWarnings("unchecked")
SimpleOrderedMap<Object> highlighting = (SimpleOrderedMap<Object>) rsp.getValues().get("highlighting");
writeProps(writer, "responseHeader", responseHeader); // this.writeVal("responseHeader", responseHeader);
writeDocs(writer, request, response); // this.writeVal("response", response);
writeProps(writer, "highlighting", highlighting);
writer.write(XML_STOP);
}
@ -89,6 +91,7 @@ public class EnhancedXMLResponseWriter implements QueryResponseWriter {
v = val.getVal(i);
if (v instanceof Integer) writeTag(writer, "int", n, ((Integer) v).toString(), false);
else if (v instanceof String) writeTag(writer, "str", n, (String) v, true);
else if (v instanceof String[]) writeTag(writer, "str", n, (String[]) v, true);
else if (v instanceof NamedList) writeProps(writer, n, (NamedList<?>) v);
}
if (sz > 0) {
@ -205,6 +208,14 @@ public class EnhancedXMLResponseWriter implements QueryResponseWriter {
writer.write("</"); writer.write(tag); writer.write('>'); writer.write(lb);
}
private static void writeTag(final Writer writer, final String tag, final String nameAttr, final String[] vals, final boolean escape) throws IOException {
startTagOpen(writer, "arr", nameAttr);
for (String val: vals) {
writeTag(writer, tag, null, val, escape);
}
writer.write("</arr>"); writer.write(lb);
}
private static void startTagOpen(final Writer writer, final String tag, final String nameAttr) throws IOException {
writer.write('<'); writer.write(tag);
if (nameAttr != null) writeAttr(writer, "name", nameAttr);

@ -140,6 +140,9 @@ public class GSAResponseWriter implements QueryResponseWriter {
@SuppressWarnings("unchecked")
SimpleOrderedMap<Object> responseHeader = (SimpleOrderedMap<Object>) rsp.getResponseHeader();
DocSlice response = (DocSlice) rsp.getValues().get("response");
@SuppressWarnings("unchecked")
SimpleOrderedMap<Object> highlighting = (SimpleOrderedMap<Object>) rsp.getValues().get("highlighting");
Map<String, List<String>> snippets = OpensearchResponseWriter.highlighting(highlighting);
Map<Object,Object> context = request.getContext();
// parse response header
@ -187,6 +190,7 @@ public class GSAResponseWriter implements QueryResponseWriter {
// parse body
SolrIndexSearcher searcher = request.getSearcher();
DocIterator iterator = response.iterator();
String urlhash = null;
for (int i = 0; i < responseCount; i++) {
writer.write("<R N=\"" + (resHead.offset + i + 1) + "\"" + (i == 1 ? " L=\"2\"" : "") + ">"); writer.write(lb);
int id = iterator.nextDoc();
@ -207,6 +211,10 @@ public class GSAResponseWriter implements QueryResponseWriter {
}
// if the rule is not generic, use the specific here
if (YaCySchema.id.name().equals(fieldName)) {
urlhash = value.stringValue();
continue;
}
if (YaCySchema.sku.name().equals(fieldName)) {
OpensearchResponseWriter.solitaireTag(writer, GSAToken.U.name(), value.stringValue());
OpensearchResponseWriter.solitaireTag(writer, GSAToken.UE.name(), value.stringValue());
@ -247,6 +255,8 @@ public class GSAResponseWriter implements QueryResponseWriter {
}
}
// compute snippet from texts
List<String> snippet = urlhash == null ? null : snippets.get(urlhash);
OpensearchResponseWriter.solitaireTag(writer, GSAToken.S.name(), snippet == null || snippet.size() == 0 ? description : snippet.get(0));
OpensearchResponseWriter.solitaireTag(writer, GSAToken.GD.name(), description);
if (YaCyVer == null) YaCyVer = yacyVersion.thisVersion().getName() + "/" + Switchboard.getSwitchboard().peers.mySeed().hash;
OpensearchResponseWriter.solitaireTag(writer, GSAToken.ENT_SOURCE.name(), YaCyVer);

@ -114,6 +114,9 @@ public class OpensearchResponseWriter implements QueryResponseWriter {
@SuppressWarnings("unchecked")
SimpleOrderedMap<Object> responseHeader = (SimpleOrderedMap<Object>) rsp.getResponseHeader();
DocSlice response = (DocSlice) rsp.getValues().get("response");
@SuppressWarnings("unchecked")
SimpleOrderedMap<Object> highlighting = (SimpleOrderedMap<Object>) rsp.getValues().get("highlighting");
Map<String, List<String>> snippets = highlighting(highlighting);
// parse response header
ResHead resHead = new ResHead();
@ -144,6 +147,7 @@ public class OpensearchResponseWriter implements QueryResponseWriter {
final int responseCount = response.size();
SolrIndexSearcher searcher = request.getSearcher();
DocIterator iterator = response.iterator();
String urlhash = null;
for (int i = 0; i < responseCount; i++) {
openTag(writer, "item");
int id = iterator.nextDoc();
@ -165,7 +169,8 @@ public class OpensearchResponseWriter implements QueryResponseWriter {
// if the rule is not generic, use the specific here
if (YaCySchema.id.name().equals(fieldName)) {
solitaireTag(writer, RSSMessage.Token.guid.name(), value.stringValue(), "isPermaLink=\"false\"");
urlhash = value.stringValue();
solitaireTag(writer, RSSMessage.Token.guid.name(), urlhash, "isPermaLink=\"false\"");
continue;
}
if (YaCySchema.title.name().equals(fieldName)) {
@ -198,7 +203,8 @@ public class OpensearchResponseWriter implements QueryResponseWriter {
}
}
// compute snippet from texts
solitaireTagNocheck(writer, RSSMessage.Token.description.name(), description);
List<String> snippet = urlhash == null ? null : snippets.get(urlhash);
solitaireTagNocheck(writer, RSSMessage.Token.description.name(), snippet == null || snippet.size() == 0 ? description : snippet.get(0));
closeTag(writer, "item");
}
@ -206,6 +212,30 @@ public class OpensearchResponseWriter implements QueryResponseWriter {
writer.write(XML_STOP);
}
@SuppressWarnings("unchecked")
public static Map<String, List<String>> highlighting(final SimpleOrderedMap<Object> val) {
Map<String, List<String>> snippets = new HashMap<String, List<String>>();
if (val == null) return snippets;
int sz = val.size();
Object v, vv;
for (int i = 0; i < sz; i++) {
String n = val.getName(i);
v = val.getVal(i);
if (v instanceof SimpleOrderedMap) {
int sz1 = ((SimpleOrderedMap<Object>) v).size();
List<String> t = new ArrayList<String>(sz1);
for (int j = 0; j < sz1; j++) {
vv = ((SimpleOrderedMap<Object>) v).getVal(j);
if (vv instanceof String[]) {
for (String t0: ((String[]) vv)) t.add(t0);
}
}
snippets.put(n, t);
}
}
return snippets;
}
public static void openTag(final Writer writer, final String tag) throws IOException {
writer.write('<'); writer.write(tag); writer.write('>'); writer.write(lb);
}

@ -69,6 +69,7 @@ public class SnippetProcess {
public static Log log = new Log("SEARCH");
public static final int SNIPPET_MAX_LENGTH = 220;
private final static int SNIPPET_WORKER_THREADS = Math.max(4, Runtime.getRuntime().availableProcessors() * 2);
// input values
@ -579,7 +580,7 @@ public class SnippetProcess {
//this.query.queryString,
null,
((this.query.constraint != null) && (this.query.constraint.get(Condenser.flag_cat_indexof))),
220,
SNIPPET_MAX_LENGTH,
!this.query.isLocal());
return new ResultEntry(page, this.query.getSegment(), this.peers, snippet, null, dbRetrievalTime, 0); // result without snippet
}

Loading…
Cancel
Save