replaced yacydoc servlet usage by a solr result output using an html

output writer. This made the creation of a html result writer necessary
which is included in this commit. The yacydoc servlet was used to
present all metadata to a document, but the solr interface can serve for
this purpose in a much better way. All usages (instead one) of yacydoc
were replaced by a solr call. This affects also the 'metadata' link
attached to search results.
pull/1/head
orbiter 12 years ago
parent 200769d0c6
commit 2b320313d9

@ -177,7 +177,7 @@ $(function() {
<td width="15px"></td>
<td>#[content_showDate_date]#</td>
<td>&nbsp;|&nbsp;42 kbyte</td>
<td>&nbsp;|&nbsp;<a href="api/yacydoc.html" target="LayouTest" onclick="return hs.htmlExpand(this, { objectType: 'ajax'} )">Metadata</a></td>
<td>&nbsp;|&nbsp;<a href="/solr/select?q=*:*&defType=edismax&start=0&rows=3&core=collection1&wt=html" target="LayouTest" onclick="return hs.htmlExpand(this, { objectType: 'ajax'} )">Metadata</a></td>
<td>&nbsp;|&nbsp;<a href="ViewFile.html" target="LayouTest">Parser</a></td>
<td>&nbsp;|&nbsp;<a href="yacysearch.html" target="LayouTest">Pictures</a></td>
<td>&nbsp;|&nbsp;<a href="CacheResource_p.html" target="LayouTest">Cache</a></td>

@ -216,9 +216,9 @@ function updatepage(str) {
#(genUrlProfile)#
::No entry found for URL-hash #[urlhash]#
::<iframe src="/api/yacydoc.html?urlhash=#[urlhash]#" width="100%" height="420" frameborder="0" scrolling="no"></iframe><br />
::<iframe src="/solr/select?defType=edismax&start=0&rows=3&core=collection1&wt=html&q=id:%22#[urlhash]#%22" width="100%" height="420" frameborder="0" scrolling="no"></iframe><br />
<div id="api">
<a href="/api/yacydoc.html?urlhash=#[urlhash]#">
<a href="/solr/select?defType=edismax&start=0&rows=3&core=collection1&wt=html&q=id:%22#[urlhash]#%22">
<img src="env/grafics/api.png" width="60" height="40" alt="API" /></a>
<span>These document details can be retrieved as <a href="http://www.w3.org/TR/xhtml-rdfa-primer/">XHTML+RDFa</a>
document containg <a href="http://www.w3.org/RDF/">RDF</a> annotations in <a href="http://dublincore.org/">Dublin Core</a> vocabulary.

@ -88,39 +88,20 @@ function updatepage(str) {
</dl>
</fieldset>
</form>
<table border="0">
<tr><td valign="top">
#(error)#
<form method="get" action="ViewFile.html" accept-charset="ascii">
<fieldset><legend>URL Metadata</legend>
<dl>
<dt>URL:</dt><dd><a href="#[url]#">#[url]#</a></dd>
<dt>Hash:</dt><dd><a href="/api/yacydoc.html?urlhash=#[hash]#">#[hash]#</a></dd>
<dt>Hash:</dt><dd><a href="/solr/select?defType=edismax&start=0&rows=3&core=collection1&wt=html&q=id:%22#[hash]#%22">#[hash]#</a> (click this for full metadata)</dd>
<dt>In Metadata:</dt><dd>#(inurldb)#no::yes#(/inurldb)#</dd>
<dt>In Cache:</dt><dd>#(incache)#no::yes#(/incache)#</dd>
<dt>Word Count:</dt><dd>#[wordCount]#</dd>
<dt>Description:</dt><dd>#[desc]#</dd>
<dt>Size:</dt><dd>#[size]# Bytes</dd>#(mimeTypeAvailable)#::
<dt>MimeType:</dt><dd>#[mimeType]#</dd>#(/mimeTypeAvailable)#
<dt>Referrer Hash:</dt><dd>#[referrerHash]#</dd>
<dt>Modified Date:</dt><dd>#[moddate]#</dd>
<dt>Load Date:</dt><dd>#[loaddate]#</dd>
<dt>Fresh Date:</dt><dd>#[freshdate]#</dd>
<dt>Host Hash:</dt><dd>#[hosthash]#</dd>
<dt>dc_creator:</dt><dd>#[dc_creator]#</dd>
<dt>dc_publisher:</dt><dd>#[dc_publisher]#</dd>
<dt>dc_subject:</dt><dd>#[dc_subject]#</dd>
<dt>md5:</dt><dd>#[md5]#</dd>
<dt>lat:</dt><dd>#[lat]#</dd>
<dt>lon:</dt><dd>#[lon]#</dd>
<dt>doctype:</dt><dd>#[doctype]#</dd>
<dt>Language:</dt><dd>#[language]#</dd>
<dt>Flags:</dt><dd>#[flags]#</dd>
<dt>Word Count:</dt><dd>#[wordCount]#</dd>
<dt>Local Links:</dt><dd>#[llocal]#</dd>
<dt>Global Links:</dt><dd>#[lother]#</dd>
<dt>Image Links:</dt><dd>#[limage]#</dd>
<dt>Audio Links:</dt><dd>#[laudio]#</dd>
<dt>Video Links:</dt><dd>#[lvideo]#</dd>
<dt>App Links:</dt><dd>#[lapp]#</dd>
<dt>Collections:</dt><dd>#[collections]#</dd>
<dt>Triplestore:</dt><dd><pre>#[triples]#</pre></dd>
<dt><label for="viewMode">View as</label>:</dt>
@ -156,7 +137,7 @@ function updatepage(str) {
:: <!-- 6 -->
<span class="error">Unsupported protocol.</span>
#(/error)#
</td><td valign="top">
#(viewMode)#
:: <!-- 1 -->
<fieldset><legend>Original Content from Web</legend>
@ -225,7 +206,7 @@ function updatepage(str) {
</ol>
</fieldset>
#(/viewMode)#
</td></tr></table>
#%env/templates/footer.template%#
</body>

@ -33,6 +33,7 @@ import net.yacy.cora.federate.solr.SolrServlet;
import net.yacy.cora.federate.solr.connector.EmbeddedSolrConnector;
import net.yacy.cora.federate.solr.responsewriter.EnhancedXMLResponseWriter;
import net.yacy.cora.federate.solr.responsewriter.GSAResponseWriter;
import net.yacy.cora.federate.solr.responsewriter.HTMLResponseWriter;
import net.yacy.cora.federate.solr.responsewriter.JsonResponseWriter;
import net.yacy.cora.federate.solr.responsewriter.OpensearchResponseWriter;
import net.yacy.cora.protocol.HeaderFramework;
@ -83,6 +84,7 @@ public class select {
xsltWriter.init(initArgs);
RESPONSE_WRITER.put("xslt", xsltWriter); // try i.e. http://localhost:8090/solr/select?q=*:*&start=0&rows=10&wt=xslt&tr=json.xsl
RESPONSE_WRITER.put("exml", new EnhancedXMLResponseWriter());
RESPONSE_WRITER.put("html", new HTMLResponseWriter());
RESPONSE_WRITER.put("rss", opensearchResponseWriter); //try http://localhost:8090/solr/select?wt=rss&q=olympia&hl=true&hl.fl=text_t,h1,h2
RESPONSE_WRITER.put("opensearch", opensearchResponseWriter); //try http://localhost:8090/solr/select?wt=rss&q=olympia&hl=true&hl.fl=text_t,h1,h2
RESPONSE_WRITER.put("yjson", new JsonResponseWriter()); //try http://localhost:8090/solr/select?wt=json&q=olympia&hl=true&hl.fl=text_t,h1,h2
@ -107,7 +109,7 @@ public class select {
if ("exml".equals(wt)) return "application/rss+xml";
if ("json".equals(wt)) return "application/json";
if ("yjson".equals(wt)) return "application/json";
if ("python".equals(wt)) return "text/html";
if ("html".equals(wt) || "python".equals(wt)) return "text/html";
if ("php".equals(wt) || "phps".equals(wt)) return "application/x-httpd-php";
if ("ruby".equals(wt)) return "text/html";
if ("raw".equals(wt)) return "application/octet-stream";

@ -26,7 +26,7 @@
<p class="urlinfo">
#(showDate)#::#[date]##(/showDate)#
#(showSize)#::&nbsp;|&nbsp;#[sizename]##(/showSize)#
#(showMetadata)#::&nbsp;|&nbsp;<a href="api/yacydoc.html?urlhash=#[urlhash]#" target="_blank" onclick="return hs.htmlExpand(this, { objectType: 'ajax'} )">Metadata</a>#(/showMetadata)#
#(showMetadata)#::&nbsp;|&nbsp;<a href="/solr/select?q=id:%22#[urlhash]#%22&defType=edismax&start=0&rows=1&core=collection1&wt=html" target="_blank" onclick="return hs.htmlExpand(this, { objectType: 'ajax'} )">Metadata</a>#(/showMetadata)#
#(showParser)#::&nbsp;|&nbsp;<a href="ViewFile.html?urlHash=#[urlhash]#&amp;words=#[words]#" target="_blank">Parser</a>#(/showParser)#
#(showPictures)#::&nbsp;|&nbsp;<a href="yacysearch.html?cat=image&amp;url=#[link]#&amp;query=#[former]#">Pictures</a>#(/showPictures)#
#(showCache)#::&nbsp;|&nbsp;<a href="CacheResource_p.html?url=#[link]#" target="_blank">Cache</a>#(/showCache)#

@ -0,0 +1,190 @@
/**
* HTMLResponseWriter
* Copyright 2013 by Michael Peter Christen
* First released 09.06.2013 at http://yacy.net
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.cora.federate.solr.responsewriter;
import java.io.IOException;
import java.io.Writer;
import java.util.Date;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;
import net.yacy.cora.federate.solr.SolrType;
import net.yacy.search.schema.CollectionSchema;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexableField;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.XML;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.QueryResponseWriter;
import org.apache.solr.response.ResultContext;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.schema.TextField;
import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocList;
import org.apache.solr.search.SolrIndexSearcher;
public class HTMLResponseWriter implements QueryResponseWriter {
private static final Set<String> DEFAULT_FIELD_LIST = null;
private static final Pattern dqp = Pattern.compile("\"");
public HTMLResponseWriter() {
super();
}
@Override
public String getContentType(final SolrQueryRequest request, final SolrQueryResponse response) {
return CONTENT_TYPE_XML_UTF8;
}
@Override
public void init(@SuppressWarnings("rawtypes") NamedList n) {
}
@Override
public void write(final Writer writer, final SolrQueryRequest request, final SolrQueryResponse rsp) throws IOException {
NamedList<?> values = rsp.getValues();
assert values.get("responseHeader") != null;
assert values.get("response") != null;
writer.write("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n");
//writer.write("<!--\n");
//writer.write("this is a XHTML+RDFa file. It contains RDF annotations with dublin core properties\n");
//writer.write("you can validate it with http://validator.w3.org/\n");
//writer.write("-->\n");
writer.write("<html xmlns=\"http://www.w3.org/1999/xhtml\"\n");
writer.write(" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\"\n");
writer.write(" xmlns:dc=\"http://purl.org/dc/elements/1.1/\"\n");
writer.write(" xmlns:foaf=\"http://xmlns.com/foaf/0.1/\">\n");
writer.write("<head profile=\"http://www.w3.org/2003/g/data-view\">\n");
//writer.write("<link rel=\"transformation\" href=\"http://www-sop.inria.fr/acacia/soft/RDFa2RDFXML.xsl\"/>\n");
writer.write("<link rel=\"stylesheet\" type=\"text/css\" media=\"all\" href=\"/env/base.css\" />\n");
writer.write("<link rel=\"stylesheet\" type=\"text/css\" media=\"screen\" href=\"/env/style.css\" />\n");
NamedList<Object> paramsList = request.getOriginalParams().toNamedList();
paramsList.remove("wt");
String xmlquery = dqp.matcher("/solr/select?" + SolrParams.toSolrParams(paramsList).toString()).replaceAll("%22");
writer.write("<div id=\"api\"><a href=\"" + xmlquery + "\"><img src=\"../env/grafics/api.png\" width=\"60\" height=\"40\" alt=\"API\" /></a>\n");
writer.write("<span>This search result can also be retrieved as XML. Click the API icon to see an example call to the search rss API.</div>\n");
DocList response = ((ResultContext) values.get("response")).docs;
final int sz = response.size();
if (sz > 0) {
SolrIndexSearcher searcher = request.getSearcher();
DocIterator iterator = response.iterator();
IndexSchema schema = request.getSchema();
if (sz == 1) {
int id = iterator.nextDoc();
Document doc = searcher.doc(id, DEFAULT_FIELD_LIST);
LinkedHashMap<String, String> tdoc = translateDoc(schema, doc);
String title = tdoc.get(CollectionSchema.title.getSolrFieldName());
writer.write("<title>" + title + "</title>\n</head><body>\n");
writeDoc(writer, tdoc, title);
} else {
writer.write("<title>Document List</title>\n</head><body>\n");
for (int i = 0; i < sz; i++) {
int id = iterator.nextDoc();
Document doc = searcher.doc(id, DEFAULT_FIELD_LIST);
LinkedHashMap<String, String> tdoc = translateDoc(schema, doc);
String title = tdoc.get(CollectionSchema.title.getSolrFieldName());
writeDoc(writer, tdoc, title);
}
}
} else {
writer.write("<title>No Document Found</title>\n</head><body>\n");
}
writer.write("</body></html>\n");
}
private static final void writeDoc(Writer writer, LinkedHashMap<String, String> tdoc, String title) throws IOException {
writer.write("<form name=\"yacydoc" + title + "\" method=\"post\" action=\"#\" enctype=\"multipart/form-data\" accept-charset=\"UTF-8\">\n");
writer.write("<fieldset>\n");
writer.write("<h1 property=\"dc:Title\">" + title + "</h1>\n");
writer.write("<dl>\n");
for (Map.Entry<String, String> entry: tdoc.entrySet()) {
writer.write("<dt>");
writer.write(entry.getKey());
writer.write("</dt><dd>");
XML.escapeAttributeValue(entry.getValue(), writer);
writer.write("</dd>\n");
}
writer.write("</dl>\n");
writer.write("</fieldset>\n");
writer.write("</form>\n");
}
private static final LinkedHashMap<String, String> translateDoc(final IndexSchema schema, final Document doc) {
List<IndexableField> fields = doc.getFields();
int sz = fields.size();
int fidx1 = 0, fidx2 = 0;
LinkedHashMap<String, String> kv = new LinkedHashMap<String, String>();
while (fidx1 < sz) {
IndexableField value = fields.get(fidx1);
String fieldName = value.name();
fidx2 = fidx1 + 1;
while (fidx2 < sz && fieldName.equals(fields.get(fidx2).name())) {
fidx2++;
}
SchemaField sf = schema.getFieldOrNull(fieldName);
if (sf == null) sf = new SchemaField(fieldName, new TextField());
FieldType type = sf.getType();
if (fidx1 + 1 == fidx2) {
if (sf.multiValued()) {
String sv = value.stringValue();
kv.put(fieldName, field2string(type, sv));
} else {
kv.put(fieldName, field2string(type, value.stringValue()));
}
} else {
for (int i = fidx1; i < fidx2; i++) {
String sv = fields.get(i).stringValue();
kv.put(fieldName + "_" + i, field2string(type, sv));
}
}
fidx1 = fidx2;
}
return kv;
}
@SuppressWarnings("deprecation")
private static String field2string(final FieldType type, final String value) {
String typeName = type.getTypeName();
if (typeName.equals(SolrType.bool.printName())) {
return "F".equals(value) ? "false" : "true";
} else if (typeName.equals(SolrType.date.printName())) {
return org.apache.solr.schema.DateField.formatExternal(new Date(Long.parseLong(value))); // this is declared deprecated in solr 4.2.1 but is still used as done here
}
return value;
}
// XML.escapeCharData(val, writer);
}
Loading…
Cancel
Save