added image link in search results

This should be a help to make a preview of search results.
The image is computed from the list of embedded images, it is
always the first image in that list.
In rss-type results the image is presented like
<media:content medium="image" url="https://abc.xyz/logo.png"/>
as defined in
http://www.rssboard.org/media-rss#media-content
pull/149/head
Michael Peter Christen 7 years ago
parent f38fb7f02c
commit 7f395ef937

@ -199,6 +199,19 @@ public class yacysearchitem {
prop.put("content_favicon", 1);
}
prop.putHTML("content_favicon_faviconUrl", processFaviconURL(ImageViewer.hasFullViewingRights(header, sb), faviconURL));
if (result.limage() == 0) {
if (faviconURL == null) {
prop.put("content_image", 0);
} else {
prop.put("content_image", 1);
prop.putXML("content_image_url", faviconURL.toNormalform(true));
}
} else {
prop.put("content_image", 1);
prop.putXML("content_image_url", result.imageURL());
}
prop.put("content_urlhash", urlhash);
prop.put("content_ranking", Float.toString(result.score()));
Date[] events = result.events();

@ -5,6 +5,7 @@
"code": "#[code]#",
"description": "#[description-json]#",
"pubDate": "#[date822]#",
#(image)#::"image": "#[url]#",#(/image)#
#(showEvent)#::"eventDate": "#[date822]#",#(/showEvent)#
"size": "#[size]#",
"sizename": "#[sizename]#",

@ -3,6 +3,7 @@
<link>#[link]#</link>
<description>#[description-xml]#</description>
<pubDate>#[date822]#</pubDate>
#(image)#::<media:content medium="image" url="#[url]#"/>#(/image)#
#(showEvent)#::<ev:startdate>#[date822]#</ev:startdate><ev:enddate>#[date822]#</ev:enddate>#(/showEvent)#
<dc:publisher><![CDATA[#[publisher]#]]></dc:publisher>
<dc:creator><![CDATA[#[creator]#]]></dc:creator>

@ -40,6 +40,8 @@ import net.yacy.cora.lod.vocabulary.DublinCore;
import net.yacy.cora.lod.vocabulary.Geo;
import net.yacy.cora.lod.vocabulary.YaCyMetadata;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.crawler.retrieval.Response;
import net.yacy.search.schema.CollectionConfiguration;
import net.yacy.search.schema.CollectionSchema;
import org.apache.lucene.document.Document;
@ -159,6 +161,7 @@ public class OpensearchResponseWriter implements QueryResponseWriter {
SolrIndexSearcher searcher = request.getSearcher();
DocIterator iterator = response.iterator();
String urlhash = null;
MultiProtocolURL url = null;
for (int i = 0; i < responseCount; i++) {
openTag(writer, "item");
int id = iterator.nextDoc();
@ -168,6 +171,8 @@ public class OpensearchResponseWriter implements QueryResponseWriter {
List<String> texts = new ArrayList<String>();
List<String> descriptions = new ArrayList<String>();
String title = "";
List<Object> images_protocol_obj = new ArrayList<>();
List<String> images_stub = new ArrayList<>();
for (int j = 0; j < fieldc; j++) {
IndexableField value = fields.get(j);
String fieldName = value.name();
@ -184,7 +189,7 @@ public class OpensearchResponseWriter implements QueryResponseWriter {
String u = value.stringValue();
solitaireTag(writer, RSSMessage.Token.link.name(), u);
try {
MultiProtocolURL url = new MultiProtocolURL(u);
url = new MultiProtocolURL(u);
solitaireTag(writer, YaCyMetadata.host.getURIref(), url.getHost());
solitaireTag(writer, YaCyMetadata.path.getURIref(), url.getPath());
solitaireTag(writer, YaCyMetadata.file.getURIref(), url.getFileName());
@ -232,6 +237,26 @@ public class OpensearchResponseWriter implements QueryResponseWriter {
texts.add(value.stringValue());
continue;
}
if (CollectionSchema.images_protocol_sxt.getSolrFieldName().equals(fieldName)) {
images_protocol_obj.add(value.stringValue());
continue;
}
if (CollectionSchema.images_urlstub_sxt.getSolrFieldName().equals(fieldName)) {
images_stub.add(value.stringValue());
continue;
}
}
if (Math.min(images_protocol_obj.size(), images_stub.size()) > 0) {
List<String> images_protocol = CollectionConfiguration.indexedList2protocolList(images_protocol_obj, images_protocol_obj.size());
String imageurl = images_protocol.get(0) + "://" + images_stub.get(0);
writer.write("<media:content medium=\"image\" url=\"");
XML.escapeCharData(imageurl, writer); writer.write("\"/>\n");
} else {
if (url != null && Response.docTypeExt(MultiProtocolURL.getFileExtension(url.getFile()).toLowerCase()) == Response.DT_IMAGE) {
writer.write("<media:content medium=\"image\" url=\"");
XML.escapeCharData(url.toNormalform(true), writer); writer.write("\"/>\n");
}
}
// compute snippet from texts

@ -35,6 +35,8 @@ import net.yacy.cora.federate.solr.responsewriter.OpensearchResponseWriter.ResHe
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.cora.util.JSONObject;
import net.yacy.crawler.retrieval.Response;
import net.yacy.search.schema.CollectionConfiguration;
import net.yacy.search.schema.CollectionSchema;
import org.apache.lucene.document.Document;
@ -141,6 +143,9 @@ public class YJsonResponseWriter implements QueryResponseWriter {
List<String> descriptions = new ArrayList<String>();
String title = "";
StringBuilder path = new StringBuilder(80);
List<Object> images_protocol_obj = new ArrayList<>();
List<String> images_stub = new ArrayList<>();
for (int j = 0; j < fieldc; j++) {
IndexableField value = fields.get(j);
String fieldName = value.name();
@ -193,10 +198,33 @@ public class YJsonResponseWriter implements QueryResponseWriter {
solitaireTag(writer, "sizename", sizemb > 0 ? (Integer.toString(sizemb) + " mbyte") : sizekb > 0 ? (Integer.toString(sizekb) + " kbyte") : (Integer.toString(size) + " byte"));
continue;
}
if (CollectionSchema.last_modified.getSolrFieldName().equals(fieldName)) {
Date d = new Date(Long.parseLong(value.stringValue()));
solitaireTag(writer, "pubDate", HeaderFramework.formatRFC1123(d));
continue;
}
if (CollectionSchema.images_protocol_sxt.getSolrFieldName().equals(fieldName)) {
images_protocol_obj.add(value.stringValue());
continue;
}
if (CollectionSchema.images_urlstub_sxt.getSolrFieldName().equals(fieldName)) {
images_stub.add(value.stringValue());
continue;
}
//missing: "code","faviconCode"
}
if (Math.min(images_protocol_obj.size(), images_stub.size()) > 0) {
List<String> images_protocol = CollectionConfiguration.indexedList2protocolList(images_protocol_obj, images_protocol_obj.size());
String imageurl = images_protocol.get(0) + "://" + images_stub.get(0);
solitaireTag(writer, "image", imageurl);
} else {
if (url != null && Response.docTypeExt(MultiProtocolURL.getFileExtension(url.getFile()).toLowerCase()) == Response.DT_IMAGE) {
solitaireTag(writer, "image", url.toNormalform(true));
}
}
// compute snippet from texts
solitaireTag(writer, "path", path.toString());
solitaireTag(writer, "title", title.length() == 0 ? path.toString() : title.replaceAll("\"", "'"));

@ -451,6 +451,20 @@ public class URIMetadataNode extends SolrDocument /* implements Comparable<URIMe
return getInt(CollectionSchema.wordcount_i);
}
/**
* in case that images are embedded in the document, get one image which can be used as thumbnail
* @return
*/
public String imageURL() {
if (limage() == 0) throw new UnsupportedOperationException();
List<String> images_protocol = CollectionConfiguration.indexedList2protocolList(getFieldValues(CollectionSchema.images_protocol_sxt.getSolrFieldName()), limage());
List<String> images_stub = getStringList(CollectionSchema.images_urlstub_sxt);
int c = Math.min(images_protocol.size(), images_stub.size());
if (c == 0) throw new UnsupportedOperationException();
String url = images_protocol.get(0) + "://" + images_stub.get(0);
return url;
}
public int llocal() {
return getInt(CollectionSchema.inboundlinkscount_i);
}

Loading…
Cancel
Save