From 969123385b1376268a4497ec5fdb0ca69b5ffbb7 Mon Sep 17 00:00:00 2001 From: orbiter Date: Mon, 23 Nov 2009 16:10:50 +0000 Subject: [PATCH] added json and rss output for image search git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6503 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/yacysearch.rss | 3 +- htroot/yacysearchitem.html | 6 +-- htroot/yacysearchitem.java | 24 ++++++---- htroot/yacysearchitem.json | 11 +++-- htroot/yacysearchitem.xml | 39 ++++++++++++++- source/de/anomic/data/MimeTable.java | 48 +++++++++++++++++++ .../anomic/http/server/HTTPDFileHandler.java | 23 +++------ source/de/anomic/search/MediaSnippet.java | 39 ++++++++++++--- source/net/yacy/document/Document.java | 2 +- .../document/parser/html/ContentScraper.java | 6 +-- .../yacy/document/parser/html/ImageEntry.java | 10 +++- .../document/parser/images/bmpParser.java | 2 +- .../parser/images/genericImageParser.java | 2 +- .../net/yacy/document/parser/rssParser.java | 2 +- 14 files changed, 167 insertions(+), 50 deletions(-) create mode 100644 source/de/anomic/data/MimeTable.java diff --git a/htroot/yacysearch.rss b/htroot/yacysearch.rss index 7af4b92ee..e9884e8e5 100644 --- a/htroot/yacysearch.rss +++ b/htroot/yacysearch.rss @@ -3,7 +3,8 @@ + xmlns:media="http://search.yahoo.com/mrss/" + xmlns:atom="http://www.w3.org/2005/Atom"> #[promoteSearchPageGreeting]# diff --git a/htroot/yacysearchitem.html b/htroot/yacysearchitem.html index 50f20a5d2..a823f3c93 100644 --- a/htroot/yacysearchitem.html +++ b/htroot/yacysearchitem.html @@ -20,8 +20,7 @@

#[date]# | #[sizename]# | YBR-#[ybr]# | Info | Pictures

:: - #{items}# -
+ #(item)#::
#[name]# @@ -29,8 +28,7 @@ -
- #{/items}# +
#(/item)# :: #{items}# #[name]##[hrefshort]# diff --git a/htroot/yacysearchitem.java b/htroot/yacysearchitem.java index 5aefe32c0..3b278d77e 100644 --- a/htroot/yacysearchitem.java +++ b/htroot/yacysearchitem.java @@ -163,16 +163,22 @@ public class yacysearchitem { prop.put("content", theQuery.contentdom.getCode() + 1); // switch on specific content final MediaSnippet ms = theSearch.result().oneImage(item); if (ms == null) { - prop.put("content_items", "0"); + prop.put("content_item", "0"); } else { - prop.putHTML("content_items_0_hrefCache", (auth) ? "/ViewImage.png?url=" + ms.href.toNormalform(true, false) : ms.href.toNormalform(true, false)); - prop.putHTML("content_items_0_href", ms.href.toNormalform(true, false)); - prop.put("content_items_0_code", sb.licensedURLs.aquireLicense(ms.href)); - prop.putHTML("content_items_0_name", shorten(ms.name, namelength)); - prop.put("content_items_0_attr", (ms.attr.equals("-1 x -1")) ? "" : "(" + ms.attr + ")"); // attributes, here: original size of image - prop.put("content_items_0_source", ms.source.toNormalform(true, false)); - prop.put("content_items_0_sourcedom", ms.source.getHost()); - prop.put("content_items", 1); + prop.putHTML("content_item_hrefCache", (auth) ? "/ViewImage.png?url=" + ms.href.toNormalform(true, false) : ms.href.toNormalform(true, false)); + prop.putHTML("content_item_href", ms.href.toNormalform(true, false)); + prop.put("content_item_code", sb.licensedURLs.aquireLicense(ms.href)); + prop.putHTML("content_item_name", shorten(ms.name, namelength)); + prop.put("content_item_mime", ms.mime); + prop.put("content_item_fileSize", ms.fileSize); + prop.put("content_item_width", ms.width); + prop.put("content_item_height", ms.height); + prop.put("content_item_attr", (ms.attr.equals("-1 x -1")) ? "" : "(" + ms.attr + ")"); // attributes, here: original size of image + prop.put("content_item_urlhash", ms.source.hash()); + prop.put("content_item_source", ms.source.toNormalform(true, false)); + prop.put("content_item_sourcedom", ms.source.getHost()); + prop.put("content_item_nl", (item == 0) ? 0 : 1); + prop.put("content_item", 1); } return prop; } diff --git a/htroot/yacysearchitem.json b/htroot/yacysearchitem.json index fcc58b480..61a663bbb 100644 --- a/htroot/yacysearchitem.json +++ b/htroot/yacysearchitem.json @@ -11,15 +11,18 @@ "host": "#[host]#", "path": "#[path]#", "file": "#[file]#" - }::#{items}##(nl)#:: ,#(/nl)# + }::#(item)#::#(nl)#:: ,#(/nl)# { "title": "#[name]#", "icon": "/ViewImage.png?maxwidth=96&maxheight=96&code=#[code]#", "image": "#[href]#", "cache": "#[hrefCache]#", - "link": "#[source]#", - "host": "#[sourcedom]#" - }#{/items}#:: + "url": "#[source]#", + "urlhash": "#[urlhash]#", + "host": "#[sourcedom]#", + "width": "#[width]#", + "height": "#[height]#" + }#(/item)#:: :: :: :: diff --git a/htroot/yacysearchitem.xml b/htroot/yacysearchitem.xml index b6d022672..863b82342 100644 --- a/htroot/yacysearchitem.xml +++ b/htroot/yacysearchitem.xml @@ -9,4 +9,41 @@ #[path]# #[file]# #[urlhash]# -#(/content)# \ No newline at end of file +::#(item)#:: +#[name]# +#[source]# + + +#[urlhash]# +#[sourcedom]# + + + + + +#(/item)#:: +#(/content)# \ No newline at end of file diff --git a/source/de/anomic/data/MimeTable.java b/source/de/anomic/data/MimeTable.java new file mode 100644 index 000000000..10350eeb3 --- /dev/null +++ b/source/de/anomic/data/MimeTable.java @@ -0,0 +1,48 @@ +package de.anomic.data; + +import java.io.BufferedInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.util.Properties; + +import net.yacy.kelondro.data.meta.DigestURI; + +public class MimeTable { + + private static final Properties mimeTable = new Properties(); + + public static void init(File mimeFile) { + if (mimeTable.size() == 0) { + // load the mime table + BufferedInputStream mimeTableInputStream = null; + try { + mimeTableInputStream = new BufferedInputStream(new FileInputStream(mimeFile)); + mimeTable.load(mimeTableInputStream); + } catch (final Exception e) { + e.printStackTrace(); + } finally { + if (mimeTableInputStream != null) try { mimeTableInputStream.close(); } catch (final Exception e1) {} + } + } + } + + public static int size() { + return mimeTable.size(); + } + + public static String ext2mime(String ext) { + return mimeTable.getProperty(ext, "application/" + ext); + } + + public static String ext2mime(String ext, String dfltMime) { + return mimeTable.getProperty(ext, dfltMime); + } + + public static String url2mime(DigestURI url, String dfltMime) { + return ext2mime(url.getFileExtension(), dfltMime); + } + + public static String url2mime(DigestURI url) { + return ext2mime(url.getFileExtension()); + } +} diff --git a/source/de/anomic/http/server/HTTPDFileHandler.java b/source/de/anomic/http/server/HTTPDFileHandler.java index 3e711e7b9..8942d2cb7 100644 --- a/source/de/anomic/http/server/HTTPDFileHandler.java +++ b/source/de/anomic/http/server/HTTPDFileHandler.java @@ -92,6 +92,7 @@ import net.yacy.kelondro.util.FileUtils; import net.yacy.kelondro.util.MemoryControl; import net.yacy.visualization.RasterPlotter; +import de.anomic.data.MimeTable; import de.anomic.http.server.servlets.crawlReceipt; import de.anomic.http.server.servlets.transferURL; import de.anomic.search.Switchboard; @@ -107,7 +108,6 @@ public final class HTTPDFileHandler { private static final boolean safeServletsMode = false; // if true then all servlets are called synchronized - private static final Properties mimeTable = new Properties(); // create a class loader private static final serverClassLoader provider = new serverClassLoader(/*this.getClass().getClassLoader()*/); private static serverSwitch switchboard = null; @@ -142,20 +142,11 @@ public final class HTTPDFileHandler { if (switchboard == null) { switchboard = theSwitchboard; - if (mimeTable.size() == 0) { + if (MimeTable.size() == 0) { // load the mime table final String mimeTablePath = theSwitchboard.getConfig("mimeTable",""); - BufferedInputStream mimeTableInputStream = null; - try { - Log.logConfig("HTTPDFiles", "Loading mime mapping file " + mimeTablePath); - mimeTableInputStream = new BufferedInputStream(new FileInputStream(new File(theSwitchboard.getRootPath(), mimeTablePath))); - mimeTable.load(mimeTableInputStream); - } catch (final Exception e) { - Log.logSevere("HTTPDFiles", "ERROR: path to configuration file or configuration invalid\n" + e); - System.exit(1); - } finally { - if (mimeTableInputStream != null) try { mimeTableInputStream.close(); } catch (final Exception e1) {} - } + Log.logConfig("HTTPDFiles", "Loading mime mapping file " + mimeTablePath); + MimeTable.init(new File(theSwitchboard.getRootPath(), mimeTablePath)); } // create default files array @@ -537,7 +528,7 @@ public final class HTTPDFileHandler { // send an image to client targetDate = new Date(System.currentTimeMillis()); nocache = true; - final String mimeType = mimeTable.getProperty(targetExt, "text/html"); + final String mimeType = MimeTable.ext2mime(targetExt, "text/html"); final ByteBuffer result = RasterPlotter.exportImage(yp.getImage(), targetExt); // write the array to the client @@ -551,7 +542,7 @@ public final class HTTPDFileHandler { // send an image to client targetDate = new Date(System.currentTimeMillis()); nocache = true; - final String mimeType = mimeTable.getProperty(targetExt, "text/html"); + final String mimeType = MimeTable.ext2mime(targetExt, "text/html"); // generate an byte array from the generated image int width = i.getWidth(null); if (width < 0) width = 96; // bad hack @@ -721,7 +712,7 @@ public final class HTTPDFileHandler { // we have found a file that can be written to the client // if this file uses templates, then we use the template // re-write - method to create an result - String mimeType = mimeTable.getProperty(targetExt,"text/html"); + String mimeType = MimeTable.ext2mime(targetExt, "text/html"); final boolean zipContent = requestHeader.acceptGzip() && HTTPDemon.shallTransportZipped("." + conProp.getProperty("EXT","")); if (path.endsWith("html") || path.endsWith("htm") || diff --git a/source/de/anomic/search/MediaSnippet.java b/source/de/anomic/search/MediaSnippet.java index 64ab6f781..4b482399d 100644 --- a/source/de/anomic/search/MediaSnippet.java +++ b/source/de/anomic/search/MediaSnippet.java @@ -30,6 +30,8 @@ import java.util.Iterator; import java.util.Map; import java.util.TreeSet; +import de.anomic.data.MimeTable; + import net.yacy.document.Document; import net.yacy.document.parser.html.ImageEntry; import net.yacy.kelondro.data.meta.DigestURI; @@ -41,15 +43,40 @@ import net.yacy.repository.LoaderDispatcher; public class MediaSnippet implements Comparable, Comparator { public ContentDomain type; public DigestURI href, source; - public String name, attr; + public String name, attr, mime; public int ranking; + public int width, height; + public long fileSize; - public MediaSnippet(final ContentDomain type, final DigestURI href, final String name, final String attr, final int ranking, final DigestURI source) { + public MediaSnippet(final ContentDomain type, final DigestURI href, final String mime, final String name, final long fileSize, final String attr, final int ranking, final DigestURI source) { this.type = type; this.href = href; + this.mime = mime; + this.fileSize = fileSize; this.source = source; // the web page where the media resource appeared this.name = name; this.attr = attr; + this.width = -1; + this.height = -1; + int p = 0; + if ((p = attr.indexOf(" x ")) > 0) { + this.width = Integer.parseInt(attr.substring(0, p).trim()); + this.height = Integer.parseInt(attr.substring(p + 3).trim()); + } + this.ranking = ranking; // the smaller the better! small values should be shown first + if ((this.name == null) || (this.name.length() == 0)) this.name = "_"; + if ((this.attr == null) || (this.attr.length() == 0)) this.attr = "_"; + } + + public MediaSnippet(final ContentDomain type, final DigestURI href, final String mime, final String name, final long fileSize, final int width, final int height, final int ranking, final DigestURI source) { + this.type = type; + this.href = href; + this.fileSize = fileSize; + this.source = source; // the web page where the media resource appeared + this.name = name; + this.attr = width + " x " + height; + this.width = width; + this.height = height; this.ranking = ranking; // the smaller the better! small values should be shown first if ((this.name == null) || (this.name.length() == 0)) this.name = "_"; if ((this.attr == null) || (this.attr.length() == 0)) this.attr = "_"; @@ -115,10 +142,10 @@ public class MediaSnippet implements Comparable, Comparator, Comparator 0.4) { final DigestURI url = absolutePath(tagopts.getProperty("src", "")); - final ImageEntry ie = new ImageEntry(url, tagopts.getProperty("alt", ""), width, height); + final ImageEntry ie = new ImageEntry(url, tagopts.getProperty("alt", ""), width, height, -1); addImage(images, ie); } // i think that real pictures have witdth & height tags - thq @@ -190,7 +190,7 @@ public class ContentScraper extends AbstractScraper implements Scraper { final String linktitle = tagopts.getProperty("title", ""); if (type.equalsIgnoreCase("shortcut icon")) { - final ImageEntry ie = new ImageEntry(newLink, linktitle, -1,-1); + final ImageEntry ie = new ImageEntry(newLink, linktitle, -1, -1, -1); images.put(ie.url().hash(), ie); this.favicon = newLink; } else if (!type.equalsIgnoreCase("stylesheet") && !type.equalsIgnoreCase("alternate stylesheet")) { @@ -225,7 +225,7 @@ public class ContentScraper extends AbstractScraper implements Scraper { final String type = (p < 0) ? "" : f.substring(p + 1); if (type.equals("png") || type.equals("gif") || type.equals("jpg") || type.equals("jpeg")) { // special handling of such urls: put them to the image urls - final ImageEntry ie = new ImageEntry(url, super.stripAll(new String(text)).trim(), -1, -1); + final ImageEntry ie = new ImageEntry(url, super.stripAll(new String(text)).trim(), -1, -1, -1); addImage(images, ie); } else { anchors.put(url, super.stripAll(new String(text)).trim()); diff --git a/source/net/yacy/document/parser/html/ImageEntry.java b/source/net/yacy/document/parser/html/ImageEntry.java index 623b44dd6..7aed5ef92 100644 --- a/source/net/yacy/document/parser/html/ImageEntry.java +++ b/source/net/yacy/document/parser/html/ImageEntry.java @@ -33,12 +33,14 @@ public class ImageEntry implements Comparable, Comparator, Comparator anchors = new HashMap(); final HashMap images = new HashMap(); // add this image to the map of images - images.put(sb.toString(), new ImageEntry(location, "", width, height)); + images.put(sb.toString(), new ImageEntry(location, "", width, height, -1)); return new Document( location, diff --git a/source/net/yacy/document/parser/images/genericImageParser.java b/source/net/yacy/document/parser/images/genericImageParser.java index 43265c549..284f8a90a 100644 --- a/source/net/yacy/document/parser/images/genericImageParser.java +++ b/source/net/yacy/document/parser/images/genericImageParser.java @@ -110,7 +110,7 @@ public class genericImageParser extends AbstractParser implements Idiom { final HashMap anchors = new HashMap(); final HashMap images = new HashMap(); // add this image to the map of images - images.put(sb.toString(), new ImageEntry(location, "", width, height)); + images.put(sb.toString(), new ImageEntry(location, "", width, height, -1)); return new Document( location, diff --git a/source/net/yacy/document/parser/rssParser.java b/source/net/yacy/document/parser/rssParser.java index 01f89839f..23d808dcf 100644 --- a/source/net/yacy/document/parser/rssParser.java +++ b/source/net/yacy/document/parser/rssParser.java @@ -105,7 +105,7 @@ public class rssParser extends AbstractParser implements Idiom { if (feed.getImage() != null) { final DigestURI imgURL = new DigestURI(feed.getImage(), null); - images.put(imgURL.hash(), new ImageEntry(imgURL, feedTitle, -1, -1)); + images.put(imgURL.hash(), new ImageEntry(imgURL, feedTitle, -1, -1, -1)); } // loop through the feed items