diff --git a/.classpath b/.classpath index d3f97a459..289093dae 100644 --- a/.classpath +++ b/.classpath @@ -99,5 +99,6 @@ + diff --git a/.project b/.project index 2ef8b129c..cabb4c415 100644 --- a/.project +++ b/.project @@ -5,6 +5,11 @@ + + org.eclipse.wst.jsdt.core.javascriptValidator + + + org.eclipse.jdt.core.javabuilder @@ -18,5 +23,6 @@ org.eclipse.jdt.core.javanature + org.eclipse.wst.jsdt.core.jsNature diff --git a/build.xml b/build.xml index ed1f6ddc3..77afde304 100644 --- a/build.xml +++ b/build.xml @@ -182,6 +182,7 @@ + diff --git a/htroot/ViewImage.java b/htroot/ViewImage.java index bccbb1642..d6838dc8b 100644 --- a/htroot/ViewImage.java +++ b/htroot/ViewImage.java @@ -26,14 +26,19 @@ import java.awt.Dimension; import java.awt.Graphics2D; import java.awt.Image; import java.awt.MediaTracker; +import java.awt.Rectangle; import java.awt.image.BufferedImage; import java.awt.image.Raster; -import java.io.ByteArrayInputStream; -import java.io.File; +import java.io.ByteArrayOutputStream; import java.io.IOException; -import java.net.MalformedURLException; +import java.io.InputStream; +import java.util.Iterator; import java.util.Map; +import javax.imageio.ImageIO; +import javax.imageio.ImageReader; +import javax.imageio.stream.ImageInputStream; + import net.yacy.cora.document.id.DigestURL; import net.yacy.cora.document.id.MultiProtocolURL; import net.yacy.cora.federate.yacy.CacheStrategy; @@ -44,12 +49,11 @@ import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.storage.ConcurrentARC; import net.yacy.cora.util.ConcurrentLog; import net.yacy.data.URLLicense; -import net.yacy.document.ImageParser; -import net.yacy.kelondro.util.FileUtils; import net.yacy.kelondro.util.MemoryControl; import net.yacy.kelondro.workflow.WorkflowProcessor; import net.yacy.peers.graphics.EncodedImage; import net.yacy.repository.Blacklist.BlacklistType; +import net.yacy.repository.LoaderDispatcher; import net.yacy.search.Switchboard; import net.yacy.server.serverObjects; import net.yacy.server.serverSwitch; @@ -58,17 +62,30 @@ public class ViewImage { private static Map iconcache = new ConcurrentARC(1000, Math.max(10, Math.min(32, WorkflowProcessor.availableCPU * 2))); - private static String defaulticon = "htroot/env/grafics/dfltfvcn.ico"; - private static byte[] defaulticonb; - static { - try { - defaulticonb = FileUtils.read(new File(defaulticon)); - } catch (final IOException e) { - } - } - - public static Object respond(final RequestHeader header, final serverObjects post, final serverSwitch env) { + /** + * Try parsing image from post "url" parameter or from "code" parameter. + * When image format is not supported, return directly image data. When + * image could be parsed, try encoding to target format specified by header + * "EXT". + * + * @param header + * request header + * @param post + * post parameters + * @param env + * environment + * @return an {@link EncodedImage} instance encoded in format specified in + * post, or an InputStream pointing to original image data + * @throws IOException + * when specified url is malformed, or a read/write error + * occured, or input or target image format is not supported. + * Sould end in a HTTP 500 error whose processing is more + * consistent across browsers than a response with zero content + * bytes. + */ + public static Object respond(final RequestHeader header, final serverObjects post, final serverSwitch env) + throws IOException { final Switchboard sb = (Switchboard) env; @@ -85,25 +102,13 @@ public class ViewImage { || sb.verifyAuthentication(header); // handle access rights DigestURL url = null; - if ((urlString.length() > 0) && (auth)) - try { - url = new DigestURL(urlString); - } catch (final MalformedURLException e1) { - url = null; - } + if ((urlString.length() > 0) && (auth)) { + url = new DigestURL(urlString); + } if ((url == null) && (urlLicense.length() > 0)) { urlString = URLLicense.releaseLicense(urlLicense); - try { - url = new DigestURL(urlString); - } catch (final MalformedURLException e1) { - url = null; - urlString = null; - } - } - - if (urlString == null) { - return null; + url = new DigestURL(urlString); } // get the image as stream @@ -115,52 +120,81 @@ public class ViewImage { if (image != null) { encodedImage = new EncodedImage(image, ext, post.getBoolean("isStatic")); } else { - byte[] resourceb = null; - if (url != null) - try { - String agentName = post.get("agentName", auth ? ClientIdentification.yacyIntranetCrawlerAgentName - : ClientIdentification.yacyInternetCrawlerAgentName); - ClientIdentification.Agent agent = ClientIdentification.getAgent(agentName); - resourceb = sb.loader.loadContent(sb.loader.request(url, false, true), CacheStrategy.IFEXIST, - BlacklistType.SEARCH, agent); - } catch (final IOException e) { - ConcurrentLog.fine("ViewImage", "cannot load: " + e.getMessage()); - } - boolean okToCache = true; - if (resourceb == null) { - if (urlString.endsWith(".ico")) { - // load default favicon dfltfvcn.ico - // Should not do this here : we can be displaying search - // image result of '.ico' type and do not want to display a - // default - if (defaulticonb == null) - try { - resourceb = FileUtils.read(new File(sb.getAppPath(), defaulticon)); - okToCache = false; - } catch (final IOException e) { - return null; - } - else { - resourceb = defaulticonb; - okToCache = false; - } - } else { - return null; - } - } String urlExt = MultiProtocolURL.getFileExtension(url.getFileName()); if (ext != null && ext.equalsIgnoreCase(urlExt) && isBrowserRendered(urlExt)) { - return new ByteArrayInputStream(resourceb); + return openInputStream(post, sb.loader, auth, url); } - // read image - encodedImage = parseAndScale(post, auth, urlString, ext, okToCache, resourceb); + ImageInputStream imageInStream = null; + InputStream inStream = null; + /* + * When opening a file, the most efficient is to open + * ImageInputStream directly on file + */ + if (url.isFile()) { + imageInStream = ImageIO.createImageInputStream(url.getFSFile()); + } else { + inStream = openInputStream(post, sb.loader, auth, url); + imageInStream = ImageIO.createImageInputStream(inStream); + } + try { + // read image + encodedImage = parseAndScale(post, auth, urlString, ext, imageInStream); + } finally { + /* + * imageInStream.close() method doesn't close source input + * stream + */ + if (inStream != null) { + try { + inStream.close(); + } catch (IOException ignored) { + } + } + } } return encodedImage; } - + + /** + * Open input stream on image url using provided loader. All parameters must + * not be null. + * + * @param post + * post parameters. + * @param loader. + * Resources loader. + * @param auth + * true when user has credentials to load full images. + * @param url + * image url. + * @return an open input stream instance (don't forget to close it). + * @throws IOException + * when a read/write error occured. + */ + private static InputStream openInputStream(final serverObjects post, final LoaderDispatcher loader, + final boolean auth, DigestURL url) throws IOException { + InputStream inStream = null; + if (url != null) { + try { + String agentName = post.get("agentName", auth ? ClientIdentification.yacyIntranetCrawlerAgentName + : ClientIdentification.yacyInternetCrawlerAgentName); + ClientIdentification.Agent agent = ClientIdentification.getAgent(agentName); + inStream = loader.openInputStream(loader.request(url, false, true), CacheStrategy.IFEXIST, + BlacklistType.SEARCH, agent); + } catch (final IOException e) { + ConcurrentLog.fine("ViewImage", "cannot load: " + e.getMessage()); + throw e; + } + } + if (inStream == null) { + throw new IOException("Input stream could no be open"); + } + return inStream; + } + /** * @param formatName * informal file format name. For example : "png". @@ -180,73 +214,177 @@ public class ViewImage { } /** - * Process resourceb byte array to try to produce an Image instance - * eventually scaled and cropped depending on post parameters + * Process source image to try to produce an EncodedImage instance + * eventually scaled and clipped depending on post parameters. When + * processed, imageInStream is closed. * * @param post * request post parameters. Must not be null. * @param auth * true when access rigths are OK. * @param urlString - * image source URL. Must not be null. + * image source URL as String. Must not be null. * @param ext - * image file extension. May be null. - * @param okToCache - * true when image can be cached - * @param resourceb - * byte array. Must not be null. - * @return an Image instance when parsing is OK, or null. + * target image file format. May be null. + * @param imageInStream + * open stream on image content. Must not be null. + * @return an EncodedImage instance. + * @throws IOException + * when image could not be parsed or encoded to specified format */ protected static EncodedImage parseAndScale(serverObjects post, boolean auth, String urlString, String ext, - boolean okToCache, byte[] resourceb) { + ImageInputStream imageInStream) throws IOException { EncodedImage encodedImage = null; - Image image = ImageParser.parse(urlString, resourceb); - - if (image != null) { - int maxwidth = post.getInt("maxwidth", 0); - int maxheight = post.getInt("maxheight", 0); - final boolean quadratic = post.containsKey("quadratic"); - boolean isStatic = post.getBoolean("isStatic"); - if (!auth || maxwidth != 0 || maxheight != 0) { - - // find original size - int h = image.getHeight(null); - int w = image.getWidth(null); - - // in case of not-authorized access shrink the image to - // prevent - // copyright problems, so that images are not larger than - // thumbnails - Dimension maxDimensions = calculateMaxDimensions(auth, w, h, maxwidth, maxheight); - - // if a quadratic flag is set, we cut the image out to be in - // quadratic shape - if (quadratic && w != h) { - image = makeSquare(image, h, w); - h = image.getHeight(null); - w = image.getWidth(null); - } - - Dimension finalDimensions = calculateDimensions(w, h, maxDimensions); + // BufferedImage image = ImageIO.read(imageInStream); + Iterator readers = ImageIO.getImageReaders(imageInStream); + if (!readers.hasNext()) { + try { + /* When no reader can be found, we have to close the stream */ + imageInStream.close(); + } catch (IOException ignoredException) { + } + /* + * Throw an exception, wich will end in a HTTP 500 response, better + * handled by browsers than an empty image + */ + throw new IOException("Image format is not supported."); + } + ImageReader reader = readers.next(); + reader.setInput(imageInStream, true, true); + + int maxwidth = post.getInt("maxwidth", 0); + int maxheight = post.getInt("maxheight", 0); + final boolean quadratic = post.containsKey("quadratic"); + boolean isStatic = post.getBoolean("isStatic"); + BufferedImage image = null; + boolean returnRaw = true; + if (!auth || maxwidth != 0 || maxheight != 0) { + + // find original size + final int originWidth = reader.getWidth(0); + final int originHeigth = reader.getHeight(0); + + // in case of not-authorized access shrink the image to + // prevent + // copyright problems, so that images are not larger than + // thumbnails + Dimension maxDimensions = calculateMaxDimensions(auth, originWidth, originHeigth, maxwidth, maxheight); + + // if a quadratic flag is set, we cut the image out to be in + // quadratic shape + int w = originWidth; + int h = originHeigth; + if (quadratic && originWidth != originHeigth) { + Rectangle square = getMaxSquare(originHeigth, originWidth); + h = square.height; + w = square.width; + } - if (w != finalDimensions.width && h != finalDimensions.height) { - image = scale(finalDimensions.width, finalDimensions.height, image); + Dimension finalDimensions = calculateDimensions(w, h, maxDimensions); + if (originWidth != finalDimensions.width || originHeigth != finalDimensions.height) { + returnRaw = false; + image = readImage(reader); + if (quadratic && originWidth != originHeigth) { + image = makeSquare(image); } - - if ((finalDimensions.width == 16) && (finalDimensions.height == 16) && okToCache) { - // this might be a favicon, store image to cache for - // faster - // re-load later on - iconcache.put(urlString, image); + image = scale(finalDimensions.width, finalDimensions.height, image); + } + if (finalDimensions.width == 16 && finalDimensions.height == 16) { + // this might be a favicon, store image to cache for + // faster + // re-load later on + if (image == null) { + returnRaw = false; + image = readImage(reader); } + iconcache.put(urlString, image); + } + } + /* Image do not need to be scaled or cropped */ + if (returnRaw) { + if (!reader.getFormatName().equalsIgnoreCase(ext) || imageInStream.getFlushedPosition() != 0) { + /* + * image parsing and reencoding is only needed when source image + * and target formats differ, or when first bytes have been discarded + */ + returnRaw = false; + image = readImage(reader); } + } + if (returnRaw) { + byte[] imageData = readRawImage(imageInStream); + encodedImage = new EncodedImage(imageData, ext, isStatic); + } else { + /* + * An error can still occur when transcoding from buffered image to + * target ext : in that case return null + */ encodedImage = new EncodedImage(image, ext, isStatic); + if (encodedImage.getImage().length() == 0) { + throw new IOException("Image could not be encoded to format : " + ext); + } } + return encodedImage; } + /** + * Read image using specified reader and close ImageInputStream source. + * Input must have bean set before using + * {@link ImageReader#setInput(Object)} + * + * @param reader + * image reader. Must not be null. + * @return buffered image + * @throws IOException + * when an error occured + */ + private static BufferedImage readImage(ImageReader reader) throws IOException { + BufferedImage image; + try { + image = reader.read(0); + } finally { + reader.dispose(); + Object input = reader.getInput(); + if (input instanceof ImageInputStream) { + try { + ((ImageInputStream) input).close(); + } catch (IOException ignoredException) { + } + } + } + return image; + } + + /** + * Read image data without parsing. + * + * @param inStream + * image source. Must not be null. First bytes must not have been marked discarded ({@link ImageInputStream#getFlushedPosition()} must be zero) + * @return image data as bytes + * @throws IOException + * when a read/write error occured. + */ + private static byte[] readRawImage(ImageInputStream inStream) throws IOException { + byte[] buffer = new byte[4096]; + int l = 0; + ByteArrayOutputStream outStream = new ByteArrayOutputStream(); + inStream.seek(0); + try { + while ((l = inStream.read(buffer)) >= 0) { + outStream.write(buffer, 0, l); + } + return outStream.toByteArray(); + } finally { + try { + inStream.close(); + } catch (IOException ignored) { + } + } + } + /** * Calculate image dimensions from image original dimensions, max * dimensions, and target dimensions. @@ -317,9 +455,9 @@ public class ViewImage { * image to scale. Must not be null. * @return a scaled image */ - protected static Image scale(final int width, final int height, Image image) { + protected static BufferedImage scale(final int width, final int height, final BufferedImage image) { // compute scaled image - final Image scaled = image.getScaledInstance(width, height, Image.SCALE_AREA_AVERAGING); + Image scaled = image.getScaledInstance(width, height, Image.SCALE_AREA_AVERAGING); final MediaTracker mediaTracker = new MediaTracker(new Container()); mediaTracker.addImage(scaled, 0); try { @@ -328,45 +466,91 @@ public class ViewImage { } // make a BufferedImage out of that - final BufferedImage i = new BufferedImage(width, height, BufferedImage.TYPE_INT_RGB); + BufferedImage result = new BufferedImage(width, height, BufferedImage.TYPE_INT_ARGB); try { - i.createGraphics().drawImage(scaled, 0, 0, width, height, null); - image = i; + result.createGraphics().drawImage(scaled, 0, 0, width, height, null); // check outcome - final Raster raster = i.getData(); - int[] pixel = new int[3]; + final Raster raster = result.getData(); + int[] pixel = new int[raster.getSampleModel().getNumBands()]; pixel = raster.getPixel(0, 0, pixel); - if (pixel[0] != 0 || pixel[1] != 0 || pixel[2] != 0) - image = i; } catch (final Exception e) { - // java.lang.ClassCastException: [I cannot be cast to [B + /* + * Exception may be caused by source image color model : try now to + * convert to RGB before scaling + */ + try { + BufferedImage converted = EncodedImage.convertToRGB(image); + scaled = converted.getScaledInstance(width, height, Image.SCALE_AREA_AVERAGING); + mediaTracker.addImage(scaled, 1); + try { + mediaTracker.waitForID(1); + } catch (final InterruptedException e2) { + } + result = new BufferedImage(width, height, BufferedImage.TYPE_INT_ARGB); + result.createGraphics().drawImage(scaled, 0, 0, width, height, null); + + // check outcome + final Raster raster = result.getData(); + int[] pixel = new int[result.getSampleModel().getNumBands()]; + pixel = raster.getPixel(0, 0, pixel); + } catch (Exception e2) { + result = image; + } + + ConcurrentLog.fine("ViewImage", "Image could not be scaled"); } - return image; + return result; } - + + /** + * + * @param h + * image height + * @param w + * image width + * @return max square area fitting inside dimensions + */ + protected static Rectangle getMaxSquare(final int h, final int w) { + Rectangle square; + if (w > h) { + final int offset = (w - h) / 2; + square = new Rectangle(offset, 0, h, h); + } else { + final int offset = (h - w) / 2; + square = new Rectangle(0, offset, w, w); + } + return square; + } + /** * Crop image to make a square * * @param image * image to crop - * @param h - * @param w * @return */ - protected static Image makeSquare(Image image, final int h, final int w) { + protected static BufferedImage makeSquare(BufferedImage image) { + final int w = image.getWidth(); + final int h = image.getHeight(); if (w > h) { - final BufferedImage dst = new BufferedImage(h, h, BufferedImage.TYPE_INT_RGB); + final BufferedImage dst = new BufferedImage(h, h, BufferedImage.TYPE_INT_ARGB); Graphics2D g = dst.createGraphics(); final int offset = (w - h) / 2; - g.drawImage(image, 0, 0, h - 1, h - 1, offset, 0, h + offset, h - 1, null); - g.dispose(); + try { + g.drawImage(image, 0, 0, h - 1, h - 1, offset, 0, h + offset, h - 1, null); + } finally { + g.dispose(); + } image = dst; } else { - final BufferedImage dst = new BufferedImage(w, w, BufferedImage.TYPE_INT_RGB); + final BufferedImage dst = new BufferedImage(w, w, BufferedImage.TYPE_INT_ARGB); Graphics2D g = dst.createGraphics(); final int offset = (h - w) / 2; - g.drawImage(image, 0, 0, w - 1, w - 1, 0, offset, w - 1, w + offset, null); - g.dispose(); + try { + g.drawImage(image, 0, 0, w - 1, w - 1, 0, offset, w - 1, w + offset, null); + } finally { + g.dispose(); + } image = dst; } return image; diff --git a/htroot/env/base.css b/htroot/env/base.css index c65877797..3e1a05c1e 100644 --- a/htroot/env/base.css +++ b/htroot/env/base.css @@ -389,7 +389,7 @@ h4.linktitle { padding-left: 20px; } -img.favicon{ +img.favicon, object.favicon { margin: 0px 4px 0px -20px; width: 16px; height: 16px; diff --git a/htroot/env/oldie.css b/htroot/env/oldie.css index a1542ac84..f2bac3083 100644 --- a/htroot/env/oldie.css +++ b/htroot/env/oldie.css @@ -1,6 +1,6 @@ /* Fixes for IE 5 and lower */ /* IE 5 and lower can't display favicons, so hide them */ -img.favicon { +img.favicon, object.favicon { display: none; } diff --git a/htroot/proxymsg/urlproxyheader.java b/htroot/proxymsg/urlproxyheader.java index 2e102a878..c7f64edb3 100644 --- a/htroot/proxymsg/urlproxyheader.java +++ b/htroot/proxymsg/urlproxyheader.java @@ -1,3 +1,4 @@ +package proxymsg; import net.yacy.cora.protocol.RequestHeader; import net.yacy.search.Switchboard; diff --git a/htroot/yacy/ui/css/base.css b/htroot/yacy/ui/css/base.css index 515dd0afe..ffa83613a 100644 --- a/htroot/yacy/ui/css/base.css +++ b/htroot/yacy/ui/css/base.css @@ -109,7 +109,7 @@ div.ys { img { vertical-align: top; } -img.favicon{ +img.favicon, object.favicon { width: 16px; height: 16px; vertical-align: middle; diff --git a/htroot/yacy/ui/css/widget.css b/htroot/yacy/ui/css/widget.css index e79d390ba..541a82763 100644 --- a/htroot/yacy/ui/css/widget.css +++ b/htroot/yacy/ui/css/widget.css @@ -27,7 +27,7 @@ a.favicon { color:#20A020; text-decoration:none; } -img.favicon{ +img.favicon, object.favicon { width: 16px; height: 16px; vertical-align: middle; diff --git a/htroot/yacysearchitem.html b/htroot/yacysearchitem.html index 6fc9c07d8..0a69ff421 100644 --- a/htroot/yacysearchitem.html +++ b/htroot/yacysearchitem.html @@ -1,7 +1,9 @@ #(content)#::

- + + + #[title]#

#(heuristic)#:: diff --git a/lib/imageio-bmp-3.2.jar b/lib/imageio-bmp-3.2.jar new file mode 100755 index 000000000..1946caefe Binary files /dev/null and b/lib/imageio-bmp-3.2.jar differ diff --git a/pom.xml b/pom.xml index baa8f332e..13eed6181 100644 --- a/pom.xml +++ b/pom.xml @@ -430,6 +430,11 @@ icu4j 56.1 + + com.twelvemonkeys.imageio + imageio-bmp + 3.2 + com.twelvemonkeys.imageio imageio-tiff diff --git a/readme.mediawiki b/readme.mediawiki index c3f8a887d..7848a276f 100644 --- a/readme.mediawiki +++ b/readme.mediawiki @@ -55,7 +55,7 @@ all these locations into one search result. You need java 1.7 or later to run YaCy, nothing else. Please download it from http://www.java.com -YaCy also runs on IcedTea6. +YaCy also runs on IcedTea7. See http://icedtea.classpath.org NO OTHER SOFTWARE IS REQUIRED! diff --git a/source/net/yacy/cora/protocol/http/HTTPClient.java b/source/net/yacy/cora/protocol/http/HTTPClient.java index 7a9161149..f2f757335 100644 --- a/source/net/yacy/cora/protocol/http/HTTPClient.java +++ b/source/net/yacy/cora/protocol/http/HTTPClient.java @@ -712,31 +712,62 @@ public class HTTPClient { } } - private static byte[] getByteArray(final HttpEntity entity, final int maxBytes) throws IOException { - final InputStream instream = entity.getContent(); - if (instream == null) { - return null; - } - try { - int i = maxBytes < 0 ? (int)entity.getContentLength() : Math.min(maxBytes, (int)entity.getContentLength()); - if (i < 0) { - i = 4096; - } - final ByteArrayBuffer buffer = new ByteArrayBuffer(i); - byte[] tmp = new byte[4096]; - int l, sum = 0; - while((l = instream.read(tmp)) != -1) { - sum += l; - if (maxBytes >= 0 && sum > maxBytes) throw new IOException("Download exceeded maximum value of " + maxBytes + " bytes"); - buffer.append(tmp, 0, l); - } - return buffer.toByteArray(); - } catch (final OutOfMemoryError e) { - throw new IOException(e.toString()); - } finally { - instream.close(); - } - } + /** + * Return entity content loaded as a byte array + * @param entity HTTP entity + * @param maxBytes maximum bytes to read. -1 means no maximum limit. + * @return content bytes or null when entity content is null. + * @throws IOException when a read error occured or content length is over maxBytes + */ + public static byte[] getByteArray(final HttpEntity entity, int maxBytes) throws IOException { + final InputStream instream = entity.getContent(); + if (instream == null) { + return null; + } + try { + long contentLength = entity.getContentLength(); + /* + * When no maxBytes is specified, the default limit is + * Integer.MAX_VALUE as a byte array size can not be over + */ + if (maxBytes < 0) { + maxBytes = Integer.MAX_VALUE; + } + /* + * Content length may already be known now : check it before + * downloading + */ + if (contentLength > maxBytes) { + throw new IOException("Content to download exceed maximum value of " + maxBytes + " bytes"); + } + int initialSize = Math.min(maxBytes, (int) contentLength); + /* ContentLenght may be negative because unknown for now */ + if (initialSize < 0) { + initialSize = 4096; + } + final ByteArrayBuffer buffer = new ByteArrayBuffer(initialSize); + byte[] tmp = new byte[4096]; + int l = 0; + /* Sum is a long to enable check against Integer.MAX_VALUE */ + long sum = 0; + while ((l = instream.read(tmp)) != -1) { + sum += l; + /* + * Check total length while downloading as content lenght might + * not be known at beginning + */ + if (sum > maxBytes) { + throw new IOException("Download exceeded maximum value of " + maxBytes + " bytes"); + } + buffer.append(tmp, 0, l); + } + return buffer.toByteArray(); + } catch (final OutOfMemoryError e) { + throw new IOException(e.toString()); + } finally { + instream.close(); + } + } private void setHeaders(final HttpUriRequest httpUriRequest) { if (this.headers != null) { diff --git a/source/net/yacy/cora/util/HTTPInputStream.java b/source/net/yacy/cora/util/HTTPInputStream.java new file mode 100755 index 000000000..035edfad9 --- /dev/null +++ b/source/net/yacy/cora/util/HTTPInputStream.java @@ -0,0 +1,125 @@ +/** + * HTTPInputStream + * Copyright 2014 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany + * First published 26.11.2014 on http://yacy.net + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program in the file lgpl21.txt + * If not, see . + */ + +package net.yacy.cora.util; + +import java.io.IOException; +import java.io.InputStream; + +import net.yacy.cora.protocol.http.HTTPClient; + +/** + * A HTTP InputStream delegating to HTTPClient. Use it when streaming HTTP content to easily finish HTTP client when closing stream. + * @author luc + * + */ +public class HTTPInputStream extends InputStream { + + /** HTTP client */ + private HTTPClient httpClient; + + /** Encapsulated HTTP content stream */ + private InputStream contentStream; + + + /** + * Constructs from a httpClient. + * @param httpClient a httpClient with accessible stream content. + * @throws IOException when content stream can not be open on httpClient + */ + public HTTPInputStream(HTTPClient httpClient) throws IOException { + if(httpClient == null) { + throw new IllegalArgumentException("httpClient is null"); + } + this.httpClient = httpClient; + this.contentStream = httpClient.getContentstream(); + if(this.contentStream == null) { + throw new IOException("content stream is null"); + } + } + + /** + * Close properly HTTP connection with httpClient + */ + @Override + public void close() throws IOException { + httpClient.finish(); + } + + + @Override + public int read() throws IOException { + return contentStream.read(); + } + + + @Override + public int hashCode() { + return contentStream.hashCode(); + } + + @Override + public int read(byte[] b) throws IOException { + return contentStream.read(b); + } + + @Override + public boolean equals(Object obj) { + return contentStream.equals(obj); + } + + @Override + public int read(byte[] b, int off, int len) throws IOException { + return contentStream.read(b, off, len); + } + + @Override + public long skip(long n) throws IOException { + return contentStream.skip(n); + } + + @Override + public String toString() { + return contentStream.toString(); + } + + @Override + public int available() throws IOException { + return contentStream.available(); + } + + @Override + public synchronized void mark(int readlimit) { + contentStream.mark(readlimit); + } + + @Override + public synchronized void reset() throws IOException { + contentStream.reset(); + } + + @Override + public boolean markSupported() { + return contentStream.markSupported(); + } + + + +} diff --git a/source/net/yacy/crawler/retrieval/FileLoader.java b/source/net/yacy/crawler/retrieval/FileLoader.java index cf0b683e7..12e809914 100644 --- a/source/net/yacy/crawler/retrieval/FileLoader.java +++ b/source/net/yacy/crawler/retrieval/FileLoader.java @@ -137,7 +137,6 @@ public class FileLoader { // load the resource InputStream is = url.getInputStream(ClientIdentification.yacyInternetCrawlerAgent, null, null); byte[] b = FileUtils.read(is); - is.close(); // create response with loaded content final CrawlProfile profile = this.sb.crawler.get(ASCII.getBytes(request.profileHandle())); diff --git a/source/net/yacy/crawler/retrieval/HTTPLoader.java b/source/net/yacy/crawler/retrieval/HTTPLoader.java index 1381548e5..4f427a1bf 100644 --- a/source/net/yacy/crawler/retrieval/HTTPLoader.java +++ b/source/net/yacy/crawler/retrieval/HTTPLoader.java @@ -24,7 +24,9 @@ package net.yacy.crawler.retrieval; +import java.io.ByteArrayInputStream; import java.io.IOException; +import java.io.InputStream; import net.yacy.cora.document.id.DigestURL; import net.yacy.cora.federate.solr.FailCategory; @@ -34,7 +36,9 @@ import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.protocol.ResponseHeader; import net.yacy.cora.protocol.http.HTTPClient; import net.yacy.cora.util.ConcurrentLog; +import net.yacy.cora.util.HTTPInputStream; import net.yacy.crawler.CrawlSwitchboard; +import net.yacy.crawler.data.Cache; import net.yacy.crawler.data.CrawlProfile; import net.yacy.crawler.data.Latency; import net.yacy.kelondro.io.ByteCount; @@ -75,6 +79,208 @@ public final class HTTPLoader { Latency.updateAfterLoad(entry.url(), System.currentTimeMillis() - start); return doc; } + + /** + * Open input stream on a requested HTTP resource. When resource is small, fully load it and returns a ByteArrayInputStream instance. + * @param request + * @param profile crawl profile + * @param retryCount remaining redirect retries count + * @param maxFileSize max file size to load. -1 means no limit. + * @param blacklistType blacklist type to use + * @param agent agent identifier + * @return an open input stream. Don't forget to close it. + * @throws IOException when an error occured + */ + public InputStream openInputStream(final Request request, CrawlProfile profile, final int retryCount, + final int maxFileSize, final BlacklistType blacklistType, final ClientIdentification.Agent agent) + throws IOException { + if (retryCount < 0) { + this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), profile, + FailCategory.TEMPORARY_NETWORK_FAILURE, "retry counter exceeded", -1); + throw new IOException( + "retry counter exceeded for URL " + request.url().toString() + ". Processing aborted.$"); + } + DigestURL url = request.url(); + + final String host = url.getHost(); + if (host == null || host.length() < 2) { + throw new IOException("host is not well-formed: '" + host + "'"); + } + final String path = url.getFile(); + int port = url.getPort(); + final boolean ssl = url.getProtocol().equals("https"); + if (port < 0) + port = (ssl) ? 443 : 80; + + // check if url is in blacklist + final String hostlow = host.toLowerCase(); + if (blacklistType != null && Switchboard.urlBlacklist.isListed(blacklistType, hostlow, path)) { + this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), profile, FailCategory.FINAL_LOAD_CONTEXT, + "url in blacklist", -1); + throw new IOException("CRAWLER Rejecting URL '" + request.url().toString() + "'. URL is in blacklist.$"); + } + + // resolve yacy and yacyh domains + final AlternativeDomainNames yacyResolver = this.sb.peers; + if (yacyResolver != null) { + final String yAddress = yacyResolver.resolve(host); + if (yAddress != null) { + url = new DigestURL(url.getProtocol() + "://" + yAddress + path); + } + } + + // create a request header + final RequestHeader requestHeader = createRequestheader(request, agent); + + // HTTP-Client + final HTTPClient client = new HTTPClient(agent); + client.setRedirecting(false); // we want to handle redirection + // ourselves, so we don't index pages + // twice + client.setTimout(this.socketTimeout); + client.setHeader(requestHeader.entrySet()); + + // send request + client.GET(url, false); + final int statusCode = client.getHttpResponse().getStatusLine().getStatusCode(); + final ResponseHeader responseHeader = new ResponseHeader(statusCode, client.getHttpResponse().getAllHeaders()); + String requestURLString = request.url().toNormalform(true); + + // check redirection + if (statusCode > 299 && statusCode < 310) { + + final DigestURL redirectionUrl = extractRedirectURL(request, profile, url, client, statusCode, + responseHeader, requestURLString); + + if (this.sb.getConfigBool(SwitchboardConstants.CRAWLER_FOLLOW_REDIRECTS, true)) { + // we have two use cases here: loading from a crawl or just + // loading the url. Check this: + if (profile != null && !CrawlSwitchboard.DEFAULT_PROFILES.contains(profile.name())) { + // put redirect url on the crawler queue to repeat a + // double-check + request.redirectURL(redirectionUrl); + this.sb.crawlStacker.stackCrawl(request); + // in the end we must throw an exception (even if this is + // not an error, just to abort the current process + throw new IOException("CRAWLER Redirect of URL=" + requestURLString + " to " + + redirectionUrl.toNormalform(false) + " placed on crawler queue for double-check"); + } + + // if we are already doing a shutdown we don't need to retry + // crawling + if (Thread.currentThread().isInterrupted()) { + this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), profile, + FailCategory.FINAL_LOAD_CONTEXT, "server shutdown", statusCode); + throw new IOException( + "CRAWLER Redirect of URL=" + requestURLString + " aborted because of server shutdown.$"); + } + + // retry crawling with new url + request.redirectURL(redirectionUrl); + return openInputStream(request, profile, retryCount - 1, maxFileSize, blacklistType, agent); + } + // we don't want to follow redirects + this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), profile, + FailCategory.FINAL_PROCESS_CONTEXT, "redirection not wanted", statusCode); + throw new IOException("REJECTED UNWANTED REDIRECTION '" + client.getHttpResponse().getStatusLine() + + "' for URL '" + requestURLString + "'$"); + } else if (statusCode == 200 || statusCode == 203) { + // the transfer is ok + + /* + * When content is not large (less than 1MB), we have better cache it if cache is enabled and url is not local + */ + long contentLength = client.getHttpResponse().getEntity().getContentLength(); + if (profile != null && profile.storeHTCache() && contentLength > 0 && contentLength < (Response.CRAWLER_MAX_SIZE_TO_CACHE) && !url.isLocal()) { + byte[] content = HTTPClient.getByteArray(client.getHttpResponse().getEntity(), maxFileSize); + + try { + Cache.store(url, responseHeader, content); + } catch (final IOException e) { + this.log.warn("cannot write " + url + " to Cache (3): " + e.getMessage(), e); + } + + return new ByteArrayInputStream(content); + } + /* + * Returns a HTTPInputStream delegating to + * client.getContentstream(). Close method will ensure client is + * properly closed. + */ + return new HTTPInputStream(client); + } else { + // if the response has not the right response type then reject file + this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), profile, + FailCategory.TEMPORARY_NETWORK_FAILURE, "wrong http status code", statusCode); + throw new IOException("REJECTED WRONG STATUS TYPE '" + client.getHttpResponse().getStatusLine() + + "' for URL '" + requestURLString + "'$"); + } + } + + /** + * Extract redirect URL from response header. Status code is supposed to be between 299 and 310. Parameters must not be null. + * @return redirect URL + * @throws IOException when an error occured + */ + private DigestURL extractRedirectURL(final Request request, CrawlProfile profile, DigestURL url, + final HTTPClient client, final int statusCode, final ResponseHeader responseHeader, String requestURLString) + throws IOException { + // read redirection URL + String redirectionUrlString = responseHeader.get(HeaderFramework.LOCATION); + redirectionUrlString = redirectionUrlString == null ? "" : redirectionUrlString.trim(); + + if (redirectionUrlString.isEmpty()) { + this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), profile, + FailCategory.TEMPORARY_NETWORK_FAILURE, + "no redirection url provided, field '" + HeaderFramework.LOCATION + "' is empty", statusCode); + throw new IOException("REJECTED EMTPY REDIRECTION '" + client.getHttpResponse().getStatusLine() + + "' for URL '" + requestURLString + "'$"); + } + + // normalize URL + final DigestURL redirectionUrl = DigestURL.newURL(request.url(), redirectionUrlString); + + // restart crawling with new url + this.log.info("CRAWLER Redirection detected ('" + client.getHttpResponse().getStatusLine() + "') for URL " + + requestURLString); + this.log.info("CRAWLER ..Redirecting request to: " + redirectionUrl.toNormalform(false)); + + this.sb.webStructure.generateCitationReference(url, redirectionUrl); + + if (this.sb.getConfigBool(SwitchboardConstants.CRAWLER_RECORD_REDIRECTS, true)) { + this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), profile, + FailCategory.FINAL_REDIRECT_RULE, "redirect to " + redirectionUrlString, statusCode); + } + return redirectionUrl; + } + + /** + * Create request header for loading content. + * @param request search request + * @param agent agent identification information + * @return a request header + * @throws IOException when an error occured + */ + private RequestHeader createRequestheader(final Request request, final ClientIdentification.Agent agent) + throws IOException { + final RequestHeader requestHeader = new RequestHeader(); + requestHeader.put(HeaderFramework.USER_AGENT, agent.userAgent); + DigestURL refererURL = null; + if (request.referrerhash() != null) { + refererURL = this.sb.getURL(request.referrerhash()); + } + if (refererURL != null) { + requestHeader.put(RequestHeader.REFERER, refererURL.toNormalform(true)); + } + requestHeader.put(HeaderFramework.ACCEPT, this.sb.getConfig("crawler.http.accept", DEFAULT_ACCEPT)); + requestHeader.put(HeaderFramework.ACCEPT_LANGUAGE, + this.sb.getConfig("crawler.http.acceptLanguage", DEFAULT_LANGUAGE)); + requestHeader.put(HeaderFramework.ACCEPT_CHARSET, + this.sb.getConfig("crawler.http.acceptCharset", DEFAULT_CHARSET)); + requestHeader.put(HeaderFramework.ACCEPT_ENCODING, + this.sb.getConfig("crawler.http.acceptEncoding", DEFAULT_ENCODING)); + return requestHeader; + } private Response load(final Request request, CrawlProfile profile, final int retryCount, final int maxFileSize, final BlacklistType blacklistType, final ClientIdentification.Agent agent) throws IOException { @@ -112,15 +318,7 @@ public final class HTTPLoader { Response response = null; // create a request header - final RequestHeader requestHeader = new RequestHeader(); - requestHeader.put(HeaderFramework.USER_AGENT, agent.userAgent); - DigestURL refererURL = null; - if (request.referrerhash() != null) refererURL = this.sb.getURL(request.referrerhash()); - if (refererURL != null) requestHeader.put(RequestHeader.REFERER, refererURL.toNormalform(true)); - requestHeader.put(HeaderFramework.ACCEPT, this.sb.getConfig("crawler.http.accept", DEFAULT_ACCEPT)); - requestHeader.put(HeaderFramework.ACCEPT_LANGUAGE, this.sb.getConfig("crawler.http.acceptLanguage", DEFAULT_LANGUAGE)); - requestHeader.put(HeaderFramework.ACCEPT_CHARSET, this.sb.getConfig("crawler.http.acceptCharset", DEFAULT_CHARSET)); - requestHeader.put(HeaderFramework.ACCEPT_ENCODING, this.sb.getConfig("crawler.http.acceptEncoding", DEFAULT_ENCODING)); + final RequestHeader requestHeader = createRequestheader(request, agent); // HTTP-Client final HTTPClient client = new HTTPClient(agent); @@ -137,27 +335,8 @@ public final class HTTPLoader { // check redirection if (statusCode > 299 && statusCode < 310) { - // read redirection URL - String redirectionUrlString = responseHeader.get(HeaderFramework.LOCATION); - redirectionUrlString = redirectionUrlString == null ? "" : redirectionUrlString.trim(); - - if (redirectionUrlString.isEmpty()) { - this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), profile, FailCategory.TEMPORARY_NETWORK_FAILURE, "no redirection url provided, field '" + HeaderFramework.LOCATION + "' is empty", statusCode); - throw new IOException("REJECTED EMTPY REDIRECTION '" + client.getHttpResponse().getStatusLine() + "' for URL '" + requestURLString + "'$"); - } - - // normalize URL - final DigestURL redirectionUrl = DigestURL.newURL(request.url(), redirectionUrlString); - - // restart crawling with new url - this.log.info("CRAWLER Redirection detected ('" + client.getHttpResponse().getStatusLine() + "') for URL " + requestURLString); - this.log.info("CRAWLER ..Redirecting request to: " + redirectionUrl.toNormalform(false)); - - this.sb.webStructure.generateCitationReference(url, redirectionUrl); - - if (this.sb.getConfigBool(SwitchboardConstants.CRAWLER_RECORD_REDIRECTS, true)) { - this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), profile, FailCategory.FINAL_REDIRECT_RULE, "redirect to " + redirectionUrlString, statusCode); - } + final DigestURL redirectionUrl = extractRedirectURL(request, profile, url, client, statusCode, + responseHeader, requestURLString); if (this.sb.getConfigBool(SwitchboardConstants.CRAWLER_FOLLOW_REDIRECTS, true)) { // we have two use cases here: loading from a crawl or just loading the url. Check this: diff --git a/source/net/yacy/crawler/retrieval/Response.java b/source/net/yacy/crawler/retrieval/Response.java index 4e1acb6ef..642994a4a 100644 --- a/source/net/yacy/crawler/retrieval/Response.java +++ b/source/net/yacy/crawler/retrieval/Response.java @@ -69,6 +69,9 @@ public class Response { private byte[] content; private int status; // tracker indexing status, see status defs below private final boolean fromCache; + + /** Maximum file size to put in cache for crawler */ + public static final long CRAWLER_MAX_SIZE_TO_CACHE = 10 * 1024L * 1024L; /** * doctype calculation by file extension @@ -387,7 +390,7 @@ public class Response { public String shallStoreCacheForCrawler() { // check storage size: all files will be handled in RAM before storage, so they must not exceed // a given size, which we consider as 1MB - if (size() > 10 * 1024L * 1024L) return "too_large_for_caching_" + size(); + if (size() > CRAWLER_MAX_SIZE_TO_CACHE) return "too_large_for_caching_" + size(); // check status code if (!validResponseStatus()) { diff --git a/source/net/yacy/crawler/retrieval/SMBLoader.java b/source/net/yacy/crawler/retrieval/SMBLoader.java index 67e3fdee1..8de78c8b0 100644 --- a/source/net/yacy/crawler/retrieval/SMBLoader.java +++ b/source/net/yacy/crawler/retrieval/SMBLoader.java @@ -155,7 +155,6 @@ public class SMBLoader { // load the resource InputStream is = url.getInputStream(ClientIdentification.yacyInternetCrawlerAgent, null, null); byte[] b = FileUtils.read(is); - is.close(); // create response with loaded content final CrawlProfile profile = this.sb.crawler.get(request.profileHandle().getBytes()); diff --git a/source/net/yacy/document/ImageParser.java b/source/net/yacy/document/ImageParser.java index aaff10437..b66518a37 100644 --- a/source/net/yacy/document/ImageParser.java +++ b/source/net/yacy/document/ImageParser.java @@ -20,82 +20,43 @@ package net.yacy.document; -import java.awt.Container; import java.awt.Image; -import java.awt.MediaTracker; +import java.awt.image.BufferedImage; import java.io.ByteArrayInputStream; import java.io.IOException; import javax.imageio.ImageIO; import net.yacy.cora.util.ConcurrentLog; -import net.yacy.document.parser.images.bmpParser; -import net.yacy.document.parser.images.icoParser; public class ImageParser { - public static final Image parse(final String filename, final byte[] source) { - final MediaTracker mediaTracker = new MediaTracker(new Container()); - Image image = null; - if (((filename.endsWith(".ico")) || (filename.endsWith(".bmp"))) && (bmpParser.isBMP(source))) { - // parse image with BMP parser - image = bmpParser.parse(source).getImage(); - if (image == null) { - if (ConcurrentLog.isFine("IMAGEPARSER")) { - ConcurrentLog.fine("IMAGEPARSER", "IMAGEPARSER.parse : bmpParser failed for " + filename); - } - return null; - } - } else if ((filename.endsWith(".ico")) && (icoParser.isICO(source))) { - // parse image with ICO parser - icoParser icoparser; - try { - icoparser = new icoParser(source); - image = icoparser.getImage(0); - } catch (final Throwable e) { - if (ConcurrentLog.isFine("IMAGEPARSER")) { - ConcurrentLog.fine("IMAGEPARSER", "IMAGEPARSER.parse : could not parse image " + filename, e); - } - } - if (image == null) { - if (ConcurrentLog.isFine("IMAGEPARSER")) { - ConcurrentLog.fine("IMAGEPARSER", "IMAGEPARSER.parse : icoParser failed for " + filename); - } - return null; - } - } else { - try { - image = ImageIO.read(new ByteArrayInputStream(source)); - } catch(IOException e) { - if (ConcurrentLog.isFine("IMAGEPARSER")) { - ConcurrentLog.fine("IMAGEPARSER", "IMAGEPARSER.parse : could not parse image " + filename, e); - } - } - if (image == null) { - if (ConcurrentLog.isFine("IMAGEPARSER")) { - ConcurrentLog.fine("IMAGEPARSER", "IMAGEPARSER.parse : ImageIO failed for " + filename); - } - return null; - } - } - if (image == null) { - return null; - } - - final int handle = image.hashCode(); - mediaTracker.addImage(image, handle); - try { - mediaTracker.waitForID(handle); - - if (mediaTracker.isErrorID(handle)) { // true if status ERRORD during loading (happens on not supported formats too) - mediaTracker.removeImage(image, handle); - image = null; // return null to indicate source not handled - } - } catch (final InterruptedException e) { - return null; - } - - return image; - } + /** + * @param filename source image file url + * @param source image content as bytes + * @return an Image instance parsed from image content bytes, or null if no parser can handle image format or an error occured + */ + public static final Image parse(final String filename, final byte[] source) { + BufferedImage image = null; + try { + image = ImageIO.read(new ByteArrayInputStream(source)); + /* + * With ImageIO.read, image is already loaded as a complete BufferedImage, no need to wait + * full loading with a MediaTracker + */ + } catch (IOException e) { + if (ConcurrentLog.isFine("IMAGEPARSER")) { + ConcurrentLog.fine("IMAGEPARSER", "IMAGEPARSER.parse : could not parse image " + filename, e); + } + } + if (image == null) { + if (ConcurrentLog.isFine("IMAGEPARSER")) { + ConcurrentLog.fine("IMAGEPARSER", "IMAGEPARSER.parse : ImageIO failed for " + filename); + } + return null; + } + + return image; + } } diff --git a/source/net/yacy/document/parser/images/bmpParser.java b/source/net/yacy/document/parser/images/bmpParser.java index 33a3fc9cf..694a83f99 100644 --- a/source/net/yacy/document/parser/images/bmpParser.java +++ b/source/net/yacy/document/parser/images/bmpParser.java @@ -32,8 +32,17 @@ import java.io.IOException; import javax.imageio.ImageIO; +import com.twelvemonkeys.imageio.plugins.bmp.BMPImageReader; + import net.yacy.cora.util.ConcurrentLog; +/** + * + * @deprecated use ImageIO {@link BMPImageReader} from github.com/haraldk/TwelveMonkeys + * library (imageio-bmp-3.2.jar), which as better BMP format + * variants support + */ +@Deprecated public class bmpParser { // this is a implementation of http://de.wikipedia.org/wiki/Windows_Bitmap diff --git a/source/net/yacy/document/parser/images/genericImageParser.java b/source/net/yacy/document/parser/images/genericImageParser.java index 74e04f779..78d645fd9 100644 --- a/source/net/yacy/document/parser/images/genericImageParser.java +++ b/source/net/yacy/document/parser/images/genericImageParser.java @@ -46,6 +46,13 @@ import java.util.Set; import javax.imageio.ImageIO; +import com.drew.imaging.jpeg.JpegMetadataReader; +import com.drew.lang.GeoLocation; +import com.drew.metadata.Directory; +import com.drew.metadata.Metadata; +import com.drew.metadata.Tag; +import com.drew.metadata.exif.GpsDirectory; + import net.yacy.cora.document.id.AnchorURL; import net.yacy.cora.document.id.DigestURL; import net.yacy.cora.document.id.MultiProtocolURL; @@ -55,16 +62,8 @@ import net.yacy.document.Document; import net.yacy.document.Parser; import net.yacy.document.VocabularyScraper; import net.yacy.document.parser.html.ImageEntry; -import net.yacy.document.parser.images.bmpParser.IMAGEMAP; import net.yacy.kelondro.util.FileUtils; -import com.drew.imaging.jpeg.JpegMetadataReader; -import com.drew.lang.GeoLocation; -import com.drew.metadata.Directory; -import com.drew.metadata.Metadata; -import com.drew.metadata.Tag; -import com.drew.metadata.exif.GpsDirectory; - /** * Parser for images, bmp and jpeg and all supported by the Java Image I/O API * by default java ImageIO supports bmp, gif, jpg, jpeg, png, wbmp (tif if jai-imageio is in classpath/registered) @@ -75,11 +74,9 @@ public class genericImageParser extends AbstractParser implements Parser { public genericImageParser() { super("Generic Image Parser"); - SUPPORTED_EXTENSIONS.add("bmp"); SUPPORTED_EXTENSIONS.add("jpe"); // not listed in ImageIO extension but sometimes uses for jpeg SUPPORTED_EXTENSIONS.addAll(Arrays.asList(ImageIO.getReaderFileSuffixes())); - SUPPORTED_MIME_TYPES.add("image/bmp"); SUPPORTED_MIME_TYPES.add("image/jpg"); // this is in fact a 'wrong' mime type. We leave it here because that is a common error that occurs in the internet frequently SUPPORTED_MIME_TYPES.addAll(Arrays.asList(ImageIO.getReaderMIMETypes())); } @@ -102,21 +99,7 @@ public class genericImageParser extends AbstractParser implements Parser { String ext = MultiProtocolURL.getFileExtension(filename); double gpslat = 0; double gpslon = 0; - if (mimeType.equals("image/bmp") || ext.equals("bmp")) { - byte[] b; - try { - b = FileUtils.read(source); - } catch (final IOException e) { - ConcurrentLog.logException(e); - throw new Parser.Failure(e.getMessage(), location); - } - if (bmpParser.isBMP(b)) { - final IMAGEMAP imap = bmpParser.parse(b); - ii = parseJavaImage(location, imap.getImage()); - } else { - throw new Parser.Failure("Not supported by bmpParser", location); - } - } else if (mimeType.equals("image/jpeg") || ext.equals("jpg") || ext.equals("jpeg") || ext.equals("jpe")) { + if (mimeType.equals("image/jpeg") || ext.equals("jpg") || ext.equals("jpeg") || ext.equals("jpe")) { // use the exif parser from // http://www.drewnoakes.com/drewnoakes.com/code/exif/ // javadoc is at: http://www.drewnoakes.com/drewnoakes.com/code/exif/javadoc/ diff --git a/source/net/yacy/document/parser/images/icoParser.java b/source/net/yacy/document/parser/images/icoParser.java index 4ff965387..3036a0219 100644 --- a/source/net/yacy/document/parser/images/icoParser.java +++ b/source/net/yacy/document/parser/images/icoParser.java @@ -32,9 +32,17 @@ import java.io.IOException; import javax.imageio.ImageIO; -import net.yacy.cora.util.ConcurrentLog; +import com.twelvemonkeys.imageio.plugins.bmp.ICOImageReader; +import net.yacy.cora.util.ConcurrentLog; +/** +* +* @deprecated use ImageIO {@link ICOImageReader} from github.com/haraldk/TwelveMonkeys +* library (imageio-bmp-3.2.jar), which as better BMP format +* variants support, and support PNG encoded icons. +*/ +@Deprecated public class icoParser { // this is a implementation of http://msdn2.microsoft.com/en-us/library/ms997538(d=printer).aspx diff --git a/source/net/yacy/http/servlets/YaCyDefaultServlet.java b/source/net/yacy/http/servlets/YaCyDefaultServlet.java index af07f20df..5ad95e9d0 100644 --- a/source/net/yacy/http/servlets/YaCyDefaultServlet.java +++ b/source/net/yacy/http/servlets/YaCyDefaultServlet.java @@ -866,7 +866,7 @@ public class YaCyDefaultServlet extends HttpServlet { if (height < 0) { height = 96; // bad hack } - final BufferedImage bi = new BufferedImage(width, height, BufferedImage.TYPE_INT_RGB); + final BufferedImage bi = new BufferedImage(width, height, BufferedImage.TYPE_INT_ARGB); bi.createGraphics().drawImage(i, 0, 0, width, height, null); result = RasterPlotter.exportImage(bi, targetExt); } @@ -882,15 +882,7 @@ public class YaCyDefaultServlet extends HttpServlet { } if (tmp instanceof InputStream) { - final InputStream is = (InputStream) tmp; - final String mimeType = Classification.ext2mime(targetExt, TEXT_HTML.asString()); - response.setContentType(mimeType); - response.setStatus(HttpServletResponse.SC_OK); - byte[] buffer = new byte[4096]; - int l, size = 0; - while ((l = is.read(buffer)) > 0) {response.getOutputStream().write(buffer, 0, l); size += l;} - response.setContentLength(size); - is.close(); + writeInputStream(response, targetExt, (InputStream)tmp); return; } @@ -990,6 +982,35 @@ public class YaCyDefaultServlet extends HttpServlet { } } } + + + /** + * Write input stream content to response and close input stream. + * @param response servlet response. Must not be null. + * @param targetExt response file format + * @param tmp + * @throws IOException when a read/write error occured. + */ + private void writeInputStream(HttpServletResponse response, String targetExt, InputStream inStream) + throws IOException { + final String mimeType = Classification.ext2mime(targetExt, TEXT_HTML.asString()); + response.setContentType(mimeType); + response.setStatus(HttpServletResponse.SC_OK); + byte[] buffer = new byte[4096]; + int l, size = 0; + try { + while ((l = inStream.read(buffer)) > 0) { + response.getOutputStream().write(buffer, 0, l); + size += l; + } + response.setContentLength(size); + } finally { + try { + inStream.close(); + } catch(IOException ignored) { + } + } + } private static String appendPath(String proplist, String path) { if (proplist.length() == 0) return path; diff --git a/source/net/yacy/kelondro/util/FileUtils.java b/source/net/yacy/kelondro/util/FileUtils.java index 37e7b94d9..4e78cb69f 100644 --- a/source/net/yacy/kelondro/util/FileUtils.java +++ b/source/net/yacy/kelondro/util/FileUtils.java @@ -268,8 +268,24 @@ public final class FileUtils { copy(new ByteArrayInputStream(source), dest); } + /** + * Read fully source stream and close it. + * @param source must not be null + * @return source content as a byte array. + * @throws IOException when a read/write error occured + */ public static byte[] read(final InputStream source) throws IOException { - return read(source, -1); + byte[] content; + try { + content = read(source, -1); + } finally { + /* source input stream must be closed here in all cases */ + try { + source.close(); + } catch(IOException ignoredException) { + } + } + return content; } public static byte[] read(final InputStream source, final int count) throws IOException { diff --git a/source/net/yacy/peers/graphics/EncodedImage.java b/source/net/yacy/peers/graphics/EncodedImage.java index a2bca1929..7e08c61be 100644 --- a/source/net/yacy/peers/graphics/EncodedImage.java +++ b/source/net/yacy/peers/graphics/EncodedImage.java @@ -22,6 +22,7 @@ package net.yacy.peers.graphics; import java.awt.Image; import java.awt.image.BufferedImage; +import java.awt.image.ColorConvertOp; import java.io.ByteArrayOutputStream; import java.io.IOException; @@ -33,9 +34,30 @@ public class EncodedImage { private ByteBuffer image; private String extension; private boolean isStatic; + + /** + * Instanciates an encoded image with raw image data. + * Image ByteBuffer will be empty when encoding format is not supported. + * @param imageData the image data encode in format specified. Must not be null. + * @param format the image format of imageData. Must not be null. + * @param isStatic shall be true if the image will never change, false if not + * @throws IllegalArgumentException when imageData or format parameter is null + */ + public EncodedImage(final byte[] imageData, final String format, final boolean isStatic) { + if(imageData == null) { + throw new IllegalArgumentException("imageData parameter is null"); + } + if(format == null) { + throw new IllegalArgumentException("format parameter is null"); + } + this.image = new ByteBuffer(imageData); + this.extension = format; + this.isStatic = isStatic; + } /** - * set an encoded image; prefer this over methods with Image-source objects because png generation is faster when done from RasterPlotter sources + * set an encoded image; prefer this over methods with Image-source objects because png generation is faster when done from RasterPlotter sources. + * Image ByteBuffer will be empty when encoding format is not supported. * @param sourceImage the image * @param targetExt the target extension of the image when converted into a file * @param isStatic shall be true if the image will never change, false if not @@ -47,7 +69,7 @@ public class EncodedImage { } /** - * set an encoded image from a buffered image + * set an encoded image from a buffered image. Image ByteBuffer will be empty when encoding format is not supported. * @param sourceImage the image * @param targetExt the target extension of the image when converted into a file * @param isStatic shall be true if the image will never change, false if not @@ -55,12 +77,21 @@ public class EncodedImage { public EncodedImage(final BufferedImage bi, final String targetExt, final boolean isStatic) { this.extension = targetExt; this.image = RasterPlotter.exportImage(bi, targetExt); + if(this.image == null || this.image.length() == 0) { + /* + * Buffered image rendering to targetExt format might fail because + * no image writer support source image color model. Let's try + * converting source image to RGB before rendering + */ + BufferedImage converted = convertToRGB(bi); + this.image = RasterPlotter.exportImage(converted, targetExt); + } this.isStatic = isStatic; } /** - * set an encoded image from a buffered image + * set an encoded image from a buffered image. Image ByteBuffer will be empty when encoding format is not supported. * @param sourceImage the image * @param targetExt the target extension of the image when converted into a file * @param isStatic shall be true if the image will never change, false if not @@ -78,7 +109,7 @@ public class EncodedImage { if (height < 0) { height = 96; // bad hack } - final BufferedImage sourceImage = new BufferedImage(width, height, BufferedImage.TYPE_INT_RGB); + final BufferedImage sourceImage = new BufferedImage(width, height, BufferedImage.TYPE_INT_ARGB); sourceImage.createGraphics().drawImage(i, 0, 0, width, height, null); this.image = RasterPlotter.exportImage(sourceImage, targetExt); } @@ -101,7 +132,7 @@ public class EncodedImage { } /** - * get the encoded image + * get the encoded image data (empty when encoding format is not supported) * @return the bytes of the image encoded into the target extension format */ public ByteBuffer getImage() { @@ -123,4 +154,25 @@ public class EncodedImage { public boolean isStatic() { return this.isStatic; } + + /** + * If source source image colorspace is not RGB or ARGB, convert it to RGB or ARGB when alpha channel is present. + * @param image source image. Must not be null. + * @return converted image or source image when already RGB. + */ + public static BufferedImage convertToRGB(BufferedImage image) { + BufferedImage converted = image; + if(image.getType() != BufferedImage.TYPE_INT_RGB && image.getType() != BufferedImage.TYPE_INT_ARGB) { + int targetType; + if(image.getColorModel() != null && image.getColorModel().hasAlpha()) { + targetType = BufferedImage.TYPE_INT_ARGB; + } else { + targetType = BufferedImage.TYPE_INT_RGB; + } + BufferedImage target = new BufferedImage(image.getWidth(), image.getHeight(), targetType); + ColorConvertOp convertOP = new ColorConvertOp(null); + converted = convertOP.filter(image, target); + } + return converted; + } } diff --git a/source/net/yacy/repository/LoaderDispatcher.java b/source/net/yacy/repository/LoaderDispatcher.java index 1da658f65..da52e15ab 100644 --- a/source/net/yacy/repository/LoaderDispatcher.java +++ b/source/net/yacy/repository/LoaderDispatcher.java @@ -26,8 +26,10 @@ package net.yacy.repository; +import java.io.ByteArrayInputStream; import java.io.File; import java.io.IOException; +import java.io.InputStream; import java.net.MalformedURLException; import java.util.Arrays; import java.util.Date; @@ -209,7 +211,82 @@ public final class LoaderDispatcher { } // check if we have the page in the cache - if (cacheStrategy != CacheStrategy.NOCACHE && crawlProfile != null) { + Response response = loadFromCache(request, cacheStrategy, agent, url, crawlProfile); + if(response != null) { + return response; + } + + // check case where we want results from the cache exclusively, and never from the Internet (offline mode) + if (cacheStrategy == CacheStrategy.CACHEONLY) { + // we had a chance to get the content from the cache .. its over. We don't have it. + throw new IOException("cache only strategy"); + } + + // now forget about the cache, nothing there. Try to load the content from the Internet + + // check access time: this is a double-check (we checked possibly already in the balancer) + // to make sure that we don't DoS the target by mistake + checkAccessTime(agent, url); + + // now it's for sure that we will access the target. Remember the access time + if (host != null) { + if (accessTime.size() > accessTimeMaxsize) accessTime.clear(); // prevent a memory leak here + accessTime.put(host, System.currentTimeMillis()); + } + + // load resource from the internet + if (protocol.equals("http") || protocol.equals("https")) { + response = this.httpLoader.load(request, crawlProfile, maxFileSize, blacklistType, agent); + } else if (protocol.equals("ftp")) { + response = this.ftpLoader.load(request, true); + } else if (protocol.equals("smb")) { + response = this.smbLoader.load(request, true); + } else if (protocol.equals("file")) { + response = this.fileLoader.load(request, true); + } else { + throw new IOException("Unsupported protocol '" + protocol + "' in url " + url); + } + if (response == null) { + throw new IOException("no response (NULL) for url " + url); + } + if (response.getContent() == null) { + throw new IOException("empty response (code " + response.getStatus() + ") for url " + url.toNormalform(true)); + } + + // we got something. Now check if we want to store that to the cache + // first check looks if we want to store the content to the cache + if (crawlProfile == null || !crawlProfile.storeHTCache()) { + // no caching wanted. Thats ok, do not write any message + return response; + } + // second check tells us if the protocol tells us something about caching + final String storeError = response.shallStoreCacheForCrawler(); + if (storeError == null) { + try { + Cache.store(url, response.getResponseHeader(), response.getContent()); + } catch (final IOException e) { + LoaderDispatcher.log.warn("cannot write " + response.url() + " to Cache (3): " + e.getMessage(), e); + } + } else { + LoaderDispatcher.log.warn("cannot write " + response.url() + " to Cache (4): " + storeError); + } + return response; + } + + /** + * Try loading requested resource from cache according to cache strategy + * @param request request to resource + * @param cacheStrategy cache strategy to use + * @param agent agent identifier + * @param url resource url + * @param crawlProfile crawl profile + * @return a Response instance when resource could be loaded from cache, or null. + * @throws IOException when an error occured + */ + private Response loadFromCache(final Request request, CacheStrategy cacheStrategy, ClientIdentification.Agent agent, + final DigestURL url, final CrawlProfile crawlProfile) throws IOException { + Response response = null; + if (cacheStrategy != CacheStrategy.NOCACHE && crawlProfile != null) { // we have passed a first test if caching is allowed // now see if there is a cache entry @@ -224,7 +301,7 @@ public final class LoaderDispatcher { DigestURL refererURL = null; if (request.referrerhash() != null) refererURL = this.sb.getURL(request.referrerhash()); if (refererURL != null) requestHeader.put(RequestHeader.REFERER, refererURL.toNormalform(true)); - final Response response = new Response( + response = new Response( request, requestHeader, cachedResponse, @@ -258,6 +335,38 @@ public final class LoaderDispatcher { LoaderDispatcher.log.warn("HTCACHE contained response header, but not content for url " + url.toNormalform(true)); } } + return response; + } + + /** + * Open an InputStream on a resource from the web, from ftp, from smb or a file + * @param request the request essentials + * @param cacheStratgy strategy according to NOCACHE, IFFRESH, IFEXIST, CACHEONLY + * @return an open ImageInputStream. Don't forget to close it once used! + * @throws IOException when url is malformed, blacklisted, or CacheStrategy is CACHEONLY and content is unavailable + */ + private InputStream openInputStreamInternal(final Request request, CacheStrategy cacheStrategy, final int maxFileSize, final BlacklistType blacklistType, ClientIdentification.Agent agent) throws IOException { + // get the protocol of the next URL + final DigestURL url = request.url(); + if (url.isFile() || url.isSMB()) { + cacheStrategy = CacheStrategy.NOCACHE; // load just from the file + // system + } + final String protocol = url.getProtocol(); + final String host = url.getHost(); + final CrawlProfile crawlProfile = request.profileHandle() == null ? null : this.sb.crawler.get(UTF8.getBytes(request.profileHandle())); + + // check if url is in blacklist + if (blacklistType != null && host != null && Switchboard.urlBlacklist.isListed(blacklistType, host.toLowerCase(), url.getFile())) { + this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), crawlProfile, FailCategory.FINAL_LOAD_CONTEXT, "url in blacklist", -1); + throw new IOException("DISPATCHER Rejecting URL '" + request.url().toString() + "'. URL is in blacklist.$"); + } + + // check if we have the page in the cache + Response cachedResponse = loadFromCache(request, cacheStrategy, agent, url, crawlProfile); + if(cachedResponse != null) { + return new ByteArrayInputStream(cachedResponse.getContent()); + } // check case where we want results from the cache exclusively, and never from the Internet (offline mode) if (cacheStrategy == CacheStrategy.CACHEONLY) { @@ -269,21 +378,7 @@ public final class LoaderDispatcher { // check access time: this is a double-check (we checked possibly already in the balancer) // to make sure that we don't DoS the target by mistake - if (!url.isLocal()) { - final Long lastAccess = accessTime.get(host); - long wait = 0; - if (lastAccess != null) wait = Math.max(0, agent.minimumDelta + lastAccess.longValue() - System.currentTimeMillis()); - if (wait > 0) { - // force a sleep here. Instead just sleep we clean up the accessTime map - final long untilTime = System.currentTimeMillis() + wait; - cleanupAccessTimeTable(untilTime); - if (System.currentTimeMillis() < untilTime) { - long frcdslp = untilTime - System.currentTimeMillis(); - LoaderDispatcher.log.info("Forcing sleep of " + frcdslp + " ms for host " + host); - try {Thread.sleep(frcdslp);} catch (final InterruptedException ee) {} - } - } - } + checkAccessTime(agent, url); // now it's for sure that we will access the target. Remember the access time if (host != null) { @@ -292,44 +387,52 @@ public final class LoaderDispatcher { } // load resource from the internet - Response response = null; + InputStream inStream = null; if (protocol.equals("http") || protocol.equals("https")) { - response = this.httpLoader.load(request, crawlProfile, maxFileSize, blacklistType, agent); - } else if (protocol.equals("ftp")) { - response = this.ftpLoader.load(request, true); - } else if (protocol.equals("smb")) { - response = this.smbLoader.load(request, true); - } else if (protocol.equals("file")) { - response = this.fileLoader.load(request, true); + inStream = this.httpLoader.openInputStream(request, crawlProfile, 1, maxFileSize, blacklistType, agent); + } else if (protocol.equals("ftp") || protocol.equals("smb") || protocol.equals("file")) { + // may also open directly stream with ftp loader + inStream = url.getInputStream(agent, null, null); } else { throw new IOException("Unsupported protocol '" + protocol + "' in url " + url); } - if (response == null) { - throw new IOException("no response (NULL) for url " + url); - } - if (response.getContent() == null) { - throw new IOException("empty response (code " + response.getStatus() + ") for url " + url.toNormalform(true)); + if (inStream == null) { + throw new IOException("Unable to open content stream"); } - // we got something. Now check if we want to store that to the cache - // first check looks if we want to store the content to the cache - if (crawlProfile == null || !crawlProfile.storeHTCache()) { - // no caching wanted. Thats ok, do not write any message - return response; - } - // second check tells us if the protocol tells us something about caching - final String storeError = response.shallStoreCacheForCrawler(); - if (storeError == null) { - try { - Cache.store(url, response.getResponseHeader(), response.getContent()); - } catch (final IOException e) { - LoaderDispatcher.log.warn("cannot write " + response.url() + " to Cache (3): " + e.getMessage(), e); - } - } else { - LoaderDispatcher.log.warn("cannot write " + response.url() + " to Cache (4): " + storeError); - } - return response; + return inStream; } + + + /** + * Check access time: this is a double-check (we checked possibly already in the balancer) + * to make sure that we don't DoS the target by mistake + * @param agent agent identifier + * @param url target url + */ + private void checkAccessTime(ClientIdentification.Agent agent, final DigestURL url) { + if (!url.isLocal()) { + String host = url.getHost(); + final Long lastAccess = accessTime.get(host); + long wait = 0; + if (lastAccess != null) + wait = Math.max(0, agent.minimumDelta + lastAccess.longValue() - System.currentTimeMillis()); + if (wait > 0) { + // force a sleep here. Instead just sleep we clean up the + // accessTime map + final long untilTime = System.currentTimeMillis() + wait; + cleanupAccessTimeTable(untilTime); + if (System.currentTimeMillis() < untilTime) { + long frcdslp = untilTime - System.currentTimeMillis(); + LoaderDispatcher.log.info("Forcing sleep of " + frcdslp + " ms for host " + host); + try { + Thread.sleep(frcdslp); + } catch (final InterruptedException ee) { + } + } + } + } + } private int protocolMaxFileSize(final DigestURL url) { if (url.isHTTP() || url.isHTTPS()) @@ -357,6 +460,53 @@ public final class LoaderDispatcher { // read resource body (if it is there) return entry.getContent(); } + + /** + * Open url as InputStream from the web or the cache + * @param request must be not null + * @param cacheStrategy cache strategy to use + * @param blacklistType black list + * @param agent agent identification for HTTP requests + * @return an open InputStream on content. Don't forget to close it once used. + * @throws IOException when url is malformed or blacklisted + */ + public InputStream openInputStream(final Request request, final CacheStrategy cacheStrategy, + BlacklistType blacklistType, final ClientIdentification.Agent agent) throws IOException { + final int maxFileSize = protocolMaxFileSize(request.url()); + InputStream stream = null; + + Semaphore check = this.loaderSteering.get(request.url()); + if (check != null && cacheStrategy != CacheStrategy.NOCACHE) { + // a loading process is going on for that url + long t = System.currentTimeMillis(); + try { + check.tryAcquire(5, TimeUnit.SECONDS); + } catch (final InterruptedException e) { + } + ConcurrentLog.info("LoaderDispatcher", + "waited " + (System.currentTimeMillis() - t) + " ms for " + request.url().toNormalform(true)); + // now the process may have terminated and we run a normal loading + // which may be successful faster because of a cache hit + } + + this.loaderSteering.put(request.url(), new Semaphore(0)); + try { + stream = openInputStreamInternal(request, cacheStrategy, maxFileSize, blacklistType, agent); + } catch(IOException ioe) { + /* Do not re encapsulate eventual IOException in an IOException */ + throw ioe; + } catch (final Throwable e) { + throw new IOException(e); + } finally { + // release the semaphore anyway + check = this.loaderSteering.remove(request.url()); + if (check != null) { + check.release(1000); // don't block any other + } + } + + return stream; + } public Document[] loadDocuments(final Request request, final CacheStrategy cacheStrategy, final int maxFileSize, BlacklistType blacklistType, final ClientIdentification.Agent agent) throws IOException, Parser.Failure { diff --git a/source/net/yacy/visualization/RasterPlotter.java b/source/net/yacy/visualization/RasterPlotter.java index 9ccae13e8..2134188bc 100644 --- a/source/net/yacy/visualization/RasterPlotter.java +++ b/source/net/yacy/visualization/RasterPlotter.java @@ -906,11 +906,18 @@ public class RasterPlotter { return dest; } + /** + * Encode buffered image using specified format to a new ByteBuffer + * @param image image to encode + * @param targetExt format name. For example "png". + * @return a ByteBuffer instance containing encoded data, or empty if an error occured or target format is not supported. + */ public static ByteBuffer exportImage(final BufferedImage image, final String targetExt) { // generate an byte array from the given image final ByteBuffer baos = new ByteBuffer(); ImageIO.setUseCache(false); // because we write into ram here try { + /* When no ImageIO writer is found image might no be written*/ ImageIO.write(image, targetExt, baos); return baos; } catch (final IOException e) { diff --git a/test/ViewImagePerfTest.java b/test/ViewImagePerfTest.java index 7b8043cf7..5a13e30a3 100755 --- a/test/ViewImagePerfTest.java +++ b/test/ViewImagePerfTest.java @@ -1,8 +1,15 @@ import java.io.File; -import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.FileWriter; import java.io.IOException; -import java.io.InputStream; -import java.net.URL; +import java.io.PrintWriter; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; + +import javax.imageio.ImageIO; +import javax.imageio.stream.ImageInputStream; import net.yacy.cora.util.ConcurrentLog; import net.yacy.peers.graphics.EncodedImage; @@ -36,145 +43,191 @@ import net.yacy.server.serverObjects; * @author luc * */ -public class ViewImagePerfTest { - - /** Default image */ - private static final String DEFAULT_IMG_RESOURCE = "/viewImageTest/test/JPEG_example_JPG_RIP_100.jpg"; - - /** Default render max width (JPEG_example_JPG_RIP_100.jpg width / 10) */ - private static final int DEFAULT_MAX_WIDTH = 31; +public class ViewImagePerfTest extends ViewImageTest { - /** Default render max height (JPEG_example_JPG_RIP_100.jpg height / 10) */ - private static final int DEFAULT_MAX_HEIGHT = 23; + /** Default minimum measurement time */ + private static final int DEFAULT_MIN_MEASURE_TIME = 10; - /** Default encoding format */ - private static final String DEFAUL_EXT = "png"; + /** Minimum measurement time */ + private int minMeasureTime; /** - * @param testFile - * file to load - * @return testFile content as a bytes array - * @throws IOException - * when an error occured while loading + * @param args + * main parameters : args[7] may contain minimum measurement time + * in secondes. Default : 10. */ - private static byte[] getBytes(File testFile) throws IOException { - InputStream inStream = new FileInputStream(testFile); - byte[] res = new byte[inStream.available()]; - try { - inStream.read(res); - } finally { - inStream.close(); - } - return res; + public ViewImagePerfTest(String args[]) { + this.minMeasureTime = getMinMeasurementTime(args); } /** + * * @param args - * first item may contain file URL - * @return file to be used : specified as first in args or default one + * main parameters : args[7] may contain minimum measurement time + * in secondes. Default : 10. + * @return extension to use for encoding */ - private static File getTestFile(String args[]) { - String fileURL; - if (args != null && args.length > 0) { - fileURL = args[0]; + protected int getMinMeasurementTime(String args[]) { + int time; + if (args != null && args.length > 7) { + time = Integer.parseInt(args[7]); } else { - URL defaultURL = ViewImagePerfTest.class.getResource(DEFAULT_IMG_RESOURCE); - if (defaultURL == null) { - throw new IllegalArgumentException("File not found : " + DEFAULT_IMG_RESOURCE); - } - fileURL = defaultURL.getFile(); + time = DEFAULT_MIN_MEASURE_TIME; } - return new File(fileURL); + return time; } /** - * Build post parameters to use with ViewImage + * Process inFile image, update processedFiles list and failures map, and + * append measurements to results_perfs.txt. All parameters must not be + * null. * - * @param args - * main parameters : second and third items may respectively - * contain max width and max height - * @return a serverObjects instance + * @param ext + * output encoding image format + * @param outDir + * output directory + * @param post + * ViewImage post parameters + * @param failures + * map failed file urls to eventual exception + * @param inFile + * file image to process + * @throws IOException + * when an read/write error occured */ - private static serverObjects makePostParams(String args[]) { - serverObjects post = new serverObjects(); - int maxWidth = DEFAULT_MAX_WIDTH; - if (args != null && args.length > 1) { - maxWidth = Integer.parseInt(args[1]); + @Override + protected void processFile(String ext, File outDir, serverObjects post, Map failures, + File inFile) throws IOException { + /* Delete eventual previous result file */ + System.out + .println("Measuring ViewImage render with file : " + inFile.getAbsolutePath() + " encoded To : " + ext); + File outFile = new File(outDir, inFile.getName() + "." + ext); + if (outFile.exists()) { + outFile.delete(); } - post.put("maxwidth", String.valueOf(maxWidth)); - int maxHeight = DEFAULT_MAX_HEIGHT; - if (args != null && args.length > 2) { - maxHeight = Integer.parseInt(args[2]); + String urlString = inFile.getAbsolutePath(); + EncodedImage img = null; + Exception error = null; + long beginTime = System.nanoTime(), time, minTime = Long.MAX_VALUE, maxTime = 0, meanTime = 0, totalTime = 0; + int step = 0; + for (step = 0; (totalTime / 1000000000) < this.minMeasureTime; step++) { + beginTime = System.nanoTime(); + ImageInputStream inStream = ImageIO.createImageInputStream(inFile); + try { + img = ViewImage.parseAndScale(post, true, urlString, ext, inStream); + } catch (Exception e) { + error = e; + } + time = System.nanoTime() - beginTime; + minTime = Math.min(minTime, time); + maxTime = Math.max(maxTime, time); + totalTime += time; + } + if (step > 0) { + meanTime = totalTime / step; + } else { + meanTime = totalTime; + } + PrintWriter resultsWriter = new PrintWriter(new FileWriter(new File(outDir, "results_perfs.txt"), true)); + try { + writeMessage("Measured ViewImage render with file : " + inFile.getAbsolutePath() + " encoded To : " + ext, + resultsWriter); + if(img == null) { + writeMessage("Image could not be rendered! Measurement show time needed to read and parse image data until error detection.", resultsWriter); + } + writeMessage("Render total time (ms) : " + (totalTime) / 1000000 + " on " + step + " steps.", + resultsWriter); + writeMessage("Render mean time (ms) : " + (meanTime) / 1000000, resultsWriter); + writeMessage("Render min time (ms) : " + (minTime) / 1000000, resultsWriter); + writeMessage("Render max time (ms) : " + (maxTime) / 1000000, resultsWriter); + } finally { + resultsWriter.close(); } - post.put("maxheight", String.valueOf(maxHeight)); - /* Make it square by default */ - post.put("quadratic", ""); - return post; - } - /** - * - * @param args - * main parameters : fourth item may contain extension - * @return extension to use for encoding - */ - private static String getEncodingExt(String args[]) { - String ext = DEFAUL_EXT; - if (args != null && args.length > 3) { - ext = args[3]; + if (img == null) { + failures.put(urlString, error); + } else { + FileOutputStream outFileStream = null; + try { + outFileStream = new FileOutputStream(outFile); + img.getImage().writeTo(outFileStream); + } finally { + if (outFileStream != null) { + outFileStream.close(); + } + img.getImage().close(); + } } - return ext; } /** - * Test image is parsed and rendered again and again until 20 seconds - * elapsed. Then measured statistics are displayed. + * Test image(s) (default : classpath resource folder /viewImageTest/test/) + * are parsed and rendered again and again until specified time (default : + * 10 seconds) elapsed. Then rendered image is written to outDir for visual + * check and measured statistics are displayed. * * @param args * may be empty or contain parameters to override defaults : *
    - *
  • args[0] : input image file URL. Default : - * viewImageTest/test/JPEG_example_JPG_RIP_100.jpg
  • - *
  • args[1] : max width (in pixels) for rendered image. - * Default : default image width divided by 10.
  • - *
  • args[2] : max height (in pixels) for rendered image. - * Default : default image height divided by 10.
  • - *
  • args[3] : output format name. Default : "png".
  • + *
  • args[0] : input image file URL or folder containing image + * files URL. Default : classpath resource /viewImageTest/test/ + *
  • + *
  • args[1] : output format name (for example : "jpg") for + * rendered image. Defaut : "png".
  • + *
  • args[2] : ouput folder URL. Default : + * "[system tmp dir]/ViewImageTest".
  • + *
  • args[3] : max width (in pixels) for rendered image. May be + * set to zero to specify no max width. Default : no value.
  • + *
  • args[4] : max height (in pixels) for rendered image. May + * be set to zero to specify no max height. Default : no value. + *
  • + *
  • args[5] : set to "quadratic" to render square output + * image. May be set to any string to specify no quadratic shape. + * Default : false.
  • + *
  • args[6] : set to "recursive" to process recursively sub + * folders. Default : false.
  • + *
  • args[7] : minimum measurement time in secondes. Default : + * 10.
  • *
* @throws IOException * when a read/write error occured */ public static void main(String args[]) throws IOException { - File imgFile = getTestFile(args); - byte[] resourceb = getBytes(imgFile); - String ext = getEncodingExt(args); - serverObjects post = makePostParams(args); + ViewImagePerfTest test = new ViewImagePerfTest(args); + File inFile = test.getInputURL(args); + String ext = test.getEncodingExt(args); + File outDir = test.getOuputDir(args); + boolean recursive = test.isRecursive(args); + serverObjects post = test.makePostParams(args); + outDir.mkdirs(); + + File[] inFiles; + if (inFile.isFile()) { + inFiles = new File[1]; + inFiles[0] = inFile; + System.out.println( + "Measuring ViewImage render with file : " + inFile.getAbsolutePath() + " encoded To : " + ext); + } else if (inFile.isDirectory()) { + inFiles = inFile.listFiles(); + System.out.println("Measuring ViewImage render with files in folder : " + inFile.getAbsolutePath() + + " encoded To : " + ext); + } else { + inFiles = new File[0]; + } + if (inFiles.length == 0) { + throw new IllegalArgumentException(inFile.getAbsolutePath() + " is not a valid file or folder url."); + } - String urlString = imgFile.getAbsolutePath(); + System.out.println("Rendered images will be written in dir : " + outDir.getAbsolutePath()); - System.out.println("Measuring ViewImage render with file : " + urlString + " encoded To : " + ext); + List processedFiles = new ArrayList(); + Map failures = new TreeMap<>(); try { - /* Max test total time (s) */ - int maxTotalTime = 20; - long beginTime, time, minTime = Long.MAX_VALUE, maxTime = 0, meanTime = 0, totalTime = 0; - int step = 0; - for (step = 0; (totalTime / 1000000000) < maxTotalTime; step++) { - beginTime = System.nanoTime(); - EncodedImage img = ViewImage.parseAndScale(post, true, urlString, ext, false, resourceb); - time = System.nanoTime() - beginTime; - minTime = Math.min(minTime, time); - maxTime = Math.max(maxTime, time); - totalTime += time; - if (img == null) { - throw new IOException("Image render failed"); - } - } - meanTime = totalTime / step; - System.out.println("Render total time (ms) : " + (totalTime) / 1000000 + " on " + step + " steps."); - System.out.println("Render mean time (ms) : " + (meanTime) / 1000000); - System.out.println("Render min time (ms) : " + (minTime) / 1000000); - System.out.println("Render max time (ms) : " + (maxTime) / 1000000); + long time = System.nanoTime(); + test.processFiles(ext, recursive, outDir, post, inFiles, processedFiles, failures); + time = System.nanoTime() - time; + test.displayResults(processedFiles, failures, time, outDir); } finally { ConcurrentLog.shutdown(); } diff --git a/test/ViewImageTest.java b/test/ViewImageTest.java index 84319969a..a80a7d588 100755 --- a/test/ViewImageTest.java +++ b/test/ViewImageTest.java @@ -1,15 +1,17 @@ import java.io.File; -import java.io.FileFilter; -import java.io.FileInputStream; import java.io.FileOutputStream; +import java.io.FileWriter; import java.io.IOException; -import java.io.InputStream; +import java.io.PrintWriter; import java.net.URL; -import java.util.HashMap; +import java.util.ArrayList; +import java.util.List; import java.util.Map; import java.util.Map.Entry; +import java.util.TreeMap; -import org.apache.commons.io.filefilter.FileFileFilter; +import javax.imageio.ImageIO; +import javax.imageio.stream.ImageInputStream; import net.yacy.cora.util.ConcurrentLog; import net.yacy.peers.graphics.EncodedImage; @@ -51,24 +53,6 @@ public class ViewImageTest { /** Default output encoding format */ private static final String DEFAULT_OUT_EXT = "png"; - /** - * @param testFile - * file to load - * @return testFile content as a bytes array - * @throws IOException - * when an error occured while loading - */ - private static byte[] getBytes(File testFile) throws IOException { - InputStream inStream = new FileInputStream(testFile); - byte[] res = new byte[inStream.available()]; - try { - inStream.read(res); - } finally { - inStream.close(); - } - return res; - } - /** * @param args * main parameters. first item may contain input file or folder @@ -76,7 +60,7 @@ public class ViewImageTest { * @return file or folder to be used : specified as first in args or default * one */ - private static File getInputURL(String args[]) { + protected File getInputURL(String args[]) { String fileURL; if (args != null && args.length > 0) { fileURL = args[0]; @@ -98,7 +82,7 @@ public class ViewImageTest { * @throws IllegalArgumentException * when args[2] is not set and default is not found */ - private static File getOuputDir(String[] args) { + protected File getOuputDir(String[] args) { File outDir; if (args.length > 2) { outDir = new File(args[2]); @@ -107,7 +91,7 @@ public class ViewImageTest { if (tmpDir == null) { throw new IllegalArgumentException("No destination dir specified, and default not found"); } - outDir = new File(tmpDir + File.separator + ViewImageTest.class.getCanonicalName()); + outDir = new File(tmpDir + File.separator + this.getClass().getCanonicalName()); } return outDir; } @@ -117,10 +101,12 @@ public class ViewImageTest { * * @param args * main parameters : args[3] and args[4] may respectively contain - * max width and max height + * max width and max height. Set it to zero so there is no max + * width and no max height when processing. args[5] may be set to + * "quadratic" to render output images as squares. * @return a serverObjects instance */ - private static serverObjects makePostParams(String args[]) { + protected serverObjects makePostParams(String args[]) { serverObjects post = new serverObjects(); if (args != null && args.length > 3) { int maxWidth = Integer.parseInt(args[3]); @@ -132,51 +118,198 @@ public class ViewImageTest { post.put("maxheight", String.valueOf(maxHeight)); } + boolean quadratic = isQuadratic(args); + if (quadratic) { + post.put("quadratic", ""); + } + return post; } /** * * @param args - * main parameters : fourth item may contain extension + * main parameters : second item may contain extension * @return extension to use for encoding */ - private static String getEncodingExt(String args[]) { + protected String getEncodingExt(String args[]) { String ext = DEFAULT_OUT_EXT; - if (args != null && args.length > 3) { - ext = args[3]; + if (args != null && args.length > 1) { + ext = args[1]; } return ext; } /** - * Display detailed results. All parametrers required not to be null. * - * @param inFiles - * input image files + * @param args + * main parameters. args[5] may be set to "quadratic" + * @return true when image are supposed to be rendered as squares. + */ + protected boolean isQuadratic(String args[]) { + boolean recursive = false; + if (args != null && args.length > 5) { + recursive = "quadratic".equals(args[5]); + } + return recursive; + } + + /** + * + * @param args + * main parameters. args[6] may be set to "recursive" + * @return true when folders are supposed to processed recursively + */ + protected boolean isRecursive(String args[]) { + boolean recursive = false; + if (args != null && args.length > 6) { + recursive = "recursive".equals(args[6]); + } + return recursive; + } + + /** + * Write same message to both system standard output and to outWriter. + * + * @param message + * message to write + * @param outWriter + * PrintWriter writer. Must not be null. + * @throws IOException + * in case of write error + */ + protected void writeMessage(String message, PrintWriter outWriter) throws IOException { + System.out.println(message); + outWriter.println(message); + } + + /** + * Display detailed results and produce a results.txt file in outDir. All + * parametrers required not to be null. + * + * @param processedFiles + * all processed image files * @param failures - * map input file url which failed with eventual cause exception + * map input file url which failed with eventual cause error + * @param time + * total processing time in nanoseconds + * @param outDir + * directory to write results file + * @throws IOException + * when a write error occured writing the results file */ - private static void displayResults(File[] inFiles, Map failures) { - if (failures.size() > 0) { - if (failures.size() == inFiles.length) { - System.out.println("No input files could be processed :"); + protected void displayResults(List processedFiles, Map failures, long time, File outDir) + throws IOException { + PrintWriter resultsWriter = new PrintWriter(new FileWriter(new File(outDir, "results.txt"))); + try { + writeMessage(processedFiles.size() + " files processed in " + (time / 1000000) + " ms", resultsWriter); + if (failures.size() > 0) { + if (failures.size() == processedFiles.size()) { + writeMessage("No input files could be processed :", resultsWriter); + } else { + writeMessage("Some input files could not be processed :", resultsWriter); + } + for (Entry entry : failures.entrySet()) { + writeMessage(entry.getKey(), resultsWriter); + if (entry.getValue() != null) { + writeMessage("cause : " + entry.getValue(), resultsWriter); + } + } } else { - System.out.println("Some input files could not be processed :"); + if (processedFiles.size() > 0) { + writeMessage("All input files were successfully processed.", resultsWriter); + } else { + writeMessage("No input file was provided.", resultsWriter); + } } - for (Entry entry : failures.entrySet()) { - System.out.println(entry.getKey()); - if (entry.getValue() != null) { - System.out.println("cause : " + entry.getValue()); + } finally { + resultsWriter.close(); + } + } + + /** + * Process inFiles and update processedFiles list and failures map. All + * parameters must not be null. + * + * @param ext + * output encoding image format + * @param recursive + * when true, also process inFiles directories + * @param outDir + * output directory + * @param post + * ViewImage post parameters + * @param inFiles + * files or directories to process + * @param processedFiles + * list of processed files + * @param failures + * map failed file urls to eventual exception + * @throws IOException + * when an read/write error occured + */ + protected void processFiles(String ext, boolean recursive, File outDir, serverObjects post, File[] inFiles, + List processedFiles, Map failures) throws IOException { + for (File inFile : inFiles) { + if (inFile.isDirectory()) { + if (recursive) { + File subDir = new File(outDir, inFile.getName()); + subDir.mkdirs(); + processFiles(ext, recursive, subDir, post, inFile.listFiles(), processedFiles, failures); } + } else { + processedFiles.add(inFile); + processFile(ext, outDir, post, failures, inFile); } + } + } + + /** + * Process inFile image and update processedFiles list and failures map. All + * parameters must not be null. + * @param ext output encoding image format + * @param outDir output directory + * @param post ViewImage post parameters + * @param failures map failed file urls to eventual exception + * @param inFile file image to process + * @throws IOException when an read/write error occured + */ + protected void processFile(String ext, File outDir, serverObjects post, Map failures, File inFile) + throws IOException { + /* Delete eventual previous result file */ + File outFile = new File(outDir, inFile.getName() + "." + ext); + if (outFile.exists()) { + outFile.delete(); + } + + ImageInputStream inStream = ImageIO.createImageInputStream(inFile); + String urlString = inFile.getAbsolutePath(); + EncodedImage img = null; + Throwable error = null; + try { + img = ViewImage.parseAndScale(post, true, urlString, ext, inStream); + } catch (Throwable e) { + error = e; + } + + if (img == null) { + failures.put(urlString, error); } else { - System.out.println("All input files were successfully processed."); + FileOutputStream outFileStream = null; + try { + outFileStream = new FileOutputStream(outFile); + img.getImage().writeTo(outFileStream); + } finally { + if (outFileStream != null) { + outFileStream.close(); + } + img.getImage().close(); + } } } /** - * Test image(s) (default : JPEG_example_JPG_RIP_100.jpg) are parsed and + * Test image(s) (default : classpath resource folder /viewImageTest/test/) are parsed and * rendered to an output foler. Result can then be checked with program of * your choice. * @@ -184,80 +317,61 @@ public class ViewImageTest { * may be empty or contain parameters to override defaults : *
    *
  • args[0] : input image file URL or folder containing image - * files URL. Default : - * viewImageTest/test/JPEG_example_JPG_RIP_100.jpg
  • + * files URL. Default : classpath resource + * /viewImageTest/test/ *
  • args[1] : output format name (for example : "jpg") for - * rendered image
  • - *
  • args[2] : ouput folder URL
  • - *
  • args[3] : max width (in pixels) for rendered image. - * Default : no value.
  • - *
  • args[4] : max height (in pixels) for rendered image. - * Default : no value.
  • + * rendered image. Defaut : "png". + *
  • args[2] : ouput folder URL. Default : + * "[system tmp dir]/ViewImageTest".
  • + *
  • args[3] : max width (in pixels) for rendered image. May be + * set to zero to specify no max width. Default : no value.
  • + *
  • args[4] : max height (in pixels) for rendered image. May + * be set to zero to specify no max height. Default : no value. + *
  • + *
  • args[5] : set to "quadratic" to render square output + * image. May be set to any string to specify no quadratic shape. + * Default : false.
  • + *
  • args[6] : set to "recursive" to process recursively sub + * folders. Default : false.
  • *
* @throws IOException * when a read/write error occured */ public static void main(String args[]) throws IOException { - File inURL = getInputURL(args); - String ext = getEncodingExt(args); - File outDir = getOuputDir(args); - serverObjects post = makePostParams(args); + ViewImageTest test = new ViewImageTest(); + File inFile = test.getInputURL(args); + String ext = test.getEncodingExt(args); + File outDir = test.getOuputDir(args); + boolean recursive = test.isRecursive(args); + serverObjects post = test.makePostParams(args); outDir.mkdirs(); File[] inFiles; - if (inURL.isFile()) { + if (inFile.isFile()) { inFiles = new File[1]; - inFiles[0] = inURL; - System.out.println("Testing ViewImage rendering with input file : " + inURL.getAbsolutePath() + inFiles[0] = inFile; + System.out.println("Testing ViewImage rendering with input file : " + inFile.getAbsolutePath() + " encoded To : " + ext); - } else if (inURL.isDirectory()) { - FileFilter filter = FileFileFilter.FILE; - inFiles = inURL.listFiles(filter); - System.out.println("Testing ViewImage rendering with input files in folder : " + inURL.getAbsolutePath() + } else if (inFile.isDirectory()) { + inFiles = inFile.listFiles(); + System.out.println("Testing ViewImage rendering with input files in folder : " + inFile.getAbsolutePath() + " encoded To : " + ext); } else { inFiles = new File[0]; } if (inFiles.length == 0) { - throw new IllegalArgumentException(inURL.getAbsolutePath() + " is not a valid file or folder url."); + throw new IllegalArgumentException(inFile.getAbsolutePath() + " is not a valid file or folder url."); } + System.out.println("Rendered images will be written in dir : " + outDir.getAbsolutePath()); - Map failures = new HashMap(); + List processedFiles = new ArrayList(); + Map failures = new TreeMap<>(); try { - for (File inFile : inFiles) { - /* Delete eventual previous result file */ - File outFile = new File(outDir, inFile.getName() + "." + ext); - if (outFile.exists()) { - outFile.delete(); - } - - byte[] resourceb = getBytes(inFile); - String urlString = inFile.getAbsolutePath(); - EncodedImage img = null; - Exception error = null; - try { - img = ViewImage.parseAndScale(post, true, urlString, ext, false, resourceb); - } catch (Exception e) { - error = e; - } - - if (img == null) { - failures.put(urlString, error); - } else { - FileOutputStream outFileStream = null; - try { - outFileStream = new FileOutputStream(outFile); - img.getImage().writeTo(outFileStream); - } finally { - if (outFileStream != null) { - outFileStream.close(); - } - img.getImage().close(); - } - } - } - displayResults(inFiles, failures); + long time = System.nanoTime(); + test.processFiles(ext, recursive, outDir, post, inFiles, processedFiles, failures); + time = System.nanoTime() - time; + test.displayResults(processedFiles, failures, time, outDir); } finally { ConcurrentLog.shutdown(); } diff --git a/test/viewImageTest/ViewImageTest.html b/test/viewImageTest/ViewImageTest.html index 547152cc7..efdfd96da 100644 --- a/test/viewImageTest/ViewImageTest.html +++ b/test/viewImageTest/ViewImageTest.html @@ -70,7 +70,8 @@ td { .png Portable Network Graphics RFC 2083 - Pngsuite + Pngsuite, + imagetestsuite
Animated Portable Network Graphics APNG Specification - APNG tests + APNG + tests
.jpg, .jpeg, .jpe, .jif, .jfif, .jfi Joint Photographic Experts Group - + imagetestsuite
.gif Graphics Interchange Format - + imagetestsuite
.bmp, .dib Windows Bitmap - + bmpsuite, + The Bitmap Test + suite
libtiff, TwelveMonkeys + href="https://github.com/haraldk/TwelveMonkeys/tree/master/imageio/imageio-tiff/src/test/resources/tiff">TwelveMonkeys, + imagetestsuite
- VTF: VTF: sample source image/vnd.valve.source.texture .vtf @@ -930,7 +936,7 @@ td { title="Browser render VTF sample in full size"> Browser VTF render failed + alt="Browser VTF render failed" />