protection against OOM cases in image parser. See also bugs.yacy.net/view.php?id=54

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7942 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 14 years ago
parent 52a2b3f110
commit 0819e1d397

@ -9,7 +9,7 @@
// $LastChangedBy$ // $LastChangedBy$
// //
// LICENSE // LICENSE
// //
// This program is free software; you can redistribute it and/or modify // This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by // it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or // the Free Software Foundation; either version 2 of the License, or
@ -33,24 +33,16 @@ import java.io.File;
import java.io.FileInputStream; import java.io.FileInputStream;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream;import java.net.MalformedURLException; import java.io.InputStream;
import java.net.MalformedURLException;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
import java.util.Properties;
import java.util.Iterator; import java.util.Iterator;
import java.util.Properties;
import java.util.Set; import java.util.Set;
import javax.imageio.ImageIO; import javax.imageio.ImageIO;
import com.drew.imaging.jpeg.JpegProcessingException;
import com.drew.imaging.jpeg.JpegSegmentReader;
import com.drew.metadata.Directory;
import com.drew.metadata.Metadata;
import com.drew.metadata.MetadataException;
import com.drew.metadata.Tag;
import com.drew.metadata.exif.ExifReader;
import com.drew.metadata.iptc.IptcReader;
import net.yacy.cora.document.MultiProtocolURI; import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.document.UTF8; import net.yacy.cora.document.UTF8;
import net.yacy.document.AbstractParser; import net.yacy.document.AbstractParser;
@ -61,6 +53,15 @@ import net.yacy.document.parser.images.bmpParser.IMAGEMAP;
import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.util.FileUtils; import net.yacy.kelondro.util.FileUtils;
import com.drew.imaging.jpeg.JpegProcessingException;
import com.drew.imaging.jpeg.JpegSegmentReader;
import com.drew.metadata.Directory;
import com.drew.metadata.Metadata;
import com.drew.metadata.MetadataException;
import com.drew.metadata.Tag;
import com.drew.metadata.exif.ExifReader;
import com.drew.metadata.iptc.IptcReader;
public class genericImageParser extends AbstractParser implements Parser { public class genericImageParser extends AbstractParser implements Parser {
/** /**
@ -82,17 +83,17 @@ public class genericImageParser extends AbstractParser implements Parser {
SUPPORTED_MIME_TYPES.add("image/jpg"); // this is in fact a 'wrong' mime type. We leave it here because that is a common error that occurs in the internet frequently SUPPORTED_MIME_TYPES.add("image/jpg"); // this is in fact a 'wrong' mime type. We leave it here because that is a common error that occurs in the internet frequently
SUPPORTED_MIME_TYPES.add("image/bmp"); SUPPORTED_MIME_TYPES.add("image/bmp");
} }
public genericImageParser() { public genericImageParser() {
super("Generic Image Parser"); super("Generic Image Parser");
} }
public Document[] parse( public Document[] parse(
final MultiProtocolURI location, final MultiProtocolURI location,
final String mimeType, final String mimeType,
final String documentCharset, final String documentCharset,
final InputStream sourceStream) throws Parser.Failure, InterruptedException { final InputStream sourceStream) throws Parser.Failure, InterruptedException {
ImageInfo ii = null; ImageInfo ii = null;
String title = null; String title = null;
String author = null; String author = null;
@ -103,11 +104,11 @@ public class genericImageParser extends AbstractParser implements Parser {
byte[] b; byte[] b;
try { try {
b = FileUtils.read(sourceStream); b = FileUtils.read(sourceStream);
} catch (IOException e) { } catch (final IOException e) {
Log.logException(e); Log.logException(e);
throw new Parser.Failure(e.getMessage(), location); throw new Parser.Failure(e.getMessage(), location);
} }
IMAGEMAP imap = bmpParser.parse(b); final IMAGEMAP imap = bmpParser.parse(b);
ii = parseJavaImage(location, imap.getImage()); ii = parseJavaImage(location, imap.getImage());
} else if (mimeType.equals("image/jpg") || } else if (mimeType.equals("image/jpg") ||
location.getFileExtension().equals("jpg") || location.getFileExtension().equals("jpg") ||
@ -120,75 +121,77 @@ public class genericImageParser extends AbstractParser implements Parser {
byte[] b; byte[] b;
try { try {
b = FileUtils.read(sourceStream); b = FileUtils.read(sourceStream);
} catch (IOException e) { } catch (final IOException e) {
Log.logException(e); Log.logException(e);
throw new Parser.Failure(e.getMessage(), location); throw new Parser.Failure(e.getMessage(), location);
} }
ii = parseJavaImage(location, new ByteArrayInputStream(b)); ii = parseJavaImage(location, new ByteArrayInputStream(b));
JpegSegmentReader segmentReader; JpegSegmentReader segmentReader;
try { try {
segmentReader = new JpegSegmentReader(new ByteArrayInputStream(b)); segmentReader = new JpegSegmentReader(new ByteArrayInputStream(b));
byte[] exifSegment = segmentReader.readSegment(JpegSegmentReader.SEGMENT_APP1); final byte[] exifSegment = segmentReader.readSegment(JpegSegmentReader.SEGMENT_APP1);
byte[] iptcSegment = segmentReader.readSegment(JpegSegmentReader.SEGMENT_APPD); final byte[] iptcSegment = segmentReader.readSegment(JpegSegmentReader.SEGMENT_APPD);
Metadata metadata = new Metadata(); final Metadata metadata = new Metadata();
new ExifReader(exifSegment).extract(metadata); new ExifReader(exifSegment).extract(metadata);
new IptcReader(iptcSegment).extract(metadata); new IptcReader(iptcSegment).extract(metadata);
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
final
Iterator<Directory> directories = metadata.getDirectoryIterator(); Iterator<Directory> directories = metadata.getDirectoryIterator();
HashMap<String, String> props = new HashMap<String, String>(); final HashMap<String, String> props = new HashMap<String, String>();
while (directories.hasNext()) { while (directories.hasNext()) {
Directory directory = directories.next(); final Directory directory = directories.next();
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
final
Iterator<Tag> tags = directory.getTagIterator(); Iterator<Tag> tags = directory.getTagIterator();
while (tags.hasNext()) { while (tags.hasNext()) {
Tag tag = tags.next(); final Tag tag = tags.next();
try { try {
props.put(tag.getTagName(), tag.getDescription()); props.put(tag.getTagName(), tag.getDescription());
ii.info.append(tag.getTagName() + ": " + tag.getDescription() + " .\n"); ii.info.append(tag.getTagName() + ": " + tag.getDescription() + " .\n");
} catch (MetadataException e) { } catch (final MetadataException e) {
//Log.logException(e); //Log.logException(e);
} }
} }
title = props.get("Image Description"); title = props.get("Image Description");
if (title == null || title.length() == 0) title = props.get("Headline"); if (title == null || title.length() == 0) title = props.get("Headline");
if (title == null || title.length() == 0) title = props.get("Object Name"); if (title == null || title.length() == 0) title = props.get("Object Name");
author = props.get("Artist"); author = props.get("Artist");
if (author == null || author.length() == 0) author = props.get("Writer/Editor"); if (author == null || author.length() == 0) author = props.get("Writer/Editor");
if (author == null || author.length() == 0) author = props.get("By-line"); if (author == null || author.length() == 0) author = props.get("By-line");
if (author == null || author.length() == 0) author = props.get("Credit"); if (author == null || author.length() == 0) author = props.get("Credit");
if (author == null || author.length() == 0) author = props.get("Make"); if (author == null || author.length() == 0) author = props.get("Make");
keywords = props.get("Keywords"); keywords = props.get("Keywords");
if (keywords == null || keywords.length() == 0) keywords = props.get("Category"); if (keywords == null || keywords.length() == 0) keywords = props.get("Category");
if (keywords == null || keywords.length() == 0) keywords = props.get("Supplemental Category(s)"); if (keywords == null || keywords.length() == 0) keywords = props.get("Supplemental Category(s)");
description = props.get("Caption/Abstract"); description = props.get("Caption/Abstract");
if (description == null || description.length() == 0) description = props.get("Country/Primary Location"); if (description == null || description.length() == 0) description = props.get("Country/Primary Location");
if (description == null || description.length() == 0) description = props.get("Province/State"); if (description == null || description.length() == 0) description = props.get("Province/State");
if (description == null || description.length() == 0) description = props.get("Copyright Notice"); if (description == null || description.length() == 0) description = props.get("Copyright Notice");
} }
} catch (JpegProcessingException e) { } catch (final JpegProcessingException e) {
//Log.logException(e); //Log.logException(e);
// just ignore // just ignore
} }
} else { } else {
ii = parseJavaImage(location, sourceStream); ii = parseJavaImage(location, sourceStream);
} }
final HashSet<String> languages = new HashSet<String>(); final HashSet<String> languages = new HashSet<String>();
final HashMap<MultiProtocolURI, Properties> anchors = new HashMap<MultiProtocolURI, Properties>(); final HashMap<MultiProtocolURI, Properties> anchors = new HashMap<MultiProtocolURI, Properties>();
final HashMap<MultiProtocolURI, ImageEntry> images = new HashMap<MultiProtocolURI, ImageEntry>(); final HashMap<MultiProtocolURI, ImageEntry> images = new HashMap<MultiProtocolURI, ImageEntry>();
// add this image to the map of images // add this image to the map of images
String infoString = ii.info.toString(); final String infoString = ii.info.toString();
images.put(ii.location, new ImageEntry(location, "", ii.width, ii.height, -1)); images.put(ii.location, new ImageEntry(location, "", ii.width, ii.height, -1));
if (title == null || title.length() == 0) title = MultiProtocolURI.unescape(location.getFileName()); if (title == null || title.length() == 0) title = MultiProtocolURI.unescape(location.getFileName());
return new Document[]{new Document( return new Document[]{new Document(
location, location,
mimeType, mimeType,
@ -208,15 +211,15 @@ public class genericImageParser extends AbstractParser implements Parser {
images, images,
false)}; // images false)}; // images
} }
public Set<String> supportedMimeTypes() { public Set<String> supportedMimeTypes() {
return SUPPORTED_MIME_TYPES; return SUPPORTED_MIME_TYPES;
} }
public Set<String> supportedExtensions() { public Set<String> supportedExtensions() {
return SUPPORTED_EXTENSIONS; return SUPPORTED_EXTENSIONS;
} }
public static ImageInfo parseJavaImage( public static ImageInfo parseJavaImage(
final MultiProtocolURI location, final MultiProtocolURI location,
final InputStream sourceStream) throws Parser.Failure { final InputStream sourceStream) throws Parser.Failure {
@ -230,17 +233,20 @@ public class genericImageParser extends AbstractParser implements Parser {
} catch (final IOException e) { } catch (final IOException e) {
Log.logException(e); Log.logException(e);
throw new Parser.Failure(e.getMessage(), location); throw new Parser.Failure(e.getMessage(), location);
} catch (final Throwable e) { // may appear in case of an OutOfMemoryError
Log.logException(e);
throw new Parser.Failure(e.getMessage(), location);
} }
if (image == null) throw new Parser.Failure("ImageIO returned NULL", location); if (image == null) throw new Parser.Failure("ImageIO returned NULL", location);
return parseJavaImage(location, image); return parseJavaImage(location, image);
} }
public static ImageInfo parseJavaImage( public static ImageInfo parseJavaImage(
final MultiProtocolURI location, final MultiProtocolURI location,
final BufferedImage image) { final BufferedImage image) {
ImageInfo ii = new ImageInfo(location); final ImageInfo ii = new ImageInfo(location);
ii.image = image; ii.image = image;
// scan the image // scan the image
ii.height = ii.image.getHeight(); ii.height = ii.image.getHeight();
ii.width = ii.image.getWidth(); ii.width = ii.image.getWidth();
@ -262,16 +268,16 @@ public class genericImageParser extends AbstractParser implements Parser {
String [] propNames = ii.image.getPropertyNames(); String [] propNames = ii.image.getPropertyNames();
if (propNames == null) propNames = new String[0]; if (propNames == null) propNames = new String[0];
ii.info.append("\n"); ii.info.append("\n");
for (String propName: propNames) { for (final String propName: propNames) {
ii.info.append(propName).append(" = ").append(ii.image.getProperty(propName)).append(" .\n"); ii.info.append(propName).append(" = ").append(ii.image.getProperty(propName)).append(" .\n");
} }
// append also properties that we measured // append also properties that we measured
ii.info.append("width").append(": ").append(Integer.toString(ii.width)).append(" .\n"); ii.info.append("width").append(": ").append(Integer.toString(ii.width)).append(" .\n");
ii.info.append("height").append(": ").append(Integer.toString(ii.height)).append(" .\n"); ii.info.append("height").append(": ").append(Integer.toString(ii.height)).append(" .\n");
return ii; return ii;
} }
public static class ImageInfo { public static class ImageInfo {
public MultiProtocolURI location; public MultiProtocolURI location;
public BufferedImage image; public BufferedImage image;
@ -286,26 +292,26 @@ public class genericImageParser extends AbstractParser implements Parser {
this.width = -1; this.width = -1;
} }
} }
public static void main(final String[] args) { public static void main(final String[] args) {
File image = new File(args[0]); final File image = new File(args[0]);
genericImageParser parser = new genericImageParser(); final genericImageParser parser = new genericImageParser();
MultiProtocolURI uri; MultiProtocolURI uri;
try { try {
uri = new MultiProtocolURI("http://localhost/" + image.getName()); uri = new MultiProtocolURI("http://localhost/" + image.getName());
Document[] document = parser.parse(uri, "image/" + uri.getFileExtension(), "UTF-8", new FileInputStream(image)); final Document[] document = parser.parse(uri, "image/" + uri.getFileExtension(), "UTF-8", new FileInputStream(image));
System.out.println(document[0].toString()); System.out.println(document[0].toString());
} catch (MalformedURLException e) { } catch (final MalformedURLException e) {
e.printStackTrace(); e.printStackTrace();
} catch (FileNotFoundException e) { } catch (final FileNotFoundException e) {
e.printStackTrace(); e.printStackTrace();
} catch (Parser.Failure e) { } catch (final Parser.Failure e) {
e.printStackTrace(); e.printStackTrace();
} catch (InterruptedException e) { } catch (final InterruptedException e) {
e.printStackTrace(); e.printStackTrace();
} }
} }
} }

Loading…
Cancel
Save