@ -9,7 +9,7 @@
// $LastChangedBy$
//
// LICENSE
//
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
@ -33,24 +33,16 @@ import java.io.File;
import java.io.FileInputStream ;
import java.io.FileNotFoundException ;
import java.io.IOException ;
import java.io.InputStream ; import java.net.MalformedURLException ;
import java.io.InputStream ;
import java.net.MalformedURLException ;
import java.util.HashMap ;
import java.util.HashSet ;
import java.util.Properties ;
import java.util.Iterator ;
import java.util.Properties ;
import java.util.Set ;
import javax.imageio.ImageIO ;
import com.drew.imaging.jpeg.JpegProcessingException ;
import com.drew.imaging.jpeg.JpegSegmentReader ;
import com.drew.metadata.Directory ;
import com.drew.metadata.Metadata ;
import com.drew.metadata.MetadataException ;
import com.drew.metadata.Tag ;
import com.drew.metadata.exif.ExifReader ;
import com.drew.metadata.iptc.IptcReader ;
import net.yacy.cora.document.MultiProtocolURI ;
import net.yacy.cora.document.UTF8 ;
import net.yacy.document.AbstractParser ;
@ -61,6 +53,15 @@ import net.yacy.document.parser.images.bmpParser.IMAGEMAP;
import net.yacy.kelondro.logging.Log ;
import net.yacy.kelondro.util.FileUtils ;
import com.drew.imaging.jpeg.JpegProcessingException ;
import com.drew.imaging.jpeg.JpegSegmentReader ;
import com.drew.metadata.Directory ;
import com.drew.metadata.Metadata ;
import com.drew.metadata.MetadataException ;
import com.drew.metadata.Tag ;
import com.drew.metadata.exif.ExifReader ;
import com.drew.metadata.iptc.IptcReader ;
public class genericImageParser extends AbstractParser implements Parser {
/ * *
@ -82,17 +83,17 @@ public class genericImageParser extends AbstractParser implements Parser {
SUPPORTED_MIME_TYPES . add ( "image/jpg" ) ; // this is in fact a 'wrong' mime type. We leave it here because that is a common error that occurs in the internet frequently
SUPPORTED_MIME_TYPES . add ( "image/bmp" ) ;
}
public genericImageParser ( ) {
super ( "Generic Image Parser" ) ;
super ( "Generic Image Parser" ) ;
}
public Document [ ] parse (
final MultiProtocolURI location ,
final String mimeType ,
final String documentCharset ,
final MultiProtocolURI location ,
final String mimeType ,
final String documentCharset ,
final InputStream sourceStream ) throws Parser . Failure , InterruptedException {
ImageInfo ii = null ;
String title = null ;
String author = null ;
@ -103,11 +104,11 @@ public class genericImageParser extends AbstractParser implements Parser {
byte [ ] b ;
try {
b = FileUtils . read ( sourceStream ) ;
} catch ( IOException e ) {
} catch ( final IOException e ) {
Log . logException ( e ) ;
throw new Parser . Failure ( e . getMessage ( ) , location ) ;
}
IMAGEMAP imap = bmpParser . parse ( b ) ;
final IMAGEMAP imap = bmpParser . parse ( b ) ;
ii = parseJavaImage ( location , imap . getImage ( ) ) ;
} else if ( mimeType . equals ( "image/jpg" ) | |
location . getFileExtension ( ) . equals ( "jpg" ) | |
@ -120,75 +121,77 @@ public class genericImageParser extends AbstractParser implements Parser {
byte [ ] b ;
try {
b = FileUtils . read ( sourceStream ) ;
} catch ( IOException e ) {
} catch ( final IOException e ) {
Log . logException ( e ) ;
throw new Parser . Failure ( e . getMessage ( ) , location ) ;
}
ii = parseJavaImage ( location , new ByteArrayInputStream ( b ) ) ;
JpegSegmentReader segmentReader ;
try {
segmentReader = new JpegSegmentReader ( new ByteArrayInputStream ( b ) ) ;
byte [ ] exifSegment = segmentReader . readSegment ( JpegSegmentReader . SEGMENT_APP1 ) ;
byte [ ] iptcSegment = segmentReader . readSegment ( JpegSegmentReader . SEGMENT_APPD ) ;
Metadata metadata = new Metadata ( ) ;
final byte [ ] exifSegment = segmentReader . readSegment ( JpegSegmentReader . SEGMENT_APP1 ) ;
final byte [ ] iptcSegment = segmentReader . readSegment ( JpegSegmentReader . SEGMENT_APPD ) ;
final Metadata metadata = new Metadata ( ) ;
new ExifReader ( exifSegment ) . extract ( metadata ) ;
new IptcReader ( iptcSegment ) . extract ( metadata ) ;
@SuppressWarnings ( "unchecked" )
final
Iterator < Directory > directories = metadata . getDirectoryIterator ( ) ;
HashMap < String , String > props = new HashMap < String , String > ( ) ;
final HashMap < String , String > props = new HashMap < String , String > ( ) ;
while ( directories . hasNext ( ) ) {
Directory directory = directories . next ( ) ;
final Directory directory = directories . next ( ) ;
@SuppressWarnings ( "unchecked" )
final
Iterator < Tag > tags = directory . getTagIterator ( ) ;
while ( tags . hasNext ( ) ) {
Tag tag = tags . next ( ) ;
final Tag tag = tags . next ( ) ;
try {
props . put ( tag . getTagName ( ) , tag . getDescription ( ) ) ;
ii . info . append ( tag . getTagName ( ) + ": " + tag . getDescription ( ) + " .\n" ) ;
} catch ( MetadataException e ) {
} catch ( final MetadataException e ) {
//Log.logException(e);
}
}
title = props . get ( "Image Description" ) ;
if ( title = = null | | title . length ( ) = = 0 ) title = props . get ( "Headline" ) ;
if ( title = = null | | title . length ( ) = = 0 ) title = props . get ( "Object Name" ) ;
author = props . get ( "Artist" ) ;
if ( author = = null | | author . length ( ) = = 0 ) author = props . get ( "Writer/Editor" ) ;
if ( author = = null | | author . length ( ) = = 0 ) author = props . get ( "By-line" ) ;
if ( author = = null | | author . length ( ) = = 0 ) author = props . get ( "Credit" ) ;
if ( author = = null | | author . length ( ) = = 0 ) author = props . get ( "Make" ) ;
keywords = props . get ( "Keywords" ) ;
if ( keywords = = null | | keywords . length ( ) = = 0 ) keywords = props . get ( "Category" ) ;
if ( keywords = = null | | keywords . length ( ) = = 0 ) keywords = props . get ( "Supplemental Category(s)" ) ;
description = props . get ( "Caption/Abstract" ) ;
if ( description = = null | | description . length ( ) = = 0 ) description = props . get ( "Country/Primary Location" ) ;
if ( description = = null | | description . length ( ) = = 0 ) description = props . get ( "Province/State" ) ;
if ( description = = null | | description . length ( ) = = 0 ) description = props . get ( "Copyright Notice" ) ;
}
} catch ( JpegProcessingException e ) {
} catch ( final JpegProcessingException e ) {
//Log.logException(e);
// just ignore
}
} else {
ii = parseJavaImage ( location , sourceStream ) ;
}
}
final HashSet < String > languages = new HashSet < String > ( ) ;
final HashMap < MultiProtocolURI , Properties > anchors = new HashMap < MultiProtocolURI , Properties > ( ) ;
final HashMap < MultiProtocolURI , ImageEntry > images = new HashMap < MultiProtocolURI , ImageEntry > ( ) ;
// add this image to the map of images
String infoString = ii . info . toString ( ) ;
final String infoString = ii . info . toString ( ) ;
images . put ( ii . location , new ImageEntry ( location , "" , ii . width , ii . height , - 1 ) ) ;
if ( title = = null | | title . length ( ) = = 0 ) title = MultiProtocolURI . unescape ( location . getFileName ( ) ) ;
return new Document [ ] { new Document (
location ,
mimeType ,
@ -208,15 +211,15 @@ public class genericImageParser extends AbstractParser implements Parser {
images ,
false ) } ; // images
}
public Set < String > supportedMimeTypes ( ) {
return SUPPORTED_MIME_TYPES ;
}
public Set < String > supportedExtensions ( ) {
return SUPPORTED_EXTENSIONS ;
}
public static ImageInfo parseJavaImage (
final MultiProtocolURI location ,
final InputStream sourceStream ) throws Parser . Failure {
@ -230,17 +233,20 @@ public class genericImageParser extends AbstractParser implements Parser {
} catch ( final IOException e ) {
Log . logException ( e ) ;
throw new Parser . Failure ( e . getMessage ( ) , location ) ;
} catch ( final Throwable e ) { // may appear in case of an OutOfMemoryError
Log . logException ( e ) ;
throw new Parser . Failure ( e . getMessage ( ) , location ) ;
}
if ( image = = null ) throw new Parser . Failure ( "ImageIO returned NULL" , location ) ;
return parseJavaImage ( location , image ) ;
}
public static ImageInfo parseJavaImage (
final MultiProtocolURI location ,
final BufferedImage image ) {
ImageInfo ii = new ImageInfo ( location ) ;
final ImageInfo ii = new ImageInfo ( location ) ;
ii . image = image ;
// scan the image
ii . height = ii . image . getHeight ( ) ;
ii . width = ii . image . getWidth ( ) ;
@ -262,16 +268,16 @@ public class genericImageParser extends AbstractParser implements Parser {
String [ ] propNames = ii . image . getPropertyNames ( ) ;
if ( propNames = = null ) propNames = new String [ 0 ] ;
ii . info . append ( "\n" ) ;
for ( String propName : propNames ) {
for ( final String propName : propNames ) {
ii . info . append ( propName ) . append ( " = " ) . append ( ii . image . getProperty ( propName ) ) . append ( " .\n" ) ;
}
// append also properties that we measured
ii . info . append ( "width" ) . append ( ": " ) . append ( Integer . toString ( ii . width ) ) . append ( " .\n" ) ;
ii . info . append ( "height" ) . append ( ": " ) . append ( Integer . toString ( ii . height ) ) . append ( " .\n" ) ;
return ii ;
}
public static class ImageInfo {
public MultiProtocolURI location ;
public BufferedImage image ;
@ -286,26 +292,26 @@ public class genericImageParser extends AbstractParser implements Parser {
this . width = - 1 ;
}
}
public static void main ( final String [ ] args ) {
File image = new File ( args [ 0 ] ) ;
genericImageParser parser = new genericImageParser ( ) ;
final File image = new File ( args [ 0 ] ) ;
final genericImageParser parser = new genericImageParser ( ) ;
MultiProtocolURI uri ;
try {
uri = new MultiProtocolURI ( "http://localhost/" + image . getName ( ) ) ;
Document [ ] document = parser . parse ( uri , "image/" + uri . getFileExtension ( ) , "UTF-8" , new FileInputStream ( image ) ) ;
final Document [ ] document = parser . parse ( uri , "image/" + uri . getFileExtension ( ) , "UTF-8" , new FileInputStream ( image ) ) ;
System . out . println ( document [ 0 ] . toString ( ) ) ;
} catch ( MalformedURLException e ) {
} catch ( final MalformedURLException e ) {
e . printStackTrace ( ) ;
} catch ( FileNotFoundException e ) {
} catch ( final FileNotFoundException e ) {
e . printStackTrace ( ) ;
} catch ( Parser . Failure e ) {
} catch ( final Parser . Failure e ) {
e . printStackTrace ( ) ;
} catch ( InterruptedException e ) {
} catch ( final InterruptedException e ) {
e . printStackTrace ( ) ;
}
}
}