- added bmp parser to the document parsers

- image parser that implement the document parser interface return itself in the list of images of the document which should cause that the parsed images contribute to the image search

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6493 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 15 years ago
parent 1dff620181
commit d2938c44a1

@ -8,8 +8,6 @@ import java.awt.Toolkit;
import net.yacy.document.parser.images.bmpParser;
import net.yacy.document.parser.images.icoParser;
public class ImageParser {
public static final Image parse(final String filename, final byte[] source) {
@ -17,9 +15,7 @@ public class ImageParser {
Image image;
if (((filename.endsWith(".ico")) || (filename.endsWith(".bmp"))) && (bmpParser.isBMP(source))) {
// parse image with BMP parser
bmpParser bmpparser;
bmpparser = new bmpParser(source);
image = bmpparser.getImage();
image = bmpParser.parse(source).getImage();
if (image == null) return null;
} else if ((filename.endsWith(".ico")) && (icoParser.isICO(source))) {
// parse image with ICO parser

@ -60,6 +60,7 @@ import net.yacy.document.parser.vcfParser;
import net.yacy.document.parser.vsdParser;
import net.yacy.document.parser.xlsParser;
import net.yacy.document.parser.zipParser;
import net.yacy.document.parser.images.bmpParser;
import net.yacy.document.parser.images.genericImageParser;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.logging.Log;
@ -83,6 +84,7 @@ public final class TextParser {
private static final Set<String> denyExtension = new TreeSet<String>(insensitiveCollator);
static {
initParser(new bmpParser());
initParser(new bzipParser());
initParser(new csvParser());
initParser(new docParser());

@ -1,4 +1,4 @@
// ymageBMPParser.java
// bmpParser.java
// (C) 2007 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 15.07.2007 on http://yacy.net
//
@ -25,16 +25,27 @@
package net.yacy.document.parser.images;
import java.awt.image.BufferedImage;
import java.io.EOFException;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Set;
import javax.imageio.ImageIO;
import net.yacy.document.AbstractParser;
import net.yacy.document.Document;
import net.yacy.document.Idiom;
import net.yacy.document.ParserException;
import net.yacy.document.parser.html.ImageEntry;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.logging.Log;
public class bmpParser {
public class bmpParser extends AbstractParser implements Idiom {
// this is a implementation of http://de.wikipedia.org/wiki/Windows_Bitmap
@ -49,16 +60,95 @@ public class bmpParser {
//private static int BI_RLE4 = 2;
//private static int BI_BITFIELDS = 3;
private final IMAGEMAP imagemap;
//boolean debugmode = false;
public static final Set<String> SUPPORTED_MIME_TYPES = new HashSet<String>();
public static final Set<String> SUPPORTED_EXTENSIONS = new HashSet<String>();
static {
SUPPORTED_EXTENSIONS.add("bmp");
SUPPORTED_MIME_TYPES.add("image/bmp");
}
public bmpParser() {
super("BMP Image Parser");
}
public Set<String> supportedMimeTypes() {
return SUPPORTED_MIME_TYPES;
}
public Set<String> supportedExtensions() {
return SUPPORTED_EXTENSIONS;
}
public static final boolean isBMP(final byte[] source) {
// check the file magic
return (source != null) && (source.length >= 2) && (source[0] == 'B') && (source[1] == 'M');
}
public bmpParser(final byte[] source) {
@Override
public Document parse(
final DigestURI location,
final String mimeType,
final String documentCharset,
final InputStream sourceStream) throws ParserException, InterruptedException {
BufferedImage image = null;
try {
image = ImageIO.read(sourceStream);
} catch (final EOFException e) {
throw new ParserException(e.getMessage(), location);
} catch (final IOException e) {
throw new ParserException(e.getMessage(), location);
}
if (image == null) throw new ParserException("ImageIO returned NULL", location);
// scan the image
int height = image.getHeight();
int width = image.getWidth();
/*
Raster raster = image.getData();
int[] pixel = raster.getPixel(0, 0, (int[])null);
long[] average = new long[pixel.length];
for (int i = 0; i < average.length; i++) average[i] = 0L;
int pc = 0;
for (int x = width / 4; x < 3 * width / 4; x = x + 2) {
for (int y = height / 4; y < 3 * height / 4; y = y + 2) {
pixel = raster.getPixel(x, y, pixel);
for (int i = 0; i < average.length; i++) average[i] += pixel[i];
pc++;
}
}
*/
// get image properties
String [] propNames = image.getPropertyNames();
if (propNames == null) propNames = new String[0];
StringBuilder sb = new StringBuilder(propNames.length * 80);
for (String propName: propNames) {
sb.append(propName).append(" = ").append(image.getProperty(propName)).append(" .\n");
}
final HashSet<String> languages = new HashSet<String>();
final HashMap<DigestURI, String> anchors = new HashMap<DigestURI, String>();
final HashMap<String, ImageEntry> images = new HashMap<String, ImageEntry>();
// add this image to the map of images
images.put(sb.toString(), new ImageEntry(location, "", width, height));
return new Document(
location,
mimeType,
"UTF-8",
languages,
new String[]{}, // keywords
"", // title
"", // author
new String[]{}, // sections
"", // description
sb.toString().getBytes(), // content text
anchors, // anchors
images); // images
}
public static IMAGEMAP parse(final byte[] source) {
// read info-header
final int bfOffBits = DWORD(source, FILEHEADER_offset + 10);
@ -69,13 +159,9 @@ public class bmpParser {
assert bfOffBits == INFOHEADER_offset + 40 + colortable.colorbytes : "bfOffBits = " + bfOffBits + ", colorbytes = " + colortable.colorbytes;
assert infoheader.biSizeImage <= source.length - bfOffBits : "bfOffBits = " + bfOffBits + ", biSizeImage = " + infoheader.biSizeImage + ", source.length = " + source.length;
imagemap = new IMAGEMAP(source, bfOffBits, infoheader.biWidth, infoheader.biHeight, infoheader.biCompression, infoheader.biBitCount, colortable);
return new IMAGEMAP(source, bfOffBits, infoheader.biWidth, infoheader.biHeight, infoheader.biCompression, infoheader.biBitCount, colortable);
}
public BufferedImage getImage() {
return imagemap.image;
}
public static final int DWORD(final byte[] b, final int offset) {
if (offset + 3 >= b.length) return 0;
int ret = (b[offset + 3] & 0xff);
@ -148,7 +234,7 @@ public class bmpParser {
public static class IMAGEMAP {
public BufferedImage image;
private BufferedImage image;
public IMAGEMAP(final byte[] s, final int offset, final int width, final int height, final int compression, final int bitcount, final COLORTABLE colortable) {
// parse picture content
@ -168,7 +254,6 @@ public class bmpParser {
}
}
private void parseBMP1(final byte[] s, final int offset, final int width, final int height, final COLORTABLE colortable) {
int n = 0;
int b;
@ -245,6 +330,11 @@ public class bmpParser {
if (r == 0) return 0;
return 4 - r;
}
public BufferedImage getImage() {
return this.image;
}
}
public static void main(final String[] args) {
@ -264,13 +354,10 @@ public class bmpParser {
Log.logException(e);
}
final bmpParser parser = new bmpParser(file);
try {
ImageIO.write(parser.getImage(), "PNG", out);
ImageIO.write(parse(file).getImage(), "PNG", out);
} catch (final IOException e) {
Log.logException(e);
}
}
}

@ -1,4 +1,4 @@
// pngParser.java
// genericImageParser.java
// (C) 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 16.10.2009 on http://yacy.net
//
@ -81,10 +81,10 @@ public class genericImageParser extends AbstractParser implements Idiom {
}
if (image == null) throw new ParserException("ImageIO returned NULL", location);
/*
// scan the image
int height = image.getHeight();
int width = image.getWidth();
/*
Raster raster = image.getData();
int[] pixel = raster.getPixel(0, 0, (int[])null);
long[] average = new long[pixel.length];
@ -109,6 +109,8 @@ public class genericImageParser extends AbstractParser implements Idiom {
final HashSet<String> languages = new HashSet<String>();
final HashMap<DigestURI, String> anchors = new HashMap<DigestURI, String>();
final HashMap<String, ImageEntry> images = new HashMap<String, ImageEntry>();
// add this image to the map of images
images.put(sb.toString(), new ImageEntry(location, "", width, height));
return new Document(
location,

@ -92,7 +92,7 @@ public class icoParser {
public BufferedImage getImage(final int index) {
if (imagemaps == null || index >= imagemaps.length) return null;
return imagemaps[index].image;
return imagemaps[index].getImage();
}
public static void main(final String[] args) {

Loading…
Cancel
Save