diff --git a/htroot/Table_API_p.html b/htroot/Table_API_p.html
index 08e11e82b..720a0f110 100644
--- a/htroot/Table_API_p.html
+++ b/htroot/Table_API_p.html
@@ -85,7 +85,7 @@ To see a list of all APIs, please visit the
#[type]#
#(isCrawlerStart)#::
- ::
+ ::
-
+
#(/isCrawlerStart)#
#[comment]#
diff --git a/source/net/yacy/document/parser/html/ContentScraper.java b/source/net/yacy/document/parser/html/ContentScraper.java
index 3e2caa3fa..6bd73f704 100644
--- a/source/net/yacy/document/parser/html/ContentScraper.java
+++ b/source/net/yacy/document/parser/html/ContentScraper.java
@@ -57,7 +57,6 @@ import net.yacy.document.SentenceReader;
import net.yacy.document.VocabularyScraper;
import net.yacy.document.parser.htmlParser;
import net.yacy.document.parser.html.Evaluation.Element;
-import net.yacy.document.parser.images.genericImageParser;
import net.yacy.kelondro.io.CharBuffer;
import net.yacy.kelondro.util.FileUtils;
import net.yacy.kelondro.util.ISO639;
@@ -552,23 +551,16 @@ public class ContentScraper extends AbstractScraper implements Scraper {
href = CharacterCoding.html2unicode(href);
AnchorURL url;
if ((href.length() > 0) && ((url = absolutePath(href)) != null)) {
- final String ext = MultiProtocolURL.getFileExtension(url.getFileName());
- if (genericImageParser.SUPPORTED_EXTENSIONS.contains(ext)) {
- // special handling of such urls: put them to the image urls
- final ImageEntry ie = new ImageEntry(url, recursiveParse(url, tag.content.getChars()), -1, -1, -1);
- this.images.add(ie);
- } else {
- if (followDenied()) {
- String rel = tag.opts.getProperty("rel", EMPTY_STRING);
- if (rel.length() == 0) rel = "nofollow"; else if (rel.indexOf("nofollow") < 0) rel += ",nofollow";
- tag.opts.put("rel", rel);
- }
- tag.opts.put("text", stripAllTags(tag.content.getChars())); // strip any inline html in tag text like "test"
- tag.opts.put("href", url.toNormalform(true)); // we must assign this because the url may have resolved backpaths and may not be absolute
- url.setAll(tag.opts);
- recursiveParse(url, tag.content.getChars());
- this.anchors.add(url);
+ if (followDenied()) {
+ String rel = tag.opts.getProperty("rel", EMPTY_STRING);
+ if (rel.length() == 0) rel = "nofollow"; else if (rel.indexOf("nofollow") < 0) rel += ",nofollow";
+ tag.opts.put("rel", rel);
}
+ tag.opts.put("text", stripAllTags(tag.content.getChars())); // strip any inline html in tag text like "test"
+ tag.opts.put("href", url.toNormalform(true)); // we must assign this because the url may have resolved backpaths and may not be absolute
+ url.setAll(tag.opts);
+ recursiveParse(url, tag.content.getChars());
+ this.anchors.add(url);
}
this.evaluationScores.match(Element.apath, href);
}
diff --git a/source/net/yacy/document/parser/html/TransformerWriter.java b/source/net/yacy/document/parser/html/TransformerWriter.java
index 2f361395d..18bdb0ba0 100644
--- a/source/net/yacy/document/parser/html/TransformerWriter.java
+++ b/source/net/yacy/document/parser/html/TransformerWriter.java
@@ -291,8 +291,7 @@ public final class TransformerWriter extends Writer {
}
if (this.transformer != null && this.transformer.isTag0(tagname)) {
// this single tag is collected at once here
- char[] b = new char[0];
- b = this.transformer.transformTag0(tag, quotechar);
+ char[] b = this.transformer.transformTag0(tag, quotechar);
return b;
} else if ((this.scraper != null && this.scraper.isTag1(tagname)) ||
(this.transformer != null && this.transformer.isTag1(tagname))) {
diff --git a/source/net/yacy/document/parser/images/genericImageParser.java b/source/net/yacy/document/parser/images/genericImageParser.java
index c6370f00b..74e04f779 100644
--- a/source/net/yacy/document/parser/images/genericImageParser.java
+++ b/source/net/yacy/document/parser/images/genericImageParser.java
@@ -65,30 +65,25 @@ import com.drew.metadata.Metadata;
import com.drew.metadata.Tag;
import com.drew.metadata.exif.GpsDirectory;
+/**
+ * Parser for images, bmp and jpeg and all supported by the Java Image I/O API
+ * by default java ImageIO supports bmp, gif, jpg, jpeg, png, wbmp (tif if jai-imageio is in classpath/registered)
+ * http://download.java.net/media/jai-imageio/javadoc/1.1/overview-summary.html
+ */
public class genericImageParser extends AbstractParser implements Parser {
- /**
- * a list of mime types that are supported by this parser class
- * @see #getSupportedMimeTypes()
- */
- public static final Set SUPPORTED_MIME_TYPES = new HashSet();
- public static final Set SUPPORTED_EXTENSIONS = new HashSet();
- static {
+ public genericImageParser() {
+ super("Generic Image Parser");
+
SUPPORTED_EXTENSIONS.add("bmp");
- // by default java ImageIO supports bmp, gif, jpg, jpeg, png, wbmp (tif if jai-imageio is in classpath/registered)
- // http://download.java.net/media/jai-imageio/javadoc/1.1/overview-summary.html
SUPPORTED_EXTENSIONS.add("jpe"); // not listed in ImageIO extension but sometimes uses for jpeg
SUPPORTED_EXTENSIONS.addAll(Arrays.asList(ImageIO.getReaderFileSuffixes()));
SUPPORTED_MIME_TYPES.add("image/bmp");
- SUPPORTED_MIME_TYPES.add("image/jpg"); // this is in fact a 'wrong' mime type. We leave it here because that is a common error that occurs in the internet frequently
+ SUPPORTED_MIME_TYPES.add("image/jpg"); // this is in fact a 'wrong' mime type. We leave it here because that is a common error that occurs in the internet frequently
SUPPORTED_MIME_TYPES.addAll(Arrays.asList(ImageIO.getReaderMIMETypes()));
}
- public genericImageParser() {
- super("Generic Image Parser");
- }
-
@Override
public Document[] parse(
final AnchorURL location,
@@ -130,7 +125,8 @@ public class genericImageParser extends AbstractParser implements Parser {
try {
b = FileUtils.read(source);
// check jpeg file signature (magic number FF D8 FF)
- if ((b[0] != (byte) 0xFF) // cast to signed byte (-1)
+ if (b.length < 3
+ || (b[0] != (byte) 0xFF) // cast to signed byte (-1)
|| (b[1] != (byte) 0xD8) //cast to signed byte (-40)
|| (b[2] != (byte) 0xFF)) {
throw new Parser.Failure("File has no jpeg signature", location);
@@ -232,7 +228,7 @@ public class genericImageParser extends AbstractParser implements Parser {
return SUPPORTED_EXTENSIONS;
}
- public static ImageInfo parseJavaImage(
+ private ImageInfo parseJavaImage(
final AnchorURL location,
final InputStream sourceStream) throws Parser.Failure {
BufferedImage image = null;
@@ -247,7 +243,7 @@ public class genericImageParser extends AbstractParser implements Parser {
return parseJavaImage(location, image);
}
- public static ImageInfo parseJavaImage(
+ private ImageInfo parseJavaImage(
final AnchorURL location,
final BufferedImage image) {
final ImageInfo ii = new ImageInfo(location);
@@ -284,7 +280,7 @@ public class genericImageParser extends AbstractParser implements Parser {
return ii;
}
- public static class ImageInfo {
+ private class ImageInfo {
public AnchorURL location;
public BufferedImage image;
public StringBuilder info;