diff --git a/source/de/anomic/htmlFilter/htmlFilterContentScraper.java b/source/de/anomic/htmlFilter/htmlFilterContentScraper.java
index be114593d..bf6c049a3 100644
--- a/source/de/anomic/htmlFilter/htmlFilterContentScraper.java
+++ b/source/de/anomic/htmlFilter/htmlFilterContentScraper.java
@@ -150,14 +150,23 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen
public void scrapeTag0(final String tagname, final Properties tagopts) {
if (tagname.equalsIgnoreCase("img")) {
- int width = -1, height = -1;
try {
- width = Integer.parseInt(tagopts.getProperty("width", "-1"));
- height = Integer.parseInt(tagopts.getProperty("height", "-1"));
+ final int width = Integer.parseInt(tagopts.getProperty("width", "-1"));
+ final int height = Integer.parseInt(tagopts.getProperty("height", "-1"));
+ if (width > 15 && height > 15) {
+ final float ratio = (float) Math.min(width, height) / Math.max(width, height);
+ if (ratio > 0.4) {
+ final yacyURL url = absolutePath(tagopts.getProperty("src", ""));
+ final htmlFilterImageEntry ie = new htmlFilterImageEntry(url, tagopts.getProperty("alt", ""), width, height);
+ addImage(images, ie);
+ }
+// i think that real pictures have witdth & height tags - thq
+// } else if (width < 0 && height < 0) { // add or to ignore !?
+// final yacyURL url = absolutePath(tagopts.getProperty("src", ""));
+// final htmlFilterImageEntry ie = new htmlFilterImageEntry(url, tagopts.getProperty("alt", ""), width, height);
+// addImage(images, ie);
+ }
} catch (final NumberFormatException e) {}
- final yacyURL url = absolutePath(tagopts.getProperty("src", ""));
- final htmlFilterImageEntry ie = new htmlFilterImageEntry(url, tagopts.getProperty("alt",""), width, height);
- addImage(images, ie);
}
if (tagname.equalsIgnoreCase("base")) try {
root = new yacyURL(tagopts.getProperty("href", ""), null);