diff --git a/source/de/anomic/htmlFilter/htmlFilterContentScraper.java b/source/de/anomic/htmlFilter/htmlFilterContentScraper.java index be114593d..bf6c049a3 100644 --- a/source/de/anomic/htmlFilter/htmlFilterContentScraper.java +++ b/source/de/anomic/htmlFilter/htmlFilterContentScraper.java @@ -150,14 +150,23 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen public void scrapeTag0(final String tagname, final Properties tagopts) { if (tagname.equalsIgnoreCase("img")) { - int width = -1, height = -1; try { - width = Integer.parseInt(tagopts.getProperty("width", "-1")); - height = Integer.parseInt(tagopts.getProperty("height", "-1")); + final int width = Integer.parseInt(tagopts.getProperty("width", "-1")); + final int height = Integer.parseInt(tagopts.getProperty("height", "-1")); + if (width > 15 && height > 15) { + final float ratio = (float) Math.min(width, height) / Math.max(width, height); + if (ratio > 0.4) { + final yacyURL url = absolutePath(tagopts.getProperty("src", "")); + final htmlFilterImageEntry ie = new htmlFilterImageEntry(url, tagopts.getProperty("alt", ""), width, height); + addImage(images, ie); + } +// i think that real pictures have witdth & height tags - thq +// } else if (width < 0 && height < 0) { // add or to ignore !? +// final yacyURL url = absolutePath(tagopts.getProperty("src", "")); +// final htmlFilterImageEntry ie = new htmlFilterImageEntry(url, tagopts.getProperty("alt", ""), width, height); +// addImage(images, ie); + } } catch (final NumberFormatException e) {} - final yacyURL url = absolutePath(tagopts.getProperty("src", "")); - final htmlFilterImageEntry ie = new htmlFilterImageEntry(url, tagopts.getProperty("alt",""), width, height); - addImage(images, ie); } if (tagname.equalsIgnoreCase("base")) try { root = new yacyURL(tagopts.getProperty("href", ""), null);