On imageSearch prefere mime to sort out none-image documents

Generalize the hack to prevent urls with just a img extension beeing returned improving http://mantis.tokeek.de/view.php?id=528
10 years ago · 370ba9da71
parent cd31633369
commit 370ba9da71
1 changed files with 7 additions and 2 deletions
--- a/source/net/yacy/search/query/SearchEvent.java
+++ b/source/net/yacy/search/query/SearchEvent.java
@ -1580,8 +1580,13 @@ public final class SearchEvent {
        SolrDocument doc = ms.getNode();
        // there can be two different kinds of image hits: either the document itself is an image or images are embedded in the links of text documents.
        String mime = (String) doc.getFirstValue(CollectionSchema.content_type.getSolrFieldName());
-        boolean fakeImageHost = ms.url().getHost() != null && ms.url().getHost().indexOf("wikipedia") > 0; // pages with image extension from wikipedia do not contain image files but html files... I know this is a bad hack, but many results come from wikipedia and we must handle that
-        if (!fakeImageHost && (Response.docType(ms.url()) == Response.DT_IMAGE || Response.docType(mime) == Response.DT_IMAGE)) {
+
+        // boolean fakeImageHost = ms.url().getHost() != null && ms.url().getHost().indexOf("wikipedia") > 0; // pages with image extension from wikipedia do not contain image files but html files... I know this is a bad hack, but many results come from wikipedia and we must handle that
+        // generalize above hack (regarding url with file extension but beeing a html (with html mime)
+        char docType = Response.docType(mime); // first look at mime (as some html pages have img extension (like wikipedia)
+        if (docType == Response.DT_UNKNOWN) docType = Response.docType(ms.url()); // try extension if mime wasn't successful
+
+        if (docType == Response.DT_IMAGE) {
            String id = ASCII.String(ms.hash());
            if (!imageViewed.containsKey(id) && !containsSpare(id)) imageSpareGood.put(id, new ImageResult(ms.url(), ms.url(), "", ms.title(), 0, 0, 0));
        } else {