diff --git a/defaults/yacy.init b/defaults/yacy.init index 1e1033578..1174be8df 100644 --- a/defaults/yacy.init +++ b/defaults/yacy.init @@ -796,6 +796,11 @@ search.excludehosth= # the cases of nocache, iffresh and ifexist causes an index deletion search.verify.delete = true +# images may be treated either as documents that are shown in search results or as objects +# that are only visible in special search environments, like image search +search.excludeintext.image = true +crawler.load.image = true; + # remote search details remotesearch.maxcount = 10 remotesearch.maxtime = 3000 diff --git a/source/net/yacy/crawler/CrawlStacker.java b/source/net/yacy/crawler/CrawlStacker.java index 0351f0620..4c3cd42dc 100644 --- a/source/net/yacy/crawler/CrawlStacker.java +++ b/source/net/yacy/crawler/CrawlStacker.java @@ -336,9 +336,10 @@ public final class CrawlStacker { // check availability of parser and maxfilesize String warning = null; + boolean loadImages = Switchboard.getSwitchboard().getConfigBool("crawler.load.image", true); if ((maxFileSize >= 0 && entry.size() > maxFileSize) || entry.url().getContentDomain() == ContentDomain.APP || - entry.url().getContentDomain() == ContentDomain.IMAGE || + (!loadImages && entry.url().getContentDomain() == ContentDomain.IMAGE) || entry.url().getContentDomain() == ContentDomain.AUDIO || entry.url().getContentDomain() == ContentDomain.VIDEO || entry.url().getContentDomain() == ContentDomain.CTRL) { diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java index dc0fc71ed..07689f518 100644 --- a/source/net/yacy/search/Switchboard.java +++ b/source/net/yacy/search/Switchboard.java @@ -2512,10 +2512,12 @@ public final class Switchboard extends serverSwitch { ) { // get the hyperlinks final Map hl = Document.getHyperlinks(documents); + boolean loadImages = getConfigBool("crawler.load.image", true); + if (loadImages) hl.putAll(Document.getImagelinks(documents)); // add all media links also to the crawl stack. They will be re-sorted to the NOLOAD queue and indexed afterwards as pure links if (response.profile().directDocByURL()) { - hl.putAll(Document.getImagelinks(documents)); + if (!loadImages) hl.putAll(Document.getImagelinks(documents)); hl.putAll(Document.getApplinks(documents)); hl.putAll(Document.getVideolinks(documents)); hl.putAll(Document.getAudiolinks(documents));