diff --git a/htroot/CrawlStartSite_p.html b/htroot/CrawlStartSite_p.html index 39eaeee41..cb522780e 100644 --- a/htroot/CrawlStartSite_p.html +++ b/htroot/CrawlStartSite_p.html @@ -95,7 +95,7 @@
allow query-strings (urls with a '?' in the path) - + diff --git a/htroot/Crawler_p.java b/htroot/Crawler_p.java index 5a3966c57..bcad9fd42 100644 --- a/htroot/Crawler_p.java +++ b/htroot/Crawler_p.java @@ -184,7 +184,7 @@ public class Crawler_p { env.setConfig("crawlingDepth", Integer.toString(newcrawlingdepth)); if ((crawlOrder) && (newcrawlingdepth > 8)) newcrawlingdepth = 8; - final boolean directDocByURL = "on".equals(post.get("directDocByURL", "off")); + final boolean directDocByURL = "on".equals(post.get("directDocByURL", "on")); // catch also all linked media documents without loading them env.setConfig("crawlingDirectDocByURL", directDocByURL); // recrawl diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java index 7acda2996..050790c6a 100644 --- a/source/net/yacy/search/Switchboard.java +++ b/source/net/yacy/search/Switchboard.java @@ -2355,10 +2355,12 @@ public final class Switchboard extends serverSwitch final Map hl = Document.getHyperlinks(documents); // add all media links also to the crawl stack. They will be re-sorted to the NOLOAD queue and indexed afterwards as pure links - hl.putAll(Document.getImagelinks(documents)); - hl.putAll(Document.getApplinks(documents)); - hl.putAll(Document.getVideolinks(documents)); - hl.putAll(Document.getAudiolinks(documents)); + if (response.profile().directDocByURL()) { + hl.putAll(Document.getImagelinks(documents)); + hl.putAll(Document.getApplinks(documents)); + hl.putAll(Document.getVideolinks(documents)); + hl.putAll(Document.getAudiolinks(documents)); + } // insert those hyperlinks to the crawler MultiProtocolURI nextUrl;