allow query-strings (urls with a '?' in the path)
-
+
diff --git a/htroot/Crawler_p.java b/htroot/Crawler_p.java
index 5a3966c57..bcad9fd42 100644
--- a/htroot/Crawler_p.java
+++ b/htroot/Crawler_p.java
@@ -184,7 +184,7 @@ public class Crawler_p {
env.setConfig("crawlingDepth", Integer.toString(newcrawlingdepth));
if ((crawlOrder) && (newcrawlingdepth > 8)) newcrawlingdepth = 8;
- final boolean directDocByURL = "on".equals(post.get("directDocByURL", "off"));
+ final boolean directDocByURL = "on".equals(post.get("directDocByURL", "on")); // catch also all linked media documents without loading them
env.setConfig("crawlingDirectDocByURL", directDocByURL);
// recrawl
diff --git a/source/net/yacy/search/Switchboard.java b/source/net/yacy/search/Switchboard.java
index 7acda2996..050790c6a 100644
--- a/source/net/yacy/search/Switchboard.java
+++ b/source/net/yacy/search/Switchboard.java
@@ -2355,10 +2355,12 @@ public final class Switchboard extends serverSwitch
final Map hl = Document.getHyperlinks(documents);
// add all media links also to the crawl stack. They will be re-sorted to the NOLOAD queue and indexed afterwards as pure links
- hl.putAll(Document.getImagelinks(documents));
- hl.putAll(Document.getApplinks(documents));
- hl.putAll(Document.getVideolinks(documents));
- hl.putAll(Document.getAudiolinks(documents));
+ if (response.profile().directDocByURL()) {
+ hl.putAll(Document.getImagelinks(documents));
+ hl.putAll(Document.getApplinks(documents));
+ hl.putAll(Document.getVideolinks(documents));
+ hl.putAll(Document.getAudiolinks(documents));
+ }
// insert those hyperlinks to the crawler
MultiProtocolURI nextUrl;