From 9d2159582f5f2417787b79aa4a6c5057fe2e56ad Mon Sep 17 00:00:00 2001
From: f1ori <f1ori@6c8d7289-2bf4-0310-a012-ef5d649a1542>
Date: Wed, 15 Dec 2010 19:20:00 +0000
Subject: [PATCH] * fix system update if urls are in blacklist (for example for
 very general blacklists like *.de)

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7375 6c8d7289-2bf4-0310-a012-ef5d649a1542
---
 htroot/DictionaryLoader_p.java                |  4 ++--
 htroot/Load_RSS_p.java                        |  2 +-
 htroot/ViewFile.java                          |  2 +-
 htroot/api/ymarks/get_treeview.java           |  2 +-
 htroot/api/ymarks/import_ymark.java           |  2 +-
 source/de/anomic/crawler/CrawlQueues.java     |  2 +-
 source/de/anomic/crawler/RSSLoader.java       |  2 +-
 source/de/anomic/crawler/ZURL.java            |  2 +-
 .../anomic/crawler/retrieval/HTTPLoader.java  | 10 +++++-----
 .../anomic/http/server/HTTPDFileHandler.java  | 14 ++++++-------
 source/de/anomic/search/Switchboard.java      |  4 ++--
 source/de/anomic/search/TextSnippet.java      |  2 +-
 source/de/anomic/yacy/graphics/OSMTile.java   |  2 +-
 .../importer/OAIListFriendsLoader.java        |  2 +-
 .../yacy/document/importer/OAIPMHLoader.java  |  2 +-
 .../net/yacy/repository/LoaderDispatcher.java | 20 +++++++++----------
 16 files changed, 37 insertions(+), 37 deletions(-)

diff --git a/htroot/DictionaryLoader_p.java b/htroot/DictionaryLoader_p.java
index 3cfff407e..0036ec087 100644
--- a/htroot/DictionaryLoader_p.java
+++ b/htroot/DictionaryLoader_p.java
@@ -63,7 +63,7 @@ public class DictionaryLoader_p {
         if (post.containsKey("geon0Load")) {
             // load from the net
             try {
-                Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEON0.url), false, true), CrawlProfile.CacheStrategy.NOCACHE, Long.MAX_VALUE);
+                Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEON0.url), false, true), CrawlProfile.CacheStrategy.NOCACHE, Long.MAX_VALUE, false);
                 byte[] b = response.getContent();
                 FileUtils.copy(b, LibraryProvider.Dictionary.GEON0.file());
                 LibraryProvider.geoLoc.addLocalization(LibraryProvider.Dictionary.GEON0.nickname, new GeonamesLocalization(LibraryProvider.Dictionary.GEON0.file()));
@@ -103,7 +103,7 @@ public class DictionaryLoader_p {
         if (post.containsKey("geo1Load")) {
             // load from the net
             try {
-                Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEODB1.url), false, true), CrawlProfile.CacheStrategy.NOCACHE, Long.MAX_VALUE);
+                Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEODB1.url), false, true), CrawlProfile.CacheStrategy.NOCACHE, Long.MAX_VALUE, false);
                 byte[] b = response.getContent();
                 FileUtils.copy(b, LibraryProvider.Dictionary.GEODB1.file());
                 LibraryProvider.geoLoc.removeLocalization(LibraryProvider.Dictionary.GEODB0.nickname);
diff --git a/htroot/Load_RSS_p.java b/htroot/Load_RSS_p.java
index 310cb40fe..03b5c94e0 100644
--- a/htroot/Load_RSS_p.java
+++ b/htroot/Load_RSS_p.java
@@ -255,7 +255,7 @@ public class Load_RSS_p {
         RSSReader rss = null;
         if (url != null) try {
             prop.put("url", url.toNormalform(true, false));
-            Response response = sb.loader.load(sb.loader.request(url, true, false), CrawlProfile.CacheStrategy.NOCACHE, Long.MAX_VALUE);
+            Response response = sb.loader.load(sb.loader.request(url, true, false), CrawlProfile.CacheStrategy.NOCACHE, Long.MAX_VALUE, true);
             byte[] resource = response == null ? null : response.getContent();
             rss = resource == null ? null : RSSReader.parse(RSSFeed.DEFAULT_MAXSIZE, resource);
         } catch (IOException e) {
diff --git a/htroot/ViewFile.java b/htroot/ViewFile.java
index 089bc169d..ab48a4ab2 100644
--- a/htroot/ViewFile.java
+++ b/htroot/ViewFile.java
@@ -169,7 +169,7 @@ public class ViewFile {
         
         Response response = null;
         try {
-            response = sb.loader.load(sb.loader.request(url, true, false), authorized ? CrawlProfile.CacheStrategy.IFEXIST : CrawlProfile.CacheStrategy.CACHEONLY, Long.MAX_VALUE);
+            response = sb.loader.load(sb.loader.request(url, true, false), authorized ? CrawlProfile.CacheStrategy.IFEXIST : CrawlProfile.CacheStrategy.CACHEONLY, Long.MAX_VALUE, true);
         } catch (IOException e) {
             prop.put("error", "4");
             prop.put("error_errorText", "error loading resource: " + e.getMessage());
diff --git a/htroot/api/ymarks/get_treeview.java b/htroot/api/ymarks/get_treeview.java
index e8e172899..0e6040fd3 100644
--- a/htroot/api/ymarks/get_treeview.java
+++ b/htroot/api/ymarks/get_treeview.java
@@ -186,7 +186,7 @@ public class get_treeview {
 	        	try {
 	                final DigestURI u = new DigestURI(post.get(ROOT).substring(2));
 	                Response response = null;
-        			response = sb.loader.load(sb.loader.request(u, true, false), CrawlProfile.CacheStrategy.IFEXIST, Long.MAX_VALUE);
+        			response = sb.loader.load(sb.loader.request(u, true, false), CrawlProfile.CacheStrategy.IFEXIST, Long.MAX_VALUE, true);
         			final Document document = Document.mergeDocuments(response.url(), response.getMimeType(), response.parse());
         			if(document != null) {
     	        		if(isWordCount)  {
diff --git a/htroot/api/ymarks/import_ymark.java b/htroot/api/ymarks/import_ymark.java
index 0294bd2cd..4fca4d7e7 100644
--- a/htroot/api/ymarks/import_ymark.java
+++ b/htroot/api/ymarks/import_ymark.java
@@ -92,7 +92,7 @@ public class import_ymark {
 		try {
 			if(!bmk.containsKey(YMarkTables.BOOKMARK.TAGS.key()) || bmk.get(YMarkTables.BOOKMARK.TAGS.key()).equals(YMarkTables.BOOKMARK.TAGS.deflt())) {
 	            final DigestURI u = new DigestURI(bmk.get(YMarkTables.BOOKMARK.URL.key()));
-	            Response response = sb.loader.load(sb.loader.request(u, true, false), CrawlProfile.CacheStrategy.IFEXIST, Long.MAX_VALUE);
+	            Response response = sb.loader.load(sb.loader.request(u, true, false), CrawlProfile.CacheStrategy.IFEXIST, Long.MAX_VALUE, true);
 				final Document document = Document.mergeDocuments(response.url(), response.getMimeType(), response.parse());
 				if(document != null) {
 					bmk.put(YMarkTables.BOOKMARK.TAGS.key(), sb.tables.bookmarks.autoTag(document, bmk_user, 3));
diff --git a/source/de/anomic/crawler/CrawlQueues.java b/source/de/anomic/crawler/CrawlQueues.java
index fe40aab89..499121918 100644
--- a/source/de/anomic/crawler/CrawlQueues.java
+++ b/source/de/anomic/crawler/CrawlQueues.java
@@ -592,7 +592,7 @@ public class CrawlQueues {
                         final long maxFileSize = sb.getConfigLong("crawler.http.maxFileSize", HTTPLoader.DEFAULT_MAXFILESIZE);
                         final Map<String, String> mp = sb.crawler.profilesActiveCrawls.get(request.profileHandle().getBytes());
                         CrawlProfile e = mp == null ? null : new CrawlProfile(mp);
-                        Response response = sb.loader.load(request, e == null ? CrawlProfile.CacheStrategy.IFEXIST : e.cacheStrategy(), maxFileSize);
+                        Response response = sb.loader.load(request, e == null ? CrawlProfile.CacheStrategy.IFEXIST : e.cacheStrategy(), maxFileSize, true);
                         if (response == null) {
                             request.setStatus("error", WorkflowJob.STATUS_FINISHED);
                             if (log.isFine()) log.logFine("problem loading " + request.url().toString() + ": no content (possibly caused by cache policy)");
diff --git a/source/de/anomic/crawler/RSSLoader.java b/source/de/anomic/crawler/RSSLoader.java
index 5c02a3043..f1f8637eb 100644
--- a/source/de/anomic/crawler/RSSLoader.java
+++ b/source/de/anomic/crawler/RSSLoader.java
@@ -56,7 +56,7 @@ public class RSSLoader extends Thread {
     public void run() {
         RSSReader rss = null;
         try {
-            Response response = sb.loader.load(sb.loader.request(urlf, true, false), CrawlProfile.CacheStrategy.NOCACHE, Long.MAX_VALUE);
+            Response response = sb.loader.load(sb.loader.request(urlf, true, false), CrawlProfile.CacheStrategy.NOCACHE, Long.MAX_VALUE, true);
             byte[] resource = response == null ? null : response.getContent();
             rss = resource == null ? null : RSSReader.parse(RSSFeed.DEFAULT_MAXSIZE, resource);
         } catch (MalformedURLException e) {
diff --git a/source/de/anomic/crawler/ZURL.java b/source/de/anomic/crawler/ZURL.java
index 11ce978c5..3766aef94 100755
--- a/source/de/anomic/crawler/ZURL.java
+++ b/source/de/anomic/crawler/ZURL.java
@@ -175,7 +175,7 @@ public class ZURL implements Iterable<ZURL.Entry> {
     public ZURL.Entry get(final byte[] urlhash) {
         try {
             if (urlIndex == null) return null;
-            //System.out.println("*** DEBUG ZURL " + this.urlIndex.filename() + " get " + urlhash);
+            // System.out.println("*** DEBUG ZURL " + this.urlIndex.filename() + " get " + urlhash);
             final Row.Entry entry = urlIndex.get(urlhash);
             if (entry == null) return null;
             return new Entry(entry);
diff --git a/source/de/anomic/crawler/retrieval/HTTPLoader.java b/source/de/anomic/crawler/retrieval/HTTPLoader.java
index 96b538816..36c784b4b 100644
--- a/source/de/anomic/crawler/retrieval/HTTPLoader.java
+++ b/source/de/anomic/crawler/retrieval/HTTPLoader.java
@@ -68,14 +68,14 @@ public final class HTTPLoader {
         this.socketTimeout = (int) sb.getConfigLong("crawler.clientTimeout", 10000);
     }  
    
-    public Response load(final Request entry, long maxFileSize) throws IOException {
+    public Response load(final Request entry, long maxFileSize, boolean checkBlacklist) throws IOException {
         long start = System.currentTimeMillis();
-        Response doc = load(entry, DEFAULT_CRAWLING_RETRY_COUNT, maxFileSize);
+        Response doc = load(entry, DEFAULT_CRAWLING_RETRY_COUNT, maxFileSize, checkBlacklist);
         Latency.update(entry.url(), System.currentTimeMillis() - start);
         return doc;
     }
     
-    private Response load(final Request request, final int retryCount, final long maxFileSize) throws IOException {
+    private Response load(final Request request, final int retryCount, final long maxFileSize, final boolean checkBlacklist) throws IOException {
 
         if (retryCount < 0) {
             sb.crawlQueues.errorURL.push(request, sb.peers.mySeed().hash.getBytes(), new Date(), 1, "redirection counter exceeded");
@@ -93,7 +93,7 @@ public final class HTTPLoader {
         
         // check if url is in blacklist
         final String hostlow = host.toLowerCase();
-        if (Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_CRAWLER, hostlow, path)) {
+        if (checkBlacklist && Switchboard.urlBlacklist.isListed(Blacklist.BLACKLIST_CRAWLER, hostlow, path)) {
             sb.crawlQueues.errorURL.push(request, sb.peers.mySeed().hash.getBytes(), new Date(), 1, "url in blacklist");
             throw new IOException("CRAWLER Rejecting URL '" + request.url().toString() + "'. URL is in blacklist.");
         }
@@ -164,7 +164,7 @@ public final class HTTPLoader {
                     
                     // retry crawling with new url
                     request.redirectURL(redirectionUrl);
-                    return load(request, retryCount - 1, maxFileSize);
+                    return load(request, retryCount - 1, maxFileSize, checkBlacklist);
                 } else {
                 	// no redirection url provided
                     sb.crawlQueues.errorURL.push(request, sb.peers.mySeed().hash.getBytes(), new Date(), 1, "no redirection url provided");
diff --git a/source/de/anomic/http/server/HTTPDFileHandler.java b/source/de/anomic/http/server/HTTPDFileHandler.java
index f7060feb7..63631a520 100644
--- a/source/de/anomic/http/server/HTTPDFileHandler.java
+++ b/source/de/anomic/http/server/HTTPDFileHandler.java
@@ -458,7 +458,7 @@ public final class HTTPDFileHandler {
                     for (int i = 0; i < list.length; i++) {
                         f = new File(targetFile, list[i]);
                         if (f.isDirectory()) {
-                            aBuffer.append("    <li><a href=\"" + path + list[i] + "/\">" + list[i] + "/</a><br></li>\n");
+                            aBuffer.append("    <li><a href=\"" + path + list[i] + "/\">" + list[i] + "/</a><br/></li>\n");
                         } else {
                             if (list[i].endsWith("html") || (list[i].endsWith("htm"))) {
                                 scraper = ContentScraper.parseResource(f);
@@ -485,12 +485,12 @@ public final class HTTPDFileHandler {
                                 size = (sz / 1024 / 1024) + " MB";
                             }
                             aBuffer.append("    <li>");
-                            if (headline != null && headline.length() > 0) aBuffer.append("<a href=\"" + list[i] + "\"><b>" + headline + "</b></a><br>");
-                            aBuffer.append("<a href=\"" + path + list[i] + "\">" + list[i] + "</a><br>");
-                            if (author != null && author.length() > 0) aBuffer.append("Author: " + author + "<br>");
-                            if (publisher != null && publisher.length() > 0) aBuffer.append("Publisher: " + publisher + "<br>");
-                            if (description != null && description.length() > 0) aBuffer.append("Description: " + description + "<br>");
-                            aBuffer.append(DateFormatter.formatShortDay(new Date(f.lastModified())) + ", " + size + ((images > 0) ? ", " + images + " images" : "") + ((links > 0) ? ", " + links + " links" : "") + "<br></li>\n");
+                            if (headline != null && headline.length() > 0) aBuffer.append("<a href=\"" + list[i] + "\"><b>" + headline + "</b></a><br/>");
+                            aBuffer.append("<a href=\"" + path + list[i] + "\">" + list[i] + "</a><br/>");
+                            if (author != null && author.length() > 0) aBuffer.append("Author: " + author + "<br/>");
+                            if (publisher != null && publisher.length() > 0) aBuffer.append("Publisher: " + publisher + "<br/>");
+                            if (description != null && description.length() > 0) aBuffer.append("Description: " + description + "<br/>");
+                            aBuffer.append(DateFormatter.formatShortDay(new Date(f.lastModified())) + ", " + size + ((images > 0) ? ", " + images + " images" : "") + ((links > 0) ? ", " + links + " links" : "") + "<br/></li>\n");
                         }
                     }
                     aBuffer.append("  </ul>\n</body>\n</html>\n");
diff --git a/source/de/anomic/search/Switchboard.java b/source/de/anomic/search/Switchboard.java
index 2f91f57cf..bcefcd0b0 100644
--- a/source/de/anomic/search/Switchboard.java
+++ b/source/de/anomic/search/Switchboard.java
@@ -2015,7 +2015,7 @@ public final class Switchboard extends serverSwitch {
             @Override
             public void run() {
                 try {
-                    final Response response = loader.load(request, CacheStrategy.IFFRESH, Long.MAX_VALUE);
+                    final Response response = loader.load(request, CacheStrategy.IFFRESH, Long.MAX_VALUE, true);
                     if (response == null) throw new IOException("response == null");
                     if (response.getContent() == null) throw new IOException("content == null");
                     if (response.getResponseHeader() == null) throw new IOException("header == null");
@@ -2364,7 +2364,7 @@ public final class Switchboard extends serverSwitch {
                 // if we have an url then try to load the rss
                 RSSReader rss = null;
                 try {
-                    Response response = sb.loader.load(sb.loader.request(url, true, false), CrawlProfile.CacheStrategy.NOCACHE, Long.MAX_VALUE);
+                    Response response = sb.loader.load(sb.loader.request(url, true, false), CrawlProfile.CacheStrategy.NOCACHE, Long.MAX_VALUE, true);
                     byte[] resource = response == null ? null : response.getContent();
                     //System.out.println("BLEKKO: " + new String(resource));
                     rss = resource == null ? null : RSSReader.parse(RSSFeed.DEFAULT_MAXSIZE, resource);
diff --git a/source/de/anomic/search/TextSnippet.java b/source/de/anomic/search/TextSnippet.java
index e1c1cdf3b..51b5360c7 100644
--- a/source/de/anomic/search/TextSnippet.java
+++ b/source/de/anomic/search/TextSnippet.java
@@ -176,7 +176,7 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
                 return;
             } else {
                 // try to load the resource from the cache
-                response = loader.load(loader.request(url, true, reindexing), noCacheUsage ? CrawlProfile.CacheStrategy.NOCACHE : cacheStrategy, Long.MAX_VALUE);
+                response = loader.load(loader.request(url, true, reindexing), noCacheUsage ? CrawlProfile.CacheStrategy.NOCACHE : cacheStrategy, Long.MAX_VALUE, true);
                 if (response == null) {
                     // in case that we did not get any result we can still return a success when we are not allowed to go online
                     if (cacheStrategy.mustBeOffline()) {
diff --git a/source/de/anomic/yacy/graphics/OSMTile.java b/source/de/anomic/yacy/graphics/OSMTile.java
index 1765a8834..e46f41ab7 100644
--- a/source/de/anomic/yacy/graphics/OSMTile.java
+++ b/source/de/anomic/yacy/graphics/OSMTile.java
@@ -112,7 +112,7 @@ public class OSMTile {
             // download resource using the crawler and keep resource in memory if possible
             Response entry = null;
             try {
-                entry = Switchboard.getSwitchboard().loader.load(Switchboard.getSwitchboard().loader.request(tileURL, false, false), CrawlProfile.CacheStrategy.IFEXIST, Long.MAX_VALUE);
+                entry = Switchboard.getSwitchboard().loader.load(Switchboard.getSwitchboard().loader.request(tileURL, false, false), CrawlProfile.CacheStrategy.IFEXIST, Long.MAX_VALUE, true);
             } catch (IOException e) {
                 Log.logWarning("OSMTile", "cannot load: " + e.getMessage());
                 return null;
diff --git a/source/net/yacy/document/importer/OAIListFriendsLoader.java b/source/net/yacy/document/importer/OAIListFriendsLoader.java
index 048a30ceb..c6cc3bfe6 100644
--- a/source/net/yacy/document/importer/OAIListFriendsLoader.java
+++ b/source/net/yacy/document/importer/OAIListFriendsLoader.java
@@ -81,7 +81,7 @@ public class OAIListFriendsLoader {
         Map<String, String> m;
         for (Map.Entry<String, File> oaiFriend: listFriends.entrySet()) try {
             if (!oaiFriend.getValue().exists()) {
-                Response response = loader == null ? null : loader.load(loader.request(new DigestURI(oaiFriend.getKey()), false, true), CrawlProfile.CacheStrategy.NOCACHE, Long.MAX_VALUE);
+                Response response = loader == null ? null : loader.load(loader.request(new DigestURI(oaiFriend.getKey()), false, true), CrawlProfile.CacheStrategy.NOCACHE, Long.MAX_VALUE, true);
                 if (response != null) FileUtils.copy(response.getContent(), oaiFriend.getValue());
             }
             
diff --git a/source/net/yacy/document/importer/OAIPMHLoader.java b/source/net/yacy/document/importer/OAIPMHLoader.java
index 23c181c32..d453da3d4 100644
--- a/source/net/yacy/document/importer/OAIPMHLoader.java
+++ b/source/net/yacy/document/importer/OAIPMHLoader.java
@@ -48,7 +48,7 @@ public class OAIPMHLoader {
         this.source = source;
         
         // load the file from the net
-        Response response = loader.load(loader.request(source, false, true), CrawlProfile.CacheStrategy.NOCACHE, Long.MAX_VALUE);
+        Response response = loader.load(loader.request(source, false, true), CrawlProfile.CacheStrategy.NOCACHE, Long.MAX_VALUE, true);
         byte[] b = response.getContent();
         this.resumptionToken = new ResumptionToken(source, b);
         //System.out.println("*** ResumptionToken = " + this.resumptionToken.toString());
diff --git a/source/net/yacy/repository/LoaderDispatcher.java b/source/net/yacy/repository/LoaderDispatcher.java
index 6ad4ca8c3..1271e9f78 100644
--- a/source/net/yacy/repository/LoaderDispatcher.java
+++ b/source/net/yacy/repository/LoaderDispatcher.java
@@ -135,7 +135,7 @@ public final class LoaderDispatcher {
 
     public void load(final DigestURI url, CrawlProfile.CacheStrategy cacheStratgy, long maxFileSize, File targetFile) throws IOException {
 
-        byte[] b = load(request(url, false, true), cacheStratgy, maxFileSize).getContent();
+        byte[] b = load(request(url, false, true), cacheStratgy, maxFileSize, false).getContent();
         if (b == null) throw new IOException("load == null");
         File tmp = new File(targetFile.getAbsolutePath() + ".tmp");
         
@@ -146,7 +146,7 @@ public final class LoaderDispatcher {
         tmp.renameTo(targetFile);
     }
     
-    public Response load(final Request request, CrawlProfile.CacheStrategy cacheStrategy, long maxFileSize) throws IOException {
+    public Response load(final Request request, CrawlProfile.CacheStrategy cacheStrategy, long maxFileSize, boolean checkBlacklist) throws IOException {
         String url = request.url().toNormalform(true, false);
         Semaphore check = this.loaderSteering.get(url);
         if (check != null) {
@@ -158,7 +158,7 @@ public final class LoaderDispatcher {
         
         try {
             this.loaderSteering.put(url, new Semaphore(0));
-            Response response = loadInternal(request, cacheStrategy, maxFileSize);
+            Response response = loadInternal(request, cacheStrategy, maxFileSize, checkBlacklist);
             check = this.loaderSteering.remove(url);
             if (check != null) check.release(1000);
             return response;
@@ -177,7 +177,7 @@ public final class LoaderDispatcher {
      * @return the loaded entity in a Response object
      * @throws IOException
      */
-    private Response loadInternal(final Request request, CrawlProfile.CacheStrategy cacheStrategy, long maxFileSize) throws IOException {
+    private Response loadInternal(final Request request, CrawlProfile.CacheStrategy cacheStrategy, long maxFileSize, boolean checkBlacklist) throws IOException {
         // get the protocol of the next URL
         final DigestURI url = request.url();
         if (url.isFile() || url.isSMB()) cacheStrategy = CrawlProfile.CacheStrategy.NOCACHE; // load just from the file system
@@ -261,7 +261,7 @@ public final class LoaderDispatcher {
         
         // load resource from the internet
         Response response = null;
-        if ((protocol.equals("http") || (protocol.equals("https")))) response = httpLoader.load(request, maxFileSize);
+        if ((protocol.equals("http") || (protocol.equals("https")))) response = httpLoader.load(request, maxFileSize, checkBlacklist);
         if (protocol.equals("ftp")) response = ftpLoader.load(request, true);
         if (protocol.equals("smb")) response = smbLoader.load(request, true);
         if (protocol.equals("file")) response = fileLoader.load(request, true);
@@ -300,7 +300,7 @@ public final class LoaderDispatcher {
     public byte[] loadContent(final Request request, CrawlProfile.CacheStrategy cacheStrategy) throws IOException {
         // try to download the resource using the loader
         final long maxFileSize = sb.getConfigLong("crawler.http.maxFileSize", HTTPLoader.DEFAULT_MAXFILESIZE);
-        final Response entry = load(request, cacheStrategy, maxFileSize);
+        final Response entry = load(request, cacheStrategy, maxFileSize, false);
         if (entry == null) return null; // not found in web
         
         // read resource body (if it is there)
@@ -310,7 +310,7 @@ public final class LoaderDispatcher {
     public Document[] loadDocuments(final Request request, final CrawlProfile.CacheStrategy cacheStrategy, final int timeout, long maxFileSize) throws IOException, Parser.Failure {
 
         // load resource
-        final Response response = load(request, cacheStrategy, maxFileSize);
+        final Response response = load(request, cacheStrategy, maxFileSize, false);
         final DigestURI url = request.url();
         if (response == null) throw new IOException("no Response for url " + url);
 
@@ -324,7 +324,7 @@ public final class LoaderDispatcher {
     public ContentScraper parseResource(final DigestURI location, CrawlProfile.CacheStrategy cachePolicy) throws IOException {
         // load page
         final long maxFileSize = this.sb.getConfigLong("crawler.http.maxFileSize", HTTPLoader.DEFAULT_MAXFILESIZE);
-        Response r = this.load(request(location, true, false), cachePolicy, maxFileSize);
+        Response r = this.load(request(location, true, false), cachePolicy, maxFileSize, false);
         byte[] page = (r == null) ? null : r.getContent();
         if (page == null) throw new IOException("no response from url " + location.toString());
         
@@ -343,7 +343,7 @@ public final class LoaderDispatcher {
      * @throws IOException
      */
     public final Map<MultiProtocolURI, String> loadLinks(DigestURI url, CrawlProfile.CacheStrategy cacheStrategy) throws IOException {
-        Response response = load(request(url, true, false), cacheStrategy, Long.MAX_VALUE);
+        Response response = load(request(url, true, false), cacheStrategy, Long.MAX_VALUE, false);
         if (response == null) throw new IOException("response == null");
         ResponseHeader responseHeader = response.getResponseHeader();
         byte[] resource = response.getContent();
@@ -401,7 +401,7 @@ public final class LoaderDispatcher {
             if (this.cache != null && this.cache.exists()) return;
             try {
                 // load from the net
-                Response response = load(request(new DigestURI(this.url), false, true), this.cacheStrategy, this.maxFileSize);
+                Response response = load(request(new DigestURI(this.url), false, true), this.cacheStrategy, this.maxFileSize, true);
                 byte[] b = response.getContent();
                 if (this.cache != null) FileUtils.copy(b, this.cache);
             } catch (MalformedURLException e) {} catch (IOException e) {}