- removed download-limit that can be given for the crawler for non-crawler download tasks. This was necessary because the same procedure was used for other downloads like for the download of dictionary files where a limit is not useful. The limit still stays for the indexer

- migrated the opengeodb downloader to a new version of the opengeodb-dump git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6873 6c8d7289-2bf4-0310-a012-ef5d649a1542
15 years ago · 2126c03a62
parent 3661cb692c
commit 2126c03a62
26 changed files with 175 additions and 143 deletions
--- a/htroot/Bookmarks.java
+++ b/htroot/Bookmarks.java
@ -189,7 +189,7 @@ public class Bookmarks {
                        Document document = null;
                        if (urlentry != null) {
                            final URIMetadataRow.Components metadata = urlentry.metadata();
-                            document = LoaderDispatcher.retrieveDocument(metadata.url(), true, 5000, true, false);
+                            document = LoaderDispatcher.retrieveDocument(metadata.url(), true, 5000, true, false, Long.MAX_VALUE);
                            prop.put("mode_edit", "0"); // create mode
                            prop.put("mode_url", metadata.url().toNormalform(false, true));
                            prop.putHTML("mode_title", metadata.dc_title());
--- a/htroot/Crawler_p.java
+++ b/htroot/Crawler_p.java
@ -194,11 +194,11 @@ public class Crawler_p {
                    env.setConfig("storeHTCache", (storeHTCache) ? "true" : "false");
                    
                    final String cachePolicyString = post.get("cachePolicy", "iffresh");
-                    int cachePolicy = CrawlProfile.CACHE_STRATEGY_IFFRESH;
-                    if (cachePolicyString.equals("nocache")) cachePolicy = CrawlProfile.CACHE_STRATEGY_NOCACHE;
-                    if (cachePolicyString.equals("iffresh")) cachePolicy = CrawlProfile.CACHE_STRATEGY_IFFRESH;
-                    if (cachePolicyString.equals("ifexist")) cachePolicy = CrawlProfile.CACHE_STRATEGY_IFEXIST;
-                    if (cachePolicyString.equals("cacheonly")) cachePolicy = CrawlProfile.CACHE_STRATEGY_CACHEONLY;
+                    CrawlProfile.CacheStrategy cachePolicy = CrawlProfile.CacheStrategy.IFFRESH;
+                    if (cachePolicyString.equals("nocache")) cachePolicy = CrawlProfile.CacheStrategy.NOCACHE;
+                    if (cachePolicyString.equals("iffresh")) cachePolicy = CrawlProfile.CacheStrategy.IFFRESH;
+                    if (cachePolicyString.equals("ifexist")) cachePolicy = CrawlProfile.CacheStrategy.IFEXIST;
+                    if (cachePolicyString.equals("cacheonly")) cachePolicy = CrawlProfile.CacheStrategy.CACHEONLY;
                    
                    final boolean xsstopw = post.get("xsstopw", "off").equals("on");
                    env.setConfig("xsstopw", (xsstopw) ? "true" : "false");
--- a/htroot/DictionaryLoader_p.html
+++ b/htroot/DictionaryLoader_p.html
@ -20,37 +20,38 @@
        <legend>Geolocalization</legend>
        The geolocalization file will enable YaCy to present locations from OpenStreetMap according to given search words.
        With this file it is possible to find locations using the location (city) name, a zip code, a car sign or a telephone pre-dial number.
+        
        <dl>
          <dt><label>Download from</label></dt>
-          <dd>#[geo0URL]#</dd>
+          <dd>#[geo1URL]#</dd>
          <dt><label>Storage location</label></dt>
-          <dd>#[geo0Storage]#</dd>
+          <dd>#[geo1Storage]#</dd>
          <dt><label>Status</label></dt>
-          <dd>#(geo0Status)#<div class="info">not loaded</div>::<div class="commit">loaded</div>::de-activated#(/geo0Status)#</dd>
+          <dd>#(geo1Status)#<div class="info">not loaded</div>::<div class="commit">loaded</div>::de-activated#(/geo1Status)#</dd>
          <dt></dt>
-          <dd>#(geo0Status)#
-          <input type="submit" name="geo0Load" value="Load" />::
-          <input type="submit" name="geo0Deactivate" value="de-Activate" />
-          <input type="submit" name="geo0Remove" value="Remove" />::
-          <input type="submit" name="geo0Activate" value="Activate" />
-          <input type="submit" name="geo0Remove" value="Remove" />
-          #(/geo0Status)#</dd>
-          #(geo0ActionLoaded)#::
+          <dd>#(geo1Status)#
+          <input type="submit" name="geo1Load" value="Load" />::
+          <input type="submit" name="geo1Deactivate" value="de-Activate" />
+          <input type="submit" name="geo1Remove" value="Remove" />::
+          <input type="submit" name="geo1Activate" value="Activate" />
+          <input type="submit" name="geo1Remove" value="Remove" />
+          #(/geo1Status)#</dd>
+          #(geo1ActionLoaded)#::
          <dt></dt><dd><div class="commit">loaded and activated dictionary file</div></dd>::
          <dt></dt><dd><div class="error">loading of dictionary file failed: #[error]#</div></dd>
-          #(/geo0ActionLoaded)#
-          #(geo0ActionRemoved)#::
+          #(/geo1ActionLoaded)#
+          #(geo1ActionRemoved)#::
          <dt></dt><dd><div class="commit">de-activated and removed dictionary file</div></dd>::
          <dt></dt><dd><div class="error">cannot remove dictionary file: #[error]#</div></dd>
-          #(/geo0ActionRemoved)#
-          #(geo0ActionDeactivated)#::
+          #(/geo1ActionRemoved)#
+          #(geo1ActionDeactivated)#::
          <dt></dt><dd><div class="commit">de-activated dictionary file</div></dd>::
          <dt></dt><dd><div class="error">cannot de-activate dictionary file: #[error]#</div></dd>
-          #(/geo0ActionDeactivated)#
-          #(geo0ActionActivated)#::
+          #(/geo1ActionDeactivated)#
+          #(geo1ActionActivated)#::
          <dt></dt><dd><div class="commit">activated dictionary file</div></dd>::
          <dt></dt><dd><div class="error">cannot activate dictionary file: #[error]#</div></dd>
-          #(/geo0ActionActivated)#        
+          #(/geo1ActionActivated)#        
        </dl>
      </fieldset>
    </form>
--- a/htroot/DictionaryLoader_p.java
+++ b/htroot/DictionaryLoader_p.java
@ -58,45 +58,47 @@ public class DictionaryLoader_p {
        
        if (post == null) return prop;
        
-        if (post.containsKey("geo0Load")) {
+        // GEO1
+        if (post.containsKey("geo1Load")) {
            // load from the net
            try {
-                Response response = sb.loader.load(new DigestURI(LibraryProvider.Dictionary.GEO0.url), false, true, CrawlProfile.CACHE_STRATEGY_NOCACHE);
+                Response response = sb.loader.load(new DigestURI(LibraryProvider.Dictionary.GEO1.url), false, true, CrawlProfile.CacheStrategy.NOCACHE, Long.MAX_VALUE);
                byte[] b = response.getContent();
-                FileUtils.copy(b, LibraryProvider.Dictionary.GEO0.file());
-                LibraryProvider.geoDB = new OpenGeoDB(LibraryProvider.Dictionary.GEO0.file());
-                prop.put("geo0Status", LibraryProvider.Dictionary.GEO0.file().exists() ? 1 : 0);
-                prop.put("geo0ActionLoaded", 1);
+                FileUtils.copy(b, LibraryProvider.Dictionary.GEO1.file());
+                LibraryProvider.geoDB = new OpenGeoDB(LibraryProvider.Dictionary.GEO1.file(), false);
+                prop.put("geo1Status", LibraryProvider.Dictionary.GEO1.file().exists() ? 1 : 0);
+                prop.put("geo1ActionLoaded", 1);
            } catch (MalformedURLException e) {
                Log.logException(e);
-                prop.put("geo0ActionLoaded", 2);
-                prop.put("geo0ActionLoaded_error", e.getMessage());
+                prop.put("geo1ActionLoaded", 2);
+                prop.put("geo1ActionLoaded_error", e.getMessage());
            } catch (IOException e) {
                Log.logException(e);
-                prop.put("geo0ActionLoaded", 2);
-                prop.put("geo0ActionLoaded_error", e.getMessage());
+                prop.put("geo1ActionLoaded", 2);
+                prop.put("geo1ActionLoaded_error", e.getMessage());
            }
        }
        
-        if (post.containsKey("geo0Remove")) {
-            FileUtils.deletedelete(LibraryProvider.Dictionary.GEO0.file());
-            FileUtils.deletedelete(LibraryProvider.Dictionary.GEO0.fileDisabled());
-            LibraryProvider.geoDB = new OpenGeoDB(null);
-            prop.put("geo0ActionRemoved", 1);
+        if (post.containsKey("geo1Remove")) {
+            FileUtils.deletedelete(LibraryProvider.Dictionary.GEO1.file());
+            FileUtils.deletedelete(LibraryProvider.Dictionary.GEO1.fileDisabled());
+            LibraryProvider.geoDB = new OpenGeoDB(null, true);
+            prop.put("geo1ActionRemoved", 1);
        }
        
-        if (post.containsKey("geo0Deactivate")) {
-            LibraryProvider.Dictionary.GEO0.file().renameTo(LibraryProvider.Dictionary.GEO0.fileDisabled());
-            LibraryProvider.geoDB = new OpenGeoDB(null);
-            prop.put("geo0ActionDeactivated", 1);
+        if (post.containsKey("geo1Deactivate")) {
+            LibraryProvider.Dictionary.GEO1.file().renameTo(LibraryProvider.Dictionary.GEO1.fileDisabled());
+            LibraryProvider.geoDB = new OpenGeoDB(null, true);
+            prop.put("geo1ActionDeactivated", 1);
        }
        
-        if (post.containsKey("geo0Activate")) {
-            LibraryProvider.Dictionary.GEO0.fileDisabled().renameTo(LibraryProvider.Dictionary.GEO0.file());
-            LibraryProvider.geoDB = new OpenGeoDB(LibraryProvider.Dictionary.GEO0.file());
-            prop.put("geo0ActionActivated", 1);
+        if (post.containsKey("geo1Activate")) {
+            LibraryProvider.Dictionary.GEO1.fileDisabled().renameTo(LibraryProvider.Dictionary.GEO1.file());
+            LibraryProvider.geoDB = new OpenGeoDB(LibraryProvider.Dictionary.GEO1.file(), false);
+            prop.put("geo1ActionActivated", 1);
        }
        
+        
        // check status again
        for (LibraryProvider.Dictionary dictionary: LibraryProvider.Dictionary.values()) {
            prop.put(dictionary.nickname + "Status", dictionary.file().exists() ? 1 : dictionary.fileDisabled().exists() ? 2 : 0);
--- a/htroot/QuickCrawlLink_p.java
+++ b/htroot/QuickCrawlLink_p.java
@ -163,7 +163,7 @@ public class QuickCrawlLink_p {
                        xsstopw,
                        xdstopw,
                        xpstopw,
-                        CrawlProfile.CACHE_STRATEGY_IFFRESH
+                        CrawlProfile.CacheStrategy.IFFRESH
                );
            } catch (final Exception e) {
                // mist
--- a/htroot/RSSLoader_p.java
+++ b/htroot/RSSLoader_p.java
@ -63,7 +63,7 @@ public class RSSLoader_p {
        // if the resource body was not cached we try to load it from web
        Response entry = null;
        try {
-            entry = sb.loader.load(url, true, false);
+            entry = sb.loader.load(url, true, false, Long.MAX_VALUE);
        } catch (final Exception e) {
            return prop;
        }
--- a/htroot/ViewFile.java
+++ b/htroot/ViewFile.java
@ -182,7 +182,7 @@ public class ViewFile {
            // load resource from net
            Response response = null;
            try {
-                response = sb.loader.load(url, true, false);
+                response = sb.loader.load(url, true, false, Long.MAX_VALUE);
            } catch (IOException e) {
                Log.logException(e);
            }
@ -198,7 +198,7 @@ public class ViewFile {
        if (resource == null) {
            Response entry = null;
            try {
-                entry = sb.loader.load(url, true, false);
+                entry = sb.loader.load(url, true, false, Long.MAX_VALUE);
            } catch (final Exception e) {
                prop.put("error", "4");
                prop.putHTML("error_errorText", e.getMessage());
@ -238,7 +238,7 @@ public class ViewFile {
                }

                try {
-                    Response response = sb.loader.load(url, true, false);
+                    Response response = sb.loader.load(url, true, false, Long.MAX_VALUE);
                    responseHeader = response.getResponseHeader();
                    resource = response.getContent();
                } catch (IOException e) {
--- a/htroot/api/util/getpageinfo_p.java
+++ b/htroot/api/util/getpageinfo_p.java
@ -54,11 +54,11 @@ public class getpageinfo_p {
                }
                ContentScraper scraper = null;
                if (u != null) try {
-                    scraper = LoaderDispatcher.parseResource(sb.loader, u, CrawlProfile.CACHE_STRATEGY_IFFRESH);
+                    scraper = LoaderDispatcher.parseResource(sb.loader, u, CrawlProfile.CacheStrategy.IFFRESH);
                } catch (final IOException e) {
                    // try again, try harder
                    try {
-                        scraper = LoaderDispatcher.parseResource(sb.loader, u, CrawlProfile.CACHE_STRATEGY_IFEXIST);
+                        scraper = LoaderDispatcher.parseResource(sb.loader, u, CrawlProfile.CacheStrategy.IFEXIST);
                    } catch (final IOException ee) {
                        // now thats a fail, do nothing                            
                    }
--- a/htroot/yacysearch.java
+++ b/htroot/yacysearch.java
@ -430,7 +430,7 @@ public class yacysearch {
                if (urlentry != null) {
                    final URIMetadataRow.Components metadata = urlentry.metadata();
                    Document document;
-                    document = LoaderDispatcher.retrieveDocument(metadata.url(), true, 5000, true, false);
+                    document = LoaderDispatcher.retrieveDocument(metadata.url(), true, 5000, true, false, Long.MAX_VALUE);
                    if (document != null) {
                        // create a news message
                        final HashMap<String, String> map = new HashMap<String, String>();
--- a/source/de/anomic/crawler/Balancer.java
+++ b/source/de/anomic/crawler/Balancer.java
@ -385,8 +385,8 @@ public class Balancer {
 		        }
 		        // depending on the caching policy we need sleep time to avoid DoS-like situations
 		        sleeptime = (
-		                profileEntry.cacheStrategy() == CrawlProfile.CACHE_STRATEGY_CACHEONLY ||
-		                (profileEntry.cacheStrategy() == CrawlProfile.CACHE_STRATEGY_IFEXIST && Cache.has(crawlEntry.url()))
+		                profileEntry.cacheStrategy() == CrawlProfile.CacheStrategy.CACHEONLY ||
+		                (profileEntry.cacheStrategy() == CrawlProfile.CacheStrategy.IFEXIST && Cache.has(crawlEntry.url()))
 		                ) ? 0 : Latency.waitingRemaining(crawlEntry.url(), minimumLocalDelta, minimumGlobalDelta); // this uses the robots.txt database and may cause a loading of robots.txt from the server
 		        
 		        assert Base64Order.enhancedCoder.equal(nexthash, rowEntry.getPrimaryKeyBytes()) : "result = " + new String(nexthash) + ", rowEntry.getPrimaryKeyBytes() = " + new String(rowEntry.getPrimaryKeyBytes());
--- a/source/de/anomic/crawler/CrawlProfile.java
+++ b/source/de/anomic/crawler/CrawlProfile.java
@ -170,7 +170,7 @@ public class CrawlProfile {
                           final boolean storeHTCache, final boolean storeTXCache,
                           final boolean remoteIndexing,
                           final boolean xsstopw, final boolean xdstopw, final boolean xpstopw,
-                           final int cacheStrategy) {
+                           final CacheStrategy cacheStrategy) {
        
        final entry ne = new entry(
                             name, startURL,
@ -246,10 +246,23 @@ public class CrawlProfile {
        
    }
    
-    public final static int CACHE_STRATEGY_NOCACHE = 0;    // never use the cache, all content from fresh internet source
-    public final static int CACHE_STRATEGY_IFFRESH = 1;    // use the cache if the cache exists and is fresh using the proxy-fresh rules
-    public final static int CACHE_STRATEGY_IFEXIST = 2;    // use the cache if the cache exist. Do no check freshness. Otherwise use online source.
-    public final static int CACHE_STRATEGY_CACHEONLY = 3;  // never go online, use all content from cache. If no cache exist, treat content as unavailable
+    public static enum CacheStrategy {
+        NOCACHE(0),    // never use the cache, all content from fresh internet source
+        IFFRESH(1),    // use the cache if the cache exists and is fresh using the proxy-fresh rules
+        IFEXIST(2),    // use the cache if the cache exist. Do no check freshness. Otherwise use online source.
+        CACHEONLY(3);  // never go online, use all content from cache. If no cache exist, treat content as unavailable
+        public int code;
+        private CacheStrategy(int code) {
+            this.code = code;
+        }
+        public String toString() {
+            return Integer.toString(this.code);
+        }
+        public static CacheStrategy decode(int code) {
+            for (CacheStrategy strategy: CacheStrategy.values()) if (strategy.code == code) return strategy;
+            return NOCACHE;
+        }
+    }
    
    public static class entry {
        // this is a simple record structure that hold all properties of a single crawl start
@ -290,7 +303,7 @@ public class CrawlProfile {
                     final boolean storeHTCache, final boolean storeTXCache,
                     final boolean remoteIndexing,
                     final boolean xsstopw, final boolean xdstopw, final boolean xpstopw,
-                     final int cacheStrategy) {
+                     final CacheStrategy cacheStrategy) {
            if (name == null || name.length() == 0) throw new NullPointerException("name must not be null");
            final String handle = (startURL == null) ? Base64Order.enhancedCoder.encode(Digest.encodeMD5Raw(Long.toString(System.currentTimeMillis()))).substring(0, Word.commonHashLength) : new String(startURL.hash());
            mem = new HashMap<String, String>(40);
@ -312,7 +325,7 @@ public class CrawlProfile {
            mem.put(XSSTOPW,          Boolean.toString(xsstopw)); // exclude static stop-words
            mem.put(XDSTOPW,          Boolean.toString(xdstopw)); // exclude dynamic stop-word
            mem.put(XPSTOPW,          Boolean.toString(xpstopw)); // exclude parent stop-words
-            mem.put(CACHE_STRAGEGY, Integer.toString(cacheStrategy));
+            mem.put(CACHE_STRAGEGY,   cacheStrategy.toString());
            doms = new ConcurrentHashMap<String, DomProfile>();
        }
        
@ -376,14 +389,14 @@ public class CrawlProfile {
                return 0;
            }
        }
-        public int cacheStrategy() {
+        public CacheStrategy cacheStrategy() {
            final String r = mem.get(CACHE_STRAGEGY);
-            if (r == null) return CACHE_STRATEGY_IFFRESH;
+            if (r == null) return CacheStrategy.IFFRESH;
            try {
-                return Integer.parseInt(r);
+                return CacheStrategy.decode(Integer.parseInt(r));
            } catch (final NumberFormatException e) {
                Log.logException(e);
-                return CACHE_STRATEGY_IFFRESH;
+                return CacheStrategy.IFFRESH;
            }
        }
        public long recrawlIfOlder() {
--- a/source/de/anomic/crawler/CrawlQueues.java
+++ b/source/de/anomic/crawler/CrawlQueues.java
@ -45,6 +45,7 @@ import net.yacy.kelondro.util.DateFormatter;
 import net.yacy.kelondro.util.FileUtils;
 import net.yacy.kelondro.workflow.WorkflowJob;

+import de.anomic.crawler.retrieval.HTTPLoader;
 import de.anomic.crawler.retrieval.Request;
 import de.anomic.crawler.retrieval.Response;
 import de.anomic.http.client.Client;
@ -561,7 +562,8 @@ public class CrawlQueues {
                    // returns null if everything went fine, a fail reason string if a problem occurred
                    try {
                        request.setStatus("loading", WorkflowJob.STATUS_RUNNING);
-                        Response response = sb.loader.load(request, true);
+                        final long maxFileSize = sb.getConfigLong("crawler.http.maxFileSize", HTTPLoader.DEFAULT_MAXFILESIZE);
+                        Response response = sb.loader.load(request, true, maxFileSize);
                        if (response == null) {
                            request.setStatus("error", WorkflowJob.STATUS_FINISHED);
                            if (log.isFine()) log.logFine("problem loading " + request.url().toString() + ": no content (possibly caused by cache policy)");
--- a/source/de/anomic/crawler/CrawlSwitchboard.java
+++ b/source/de/anomic/crawler/CrawlSwitchboard.java
@ -168,37 +168,37 @@ public final class CrawlSwitchboard {
                    true /*getConfigBool(PROXY_INDEXING_LOCAL_MEDIA, true)*/,
                    true, true,
                    false /*getConfigBool(PROXY_INDEXING_REMOTE, false)*/, true, true, true,
-                    CrawlProfile.CACHE_STRATEGY_IFFRESH);
+                    CrawlProfile.CacheStrategy.IFFRESH);
        }
        if (this.defaultRemoteProfile == null) {
            // generate new default entry for remote crawling
            defaultRemoteProfile = this.profilesActiveCrawls.newEntry(CRAWL_PROFILE_REMOTE, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_BAD_URL, 0,
-                    -1, -1, -1, true, true, true, false, true, false, true, true, false, CrawlProfile.CACHE_STRATEGY_IFFRESH);
+                    -1, -1, -1, true, true, true, false, true, false, true, true, false, CrawlProfile.CacheStrategy.IFFRESH);
        }
        if (this.defaultTextSnippetLocalProfile == null) {
            // generate new default entry for snippet fetch and optional crawling
            defaultTextSnippetLocalProfile = this.profilesActiveCrawls.newEntry(CRAWL_PROFILE_SNIPPET_LOCAL_TEXT, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_BAD_URL, 0,
-                    this.profilesActiveCrawls.getRecrawlDate(CRAWL_PROFILE_SNIPPET_LOCAL_TEXT_RECRAWL_CYCLE), -1, -1, true, false, false, true, true, false, true, true, false, CrawlProfile.CACHE_STRATEGY_IFFRESH);
+                    this.profilesActiveCrawls.getRecrawlDate(CRAWL_PROFILE_SNIPPET_LOCAL_TEXT_RECRAWL_CYCLE), -1, -1, true, false, false, true, true, false, true, true, false, CrawlProfile.CacheStrategy.IFFRESH);
        }
        if (this.defaultTextSnippetGlobalProfile == null) {
            // generate new default entry for snippet fetch and optional crawling
            defaultTextSnippetGlobalProfile = this.profilesActiveCrawls.newEntry(CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_BAD_URL, 0,
-                    this.profilesActiveCrawls.getRecrawlDate(CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT_RECRAWL_CYCLE), -1, -1, true, true, true, true, true, false, true, true, false, CrawlProfile.CACHE_STRATEGY_CACHEONLY);
+                    this.profilesActiveCrawls.getRecrawlDate(CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT_RECRAWL_CYCLE), -1, -1, true, true, true, true, true, false, true, true, false, CrawlProfile.CacheStrategy.CACHEONLY);
        }
        if (this.defaultMediaSnippetLocalProfile == null) {
            // generate new default entry for snippet fetch and optional crawling
            defaultMediaSnippetLocalProfile = this.profilesActiveCrawls.newEntry(CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_BAD_URL, 0,
-                    this.profilesActiveCrawls.getRecrawlDate(CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA_RECRAWL_CYCLE), -1, -1, true, false, false, false, false, false, true, true, false, CrawlProfile.CACHE_STRATEGY_IFEXIST);
+                    this.profilesActiveCrawls.getRecrawlDate(CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA_RECRAWL_CYCLE), -1, -1, true, false, false, false, false, false, true, true, false, CrawlProfile.CacheStrategy.IFEXIST);
        }
        if (this.defaultMediaSnippetGlobalProfile == null) {
            // generate new default entry for snippet fetch and optional crawling
            defaultMediaSnippetGlobalProfile = this.profilesActiveCrawls.newEntry(CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_BAD_URL, 0,
-                    this.profilesActiveCrawls.getRecrawlDate(CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA_RECRAWL_CYCLE), -1, -1, true, false, true, true, true, false, true, true, false, CrawlProfile.CACHE_STRATEGY_IFEXIST);
+                    this.profilesActiveCrawls.getRecrawlDate(CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA_RECRAWL_CYCLE), -1, -1, true, false, true, true, true, false, true, true, false, CrawlProfile.CacheStrategy.IFEXIST);
        }
        if (this.defaultSurrogateProfile == null) {
            // generate new default entry for surrogate parsing
            defaultSurrogateProfile = this.profilesActiveCrawls.newEntry(CRAWL_PROFILE_SURROGATE, null, CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_BAD_URL, 0,
-                    this.profilesActiveCrawls.getRecrawlDate(CRAWL_PROFILE_SURROGATE_RECRAWL_CYCLE), -1, -1, true, true, false, false, false, false, true, true, false, CrawlProfile.CACHE_STRATEGY_NOCACHE);
+                    this.profilesActiveCrawls.getRecrawlDate(CRAWL_PROFILE_SURROGATE_RECRAWL_CYCLE), -1, -1, true, true, false, false, false, false, true, true, false, CrawlProfile.CacheStrategy.NOCACHE);
        }
    }
    
--- a/source/de/anomic/crawler/retrieval/HTTPLoader.java
+++ b/source/de/anomic/crawler/retrieval/HTTPLoader.java
@ -45,7 +45,7 @@ public final class HTTPLoader {
    private static final String DEFAULT_ENCODING = "gzip,deflate";
    private static final String DEFAULT_LANGUAGE = "en-us,en;q=0.5";
    private static final String DEFAULT_CHARSET = "ISO-8859-1,utf-8;q=0.7,*;q=0.7";
-    private static final long   DEFAULT_MAXFILESIZE = 1024 * 1024 * 10;
+    public  static final long   DEFAULT_MAXFILESIZE = 1024 * 1024 * 10;
    public  static final int    DEFAULT_CRAWLING_RETRY_COUNT = 5;
    public  static final String crawlerUserAgent = "yacybot (" + Client.getSystemOST() +") http://yacy.net/bot.html";
    public  static final String yacyUserAgent = "yacy (" + Client.getSystemOST() +") yacy.net";
@ -74,14 +74,14 @@ public final class HTTPLoader {
        this.socketTimeout = (int) sb.getConfigLong("crawler.clientTimeout", 10000);
    }  
   
-    public Response load(final Request entry, final boolean acceptOnlyParseable) throws IOException {
+    public Response load(final Request entry, final boolean acceptOnlyParseable, long maxFileSize) throws IOException {
        long start = System.currentTimeMillis();
-        Response doc = load(entry, acceptOnlyParseable, DEFAULT_CRAWLING_RETRY_COUNT);
+        Response doc = load(entry, acceptOnlyParseable, DEFAULT_CRAWLING_RETRY_COUNT, maxFileSize);
        Latency.update(new String(entry.url().hash()).substring(6), entry.url().getHost(), System.currentTimeMillis() - start);
        return doc;
    }
    
-    private Response load(final Request request, boolean acceptOnlyParseable, final int retryCount) throws IOException {
+    private Response load(final Request request, boolean acceptOnlyParseable, final int retryCount, final long maxFileSize) throws IOException {

        if (retryCount < 0) {
            sb.crawlQueues.errorURL.push(request, sb.peers.mySeed().hash.getBytes(), new Date(), 1, "redirection counter exceeded");
@ -113,8 +113,7 @@ public final class HTTPLoader {
        
        // take a file from the net
        Response response = null;
-        final long maxFileSize = sb.getConfigLong("crawler.http.maxFileSize", DEFAULT_MAXFILESIZE);
-
+        
        // create a request header
        final RequestHeader requestHeader = new RequestHeader();
        requestHeader.put(HeaderFramework.USER_AGENT, crawlerUserAgent);
@ -202,7 +201,7 @@ public final class HTTPLoader {
                    
                    // retry crawling with new url
                    request.redirectURL(redirectionUrl);
-                    return load(request, acceptOnlyParseable, retryCount - 1);
+                    return load(request, acceptOnlyParseable, retryCount - 1, maxFileSize);
                }
            } else {
                // if the response has not the right response type then reject file
--- a/source/de/anomic/data/LibraryProvider.java
+++ b/source/de/anomic/data/LibraryProvider.java
@ -50,14 +50,17 @@ public class LibraryProvider {
    public static final String disabledExtension = ".disabled";
    
    public static DidYouMeanLibrary dymLib = new DidYouMeanLibrary(null);
-    public static OpenGeoDB geoDB = new OpenGeoDB(null);
+    public static OpenGeoDB geoDB = new OpenGeoDB(null, true);
    private static File dictSource = null;
    private static File dictRoot = null;
    
    public static enum Dictionary {
        GEO0("geo0",
             "http://downloads.sourceforge.net/project/opengeodb/Data/0.2.5a/opengeodb-0.2.5a-UTF8-sql.gz",
-             "opengeodb-0.2.5a-UTF8-sql.gz");
+             "opengeodb-0.2.5a-UTF8-sql.gz"),
+        GEO1("geo1",
+             "http://fa-technik.adfc.de/code/opengeodb/dump/opengeodb-02621_2010-03-16.sql.gz",
+             "opengeodb-02621_2010-03-16.sql.gz");

        public String nickname, url, filename;
        private Dictionary(String nickname, String url, String filename) {
@ -95,20 +98,16 @@ public class LibraryProvider {
    }
    
    public static void integrateOpenGeoDB() {
-        File ogdb = new File(dictSource, "opengeodb-0.2.5a-UTF8-sql.gz");
-        if (ogdb.exists()) {
-        	geoDB = new OpenGeoDB(ogdb);
-        	return;
+        File geo1 = Dictionary.GEO1.file();
+        File geo0 = Dictionary.GEO0.file();
+        if (geo1.exists()) {
+            if (geo0.exists()) geo0.renameTo(Dictionary.GEO0.fileDisabled());
+            geoDB = new OpenGeoDB(geo1, false);
+            return;
        }
-        ogdb = new File(dictSource, "opengeodb-02513_2007-10-02.sql.gz");
-        if (ogdb.exists()) {
-        	geoDB = new OpenGeoDB(ogdb);
-        	return;
-        }
-        ogdb = new File(dictSource, "opengeodb-02513_2007-10-02.sql");
-        if (ogdb.exists()) {
-        	geoDB = new OpenGeoDB(ogdb);
-        	return;
+        if (geo0.exists()) {
+            geoDB = new OpenGeoDB(geo0, true);
+            return;
        }
    }
    
--- a/source/de/anomic/data/SitemapParser.java
+++ b/source/de/anomic/data/SitemapParser.java
@ -334,6 +334,6 @@ public class SitemapParser extends DefaultHandler {
                false,
                // exclude stop-words
                true, true, true,
-                CrawlProfile.CACHE_STRATEGY_IFFRESH);
+                CrawlProfile.CacheStrategy.IFFRESH);
    }
 }
--- a/source/de/anomic/data/bookmarksDB.java
+++ b/source/de/anomic/data/bookmarksDB.java
@ -161,7 +161,7 @@ public class bookmarksDB {
                                Integer.parseInt(parser[5]), Integer.parseInt(parser[6]), Boolean.parseBoolean(parser[7]),
                                Boolean.parseBoolean(parser[8]), Boolean.parseBoolean(parser[9]),
                                Boolean.parseBoolean(parser[10]), Boolean.parseBoolean(parser[11]),
-                                Boolean.parseBoolean(parser[12]), CrawlProfile.CACHE_STRATEGY_IFFRESH
+                                Boolean.parseBoolean(parser[12]), CrawlProfile.CacheStrategy.IFFRESH
                                );
                    }
                    if (parser.length == 14) {
@ -169,7 +169,7 @@ public class bookmarksDB {
                                Integer.parseInt(parser[5]), Integer.parseInt(parser[6]), Boolean.parseBoolean(parser[7]),
                                Boolean.parseBoolean(parser[8]), Boolean.parseBoolean(parser[9]),
                                Boolean.parseBoolean(parser[10]), Boolean.parseBoolean(parser[11]),
-                                Boolean.parseBoolean(parser[12]), Integer.parseInt(parser[13])
+                                Boolean.parseBoolean(parser[12]), CrawlProfile.CacheStrategy.decode(Integer.parseInt(parser[13]))
                                );
                    }
                }
@ -206,7 +206,7 @@ public class bookmarksDB {
    
    public void folderReCrawl(long schedule, String folder, String crawlingfilter, int newcrawlingdepth, long crawlingIfOlder, 
    		int crawlingDomFilterDepth, int crawlingDomMaxPages, boolean crawlingQ, boolean indexText, boolean indexMedia, 
-    		boolean crawlOrder, boolean xsstopw, boolean storeHTCache, int cacheStrategy) {
+    		boolean crawlOrder, boolean xsstopw, boolean storeHTCache, CrawlProfile.CacheStrategy cacheStrategy) {

        final Switchboard sb = Switchboard.getSwitchboard();
        final Iterator<String> bit = getBookmarksIterator(folder, true);
--- a/source/de/anomic/http/client/ClientGetMethod.java
+++ b/source/de/anomic/http/client/ClientGetMethod.java
@ -52,14 +52,16 @@ public class ClientGetMethod extends GetMethod {
    protected void readResponseHeaders(HttpState state, HttpConnection conn) throws IOException, HttpException {
 		super.readResponseHeaders(state, conn);
 		
-		// already processing the header to be able to throw an exception
-        Header contentlengthHeader = getResponseHeader("content-length");
-        long contentlength = 0;
-        if (contentlengthHeader != null) {
-        	try { contentlength = Long.parseLong(contentlengthHeader.getValue()); } catch (NumberFormatException e) {	}
-        }
-        if (contentlength > maxfilesize) {
-            throw new IOException("Content-Length " + contentlength + " larger than maxfilesize " + maxfilesize);
+		if (this.maxfilesize < Long.MAX_VALUE) {
+    		// already processing the header to be able to throw an exception
+            Header contentlengthHeader = getResponseHeader("content-length");
+            long contentlength = 0;
+            if (contentlengthHeader != null) {
+            	try { contentlength = Long.parseLong(contentlengthHeader.getValue()); } catch (NumberFormatException e) {	}
+            }
+            if (contentlength > maxfilesize) {
+                throw new IOException("Content-Length " + contentlength + " larger than maxfilesize " + maxfilesize);
+    		}
 		}
    }	
 }
--- a/source/de/anomic/search/MediaSnippet.java
+++ b/source/de/anomic/search/MediaSnippet.java
@ -116,7 +116,7 @@ public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaS
            return new ArrayList<MediaSnippet>();
        }
        
-        final Document document = LoaderDispatcher.retrieveDocument(url, fetchOnline, timeout, false, reindexing);
+        final Document document = LoaderDispatcher.retrieveDocument(url, fetchOnline, timeout, false, reindexing, Long.MAX_VALUE);
        final ArrayList<MediaSnippet> a = new ArrayList<MediaSnippet>();
        if (document != null) {
            if ((mediatype == ContentDomain.ALL) || (mediatype == ContentDomain.AUDIO)) a.addAll(computeMediaSnippets(document, queryhashes, ContentDomain.AUDIO));
--- a/source/de/anomic/search/TextSnippet.java
+++ b/source/de/anomic/search/TextSnippet.java
@ -359,7 +359,7 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
                    // if not found try to download it
                    
                    // download resource using the crawler and keep resource in memory if possible
-                    final Response entry = loader.load(url, true, reindexing);
+                    final Response entry = loader.load(url, true, reindexing, Long.MAX_VALUE);
                    
                    // get resource metadata (e.g. the http headers for http resources)
                    if (entry != null) {
--- a/source/de/anomic/yacy/graphics/OSMTile.java
+++ b/source/de/anomic/yacy/graphics/OSMTile.java
@ -90,7 +90,7 @@ public class OSMTile {
            // download resource using the crawler and keep resource in memory if possible
            Response entry = null;
            try {
-                entry = Switchboard.getSwitchboard().loader.load(tileURL, false, false, CrawlProfile.CACHE_STRATEGY_IFEXIST);
+                entry = Switchboard.getSwitchboard().loader.load(tileURL, false, false, CrawlProfile.CacheStrategy.IFEXIST, Long.MAX_VALUE);
            } catch (IOException e) {
                Log.logWarning("yamyOSM", "cannot load: " + e.getMessage());
                return null;
--- a/source/de/anomic/yacy/yacyRelease.java
+++ b/source/de/anomic/yacy/yacyRelease.java
@ -235,7 +235,7 @@ public final class yacyRelease extends yacyVersion {
        // returns the version info if successful, null otherwise
        ContentScraper scraper;
        try {
-            scraper = LoaderDispatcher.parseResource(Switchboard.getSwitchboard().loader, location.getLocationURL(), CrawlProfile.CACHE_STRATEGY_NOCACHE);
+            scraper = LoaderDispatcher.parseResource(Switchboard.getSwitchboard().loader, location.getLocationURL(), CrawlProfile.CacheStrategy.NOCACHE);
        } catch (final IOException e) {
            return null;
        }
--- a/source/net/yacy/document/geolocalization/OpenGeoDB.java
+++ b/source/net/yacy/document/geolocalization/OpenGeoDB.java
@ -76,7 +76,7 @@ public class OpenGeoDB {
    private final HashMap<String, List<Integer>> predial2ids;
    private final HashMap<String, Integer>       zip2id;
    
-    public OpenGeoDB(final File file) {
+    public OpenGeoDB(final File file, boolean lonlat) {

        this.locTypeHash2locType = new HashMap<Integer, String>();
        this.id2loc              = new HashMap<Integer, Location>();
@ -98,6 +98,7 @@ public class OpenGeoDB {
            String[] v;
            Integer id;
            String h;
+            double lon, lat;
            while ((line = reader.readLine()) != null) {
                line = line.trim();
                if (!line.startsWith("INSERT INTO ")) continue;
@ -107,7 +108,14 @@ public class OpenGeoDB {
                if (line.startsWith("geodb_coordinates ")) {
                    line = line.substring(18 + 7);v = line.split(",");
                    v = line.split(",");
-                    id2loc.put(Integer.parseInt(v[0]), new Location(Double.parseDouble(v[2]), Double.parseDouble(v[3])));
+                    if (lonlat) {
+                        lon = Double.parseDouble(v[2]);
+                        lat = Double.parseDouble(v[3]);
+                    } else {
+                        lat = Double.parseDouble(v[2]);
+                        lon = Double.parseDouble(v[3]);
+                    }
+                    id2loc.put(Integer.parseInt(v[0]), new Location(lon, lat));
                }
                if (line.startsWith("geodb_textdata ")) {
                    line = line.substring(15 + 7);
--- a/source/net/yacy/document/importer/OAIListFriendsLoader.java
+++ b/source/net/yacy/document/importer/OAIListFriendsLoader.java
@ -58,7 +58,7 @@ public class OAIListFriendsLoader {
    public static void init(LoaderDispatcher loader, Map<String, File> moreFriends) {
        listFriends.putAll(moreFriends);
        if (loader != null) for (Map.Entry<String, File> oaiFriend: listFriends.entrySet()) {
-            loader.loadIfNotExistBackground(oaiFriend.getKey(), oaiFriend.getValue());
+            loader.loadIfNotExistBackground(oaiFriend.getKey(), oaiFriend.getValue(), Long.MAX_VALUE);
        }
    }
    
@ -81,7 +81,7 @@ public class OAIListFriendsLoader {
        Map<String, String> m;
        for (Map.Entry<String, File> oaiFriend: listFriends.entrySet()) try {
            if (!oaiFriend.getValue().exists()) {
-                Response response = loader == null ? null : loader.load(new DigestURI(oaiFriend.getKey(), null), false, true, CrawlProfile.CACHE_STRATEGY_NOCACHE);
+                Response response = loader == null ? null : loader.load(new DigestURI(oaiFriend.getKey(), null), false, true, CrawlProfile.CacheStrategy.NOCACHE, Long.MAX_VALUE);
                if (response != null) FileUtils.copy(response.getContent(), oaiFriend.getValue());
            }
            
--- a/source/net/yacy/document/importer/OAIPMHLoader.java
+++ b/source/net/yacy/document/importer/OAIPMHLoader.java
@ -48,7 +48,7 @@ public class OAIPMHLoader {
        this.source = source;
        
        // load the file from the net
-        Response response = loader.load(source, false, true, CrawlProfile.CACHE_STRATEGY_NOCACHE);
+        Response response = loader.load(source, false, true, CrawlProfile.CacheStrategy.NOCACHE, Long.MAX_VALUE);
        byte[] b = response.getContent();
        this.resumptionToken = new ResumptionToken(source, b);
        //System.out.println("*** ResumptionToken = " + this.resumptionToken.toString());
--- a/source/net/yacy/repository/LoaderDispatcher.java
+++ b/source/net/yacy/repository/LoaderDispatcher.java
@ -99,8 +99,9 @@ public final class LoaderDispatcher {
    public Response load(
            final DigestURI url,
            final boolean forText,
-            final boolean global) throws IOException {
-        return load(request(url, forText, global), forText);
+            final boolean global,
+            final long maxFileSize) throws IOException {
+        return load(request(url, forText, global), forText, maxFileSize);
    }
    
    /**
@ -116,13 +117,14 @@ public final class LoaderDispatcher {
            final DigestURI url,
            final boolean forText,
            final boolean global,
-            int cacheStratgy) throws IOException {
-        return load(request(url, forText, global), forText, cacheStratgy);
+            CrawlProfile.CacheStrategy cacheStratgy,
+            long maxFileSize) throws IOException {
+        return load(request(url, forText, global), forText, cacheStratgy, maxFileSize);
    }
    
-    public void load(final DigestURI url, int cacheStratgy, File targetFile) throws IOException {
+    public void load(final DigestURI url, CrawlProfile.CacheStrategy cacheStratgy, long maxFileSize, File targetFile) throws IOException {

-        byte[] b = load(request(url, false, true), false, cacheStratgy).getContent();
+        byte[] b = load(request(url, false, true), false, cacheStratgy, maxFileSize).getContent();
        if (b == null) throw new IOException("load == null");
        File tmp = new File(targetFile.getAbsolutePath() + ".tmp");
        
@ -164,14 +166,14 @@ public final class LoaderDispatcher {
                    0);
    }
    
-    public Response load(final Request request, final boolean acceptOnlyParseable) throws IOException {
+    public Response load(final Request request, final boolean acceptOnlyParseable, long maxFileSize) throws IOException {
        CrawlProfile.entry crawlProfile = sb.crawler.profilesActiveCrawls.getEntry(request.profileHandle());
-        int cacheStrategy = CrawlProfile.CACHE_STRATEGY_IFEXIST;
+        CrawlProfile.CacheStrategy cacheStrategy = CrawlProfile.CacheStrategy.IFEXIST;
        if (crawlProfile != null) cacheStrategy = crawlProfile.cacheStrategy();
-        return load(request, acceptOnlyParseable, cacheStrategy);
+        return load(request, acceptOnlyParseable, cacheStrategy, maxFileSize);
    }
    
-    public Response load(final Request request, final boolean acceptOnlyParseable, int cacheStrategy) throws IOException {
+    public Response load(final Request request, final boolean acceptOnlyParseable, CrawlProfile.CacheStrategy cacheStrategy, long maxFileSize) throws IOException {
        // get the protocol of the next URL
        final String protocol = request.url().getProtocol();
        final String host = request.url().getHost();
@ -183,7 +185,7 @@ public final class LoaderDispatcher {
        // check if we have the page in the cache

        CrawlProfile.entry crawlProfile = sb.crawler.profilesActiveCrawls.getEntry(request.profileHandle());
-        if (crawlProfile != null && cacheStrategy != CrawlProfile.CACHE_STRATEGY_NOCACHE) {
+        if (crawlProfile != null && cacheStrategy != CrawlProfile.CacheStrategy.NOCACHE) {
            // we have passed a first test if caching is allowed
            // now see if there is a cache entry
        
@ -214,14 +216,14 @@ public final class LoaderDispatcher {
                        content);
                
                // check which caching strategy shall be used
-                if (cacheStrategy == CrawlProfile.CACHE_STRATEGY_IFEXIST || cacheStrategy == CrawlProfile.CACHE_STRATEGY_CACHEONLY) {
+                if (cacheStrategy == CrawlProfile.CacheStrategy.IFEXIST || cacheStrategy == CrawlProfile.CacheStrategy.CACHEONLY) {
                    // well, just take the cache and don't care about freshness of the content
                    log.logInfo("cache hit/useall for: " + request.url().toNormalform(true, false));
                    return response;
                }
                
                // now the cacheStrategy must be CACHE_STRATEGY_IFFRESH, that means we should do a proxy freshness test
-                assert cacheStrategy == CrawlProfile.CACHE_STRATEGY_IFFRESH : "cacheStrategy = " + cacheStrategy;
+                assert cacheStrategy == CrawlProfile.CacheStrategy.IFFRESH : "cacheStrategy = " + cacheStrategy;
                if (response.isFreshForProxy()) {
                    log.logInfo("cache hit/fresh for: " + request.url().toNormalform(true, false));
                    return response;
@ -232,7 +234,7 @@ public final class LoaderDispatcher {
        }
        
        // check case where we want results from the cache exclusively, and never from the internet (offline mode)
-        if (cacheStrategy == CrawlProfile.CACHE_STRATEGY_CACHEONLY) {
+        if (cacheStrategy == CrawlProfile.CacheStrategy.CACHEONLY) {
            // we had a chance to get the content from the cache .. its over. We don't have it.
            return null;
        }
@ -259,7 +261,7 @@ public final class LoaderDispatcher {
        
        // load resource from the internet
        Response response = null;
-        if ((protocol.equals("http") || (protocol.equals("https")))) response = httpLoader.load(request, acceptOnlyParseable);
+        if ((protocol.equals("http") || (protocol.equals("https")))) response = httpLoader.load(request, acceptOnlyParseable, maxFileSize);
        if (protocol.equals("ftp")) response = ftpLoader.load(request, true);
        if (protocol.equals("smb")) response = smbLoader.load(request, true);
        if (response != null) {
@ -302,7 +304,8 @@ public final class LoaderDispatcher {
        if (!fetchOnline) return null;
        
        // try to download the resource using the loader
-        final Response entry = load(url, forText, reindexing);
+        final long maxFileSize = sb.getConfigLong("crawler.http.maxFileSize", HTTPLoader.DEFAULT_MAXFILESIZE);
+        final Response entry = load(url, forText, reindexing, maxFileSize);
        if (entry == null) return null; // not found in web
        
        // read resource body (if it is there)
@ -321,7 +324,7 @@ public final class LoaderDispatcher {
     * @param global the domain of the search. If global == true then the content is re-indexed
     * @return the parsed document as {@link Document}
     */
-    public static Document retrieveDocument(final DigestURI url, final boolean fetchOnline, final int timeout, final boolean forText, final boolean global) {
+    public static Document retrieveDocument(final DigestURI url, final boolean fetchOnline, final int timeout, final boolean forText, final boolean global, long maxFileSize) {

        // load resource
        byte[] resContent = null;
@ -336,7 +339,7 @@ public final class LoaderDispatcher {
                // if not found try to download it
                
                // download resource using the crawler and keep resource in memory if possible
-                final Response entry = Switchboard.getSwitchboard().loader.load(url, forText, global);
+                final Response entry = Switchboard.getSwitchboard().loader.load(url, forText, global, maxFileSize);
                
                // getting resource metadata (e.g. the http headers for http resources)
                if (entry != null) {
@ -431,9 +434,10 @@ public final class LoaderDispatcher {
        }
    }

-    public static ContentScraper parseResource(final LoaderDispatcher loader, final DigestURI location, int cachePolicy) throws IOException {
+    public static ContentScraper parseResource(final LoaderDispatcher loader, final DigestURI location, CrawlProfile.CacheStrategy cachePolicy) throws IOException {
        // load page
-        Response r = loader.load(location, true, false, cachePolicy);
+        final long maxFileSize = loader.sb.getConfigLong("crawler.http.maxFileSize", HTTPLoader.DEFAULT_MAXFILESIZE);
+        Response r = loader.load(location, true, false, cachePolicy, maxFileSize);
        byte[] page = (r == null) ? null : r.getContent();
        if (page == null) throw new IOException("no response from url " + location.toString());
        
@ -455,25 +459,27 @@ public final class LoaderDispatcher {
        }
    }
    
-    public void loadIfNotExistBackground(String url, File cache) {
-        new Loader(url, cache).start();
+    public void loadIfNotExistBackground(String url, File cache, long maxFileSize) {
+        new Loader(url, cache, maxFileSize).start();
    }
    
    private class Loader extends Thread {

        private String url;
        private File cache;
+        private long maxFileSize;
        
-        public Loader(String url, File cache) {
+        public Loader(String url, File cache, long maxFileSize) {
            this.url = url;
            this.cache = cache;
+            this.maxFileSize = maxFileSize;
        }
        
        public void run() {
            if (this.cache.exists()) return;
            try {
                // load from the net
-                Response response = load(new DigestURI(this.url), false, true, CrawlProfile.CACHE_STRATEGY_NOCACHE);
+                Response response = load(new DigestURI(this.url), false, true, CrawlProfile.CacheStrategy.NOCACHE, this.maxFileSize);
                byte[] b = response.getContent();
                FileUtils.copy(b, this.cache);
            } catch (MalformedURLException e) {} catch (IOException e) {}