more abstraction for access of LoaderDispatcher and cache

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6937 6c8d7289-2bf4-0310-a012-ef5d649a1542
15 years ago · 777195e8d1
parent 7bcfa033c9
commit 777195e8d1
19 changed files with 242 additions and 300 deletions
--- a/htroot/Bookmarks.java
+++ b/htroot/Bookmarks.java
@ -30,6 +30,7 @@
 // if the shell's current path is HTROOT

 import java.io.File;
+import java.io.IOException;
 import java.net.MalformedURLException;
 import java.util.Date;
 import java.util.HashMap;
@ -37,11 +38,11 @@ import java.util.Iterator;
 import java.util.Set;

 import net.yacy.document.Document;
+import net.yacy.document.ParserException;
 import net.yacy.kelondro.data.meta.DigestURI;
 import net.yacy.kelondro.data.meta.URIMetadataRow;
 import net.yacy.kelondro.logging.Log;
 import net.yacy.kelondro.util.DateFormatter;
-import net.yacy.repository.LoaderDispatcher;

 import de.anomic.crawler.CrawlProfile;
 import de.anomic.data.BookmarkHelper;
@ -187,9 +188,9 @@ public class Bookmarks {
                        // try to get the bookmark from the LURL database
                        final URIMetadataRow urlentry = sb.indexSegments.urlMetadata(Segments.Process.PUBLIC).load(urlHash.getBytes(), null, 0);
                        Document document = null;
-                        if (urlentry != null) {
+                        if (urlentry != null) try {
                            final URIMetadataRow.Components metadata = urlentry.metadata();
-                            document = LoaderDispatcher.retrieveDocument(metadata.url(), CrawlProfile.CacheStrategy.IFEXIST, 5000, true, false, Long.MAX_VALUE);
+                            document = sb.loader.loadDocument(sb.loader.request(metadata.url(), true, false), CrawlProfile.CacheStrategy.IFEXIST, 5000, Long.MAX_VALUE);
                            prop.put("mode_edit", "0"); // create mode
                            prop.put("mode_url", metadata.url().toNormalform(false, true));
                            prop.putHTML("mode_title", metadata.dc_title());
@ -199,7 +200,7 @@ public class Bookmarks {
                            prop.putHTML("mode_path","");
                            prop.put("mode_public", "0");
                            prop.put("mode_feed", "0"); //TODO: check if it IS a feed
-                        }
+                        } catch (IOException e) {Log.logException(e);} catch (ParserException e) {Log.logException(e);}
                        if (document != null) document.close();
                    } else {
                        // get from the bookmark database
--- a/htroot/DictionaryLoader_p.java
+++ b/htroot/DictionaryLoader_p.java
@ -63,7 +63,7 @@ public class DictionaryLoader_p {
        if (post.containsKey("geon0Load")) {
            // load from the net
            try {
-                Response response = sb.loader.load(new DigestURI(LibraryProvider.Dictionary.GEON0.url), false, true, CrawlProfile.CacheStrategy.NOCACHE, Long.MAX_VALUE);
+                Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEON0.url), false, true), CrawlProfile.CacheStrategy.NOCACHE, Long.MAX_VALUE);
                byte[] b = response.getContent();
                FileUtils.copy(b, LibraryProvider.Dictionary.GEON0.file());
                LibraryProvider.geoLoc.addLocalization(LibraryProvider.Dictionary.GEON0.nickname, new GeonamesLocalization(LibraryProvider.Dictionary.GEON0.file()));
@ -103,7 +103,7 @@ public class DictionaryLoader_p {
        if (post.containsKey("geo1Load")) {
            // load from the net
            try {
-                Response response = sb.loader.load(new DigestURI(LibraryProvider.Dictionary.GEODB1.url), false, true, CrawlProfile.CacheStrategy.NOCACHE, Long.MAX_VALUE);
+                Response response = sb.loader.load(sb.loader.request(new DigestURI(LibraryProvider.Dictionary.GEODB1.url), false, true), CrawlProfile.CacheStrategy.NOCACHE, Long.MAX_VALUE);
                byte[] b = response.getContent();
                FileUtils.copy(b, LibraryProvider.Dictionary.GEODB1.file());
                LibraryProvider.geoLoc.removeLocalization(LibraryProvider.Dictionary.GEODB0.nickname);
--- a/htroot/RSSLoader_p.java
+++ b/htroot/RSSLoader_p.java
@ -64,7 +64,7 @@ public class RSSLoader_p {
        // if the resource body was not cached we try to load it from web
        Response entry = null;
        try {
-            entry = sb.loader.load(url, true, false, CrawlProfile.CacheStrategy.NOCACHE, Long.MAX_VALUE);
+            entry = sb.loader.load(sb.loader.request(url, true, false), CrawlProfile.CacheStrategy.NOCACHE, Long.MAX_VALUE);
        } catch (final Exception e) {
            return prop;
        }
--- a/htroot/ViewFile.java
+++ b/htroot/ViewFile.java
@ -25,7 +25,6 @@
 //javac -classpath .:../Classes Status.java
 //if the shell's current path is HTROOT

-import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.io.UnsupportedEncodingException;
 import java.net.MalformedURLException;
@ -43,13 +42,11 @@ import net.yacy.document.parser.html.CharacterCoding;
 import net.yacy.document.parser.html.ImageEntry;
 import net.yacy.kelondro.data.meta.DigestURI;
 import net.yacy.kelondro.data.meta.URIMetadataRow;
-import net.yacy.repository.LoaderDispatcher;

 import de.anomic.crawler.CrawlProfile;
 import de.anomic.crawler.retrieval.Response;
 import de.anomic.http.client.Cache;
 import de.anomic.http.server.RequestHeader;
-import de.anomic.http.server.ResponseHeader;
 import de.anomic.search.Segment;
 import de.anomic.search.Segments;
 import de.anomic.search.Switchboard;
@ -168,35 +165,22 @@ public class ViewFile {
        // loading the resource content as byte array
        prop.put("error_incache", Cache.has(url) ? 1 : 0);
        
-        String resMime = null;
-        ResponseHeader responseHeader = responseHeader = Cache.getResponseHeader(url);;
-        byte[] resource = Cache.getContent(url);
-        
-        if ((resource == null || responseHeader == null) && authorized) {
-            // load resource from net
-            Response response = null;
-            try {
-                response = sb.loader.load(url, true, false, CrawlProfile.CacheStrategy.IFEXIST, Long.MAX_VALUE);
-            } catch (IOException e) {
-                prop.put("error", "4");
-                prop.putHTML("error_errorText", e.getMessage());
-                prop.put("viewMode", VIEW_MODE_NO_TEXT);
-                return prop;
-            }
-            if (response != null) {
-                resource = response.getContent();
-                responseHeader = response.getResponseHeader();
-            }
+        Response response = null;
+        try {
+            response = sb.loader.load(sb.loader.request(url, true, false), authorized ? CrawlProfile.CacheStrategy.IFEXIST : CrawlProfile.CacheStrategy.CACHEONLY, Long.MAX_VALUE);
+        } catch (IOException e) {
+            prop.put("error", "4");
+            prop.put("error_errorText", "error loading resource: " + e.getMessage());
+            prop.put("viewMode", VIEW_MODE_NO_TEXT);
+            return prop;
        }
        
-        // if resource not available just fail
-        if (resource == null || responseHeader == null) {
+        if (response == null) {
            prop.put("error", "4");
            prop.put("error_errorText", "No resource available");
            prop.put("viewMode", VIEW_MODE_NO_TEXT);
            return prop;
        }
-        resMime = responseHeader.mime();
        
        final String[] wordArray = wordArray(post.get("words", null));

@ -205,14 +189,12 @@ public class ViewFile {
            // TODO: how to handle very large files here ?
            String content;
            try {
-                content = new String(resource, "UTF-8");
+                content = new String(response.getContent(), "UTF-8");
            } catch (final Exception e) {
                prop.put("error", "4");
                prop.putHTML("error_errorText", e.getMessage());
                prop.put("viewMode", VIEW_MODE_NO_TEXT);
                return prop;
-            } finally {
-                resource = null;
            }

            prop.put("error", "0");
@ -231,7 +213,7 @@ public class ViewFile {
            // parsing the resource content
            Document document = null;
            try {
-                document = LoaderDispatcher.parseDocument(url, resource.length, new ByteArrayInputStream(resource), responseHeader);
+                document = response.parse();
                if (document == null) {
                    prop.put("error", "5");
                    prop.put("error_errorText", "Unknown error");
@ -243,11 +225,7 @@ public class ViewFile {
                prop.putHTML("error_errorText", e.getMessage());
                prop.put("viewMode", VIEW_MODE_NO_TEXT);
                return prop;
-            } finally {
-                resource = null;
            }
-
-            resMime = document.dc_format();
            
            if (viewMode.equals("parsed")) {
                final String content = new String(document.getTextBytes());
@ -352,8 +330,8 @@ public class ViewFile {
        prop.put("error_wordCount", wordCount);
        prop.putHTML("error_desc", descr);
        prop.putNum("error_size", size);
-        prop.put("error_mimeTypeAvailable", (resMime == null) ? "0" : "1");
-        prop.put("error_mimeTypeAvailable_mimeType", resMime);
+        prop.put("error_mimeTypeAvailable", (response.getMimeType() == null) ? "0" : "1");
+        prop.put("error_mimeTypeAvailable_mimeType", response.getMimeType());
        return prop;
    }

--- a/htroot/ViewImage.java
+++ b/htroot/ViewImage.java
@ -84,14 +84,13 @@ public class ViewImage {
        int height = post.getInt("height", 0);
        int maxwidth = post.getInt("maxwidth", 0);
        int maxheight = post.getInt("maxheight", 0);
-        final int timeout = post.getInt("timeout", 5000);
        
        // get the image as stream
        Image scaled = iconcache.get(urlString);
        if (scaled == null) {
            byte[] resourceb = null;
            if (url != null) try {
-                resourceb = sb.loader.getResource(url, CrawlProfile.CacheStrategy.IFEXIST, timeout, false, true);
+                resourceb = sb.loader.loadContent(sb.loader.request(url, false, true), CrawlProfile.CacheStrategy.IFEXIST);
            } catch (IOException e) {
                Log.logWarning("ViewImage", "cannot load: " + e.getMessage());
            }
--- a/htroot/api/util/getpageinfo_p.java
+++ b/htroot/api/util/getpageinfo_p.java
@ -6,7 +6,6 @@ import java.util.Set;
 import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.document.parser.html.ContentScraper;
 import net.yacy.kelondro.data.meta.DigestURI;
-import net.yacy.repository.LoaderDispatcher;

 import de.anomic.crawler.CrawlProfile;
 import de.anomic.http.server.RequestHeader;
@ -55,14 +54,9 @@ public class getpageinfo_p {
                }
                ContentScraper scraper = null;
                if (u != null) try {
-                    scraper = LoaderDispatcher.parseResource(sb.loader, u, CrawlProfile.CacheStrategy.IFFRESH);
+                    scraper = sb.loader.parseResource(u, CrawlProfile.CacheStrategy.IFFRESH);
                } catch (final IOException e) {
-                    // try again, try harder
-                    try {
-                        scraper = LoaderDispatcher.parseResource(sb.loader, u, CrawlProfile.CacheStrategy.IFEXIST);
-                    } catch (final IOException ee) {
-                        // now thats a fail, do nothing                            
-                    }
+                    // now thats a fail, do nothing      
                }  
                if (scraper != null) {
                    // put the document title 
--- a/htroot/yacysearch.java
+++ b/htroot/yacysearch.java
@ -36,6 +36,7 @@ import net.yacy.cora.document.RSSFeed;
 import net.yacy.cora.document.RSSMessage;
 import net.yacy.document.Condenser;
 import net.yacy.document.Document;
+import net.yacy.document.ParserException;
 import net.yacy.document.geolocalization.Location;
 import net.yacy.kelondro.data.meta.DigestURI;
 import net.yacy.kelondro.data.meta.URIMetadataRow;
@ -49,7 +50,6 @@ import net.yacy.kelondro.util.Formatter;
 import net.yacy.kelondro.util.MemoryControl;
 import net.yacy.kelondro.util.SetTools;
 import net.yacy.kelondro.util.ISO639;
-import net.yacy.repository.LoaderDispatcher;

 import de.anomic.crawler.CrawlProfile;
 import de.anomic.data.DidYouMean;
@ -428,8 +428,12 @@ public class yacysearch {
                final URIMetadataRow urlentry = indexSegment.urlMetadata().load(recommendHash.getBytes(), null, 0);
                if (urlentry != null) {
                    final URIMetadataRow.Components metadata = urlentry.metadata();
-                    Document document;
-                    document = LoaderDispatcher.retrieveDocument(metadata.url(), CrawlProfile.CacheStrategy.IFEXIST, 5000, true, false, Long.MAX_VALUE);
+                    Document document = null;
+                    try {
+                        document = sb.loader.loadDocument(sb.loader.request(metadata.url(), true, false), CrawlProfile.CacheStrategy.IFEXIST, 5000, Long.MAX_VALUE);
+                    } catch (IOException e) {
+                    } catch (ParserException e) {
+                    }
                    if (document != null) {
                        // create a news message
                        final HashMap<String, String> map = new HashMap<String, String>();
--- a/source/de/anomic/crawler/retrieval/Response.java
+++ b/source/de/anomic/crawler/retrieval/Response.java
@ -26,9 +26,12 @@

 package de.anomic.crawler.retrieval;

+import java.io.ByteArrayInputStream;
 import java.util.Date;

 import net.yacy.document.Classification;
+import net.yacy.document.Document;
+import net.yacy.document.ParserException;
 import net.yacy.document.TextParser;
 import net.yacy.kelondro.data.meta.DigestURI;
 import net.yacy.kelondro.util.DateFormatter;
@ -799,4 +802,16 @@ public class Response {
        return processCase;
    }
    
+    public Document parse() throws ParserException {
+        
+        String supportError = TextParser.supports(url(), this.responseHeader == null ? null : this.responseHeader.mime());
+        if (supportError != null) throw new ParserException("no parser support:" + supportError, url());
+        
+        try {
+            return TextParser.parseSource(url(), this.responseHeader == null ? null : this.responseHeader.mime(), this.responseHeader == null ? "UTF-8" : this.responseHeader.getCharacterEncoding(), this.content.length, new ByteArrayInputStream(this.content));
+        } catch (InterruptedException e) {
+            return null;
+        }
+
+    }
 }
--- a/source/de/anomic/search/MediaSnippet.java
+++ b/source/de/anomic/search/MediaSnippet.java
@ -24,6 +24,7 @@

 package de.anomic.search;

+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Comparator;
 import java.util.Iterator;
@ -35,13 +36,13 @@ import de.anomic.data.MimeTable;

 import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.document.Document;
+import net.yacy.document.ParserException;
 import net.yacy.document.parser.html.ImageEntry;
 import net.yacy.kelondro.data.meta.DigestURI;
 import net.yacy.kelondro.index.HandleSet;
 import net.yacy.kelondro.logging.Log;
 import net.yacy.kelondro.order.Base64Order;
 import net.yacy.kelondro.util.ByteArray;
-import net.yacy.repository.LoaderDispatcher;


 public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaSnippet> {
@ -118,7 +119,16 @@ public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaS
            return new ArrayList<MediaSnippet>();
        }
        
-        final Document document = LoaderDispatcher.retrieveDocument(url, cacheStrategy, timeout, false, reindexing, Long.MAX_VALUE);
+        Document document;
+        try {
+            document = Switchboard.getSwitchboard().loader.loadDocument(Switchboard.getSwitchboard().loader.request(url, false, reindexing), cacheStrategy, timeout, Long.MAX_VALUE);
+        } catch (IOException e) {
+            Log.logFine("snippet fetch", "load error: " + e.getMessage());
+            return new ArrayList<MediaSnippet>();
+        } catch (ParserException e) {
+            Log.logFine("snippet fetch", "parser error: " + e.getMessage());
+            return new ArrayList<MediaSnippet>();
+        }
        final ArrayList<MediaSnippet> a = new ArrayList<MediaSnippet>();
        if (document != null) {
            if ((mediatype == ContentDomain.ALL) || (mediatype == ContentDomain.AUDIO)) a.addAll(computeMediaSnippets(document, queryhashes, ContentDomain.AUDIO));
--- a/source/de/anomic/search/QueryParams.java
+++ b/source/de/anomic/search/QueryParams.java
@ -274,7 +274,19 @@ public final class QueryParams {
        return new String(sb);
    }
    
-    protected static final boolean matches(final String text, final HandleSet keyhashes) {
+    /**
+     * check if the given text matches with the query
+     * this checks inclusion and exclusion words
+     * @param text
+     * @return true if the query matches with the given text
+     */
+    public final boolean matches(final String text) {
+        final HandleSet wordhashes = Word.words2hashesHandles(Condenser.getWords(text).keySet());
+        if (SetTools.anymatch(wordhashes, this.excludeHashes)) return false;
+        return SetTools.totalInclusion(this.queryHashes, wordhashes);
+    }
+    
+    protected static final boolean anymatch(final String text, final HandleSet keyhashes) {
    	// returns true if any of the word hashes in keyhashes appear in the String text
    	// to do this, all words in the string must be recognized and transcoded to word hashes
    	final HandleSet wordhashes = Word.words2hashesHandles(Condenser.getWords(text).keySet());
--- a/source/de/anomic/search/RankingProcess.java
+++ b/source/de/anomic/search/RankingProcess.java
@ -428,9 +428,9 @@ public final class RankingProcess extends Thread {
            final String pagetitle = metadata.dc_title().toLowerCase();

            // check exclusion
-            if ((QueryParams.matches(pagetitle, query.excludeHashes)) ||
-                (QueryParams.matches(pageurl.toLowerCase(), query.excludeHashes)) ||
-                (QueryParams.matches(pageauthor.toLowerCase(), query.excludeHashes))) {
+            if ((QueryParams.anymatch(pagetitle, query.excludeHashes)) ||
+                (QueryParams.anymatch(pageurl.toLowerCase(), query.excludeHashes)) ||
+                (QueryParams.anymatch(pageauthor.toLowerCase(), query.excludeHashes))) {
                continue;
            }
            
--- a/source/de/anomic/search/Segment.java
+++ b/source/de/anomic/search/Segment.java
@ -26,10 +26,8 @@

 package de.anomic.search;

-import java.io.ByteArrayInputStream;
 import java.io.File;
 import java.io.IOException;
-import java.io.InputStream;
 import java.io.UnsupportedEncodingException;
 import java.util.Date;
 import java.util.Iterator;
@ -386,49 +384,34 @@ public class Segment {
        final URIMetadataRow.Components metadata = entry.metadata();
        if (metadata == null || metadata.url() == null) return 0;
        
-        InputStream resourceContent = null;
        try {
-            // get the resource content
-            byte[] resourceb = null;
-            try {
-                resourceb = loader.getResource(metadata.url(), cacheStrategy, 10000, true, false);
-            } catch (IOException e) {
-                Log.logWarning("removeAllUrlReferences", "cannot load: " + e.getMessage());
-            }
-            if (resourceb == null) {
+            // parse the resource
+            final Document document = loader.loadDocument(loader.request(metadata.url(), true, false), cacheStrategy, 10000, Long.MAX_VALUE);
+            if (document == null) {
                // delete just the url entry
                urlMetadata().remove(urlhash);
                return 0;
-            } else {
-                resourceContent = new ByteArrayInputStream(resourceb);
-                final long resourceContentLength = resourceb.length;
-                
-                // parse the resource
-                final Document document = LoaderDispatcher.parseDocument(metadata.url(), resourceContentLength, resourceContent, null);
-                
-                // get the word set
-                Set<String> words = null;
-                try {
-                    words = new Condenser(document, true, true).words().keySet();
-                } catch (final UnsupportedEncodingException e) {
-                    Log.logException(e);
-                }
-                
-                // delete all word references
-                int count = 0;
-                if (words != null) count = termIndex().remove(Word.words2hashesHandles(words), urlhash);
-                
-                // finally delete the url entry itself
-                urlMetadata().remove(urlhash);
-                return count;
            }
+            // get the word set
+            Set<String> words = null;
+            try {
+                words = new Condenser(document, true, true).words().keySet();
+            } catch (final UnsupportedEncodingException e) {
+                Log.logException(e);
+            }
+            
+            // delete all word references
+            int count = 0;
+            if (words != null) count = termIndex().remove(Word.words2hashesHandles(words), urlhash);
+            
+            // finally delete the url entry itself
+            urlMetadata().remove(urlhash);
+            return count;
        } catch (final ParserException e) {
            return 0;
        } catch (IOException e) {
            Log.logException(e);
            return 0;
-        } finally {
-            if (resourceContent != null) try { resourceContent.close(); } catch (final Exception e) {/* ignore this */}
        }
    }

--- a/source/de/anomic/search/TextSnippet.java
+++ b/source/de/anomic/search/TextSnippet.java
@ -24,7 +24,6 @@

 package de.anomic.search;

-import java.io.ByteArrayInputStream;
 import java.util.ArrayList;
 import java.util.Comparator;
 import java.util.Iterator;
@ -51,7 +50,6 @@ import net.yacy.repository.LoaderDispatcher;
 import de.anomic.crawler.CrawlProfile;
 import de.anomic.crawler.retrieval.Response;
 import de.anomic.http.client.Cache;
-import de.anomic.http.server.ResponseHeader;
 import de.anomic.yacy.yacySearch;

 public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnippet> {
@ -331,8 +329,7 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
         * LOADING RESOURCE DATA
         * =========================================================================== */
        // if the snippet is not in the cache, we can try to get it from the htcache
-        byte[] resContent = null;
-        ResponseHeader responseHeader = null;
+        Response response;
        try {
            // first try to get the snippet from metadata
            String loc;
@ -350,31 +347,9 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
                return new TextSnippet(url, loc, SOURCE_METADATA, null, null, faviconCache.get(new String(url.hash())));
            } else {
                // trying to load the resource from the cache
-                resContent = Cache.getContent(url);
-                responseHeader = Cache.getResponseHeader(url);
-                if ((resContent == null || responseHeader == null) && cacheStrategy.isAllowedToFetchOnline()) {
-                    // if not found try to download it
-                    
-                    // download resource or get it from the cache
-                    final Response entry = loader.load(url, true, reindexing, cacheStrategy, Long.MAX_VALUE);
-                    
-                    // get resource metadata (e.g. the http headers for http resources)
-                    if (entry != null) {
-                        // place entry on indexing queue
-                        Switchboard.getSwitchboard().toIndexer(entry);
-                        
-                        // read resource body (if it is there)
-                        final byte[] resourceArray = entry.getContent();
-                        if (resourceArray != null) {
-                            resContent = resourceArray;
-                        } else {
-                            resContent = Cache.getContent(url); 
-                        }
-                    }
-                    
-                    source = SOURCE_WEB;
-                }
-                if (resContent == null) {
+                boolean objectWasInCache = Cache.has(url);
+                response = loader.load(loader.request(url, true, reindexing), cacheStrategy, Long.MAX_VALUE);
+                if (response == null) {
                    // in case that we did not get any result we can still return a success when we are not allowed to go online
                    if (cacheStrategy.mustBeOffline()) {
                        return new TextSnippet(url, null, ERROR_SOURCE_LOADING, queryhashes, "omitted network load (not allowed), no cache entry");
@ -383,6 +358,11 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
                    // if it is still not available, report an error
                    return new TextSnippet(url, null, ERROR_RESOURCE_LOADING, queryhashes, "error loading resource from net, no cache entry");
                }
+                if (!objectWasInCache) {
+                    // place entry on indexing queue
+                    Switchboard.getSwitchboard().toIndexer(response);
+                    source = SOURCE_WEB;
+                }
            }
        } catch (final Exception e) {
            //Log.logException(e);
@ -394,11 +374,9 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
         * =========================================================================== */
        Document document = null;
        try {
-             document = LoaderDispatcher.parseDocument(url, resContent.length, new ByteArrayInputStream(resContent), responseHeader);
+             document = response.parse();
        } catch (final ParserException e) {
            return new TextSnippet(url, null, ERROR_PARSER_FAILED, queryhashes, e.getMessage()); // cannot be parsed
-        } finally {
-            resContent = null;
        }
        if (document == null) return new TextSnippet(url, null, ERROR_PARSER_FAILED, queryhashes, "parser error/failed"); // cannot be parsed
        
--- a/source/de/anomic/yacy/graphics/OSMTile.java
+++ b/source/de/anomic/yacy/graphics/OSMTile.java
@ -85,7 +85,7 @@ public class OSMTile {
            // download resource using the crawler and keep resource in memory if possible
            Response entry = null;
            try {
-                entry = Switchboard.getSwitchboard().loader.load(tileURL, false, false, CrawlProfile.CacheStrategy.IFEXIST, Long.MAX_VALUE);
+                entry = Switchboard.getSwitchboard().loader.load(Switchboard.getSwitchboard().loader.request(tileURL, false, false), CrawlProfile.CacheStrategy.IFEXIST, Long.MAX_VALUE);
            } catch (IOException e) {
                Log.logWarning("yamyOSM", "cannot load: " + e.getMessage());
                return null;
--- a/source/de/anomic/yacy/yacyRelease.java
+++ b/source/de/anomic/yacy/yacyRelease.java
@ -52,7 +52,6 @@ import net.yacy.kelondro.logging.Log;
 import net.yacy.kelondro.order.Base64Order;
 import net.yacy.kelondro.util.FileUtils;
 import net.yacy.kelondro.util.OS;
-import net.yacy.repository.LoaderDispatcher;

 import de.anomic.crawler.CrawlProfile;
 import de.anomic.crawler.retrieval.HTTPLoader;
@ -235,12 +234,12 @@ public final class yacyRelease extends yacyVersion {
        // returns the version info if successful, null otherwise
        ContentScraper scraper;
        try {
-            scraper = LoaderDispatcher.parseResource(Switchboard.getSwitchboard().loader, location.getLocationURL(), CrawlProfile.CacheStrategy.NOCACHE);
+            scraper = Switchboard.getSwitchboard().loader.parseResource(location.getLocationURL(), CrawlProfile.CacheStrategy.NOCACHE);
        } catch (final IOException e) {
            return null;
        }
        
-        // analyse links in scraper resource, and find link to latest release in it
+        // analyze links in scraper resource, and find link to latest release in it
        final Map<MultiProtocolURI, String> anchors = scraper.getAnchors(); // a url (String) / name (String) relation
        final TreeSet<yacyRelease> mainReleases = new TreeSet<yacyRelease>();
        final TreeSet<yacyRelease> devReleases = new TreeSet<yacyRelease>();
--- a/source/net/yacy/document/importer/OAIListFriendsLoader.java
+++ b/source/net/yacy/document/importer/OAIListFriendsLoader.java
@ -81,7 +81,7 @@ public class OAIListFriendsLoader {
        Map<String, String> m;
        for (Map.Entry<String, File> oaiFriend: listFriends.entrySet()) try {
            if (!oaiFriend.getValue().exists()) {
-                Response response = loader == null ? null : loader.load(new DigestURI(oaiFriend.getKey(), null), false, true, CrawlProfile.CacheStrategy.NOCACHE, Long.MAX_VALUE);
+                Response response = loader == null ? null : loader.load(loader.request(new DigestURI(oaiFriend.getKey(), null), false, true), CrawlProfile.CacheStrategy.NOCACHE, Long.MAX_VALUE);
                if (response != null) FileUtils.copy(response.getContent(), oaiFriend.getValue());
            }
            
--- a/source/net/yacy/document/importer/OAIPMHLoader.java
+++ b/source/net/yacy/document/importer/OAIPMHLoader.java
@ -48,7 +48,7 @@ public class OAIPMHLoader {
        this.source = source;
        
        // load the file from the net
-        Response response = loader.load(source, false, true, CrawlProfile.CacheStrategy.NOCACHE, Long.MAX_VALUE);
+        Response response = loader.load(loader.request(source, false, true), CrawlProfile.CacheStrategy.NOCACHE, Long.MAX_VALUE);
        byte[] b = response.getContent();
        this.resumptionToken = new ResumptionToken(source, b);
        //System.out.println("*** ResumptionToken = " + this.resumptionToken.toString());
--- a/source/net/yacy/kelondro/util/SetTools.java
+++ b/source/net/yacy/kelondro/util/SetTools.java
@ -67,7 +67,7 @@ public class SetTools {
    // - join by iterative tests (where we distinguish left-right and right-left tests)

    
-    public static <A, B> TreeMap<A, B> joinConstructive(final Collection<TreeMap<A, B>> maps, final boolean concatStrings) {
+    public final static <A, B> TreeMap<A, B> joinConstructive(final Collection<TreeMap<A, B>> maps, final boolean concatStrings) {
        // this joins all TreeMap(s) contained in maps
        
        // first order entities by their size
@ -109,7 +109,7 @@ public class SetTools {
        return joinResult;
    }
    
-    public static <A, B> TreeMap<A, B> joinConstructive(final TreeMap<A, B> map1, final TreeMap<A, B> map2, final boolean concatStrings) {
+    public final static <A, B> TreeMap<A, B> joinConstructive(final TreeMap<A, B> map1, final TreeMap<A, B> map2, final boolean concatStrings) {
        // comparators must be equal
        if ((map1 == null) || (map2 == null)) return null;
        if (map1.comparator() != map2.comparator()) return null;
@ -130,7 +130,7 @@ public class SetTools {
    }
    
    @SuppressWarnings("unchecked")
-    private static <A, B> TreeMap<A, B> joinConstructiveByTest(final TreeMap<A, B> small, final TreeMap<A, B> large, final boolean concatStrings) {
+    private final static <A, B> TreeMap<A, B> joinConstructiveByTest(final TreeMap<A, B> small, final TreeMap<A, B> large, final boolean concatStrings) {
        final Iterator<Map.Entry<A, B>> mi = small.entrySet().iterator();
        final TreeMap<A, B> result = new TreeMap<A, B>(large.comparator());
        Map.Entry<A, B> mentry1;
@ -150,7 +150,7 @@ public class SetTools {
    }

    @SuppressWarnings("unchecked")
-    private static <A, B> TreeMap<A, B> joinConstructiveByEnumeration(final TreeMap<A, B> map1, final TreeMap<A, B> map2, final boolean concatStrings) {
+    private final static <A, B> TreeMap<A, B> joinConstructiveByEnumeration(final TreeMap<A, B> map1, final TreeMap<A, B> map2, final boolean concatStrings) {
        // implement pairwise enumeration
        final Comparator<? super A> comp = map1.comparator();
        final Iterator<Map.Entry<A, B>> mi1 = map1.entrySet().iterator();
@ -181,7 +181,7 @@ public class SetTools {
    }
    
    // now the same for set-set
-    public static <A> TreeSet<A> joinConstructive(final TreeSet<A> set1, final TreeSet<A> set2) {
+    public final static <A> TreeSet<A> joinConstructive(final TreeSet<A> set1, final TreeSet<A> set2) {
    	// comparators must be equal
        if ((set1 == null) || (set2 == null)) return null;
        if (set1.comparator() != set2.comparator()) return null;
@ -201,7 +201,7 @@ public class SetTools {
        return joinConstructiveByEnumeration(set1, set2);
    }

-    private static <A> TreeSet<A> joinConstructiveByTest(final TreeSet<A> small, final TreeSet<A> large) {
+    private final static <A> TreeSet<A> joinConstructiveByTest(final TreeSet<A> small, final TreeSet<A> large) {
    	final Iterator<A> mi = small.iterator();
    	final TreeSet<A> result = new TreeSet<A>(small.comparator());
    	A o;
@ -212,7 +212,7 @@ public class SetTools {
    	return result;
    }

-    private static <A> TreeSet<A> joinConstructiveByEnumeration(final TreeSet<A> set1, final TreeSet<A> set2) {
+    private final static <A> TreeSet<A> joinConstructiveByEnumeration(final TreeSet<A> set1, final TreeSet<A> set2) {
    	// implement pairwise enumeration
    	final Comparator<? super A> comp = set1.comparator();
    	final Iterator<A> mi = set1.iterator();
@ -238,8 +238,41 @@ public class SetTools {
    	return result;
    }
    
-    // now the same for set-set
-    public static <A> boolean anymatch(final TreeSet<A> set1, final TreeSet<A> set2) {
+    /**
+     * test if one set is totally included in another set
+     * @param <A>
+     * @param small
+     * @param large
+     * @return true if the small set is completely included in the large set
+     */
+    public final static <A> boolean totalInclusion(final Set<A> small, final Set<A> large) {
+        for (A o: small) {
+            if (!large.contains(o)) return false;
+        }
+        return true;
+    }    
+    
+    /**
+     * test if one set is totally included in another set
+     * @param small
+     * @param large
+     * @return true if the small set is completely included in the large set
+     */
+    public final static boolean totalInclusion(final HandleSet small, final HandleSet large) {
+        for (byte[] handle: small) {
+            if (!large.has(handle)) return false;
+        }
+        return true;
+    }    
+    
+    /**
+     * test if the intersection of two sets is not empty
+     * @param <A>
+     * @param set1
+     * @param set2
+     * @return true if any element of the first set is part of the second set or vice-versa
+     */
+    public final static <A> boolean anymatch(final TreeSet<A> set1, final TreeSet<A> set2) {
 		// comparators must be equal
 		if ((set1 == null) || (set2 == null)) return false;
 		if (set1.comparator() != set2.comparator()) return false;
@ -259,7 +292,13 @@ public class SetTools {
 		return anymatchByEnumeration(set1, set2);
 	}

-    public static <A> boolean anymatch(final HandleSet set1, final HandleSet set2) {
+    /**
+     * test if the intersection of two sets is not empty
+     * @param set1
+     * @param set2
+     * @return true if any element of the first set is part of the second set or vice-versa
+     */
+    public final static boolean anymatch(final HandleSet set1, final HandleSet set2) {
        // comparators must be equal
        if ((set1 == null) || (set2 == null)) return false;
        if (set1.comparator() != set2.comparator()) return false;
@ -279,7 +318,7 @@ public class SetTools {
        return anymatchByEnumeration(set1, set2);
    }

-    private static <A> boolean anymatchByTest(final TreeSet<A> small, final TreeSet<A> large) {
+    private final static <A> boolean anymatchByTest(final TreeSet<A> small, final TreeSet<A> large) {
        final Iterator<A> mi = small.iterator();
        A o;
        while (mi.hasNext()) {
@ -289,7 +328,7 @@ public class SetTools {
        return false;
    }

-    private static boolean anymatchByTest(final HandleSet small, final HandleSet large) {
+    private final static boolean anymatchByTest(final HandleSet small, final HandleSet large) {
        final Iterator<byte[]> mi = small.iterator();
        byte[] o;
        while (mi.hasNext()) {
@ -299,7 +338,7 @@ public class SetTools {
        return false;
    }

-    private static <A> boolean anymatchByEnumeration(final TreeSet<A> set1, final TreeSet<A> set2) {
+    private final static <A> boolean anymatchByEnumeration(final TreeSet<A> set1, final TreeSet<A> set2) {
        // implement pairwise enumeration
        final Comparator<? super A> comp = set1.comparator();
        final Iterator<A> mi = set1.iterator();
@ -322,7 +361,7 @@ public class SetTools {
        return false;
    }
    
-    private static boolean anymatchByEnumeration(final HandleSet set1, final HandleSet set2) {
+    private final static boolean anymatchByEnumeration(final HandleSet set1, final HandleSet set2) {
        // implement pairwise enumeration
        final Comparator<byte[]> comp = set1.comparator();
        final Iterator<byte[]> mi = set1.iterator();
@ -370,7 +409,7 @@ public class SetTools {
    }
    */
    
-    public static <A, B> void excludeDestructive(final Map<A, B> map, final Set<A> set) {
+    public final static <A, B> void excludeDestructive(final Map<A, B> map, final Set<A> set) {
        // comparators must be equal
        if (map == null) return;
        if (set == null) return;
@ -383,18 +422,18 @@ public class SetTools {
            excludeDestructiveByTestSetInMap(map, set);
    }
    
-    private static <A, B> void excludeDestructiveByTestMapInSet(final Map<A, B> map, final Set<A> set) {
+    private final static <A, B> void excludeDestructiveByTestMapInSet(final Map<A, B> map, final Set<A> set) {
        final Iterator<A> mi = map.keySet().iterator();
        while (mi.hasNext()) if (set.contains(mi.next())) mi.remove();
    }
    
-    private static <A, B> void excludeDestructiveByTestSetInMap(final Map<A, B> map, final Set<A> set) {
+    private final static <A, B> void excludeDestructiveByTestSetInMap(final Map<A, B> map, final Set<A> set) {
        final Iterator<A> si = set.iterator();
        while (si.hasNext()) map.remove(si.next());
    }
    
    // and the same again with set-set
-    public static <A> void excludeDestructive(final Set<A> set1, final Set<A> set2) {
+    public final static <A> void excludeDestructive(final Set<A> set1, final Set<A> set2) {
        if (set1 == null) return;
        if (set2 == null) return;
        assert !(set1 instanceof TreeSet<?> && set2 instanceof TreeSet<?>) || ((TreeSet<A>) set1).comparator() == ((TreeSet<A>) set2).comparator();
@ -406,19 +445,19 @@ public class SetTools {
            excludeDestructiveByTestLargeInSmall(set1, set2);
    }
    
-    private static <A> void excludeDestructiveByTestSmallInLarge(final Set<A> small, final Set<A> large) {
+    private final static <A> void excludeDestructiveByTestSmallInLarge(final Set<A> small, final Set<A> large) {
        final Iterator<A> mi = small.iterator();
        while (mi.hasNext()) if (large.contains(mi.next())) mi.remove();
    }
    
-    private static <A> void excludeDestructiveByTestLargeInSmall(final Set<A> large, final Set<A> small) {
+    private final static <A> void excludeDestructiveByTestLargeInSmall(final Set<A> large, final Set<A> small) {
        final Iterator<A> si = small.iterator();
        while (si.hasNext()) large.remove(si.next());
    }
    
    // ------------------------------------------------------------------------------------------------

-    public static TreeMap<String, String> loadMap(final String filename, final String sep) {
+    public final static TreeMap<String, String> loadMap(final String filename, final String sep) {
        final TreeMap<String, String> map = new TreeMap<String, String>();
        BufferedReader br = null;
        try {
@ -437,7 +476,7 @@ public class SetTools {
        return map;
    }
    
-    public static TreeMap<String, ArrayList<String>> loadMapMultiValsPerKey(final String filename, final String sep) {
+    public final static TreeMap<String, ArrayList<String>> loadMapMultiValsPerKey(final String filename, final String sep) {
        final TreeMap<String, ArrayList<String>> map = new TreeMap<String, ArrayList<String>>();
        BufferedReader br = null;
        try {
@ -460,7 +499,7 @@ public class SetTools {
        return map;
    }
    
-    public static TreeSet<String> loadList(final File file, final Comparator<String> c) {
+    public final static TreeSet<String> loadList(final File file, final Comparator<String> c) {
        final TreeSet<String> list = new TreeSet<String>(c);
        if (!(file.exists())) return list;
        
@ -480,7 +519,7 @@ public class SetTools {
        return list;
    }

-    public static String setToString(final HandleSet set, final char separator) {
+    public final static String setToString(final HandleSet set, final char separator) {
        final Iterator<byte[]> i = set.iterator();
        final StringBuilder sb = new StringBuilder(set.size() * 7);
        if (i.hasNext()) sb.append(new String(i.next()));
@ -490,7 +529,7 @@ public class SetTools {
        return sb.toString();
    }
    
-    public static String setToString(final Set<String> set, final char separator) {
+    public final static String setToString(final Set<String> set, final char separator) {
        final Iterator<String> i = set.iterator();
        final StringBuilder sb = new StringBuilder(set.size() * 7);
        if (i.hasNext()) sb.append(i.next());
--- a/source/net/yacy/repository/LoaderDispatcher.java
+++ b/source/net/yacy/repository/LoaderDispatcher.java
@ -29,7 +29,6 @@ package net.yacy.repository;
 import java.io.ByteArrayInputStream;
 import java.io.File;
 import java.io.IOException;
-import java.io.InputStream;
 import java.io.Writer;
 import java.net.MalformedURLException;
 import java.util.Arrays;
@ -39,6 +38,7 @@ import java.util.Iterator;
 import java.util.Map;
 import java.util.concurrent.ConcurrentHashMap;

+import net.yacy.cora.document.MultiProtocolURI;
 import net.yacy.document.Document;
 import net.yacy.document.TextParser;
 import net.yacy.document.ParserException;
@ -57,7 +57,6 @@ import de.anomic.crawler.retrieval.Request;
 import de.anomic.crawler.retrieval.Response;
 import de.anomic.crawler.retrieval.SMBLoader;
 import de.anomic.http.client.Cache;
-import de.anomic.http.client.Client;
 import de.anomic.http.server.HeaderFramework;
 import de.anomic.http.server.RequestHeader;
 import de.anomic.http.server.ResponseHeader;
@ -98,38 +97,7 @@ public final class LoaderDispatcher {
    public HashSet<String> getSupportedProtocols() {
        return (HashSet<String>) this.supportedProtocols.clone();
    }
-    
-    /**
-     * load a resource from the web, from ftp, from smb or a file
-     * @param url
-     * @param forText shows that this was a for-text crawling request
-     * @param global shows that this was a global crawling request
-     * @param cacheStratgy strategy according to CACHE_STRATEGY_NOCACHE,CACHE_STRATEGY_IFFRESH,CACHE_STRATEGY_IFEXIST,CACHE_STRATEGY_CACHEONLY
-     * @return the loaded entity in a Response object
-     * @throws IOException
-     */
-    public Response load(
-            final DigestURI url,
-            final boolean forText,
-            final boolean global,
-            CrawlProfile.CacheStrategy cacheStratgy,
-            long maxFileSize) throws IOException {
-        return load(request(url, forText, global), cacheStratgy, maxFileSize);
-    }
-    
-    public void load(final DigestURI url, CrawlProfile.CacheStrategy cacheStratgy, long maxFileSize, File targetFile) throws IOException {

-        byte[] b = load(request(url, false, true), cacheStratgy, maxFileSize).getContent();
-        if (b == null) throw new IOException("load == null");
-        File tmp = new File(targetFile.getAbsolutePath() + ".tmp");
-        
-        // transaction-safe writing
-        File parent = targetFile.getParentFile();
-        if (!parent.exists()) parent.mkdirs();
-        FileUtils.copy(b, tmp);
-        tmp.renameTo(targetFile);
-    }
-    
    /**
     * generate a request object
     * @param url the target url
@ -160,7 +128,27 @@ public final class LoaderDispatcher {
                    0, 
                    0);
    }
+
+    public void load(final DigestURI url, CrawlProfile.CacheStrategy cacheStratgy, long maxFileSize, File targetFile) throws IOException {
+
+        byte[] b = load(request(url, false, true), cacheStratgy, maxFileSize).getContent();
+        if (b == null) throw new IOException("load == null");
+        File tmp = new File(targetFile.getAbsolutePath() + ".tmp");
+        
+        // transaction-safe writing
+        File parent = targetFile.getParentFile();
+        if (!parent.exists()) parent.mkdirs();
+        FileUtils.copy(b, tmp);
+        tmp.renameTo(targetFile);
+    }
    
+    /**
+     * load a resource from the web, from ftp, from smb or a file
+     * @param request the request essentials
+     * @param cacheStratgy strategy according to CACHE_STRATEGY_NOCACHE,CACHE_STRATEGY_IFFRESH,CACHE_STRATEGY_IFEXIST,CACHE_STRATEGY_CACHEONLY
+     * @return the loaded entity in a Response object
+     * @throws IOException
+     */
    public Response load(final Request request, CrawlProfile.CacheStrategy cacheStrategy, long maxFileSize) throws IOException {
        // get the protocol of the next URL
        final String protocol = request.url().getProtocol();
@ -272,132 +260,40 @@ public final class LoaderDispatcher {
    }

    /**
-     * load the url as resource from the web or the cache
-     * @param url
-     * @param fetchOnline
-     * @param socketTimeout
-     * @param forText 
+     * load the url as byte[] content from the web or the cache
+     * @param request
+     * @param cacheStrategy
+     * @param timeout
     * @return the content as {@link byte[]}
     * @throws IOException 
     */
-    public byte[] getResource(final DigestURI url, CrawlProfile.CacheStrategy cacheStrategy, final int socketTimeout, final boolean forText, final boolean reindexing) throws IOException {
+    public byte[] loadContent(final Request request, CrawlProfile.CacheStrategy cacheStrategy) throws IOException {
        // try to download the resource using the loader
        final long maxFileSize = sb.getConfigLong("crawler.http.maxFileSize", HTTPLoader.DEFAULT_MAXFILESIZE);
-        final Response entry = load(url, forText, reindexing, cacheStrategy, maxFileSize);
+        final Response entry = load(request, cacheStrategy, maxFileSize);
        if (entry == null) return null; // not found in web
        
        // read resource body (if it is there)
        return entry.getContent();
    }
    
-    /**
-     * Tries to load and parse a resource specified by it's URL.
-     * If the resource is not stored in cache and if fetchOnline is set the
-     * this function tries to download the resource from web.
-     * 
-     * @param url the URL of the resource
-     * @param fetchOnline specifies if the resource should be loaded from web if it'as not available in the cache
-     * @param timeout 
-     * @param forText 
-     * @param global the domain of the search. If global == true then the content is re-indexed
-     * @return the parsed document as {@link Document}
-     */
-    public static Document retrieveDocument(final DigestURI url, final CrawlProfile.CacheStrategy cacheStrategy, final int timeout, final boolean forText, final boolean global, long maxFileSize) {
+    public Document loadDocument(final Request request, final CrawlProfile.CacheStrategy cacheStrategy, final int timeout, long maxFileSize) throws IOException, ParserException {

        // load resource
-        byte[] resContent = null;
-        ResponseHeader responseHeader = null;
-        try {
-            final Response entry = Switchboard.getSwitchboard().loader.load(url, forText, global, cacheStrategy, maxFileSize);
-            if (entry == null) {
-                Log.logFine("snippet fetch", "no Response for url " + url);
-                return null;
-            }
+        final Response response = load(request, cacheStrategy, maxFileSize);
+        if (response == null) throw new IOException("no Response for url " + request.url());

-            // read resource body (if it is there)
-            resContent = entry.getContent();
-            
-            // read a fresh header
-            responseHeader = entry.getResponseHeader();
-        
-            // if it is still not available, report an error
-            if (resContent == null || responseHeader == null) {
-                Log.logFine("snippet fetch", "no Content available for url " + url);
-                return null;
-            }
-        } catch (final Exception e) {
-            Log.logFine("snippet fetch", "error loading resource: " + e.getMessage() + " for url " + url);
-            return null;
-        } 
+        // if it is still not available, report an error
+        if (response.getContent() == null || response.getResponseHeader() == null) throw new IOException("no Content available for url " + request.url());

        // parse resource
-        Document document = null;
-        try {
-            document = parseDocument(url, resContent.length, new ByteArrayInputStream(resContent), responseHeader);            
-        } catch (final ParserException e) {
-            Log.logFine("snippet fetch", "parser error " + e.getMessage() + " for url " + url);
-            return null;
-        } finally {
-            resContent = null;
-        }
-        return document;
-    }
-    
-    /**
-     * Parse the resource
-     * @param url the URL of the resource
-     * @param contentLength the contentLength of the resource
-     * @param resourceStream the resource body as stream
-     * @param docInfo metadata about the resource
-     * @return the extracted data
-     * @throws ParserException
-     */
-    public static Document parseDocument(final DigestURI url, final long contentLength, final InputStream resourceStream, ResponseHeader responseHeader) throws ParserException {
-        try {
-            if (resourceStream == null) return null;
-
-            // STEP 1: if no resource metadata is available, try to load it from cache 
-            if (responseHeader == null) {
-                // try to get the header from the htcache directory
-                try {                    
-                    responseHeader = Cache.getResponseHeader(url);
-                } catch (final Exception e) {
-                    // ignore this. resource info loading failed
-                }   
-            }
-            
-            // STEP 2: if the metadata is still null try to download it from web
-            if ((responseHeader == null) && (url.getProtocol().startsWith("http"))) {
-                // TODO: we need a better solution here
-                // e.g. encapsulate this in the crawlLoader class
-                
-                // getting URL mimeType
-                try {
-                    responseHeader = Client.whead(url.toString());
-                } catch (final Exception e) {
-                    // ingore this. http header download failed
-                } 
-            }
-
-            // STEP 3: if the metadata is still null try to guess the mimeType of the resource
-            String supportError = TextParser.supports(url, responseHeader == null ? null : responseHeader.mime());
-            if (supportError != null) {
-                return null;
-            }
-            if (responseHeader == null) {
-                return TextParser.parseSource(url, null, null, contentLength, resourceStream);
-            }
-            return TextParser.parseSource(url, responseHeader.mime(), responseHeader.getCharacterEncoding(), contentLength, resourceStream);
-        } catch (final InterruptedException e) {
-            // interruption of thread detected
-            return null;
-        }
+        return response.parse();
    }

-    public static ContentScraper parseResource(final LoaderDispatcher loader, final DigestURI location, CrawlProfile.CacheStrategy cachePolicy) throws IOException {
+    public ContentScraper parseResource(final DigestURI location, CrawlProfile.CacheStrategy cachePolicy) throws IOException {
        // load page
-        final long maxFileSize = loader.sb.getConfigLong("crawler.http.maxFileSize", HTTPLoader.DEFAULT_MAXFILESIZE);
-        Response r = loader.load(location, true, false, cachePolicy, maxFileSize);
+        final long maxFileSize = this.sb.getConfigLong("crawler.http.maxFileSize", HTTPLoader.DEFAULT_MAXFILESIZE);
+        Response r = this.load(request(location, true, false), cachePolicy, maxFileSize);
        byte[] page = (r == null) ? null : r.getContent();
        if (page == null) throw new IOException("no response from url " + location.toString());
        
@ -409,6 +305,40 @@ public final class LoaderDispatcher {
        return scraper;
    }

+    /**
+     * load all links from a resource
+     * @param url the url that shall be loaded
+     * @param cacheStrategy the cache strategy
+     * @return a map from URLs to the anchor texts of the urls
+     * @throws IOException
+     */
+    public final Map<MultiProtocolURI, String> loadLinks(DigestURI url, CrawlProfile.CacheStrategy cacheStrategy) throws IOException {
+        Response response = load(request(url, true, false), cacheStrategy, Long.MAX_VALUE);
+        if (response == null) throw new IOException("response == null");
+        ResponseHeader responseHeader = response.getResponseHeader();
+        byte[] resource = response.getContent();
+        if (resource == null) throw new IOException("resource == null");
+        if (responseHeader == null) throw new IOException("responseHeader == null");
+    
+        Document document = null;
+        String supportError = TextParser.supports(url, responseHeader.mime());
+        if (supportError != null) throw new IOException("no parser support: " + supportError);
+        try {
+            document = TextParser.parseSource(url, responseHeader.mime(), responseHeader.getCharacterEncoding(), resource.length, new ByteArrayInputStream(resource));
+            if (document == null) throw new IOException("document == null");
+        } catch (final ParserException e) {
+            throw new IOException("parser error: " + e.getMessage());
+        } catch (InterruptedException e) {
+            throw new IOException("interrupted");
+        } finally {
+            resource = null;
+        }
+
+        Map<MultiProtocolURI, String> result = document.getHyperlinks();
+        document.close();
+        return result;
+    }
+    
    public synchronized void cleanupAccessTimeTable(long timeout) {
    	final Iterator<Map.Entry<String, Long>> i = accessTime.entrySet().iterator();
        Map.Entry<String, Long> e;
@ -439,7 +369,7 @@ public final class LoaderDispatcher {
            if (this.cache.exists()) return;
            try {
                // load from the net
-                Response response = load(new DigestURI(this.url), false, true, CrawlProfile.CacheStrategy.NOCACHE, this.maxFileSize);
+                Response response = load(request(new DigestURI(this.url), false, true), CrawlProfile.CacheStrategy.NOCACHE, this.maxFileSize);
                byte[] b = response.getContent();
                FileUtils.copy(b, this.cache);
            } catch (MalformedURLException e) {} catch (IOException e) {}