simplified snippet computation process and separated the algorithm into two classes

also enhances selection criteria for best snippet line computation git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7182 6c8d7289-2bf4-0310-a012-ef5d649a1542
15 years ago · 10a9cb1971
parent 4450c240b7
commit 10a9cb1971
11 changed files with 487 additions and 404 deletions
--- a/htroot/ViewFile.java
+++ b/htroot/ViewFile.java
@ -29,6 +29,7 @@ import java.io.IOException;
 import java.io.UnsupportedEncodingException;
 import java.net.MalformedURLException;
 import java.net.URLDecoder;
+import java.util.Collection;
 import java.util.Enumeration;
 import java.util.HashMap;
 import java.util.Iterator;
@ -243,7 +244,7 @@ public class ViewFile {
                
            } else if (viewMode.equals("sentences")) {
                prop.put("viewMode", VIEW_MODE_AS_PARSED_SENTENCES);
-                final Iterator<StringBuilder> sentences = document.getSentences(pre);
+                final Collection<StringBuilder> sentences = document.getSentences(pre);

                boolean dark = true;
                int i = 0;
@ -251,8 +252,8 @@ public class ViewFile {
                if (sentences != null) {
                    
                    // Search word highlighting
-                    while (sentences.hasNext()) {
-                        sentence = sentences.next().toString();
+                    for (StringBuilder s: sentences) {
+                        sentence = s.toString();
                        if (sentence.trim().length() > 0) {
                            prop.put("viewMode_sentences_" + i + "_nr", i + 1);
                            prop.put("viewMode_sentences_" + i + "_text", markup(wordArray, sentence));
@ -266,7 +267,7 @@ public class ViewFile {

            } else if (viewMode.equals("words")) {
                prop.put("viewMode", VIEW_MODE_AS_PARSED_WORDS);
-                final Iterator<StringBuilder> sentences = document.getSentences(pre);
+                final Collection<StringBuilder> sentences = document.getSentences(pre);

                boolean dark = true;
                int i = 0;
@ -274,8 +275,8 @@ public class ViewFile {
                if (sentences != null) {
                    
                    // Search word highlighting
-                    while (sentences.hasNext()) {
-                        sentence = sentences.next().toString();
+                    for (StringBuilder s: sentences) {
+                        sentence = s.toString();
                        Enumeration<StringBuilder> tokens = Condenser.wordTokenizer(sentence, "UTF-8");
                        while (tokens.hasMoreElements()) {
                            token = tokens.nextElement().toString();
--- a/htroot/yacysearch.java
+++ b/htroot/yacysearch.java
@ -80,6 +80,8 @@ public class yacysearch {
        final boolean searchAllowed = sb.getConfigBool("publicSearchpage", true) || sb.verifyAuthentication(header, false);
        
        final boolean authenticated = sb.adminAuthenticated(header) >= 2;
+        final boolean localhostAccess = sb.accessFromLocalhost(header);
+        
        int display = (post == null) ? 0 : post.getInt("display", 0);
        if (!authenticated) display = 2;
        // display == 0: shop top menu
@ -234,34 +236,34 @@ public class yacysearch {
            global = false;
            snippetFetchStrategy = CrawlProfile.CacheStrategy.CACHEONLY;
            block = true;
-            Log.logWarning("LOCAL_SEARCH", "ACCECC CONTROL: BLACKLISTED CLIENT FROM " + client + " gets no permission to search");
+            Log.logWarning("LOCAL_SEARCH", "ACCESS CONTROL: BLACKLISTED CLIENT FROM " + client + " gets no permission to search");
        } else if (Domains.matchesList(client, sb.networkWhitelist)) {
-            Log.logInfo("LOCAL_SEARCH", "ACCECC CONTROL: WHITELISTED CLIENT FROM " + client + " gets no search restrictions");
-        } else if (!authenticated && (global || snippetFetchStrategy.isAllowedToFetchOnline())) {
+            Log.logInfo("LOCAL_SEARCH", "ACCESS CONTROL: WHITELISTED CLIENT FROM " + client + " gets no search restrictions");
+        } else if (!authenticated && !localhostAccess) {
            // in case that we do a global search or we want to fetch snippets, we check for DoS cases
            synchronized (trackerHandles) {
                int accInOneSecond = trackerHandles.tailSet(Long.valueOf(System.currentTimeMillis() - 1000)).size();
                int accInThreeSeconds = trackerHandles.tailSet(Long.valueOf(System.currentTimeMillis() - 3000)).size();
                int accInOneMinute = trackerHandles.tailSet(Long.valueOf(System.currentTimeMillis() - 60000)).size();
                int accInTenMinutes = trackerHandles.tailSet(Long.valueOf(System.currentTimeMillis() - 600000)).size();
-                if (accInTenMinutes > 600) {
-                    global = false;
-                    snippetFetchStrategy = CrawlProfile.CacheStrategy.CACHEONLY;
-                    block = true;
-                    Log.logWarning("LOCAL_SEARCH", "ACCECC CONTROL: CLIENT FROM " + client + ": " + accInTenMinutes + " searches in ten minutes, fully blocked (no results generated)");
-                } else if (accInOneMinute > 200) {
-                    global = false;
-                    snippetFetchStrategy = CrawlProfile.CacheStrategy.CACHEONLY;
+                // protections against too strong YaCy network load, reduces remote search
+                if (global) {
+                    if (accInTenMinutes >= 30 || accInOneMinute >= 6 || accInThreeSeconds >= 1) {
+                        global = false;
+                        Log.logWarning("LOCAL_SEARCH", "ACCESS CONTROL: CLIENT FROM " + client + ": " + accInOneSecond + "/1s, " + accInThreeSeconds + "/3s, " + accInOneMinute + "/60s, " + accInTenMinutes + "/600s, " + " requests, disallowed global search");
+                    }
+                }
+                // protection against too many remote server snippet loads (protects traffic on server)
+                if (snippetFetchStrategy.isAllowedToFetchOnline()) {
+                    if (accInTenMinutes >= 20 || accInOneMinute >= 4 || accInThreeSeconds >= 1) {
+                        snippetFetchStrategy = CrawlProfile.CacheStrategy.CACHEONLY;
+                        Log.logWarning("LOCAL_SEARCH", "ACCESS CONTROL: CLIENT FROM " + client + ": " + accInOneSecond + "/1s, " + accInThreeSeconds + "/3s, " + accInOneMinute + "/60s, " + accInTenMinutes + "/600s, " + " requests, disallowed remote snippet loading");
+                    }
+                }
+                // general load protection
+                if (accInTenMinutes >= 2000 || accInOneMinute >= 600 || accInOneSecond >= 20) {
                    block = true;
-                    Log.logWarning("LOCAL_SEARCH", "ACCECC CONTROL: CLIENT FROM " + client + ": " + accInOneMinute + " searches in one minute, fully blocked (no results generated)");
-                } else if (accInThreeSeconds > 1) {
-                    global = false;
-                    snippetFetchStrategy = CrawlProfile.CacheStrategy.CACHEONLY;
-                    Log.logWarning("LOCAL_SEARCH", "ACCECC CONTROL: CLIENT FROM " + client + ": " + accInThreeSeconds + " searches in three seconds, blocked global search and snippets");
-                } else if (accInOneSecond > 2) {
-                    global = false;
-                    snippetFetchStrategy = CrawlProfile.CacheStrategy.CACHEONLY;
-                    Log.logWarning("LOCAL_SEARCH", "ACCECC CONTROL: CLIENT FROM " + client + ": " + accInOneSecond + " searches in one second, blocked global search and snippets");
+                    Log.logWarning("LOCAL_SEARCH", "ACCESS CONTROL: CLIENT FROM " + client + ": " + accInOneSecond + "/1s, " + accInThreeSeconds + "/3s, " + accInOneMinute + "/60s, " + accInTenMinutes + "/600s, " + " requests, disallowed search");
                }
            }
        }
--- a/source/de/anomic/http/server/HTTPDemon.java
+++ b/source/de/anomic/http/server/HTTPDemon.java
@ -256,10 +256,9 @@ public final class HTTPDemon implements serverHandler, Cloneable {
    
    public static int staticAdminAuthenticated(final String authorization, final serverSwitch sw) {
        // the authorization string must be given with the truncated 6 bytes at the beginning
-        if (authorization == null) return 1;
-        //if (authorization.length() < 6) return 1; // no authentication information given
        final String adminAccountBase64MD5 = sw.getConfig(ADMIN_ACCOUNT_B64MD5, "");
        if (adminAccountBase64MD5.length() == 0) return 2; // no password stored
+        if (authorization == null || authorization.length() == 0) return 1;
        if (adminAccountBase64MD5.equals(Digest.encodeMD5Hex(authorization))) return 4; // hard-authenticated, all ok
        return 1;
    }
--- a/source/de/anomic/search/MediaSnippet.java
+++ b/source/de/anomic/search/MediaSnippet.java
@ -29,17 +29,20 @@ import java.util.ArrayList;
 import java.util.Comparator;
 import java.util.Iterator;
 import java.util.Map;
+import java.util.TreeMap;
 import java.util.TreeSet;

 import de.anomic.crawler.CrawlProfile;
 import de.anomic.data.MimeTable;

 import net.yacy.cora.document.MultiProtocolURI;
+import net.yacy.document.Condenser;
 import net.yacy.document.Document;
 import net.yacy.document.Parser;
 import net.yacy.document.parser.html.ImageEntry;
 import net.yacy.kelondro.data.meta.DigestURI;
 import net.yacy.kelondro.index.HandleSet;
+import net.yacy.kelondro.index.RowSpaceExceededException;
 import net.yacy.kelondro.logging.Log;
 import net.yacy.kelondro.order.Base64Order;
 import net.yacy.kelondro.util.ByteArray;
@ -157,8 +160,8 @@ public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaS
            entry = i.next();
            url = new DigestURI(entry.getKey());
            desc = entry.getValue();
-            int ranking = TextSnippet.removeAppearanceHashes(url.toNormalform(false, false), queryhashes).size() +
-                           TextSnippet.removeAppearanceHashes(desc, queryhashes).size();
+            int ranking = removeAppearanceHashes(url.toNormalform(false, false), queryhashes).size() +
+                           removeAppearanceHashes(desc, queryhashes).size();
            if (ranking < 2 * queryhashes.size()) {
                result.add(new MediaSnippet(mediatype, url, MimeTable.url2mime(url), desc, document.getTextLength(), null, ranking, source));
            }
@ -186,41 +189,40 @@ public class MediaSnippet implements Comparable<MediaSnippet>, Comparator<MediaS
            if (ientry.width() > 0 && ientry.width() < 64) continue;
            desc = ientry.alt();
            int appcount = queryhashes.size()  * 2 - 
-                           TextSnippet.removeAppearanceHashes(url.toNormalform(false, false), queryhashes).size() -
-                           TextSnippet.removeAppearanceHashes(desc, queryhashes).size();
+                           removeAppearanceHashes(url.toNormalform(false, false), queryhashes).size() -
+                           removeAppearanceHashes(desc, queryhashes).size();
            final long ranking = Long.MAX_VALUE - (ientry.height() + 1) * (ientry.width() + 1) * (appcount + 1);  
            result.add(new MediaSnippet(ContentDomain.IMAGE, url, MimeTable.url2mime(url), desc, ientry.fileSize(), ientry.width(), ientry.height(), ranking, source));
        }
        return result;
    }
    
-
-    /*
-    private static String computeMediaSnippet(Map<yacyURL, String> media, Set<String> queryhashes) {
-        Iterator<Map.Entry<yacyURL, String>> i = media.entrySet().iterator();
-        Map.Entry<yacyURL, String> entry;
-        yacyURL url;
-        String desc;
-        Set<String> s;
-        String result = "";
-        while (i.hasNext()) {
-            entry = i.next();
-            url = entry.getKey();
-            desc = entry.getValue();
-            s = removeAppearanceHashes(url.toNormalform(false, false), queryhashes);
-            if (isEmpty()) {
-                result += "<br /><a href=\"" + url + "\">" + ((desc.length() == 0) ? url : desc) + "</a>";
-                continue;
-            }
-            s = removeAppearanceHashes(desc, s);
-            if (isEmpty()) {
-                result += "<br /><a href=\"" + url + "\">" + ((desc.length() == 0) ? url : desc) + "</a>";
-                continue;
+    /**
+     * removed all word hashes that can be computed as tokens from a given sentence from a given hash set
+     * @param sentence
+     * @param queryhashes
+     * @return the given hash set minus the hashes from the tokenization of the given sentence
+     */
+    private static HandleSet removeAppearanceHashes(final String sentence, final HandleSet queryhashes) {
+        // remove all hashes that appear in the sentence
+        if (sentence == null) return queryhashes;
+        final TreeMap<byte[], Integer> hs = Condenser.hashSentence(sentence);
+        final Iterator<byte[]> j = queryhashes.iterator();
+        byte[] hash;
+        Integer pos;
+        final HandleSet remaininghashes = new HandleSet(queryhashes.row().primaryKeyLength, queryhashes.comparator(), queryhashes.size());
+        while (j.hasNext()) {
+            hash = j.next();
+            pos = hs.get(hash);
+            if (pos == null) {
+                try {
+                    remaininghashes.put(hash);
+                } catch (RowSpaceExceededException e) {
+                    Log.logException(e);
+                }
            }
        }
-        if (result.length() == 0) return null;
-        return result.substring(6);
+        return remaininghashes;
    }
-    */
    
 }
--- a/source/de/anomic/search/ResultFetcher.java
+++ b/source/de/anomic/search/ResultFetcher.java
@ -220,7 +220,7 @@ public class ResultFetcher {
        if (query.contentdom == ContentDomain.TEXT) {
            // attach text snippet
            startTime = System.currentTimeMillis();
-            final TextSnippet snippet = TextSnippet.retrieveTextSnippet(
+            final TextSnippet snippet = new TextSnippet(
                    this.loader,
                    metadata,
                    snippetFetchWordHashes,
--- a/source/de/anomic/search/Switchboard.java
+++ b/source/de/anomic/search/Switchboard.java
@ -2057,12 +2057,19 @@ public final class Switchboard extends serverSwitch {
        }
    }
    
-    public int adminAuthenticated(final RequestHeader requestHeader) {
+    public boolean accessFromLocalhost(final RequestHeader requestHeader) {
        
        // authorization for localhost, only if flag is set to grant localhost access as admin
        final String clientIP = requestHeader.get(HeaderFramework.CONNECTION_PROP_CLIENTIP, "");
+        if (!Domains.isLocal(clientIP)) return false;
        final String refererHost = requestHeader.refererHost();
-        boolean accessFromLocalhost = Domains.isLocal(clientIP) && (refererHost == null || refererHost.length() == 0 || Domains.isLocal(refererHost));
+        return refererHost == null || refererHost.length() == 0 || Domains.isLocal(refererHost);
+    }
+    
+    public int adminAuthenticated(final RequestHeader requestHeader) {
+        
+        // authorization for localhost, only if flag is set to grant localhost access as admin
+        boolean accessFromLocalhost = accessFromLocalhost(requestHeader);
        if (getConfigBool("adminAccountForLocalhost", false) && accessFromLocalhost) return 3; // soft-authenticated for localhost
        
        // get the authorization string from the header
--- a/source/de/anomic/search/TextSnippet.java
+++ b/source/de/anomic/search/TextSnippet.java
@ -25,6 +25,7 @@
 package de.anomic.search;

 import java.util.ArrayList;
+import java.util.Collection;
 import java.util.Comparator;
 import java.util.Iterator;
 import java.util.TreeMap;
@ -36,20 +37,18 @@ import net.yacy.cora.storage.ConcurrentARC;
 import net.yacy.document.Condenser;
 import net.yacy.document.Document;
 import net.yacy.document.Parser;
+import net.yacy.document.SnippetExtractor;
 import net.yacy.document.parser.html.CharacterCoding;
 import net.yacy.kelondro.data.meta.DigestURI;
 import net.yacy.kelondro.data.meta.URIMetadataRow;
 import net.yacy.kelondro.data.word.Word;
 import net.yacy.kelondro.index.HandleSet;
-import net.yacy.kelondro.index.RowSpaceExceededException;
-import net.yacy.kelondro.logging.Log;
 import net.yacy.kelondro.order.Base64Order;
 import net.yacy.kelondro.util.ByteArray;
 import net.yacy.repository.LoaderDispatcher;

 import de.anomic.crawler.CrawlProfile;
 import de.anomic.crawler.retrieval.Response;
-import de.anomic.http.client.Cache;
 import de.anomic.yacy.yacySearch;

 public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnippet> {
@ -68,66 +67,6 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
    public static final int ERROR_PARSER_NO_LINES = 15;
    public static final int ERROR_NO_MATCH = 16;

-    private static final ARC<String, String> snippetsCache = new ConcurrentARC<String, String>(maxCache, Math.max(10, Runtime.getRuntime().availableProcessors()));
-    private static final ARC<String, DigestURI> faviconCache = new ConcurrentARC<String, DigestURI>(maxCache, Math.max(10, Runtime.getRuntime().availableProcessors()));
-    
-    private final DigestURI url;
-    private String line;
-    private final String error;
-    private final int errorCode;
-    private HandleSet remaingHashes;
-    private final DigestURI favicon;
-    
-    public static boolean existsInCache(final DigestURI url, final HandleSet queryhashes) {
-        final String hashes = yacySearch.set2string(queryhashes);
-        return retrieveFromCache(hashes, new String(url.hash())) != null;
-    }
-
-    public static void storeToCache(final String wordhashes, final String urlhash, final String snippet) {
-        // generate key
-        String key = urlhash + wordhashes;
-
-        // do nothing if snippet is known
-        if (snippetsCache.containsKey(key)) return;
-
-        // learn new snippet
-        snippetsCache.put(key, snippet);
-    }
-    
-    public static String retrieveFromCache(final String wordhashes, final String urlhash) {
-        // generate key
-        final String key = urlhash + wordhashes;
-        return snippetsCache.get(key);
-    }
-    
-    /**
-     * removed all word hashes that can be computed as tokens from a given sentence from a given hash set
-     * @param sentence
-     * @param queryhashes
-     * @return the given hash set minus the hashes from the tokenization of the given sentence
-     */
-    public static HandleSet removeAppearanceHashes(final String sentence, final HandleSet queryhashes) {
-        // remove all hashes that appear in the sentence
-        if (sentence == null) return queryhashes;
-        final TreeMap<byte[], Integer> hs = Condenser.hashSentence(sentence);
-        final Iterator<byte[]> j = queryhashes.iterator();
-        byte[] hash;
-        Integer pos;
-        final HandleSet remaininghashes = new HandleSet(queryhashes.row().primaryKeyLength, queryhashes.comparator(), queryhashes.size());
-        while (j.hasNext()) {
-            hash = j.next();
-            pos = hs.get(hash);
-            if (pos == null) {
-                try {
-                    remaininghashes.put(hash);
-                } catch (RowSpaceExceededException e) {
-                    Log.logException(e);
-                }
-            }
-        }
-        return remaininghashes;
-    }
-
    /**
     * <code>\\A[^\\p{L}\\p{N}].+</code>
     */
@ -149,53 +88,205 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
     */
    private final static Pattern p01 = Pattern.compile("(.*?)(\\<b\\>.+?\\</b\\>)(.*)"); // marked words are in <b>-tags

-    public TextSnippet(final DigestURI url, final String line, final int errorCode, final HandleSet remaingHashes, final String errortext) {
-        this(url, line, errorCode, remaingHashes, errortext, null);
+    public static class Cache {
+        private final ARC<String, String> cache;
+        public Cache() {
+            cache = new ConcurrentARC<String, String>(maxCache, Math.max(10, Runtime.getRuntime().availableProcessors()));
+        }
+        public void put(final String wordhashes, final String urlhash, final String snippet) {
+            // generate key
+            String key = urlhash + wordhashes;
+
+            // do nothing if snippet is known
+            if (cache.containsKey(key)) return;
+
+            // learn new snippet
+            cache.put(key, snippet);
+        }
+        
+        public String get(final String wordhashes, final String urlhash) {
+            // generate key
+            final String key = urlhash + wordhashes;
+            return cache.get(key);
+        }
+        
+        public boolean contains(final String wordhashes, final String urlhash) {
+            return cache.containsKey(urlhash + wordhashes);
+        }
+    }
+    
+    public static final Cache snippetsCache = new Cache();
+    
+    private byte[] urlhash;
+    private String line;
+    private String error;
+    private int errorCode;
+
+    public TextSnippet(final byte[] urlhash, final String line, final int errorCode, final String errortext) {
+        init(urlhash, line, errorCode, errortext);
    }

-    public TextSnippet(final DigestURI url, final String line, final int errorCode, final HandleSet remaingHashes, final String errortext, final DigestURI favicon) {
-        this.url = url;
+    public TextSnippet(final LoaderDispatcher loader, final URIMetadataRow.Components comp, final HandleSet queryhashes, final CrawlProfile.CacheStrategy cacheStrategy, final boolean pre, final int snippetMaxLength, final int maxDocLen, final boolean reindexing) {
+        // heise = "0OQUNU3JSs05"
+        final DigestURI url = comp.url();
+        if (queryhashes.isEmpty()) {
+            //System.out.println("found no queryhashes for URL retrieve " + url);
+            init(url.hash(), null, ERROR_NO_HASH_GIVEN, "no query hashes given");
+            return;
+        }
+        
+        // try to get snippet from snippetCache
+        int source = SOURCE_CACHE;
+        final String wordhashes = yacySearch.set2string(queryhashes);
+        final String urls = new String(url.hash());
+        String line = snippetsCache.get(wordhashes, urls);
+        if (line != null) {
+            // found the snippet
+            init(url.hash(), line, source, null);
+            return;
+        }
+        
+        
+        /* ===========================================================================
+         * LOAD RESOURCE DATA
+         * =========================================================================== */
+        // if the snippet is not in the cache, we can try to get it from the htcache
+        Response response;
+        try {
+            // first try to get the snippet from metadata
+            String loc;
+            boolean objectWasInCache = de.anomic.http.client.Cache.has(url);
+            boolean useMetadata = !objectWasInCache && !cacheStrategy.mustBeOffline();
+            if (useMetadata && containsAllHashes(loc = comp.dc_title(), queryhashes)) {
+                // try to create the snippet from information given in the url itself
+                init(url.hash(), loc, SOURCE_METADATA, null);
+                return;
+            } else if (useMetadata && containsAllHashes(loc = comp.dc_creator(), queryhashes)) {
+                // try to create the snippet from information given in the creator metadata
+                init(url.hash(), loc, SOURCE_METADATA, null);
+                return;
+            } else if (useMetadata && containsAllHashes(loc = comp.dc_subject(), queryhashes)) {
+                // try to create the snippet from information given in the subject metadata
+                init(url.hash(), loc, SOURCE_METADATA, null);
+                return;
+            } else if (useMetadata && containsAllHashes(loc = comp.url().toNormalform(true, true).replace('-', ' '), queryhashes)) {
+                // try to create the snippet from information given in the url
+                init(url.hash(), loc, SOURCE_METADATA, null);
+                return;
+            } else {
+                // try to load the resource from the cache
+                response = loader.load(loader.request(url, true, reindexing), cacheStrategy, Long.MAX_VALUE);
+                if (response == null) {
+                    // in case that we did not get any result we can still return a success when we are not allowed to go online
+                    if (cacheStrategy.mustBeOffline()) {
+                        init(url.hash(), null, ERROR_SOURCE_LOADING, "omitted network load (not allowed), no cache entry");
+                        return;
+                    }
+                    
+                    // if it is still not available, report an error
+                    init(url.hash(), null, ERROR_RESOURCE_LOADING, "error loading resource from net, no cache entry");
+                    return;
+                }
+                if (!objectWasInCache) {
+                    // place entry on indexing queue
+                    Switchboard.getSwitchboard().toIndexer(response);
+                    source = SOURCE_WEB;
+                }
+            }
+        } catch (final Exception e) {
+            //Log.logException(e);
+            init(url.hash(), null, ERROR_SOURCE_LOADING, "error loading resource: " + e.getMessage());
+            return;
+        } 
+        
+        /* ===========================================================================
+         * PARSE RESOURCE
+         * =========================================================================== */
+        Document document = null;
+        try {
+            document = Document.mergeDocuments(response.url(), response.getMimeType(), response.parse());
+        } catch (final Parser.Failure e) {
+            init(url.hash(), null, ERROR_PARSER_FAILED, e.getMessage()); // cannot be parsed
+            return;
+        }
+        if (document == null) {
+            init(url.hash(), null, ERROR_PARSER_FAILED, "parser error/failed"); // cannot be parsed
+            return;
+        }
+        
+        /* ===========================================================================
+         * COMPUTE SNIPPET
+         * =========================================================================== */    
+        // we have found a parseable non-empty file: use the lines
+
+        // compute snippet from text
+        final Collection<StringBuilder> sentences = document.getSentences(pre);
+        if (sentences == null) {
+            init(url.hash(), null, ERROR_PARSER_NO_LINES, "parser returned no sentences");
+            return;
+        }
+        final SnippetExtractor tsr;
+        String textline = null;
+        HandleSet remainingHashes = queryhashes;
+        try {
+            tsr = new SnippetExtractor(sentences, queryhashes, snippetMaxLength);
+            textline = tsr.getSnippet();
+            remainingHashes =  tsr.getRemainingWords();
+        } catch (UnsupportedOperationException e) {
+            init(url.hash(), null, ERROR_NO_MATCH, "no matching snippet found");
+            return;
+        }
+        
+        // compute snippet from media
+        //String audioline = computeMediaSnippet(document.getAudiolinks(), queryhashes);
+        //String videoline = computeMediaSnippet(document.getVideolinks(), queryhashes);
+        //String appline = computeMediaSnippet(document.getApplinks(), queryhashes);
+        //String hrefline = computeMediaSnippet(document.getAnchors(), queryhashes);
+        //String imageline = computeMediaSnippet(document.getAudiolinks(), queryhashes);
+        
+        line = "";
+        //if (audioline != null) line += (line.length() == 0) ? audioline : "<br />" + audioline;
+        //if (videoline != null) line += (line.length() == 0) ? videoline : "<br />" + videoline;
+        //if (appline   != null) line += (line.length() == 0) ? appline   : "<br />" + appline;
+        //if (hrefline  != null) line += (line.length() == 0) ? hrefline  : "<br />" + hrefline;
+        if (textline  != null) line += (line.length() == 0) ? textline  : "<br />" + textline;
+        
+        if (line == null || !remainingHashes.isEmpty()) {
+            init(url.hash(), null, ERROR_NO_MATCH, "no matching snippet found");
+            return;
+        }
+        if (line.length() > snippetMaxLength) line = line.substring(0, snippetMaxLength);
+
+        // finally store this snippet in our own cache
+        snippetsCache.put(wordhashes, urls, line);
+        
+        document.close();
+        init(url.hash(), line, source, null);
+    }
+    
+    private void init(final byte[] urlhash, final String line, final int errorCode, final String errortext) {
+        this.urlhash = urlhash;
        this.line = line;
        this.errorCode = errorCode;
        this.error = errortext;
-        this.remaingHashes = remaingHashes;
-        this.favicon = favicon;
-    }
-    public DigestURI getUrl() {
-        return this.url;
-    }
-    public DigestURI getFavicon() {
-        return this.favicon;
    }
+    
    public boolean exists() {
        return line != null;
    }
-    public int compareTo(TextSnippet o) {
-        return Base64Order.enhancedCoder.compare(this.url.hash(), o.url.hash());
-    }
-    public int compare(TextSnippet o1, TextSnippet o2) {
-        return o1.compareTo(o2);
-    }
-    public int hashCode() {
-        return ByteArray.hashCode(this.url.hash());
-    }
    
-    @Override
-    public String toString() {
-        return (line == null) ? "" : line;
-    }
    public String getLineRaw() {
        return (line == null) ? "" : line;
    }
+    
    public String getError() {
        return (error == null) ? "" : error.trim();
    }
+    
    public int getErrorCode() {
        return errorCode;
    }
-    public HandleSet getRemainingHashes() {
-        return this.remaingHashes;
-    }
+    
    public String getLineMarked(final HandleSet queryHashes) {
        if (line == null) return "";
        if (queryHashes == null || queryHashes.isEmpty()) return line.trim();
@ -225,6 +316,23 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
        return l.toString().trim();
    }

+    public int compareTo(TextSnippet o) {
+        return Base64Order.enhancedCoder.compare(this.urlhash, o.urlhash);
+    }
+    
+    public int compare(TextSnippet o1, TextSnippet o2) {
+        return o1.compareTo(o2);
+    }
+    
+    public int hashCode() {
+        return ByteArray.hashCode(this.urlhash);
+    }
+    
+    @Override
+    public String toString() {
+        return (line == null) ? "" : line;
+    }
+    
    /**
     * mark words with &lt;b&gt;-tags
     * @param word the word to mark
@ -307,119 +415,6 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
        return al;
    }
    
-    public static TextSnippet retrieveTextSnippet(final LoaderDispatcher loader, final URIMetadataRow.Components comp, final HandleSet queryhashes, final CrawlProfile.CacheStrategy cacheStrategy, final boolean pre, final int snippetMaxLength, final int maxDocLen, final boolean reindexing) {
-        // heise = "0OQUNU3JSs05"
-        final DigestURI url = comp.url();
-        if (queryhashes.isEmpty()) {
-            //System.out.println("found no queryhashes for URL retrieve " + url);
-            return new TextSnippet(url, null, ERROR_NO_HASH_GIVEN, queryhashes, "no query hashes given");
-        }
-        
-        // try to get snippet from snippetCache
-        int source = SOURCE_CACHE;
-        final String wordhashes = yacySearch.set2string(queryhashes);
-        final String urls = new String(url.hash());
-        String line = retrieveFromCache(wordhashes, urls);
-        if (line != null) {
-            // found the snippet
-            return new TextSnippet(url, line, source, null, null, faviconCache.get(urls));
-        }
-        
-        
-        /* ===========================================================================
-         * LOADING RESOURCE DATA
-         * =========================================================================== */
-        // if the snippet is not in the cache, we can try to get it from the htcache
-        Response response;
-        try {
-            // first try to get the snippet from metadata
-            String loc;
-            if (containsAllHashes(loc = comp.dc_title(), queryhashes)) {
-                // try to create the snippet from information given in the url itself
-                return new TextSnippet(url, loc, SOURCE_METADATA, null, null, faviconCache.get(urls));
-            } else if (containsAllHashes(loc = comp.dc_creator(), queryhashes)) {
-                // try to create the snippet from information given in the creator metadata
-                return new TextSnippet(url, loc, SOURCE_METADATA, null, null, faviconCache.get(urls));
-            } else if (containsAllHashes(loc = comp.dc_subject(), queryhashes)) {
-                // try to create the snippet from information given in the subject metadata
-                return new TextSnippet(url, loc, SOURCE_METADATA, null, null, faviconCache.get(urls));
-            } else if (containsAllHashes(loc = comp.url().toNormalform(true, true).replace('-', ' '), queryhashes)) {
-                // try to create the snippet from information given in the subject metadata
-                return new TextSnippet(url, loc, SOURCE_METADATA, null, null, faviconCache.get(urls));
-            } else {
-                // trying to load the resource from the cache
-                boolean objectWasInCache = Cache.has(url);
-                response = loader.load(loader.request(url, true, reindexing), cacheStrategy, Long.MAX_VALUE);
-                if (response == null) {
-                    // in case that we did not get any result we can still return a success when we are not allowed to go online
-                    if (cacheStrategy.mustBeOffline()) {
-                        return new TextSnippet(url, null, ERROR_SOURCE_LOADING, queryhashes, "omitted network load (not allowed), no cache entry");
-                    }
-                    
-                    // if it is still not available, report an error
-                    return new TextSnippet(url, null, ERROR_RESOURCE_LOADING, queryhashes, "error loading resource from net, no cache entry");
-                }
-                if (!objectWasInCache) {
-                    // place entry on indexing queue
-                    Switchboard.getSwitchboard().toIndexer(response);
-                    source = SOURCE_WEB;
-                }
-            }
-        } catch (final Exception e) {
-            //Log.logException(e);
-            return new TextSnippet(url, null, ERROR_SOURCE_LOADING, queryhashes, "error loading resource: " + e.getMessage());
-        } 
-        
-        /* ===========================================================================
-         * PARSING RESOURCE
-         * =========================================================================== */
-        Document document = null;
-        try {
-             document = Document.mergeDocuments(response.url(), response.getMimeType(), response.parse());
-        } catch (final Parser.Failure e) {
-            return new TextSnippet(url, null, ERROR_PARSER_FAILED, queryhashes, e.getMessage()); // cannot be parsed
-        }
-        if (document == null) return new TextSnippet(url, null, ERROR_PARSER_FAILED, queryhashes, "parser error/failed"); // cannot be parsed
-        
-        
-        /* ===========================================================================
-         * COMPUTE SNIPPET
-         * =========================================================================== */    
-        final DigestURI resFavicon = (document.getFavicon() == null) ? null : new DigestURI(document.getFavicon());
-        if (resFavicon != null) faviconCache.put(urls, resFavicon);
-        // we have found a parseable non-empty file: use the lines
-
-        // compute snippet from text
-        final Iterator<StringBuilder> sentences = document.getSentences(pre);
-        if (sentences == null) return new TextSnippet(url, null, ERROR_PARSER_NO_LINES, queryhashes, "parser returned no sentences",resFavicon);
-        final Object[] tsr = computeTextSnippet(sentences, queryhashes, snippetMaxLength);
-        final String textline = (tsr == null) ? null : (String) tsr[0];
-        final HandleSet remainingHashes = (tsr == null) ? queryhashes : (HandleSet) tsr[1];
-        
-        // compute snippet from media
-        //String audioline = computeMediaSnippet(document.getAudiolinks(), queryhashes);
-        //String videoline = computeMediaSnippet(document.getVideolinks(), queryhashes);
-        //String appline = computeMediaSnippet(document.getApplinks(), queryhashes);
-        //String hrefline = computeMediaSnippet(document.getAnchors(), queryhashes);
-        //String imageline = computeMediaSnippet(document.getAudiolinks(), queryhashes);
-        
-        line = "";
-        //if (audioline != null) line += (line.length() == 0) ? audioline : "<br />" + audioline;
-        //if (videoline != null) line += (line.length() == 0) ? videoline : "<br />" + videoline;
-        //if (appline   != null) line += (line.length() == 0) ? appline   : "<br />" + appline;
-        //if (hrefline  != null) line += (line.length() == 0) ? hrefline  : "<br />" + hrefline;
-        if (textline  != null) line += (line.length() == 0) ? textline  : "<br />" + textline;
-        
-        if (line == null || !remainingHashes.isEmpty()) return new TextSnippet(url, null, ERROR_NO_MATCH, remainingHashes, "no matching snippet found",resFavicon);
-        if (line.length() > snippetMaxLength) line = line.substring(0, snippetMaxLength);
-
-        // finally store this snippet in our own cache
-        storeToCache(wordhashes, urls, line);
-        
-        document.close();
-        return new TextSnippet(url, line, source, null, null, resFavicon);
-    }
-    
    private static boolean containsAllHashes(final String sentence, final HandleSet queryhashes) {
        final TreeMap<byte[], Integer> m = Condenser.hashSentence(sentence);
        for (byte[] b: queryhashes) {
@ -428,136 +423,4 @@ public class TextSnippet implements Comparable<TextSnippet>, Comparator<TextSnip
        return true;
    }

-    private static Object[] /*{String - the snippet, HandleSet - remaining hashes}*/
-            computeTextSnippet(final Iterator<StringBuilder> sentences, final HandleSet queryhashes, int maxLength) {
-        try {
-            if (sentences == null) return null;
-            if ((queryhashes == null) || (queryhashes.isEmpty())) return null;
-            Iterator<byte[]> j;
-            TreeMap<byte[], Integer> hs;
-            StringBuilder sentence;
-            final TreeMap<Integer, StringBuilder> os = new TreeMap<Integer, StringBuilder>();
-            int uniqCounter = 9999;
-            int score;
-            while (sentences.hasNext()) {
-                sentence = sentences.next();
-                hs = Condenser.hashSentence(sentence.toString());
-                j = queryhashes.iterator();
-                score = 0;
-                while (j.hasNext()) {if (hs.containsKey(j.next())) score++;}
-                if (score > 0) {
-                    os.put(Integer.valueOf(1000000 * score - sentence.length() * 10000 + uniqCounter--), sentence);
-                }
-            }
-            
-            String result;
-            HandleSet remaininghashes;
-            while (!os.isEmpty()) {
-                sentence = os.remove(os.lastKey()); // sentence with the biggest score
-                Object[] tsr = computeTextSnippet(sentence.toString(), queryhashes, maxLength);
-                if (tsr == null) continue;
-                result = (String) tsr[0];
-                if ((result != null) && (result.length() > 0)) {
-                    remaininghashes = (HandleSet) tsr[1];
-                    if (remaininghashes.isEmpty()) {
-                        // we have found the snippet
-                        return new Object[]{result, remaininghashes};
-                    } else if (remaininghashes.size() < queryhashes.size()) {
-                        // the result has not all words in it.
-                        // find another sentence that represents the missing other words
-                        // and find recursively more sentences
-                        maxLength = maxLength - result.length();
-                        if (maxLength < 20) maxLength = 20;
-                        tsr = computeTextSnippet(os.values().iterator(), remaininghashes, maxLength);
-                        if (tsr == null) return null;
-                        final String nextSnippet = (String) tsr[0];
-                        if (nextSnippet == null) return tsr;
-                        return new Object[]{result + (" / " + nextSnippet), tsr[1]};
-                    } else {
-                        // error
-                        //assert remaininghashes.size() < queryhashes.size() : "remaininghashes.size() = " + remaininghashes.size() + ", queryhashes.size() = " + queryhashes.size() + ", sentence = '" + sentence + "', result = '" + result + "'";
-                        continue;
-                    }
-                }
-            }
-            return null;
-        } catch (final IndexOutOfBoundsException e) {
-            Log.logSevere("computeSnippet", "error with string generation", e);
-            return new Object[]{null, queryhashes};
-        }
-    }
-    
-    private static Object[] /*{String - the snippet, HandleSet - remaining hashes}*/
-            computeTextSnippet(String sentence, final HandleSet queryhashes, final int maxLength) {
-        try {
-            if (sentence == null) return null;
-            if ((queryhashes == null) || (queryhashes.isEmpty())) return null;
-            byte[] hash;
-            
-            // find all hashes that appear in the sentence
-            final TreeMap<byte[], Integer> hs = Condenser.hashSentence(sentence);
-            final Iterator<byte[]> j = queryhashes.iterator();
-            Integer pos;
-            int p, minpos = sentence.length(), maxpos = -1;
-            final HandleSet remainingHashes = new HandleSet(queryhashes.row().primaryKeyLength, queryhashes.comparator(), 0);
-            while (j.hasNext()) {
-                hash = j.next();
-                pos = hs.get(hash);
-                if (pos == null) {
-                    try {
-                        remainingHashes.put(hash);
-                    } catch (RowSpaceExceededException e) {
-                        Log.logException(e);
-                    }
-                } else {
-                    p = pos.intValue();
-                    if (p > maxpos) maxpos = p;
-                    if (p < minpos) minpos = p;
-                }
-            }
-            // check result size
-            maxpos = maxpos + 10;
-            if (maxpos > sentence.length()) maxpos = sentence.length();
-            if (minpos < 0) minpos = 0;
-            // we have a result, but is it short enough?
-            if (maxpos - minpos + 10 > maxLength) {
-                // the string is too long, even if we cut at both ends
-                // so cut here in the middle of the string
-                final int lenb = sentence.length();
-                sentence = sentence.substring(0, (minpos + 20 > sentence.length()) ? sentence.length() : minpos + 20).trim() +
-                " [..] " +
-                sentence.substring((maxpos + 26 > sentence.length()) ? sentence.length() : maxpos + 26).trim();
-                maxpos = maxpos + lenb - sentence.length() + 6;
-            }
-            if (maxpos > maxLength) {
-                // the string is too long, even if we cut it at the end
-                // so cut it here at both ends at once
-                assert maxpos >= minpos;
-                final int newlen = Math.max(10, maxpos - minpos + 10);
-                final int around = (maxLength - newlen) / 2;
-                assert minpos - around < sentence.length() : "maxpos = " + maxpos + ", minpos = " + minpos + ", around = " + around + ", sentence.length() = " + sentence.length();
-                //assert ((maxpos + around) <= sentence.length()) && ((maxpos + around) <= sentence.length()) : "maxpos = " + maxpos + ", minpos = " + minpos + ", around = " + around + ", sentence.length() = " + sentence.length();
-                sentence = "[..] " + sentence.substring(minpos - around, ((maxpos + around) > sentence.length()) ? sentence.length() : (maxpos + around)).trim() + " [..]";
-                minpos = around;
-                maxpos = sentence.length() - around - 5;
-            }
-            if (sentence.length() > maxLength) {
-                // trim sentence, 1st step (cut at right side)
-                sentence = sentence.substring(0, maxpos).trim() + " [..]";
-            }
-            if (sentence.length() > maxLength) {
-                // trim sentence, 2nd step (cut at left side)
-                sentence = "[..] " + sentence.substring(minpos).trim();
-            }
-            if (sentence.length() > maxLength) {
-                // trim sentence, 3rd step (cut in the middle)
-                sentence = sentence.substring(6, 20).trim() + " [..] " + sentence.substring(sentence.length() - 26, sentence.length() - 6).trim();
-            }
-            return new Object[] {sentence, remainingHashes};
-        } catch (final IndexOutOfBoundsException e) {
-            Log.logSevere("computeSnippet", "error with string generation", e);
-            return null;
-        }
-    }
-    
 }
--- a/source/de/anomic/yacy/yacyClient.java
+++ b/source/de/anomic/yacy/yacyClient.java
@ -480,7 +480,7 @@ public final class yacyClient {
                // because they are search-specific.
                // instead, they are placed in a snipped-search cache.
                // System.out.println("--- RECEIVED SNIPPET '" + urlEntry.snippet() + "'");
-                TextSnippet.storeToCache(wordhashes, new String(urlEntry.hash()), urlEntry.snippet());
+                TextSnippet.snippetsCache.put(wordhashes, new String(urlEntry.hash()), urlEntry.snippet());
            }
            
            // add the url entry to the word indexes
--- a/source/net/yacy/cora/storage/WeakPriorityBlockingQueue.java
+++ b/source/net/yacy/cora/storage/WeakPriorityBlockingQueue.java
@ -226,7 +226,7 @@ public class WeakPriorityBlockingQueue<E> {
    }
    
    /**
-     * return the specific amount of entrie as they would be retrievable with element()
+     * return the specific amount of entries as they would be retrievable with element()
     * if count is < 0 then all elements are taken
     * the returned list is not cloned from the internal list and shall not be modified in any way (read-only)
     * @param count
@ -234,15 +234,23 @@ public class WeakPriorityBlockingQueue<E> {
     */
    public synchronized ArrayList<E> list(final int count) {
        if (count < 0) {
-            // shift all elements
-            while (!this.queue.isEmpty()) this.poll();
-            return this.drained;
+            return list();
        }
        if (count > sizeAvailable()) throw new RuntimeException("list(" + count + ") exceeded avaiable number of elements (" + sizeAvailable() + ")"); 
        while (count > this.drained.size()) this.poll();
        return this.drained;
    }
    
+    /**
+     * return all entries as they would be retrievable with element()
+     * @return a list of all elements in the stack
+     */
+    public synchronized ArrayList<E> list() {
+        // shift all elements
+        while (!this.queue.isEmpty()) this.poll();
+        return this.drained;
+    }
+    
    /**
     * iterate over all elements available. All elements that are still in the queue are drained to recorded positions
     * @return an iterator over all drained positions.
--- a/source/net/yacy/document/Document.java
+++ b/source/net/yacy/document/Document.java
@ -33,6 +33,7 @@ import java.io.InputStream;
 import java.io.OutputStreamWriter;
 import java.io.UnsupportedEncodingException;
 import java.net.MalformedURLException;
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Date;
@ -275,11 +276,15 @@ dc_rights
        return -1; 
    }
    
-    public Iterator<StringBuilder> getSentences(final boolean pre) {
+    public List<StringBuilder> getSentences(final boolean pre) {
        if (this.text == null) return null;
        final Condenser.sentencesFromInputStreamEnum e = Condenser.sentencesFromInputStream(getText());
        e.pre(pre);
-        return e;
+        ArrayList<StringBuilder> sentences = new ArrayList<StringBuilder>();
+        while (e.hasNext()) {
+            sentences.add(e.next());
+        }
+        return sentences;
    }
    
    public List<String> getKeywords() {
--- a/source/net/yacy/document/SnippetExtractor.java
+++ b/source/net/yacy/document/SnippetExtractor.java
@ -0,0 +1,196 @@
+/**
+ *  SnippetExtractor
+ *  Copyright 2010 by Michael Peter Christen, mc@yacy.net, Frankfurt a. M., Germany
+ *  First released 22.10.2010 at http://yacy.net
+ *
+ *  This library is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public
+ *  License as published by the Free Software Foundation; either
+ *  version 2.1 of the License, or (at your option) any later version.
+ *  
+ *  This library is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  Lesser General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program in the file lgpl21.txt
+ *  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+package net.yacy.document;
+
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.TreeMap;
+import java.util.TreeSet;
+
+import net.yacy.kelondro.index.HandleSet;
+import net.yacy.kelondro.index.RowSpaceExceededException;
+import net.yacy.kelondro.logging.Log;
+
+public class SnippetExtractor {
+    
+    String snippetString;
+    HandleSet remainingHashes;
+    
+    public SnippetExtractor(final Collection<StringBuilder> sentences, final HandleSet queryhashes, int maxLength) throws UnsupportedOperationException {
+        if (sentences == null) throw new UnsupportedOperationException("sentence == null");
+        if (queryhashes == null || queryhashes.isEmpty()) throw new UnsupportedOperationException("queryhashes == null");
+        TreeMap<byte[], Integer> hs;
+        final TreeMap<Long, StringBuilder> order = new TreeMap<Long, StringBuilder>();
+        long uniqCounter = 999L;
+        Integer pos;
+        TreeSet<Integer> positions;
+        int linenumber = 0;
+        for (StringBuilder sentence: sentences) {
+            hs = Condenser.hashSentence(sentence.toString());
+            positions = new TreeSet<Integer>();
+            for (byte[] word: queryhashes) {
+                pos = hs.get(word);
+                if (pos != null) {
+                    positions.add(pos);
+                }
+            }
+            int worddistance = positions.size() > 1 ? positions.last() - positions.first() : 0;
+            // sort by
+            // - 1st order: number of matching words
+            // - 2nd order: word distance
+            // - 3th order: line length (not too short and not too long)
+            // - 4rd order: line number
+            if (positions.size() > 0) {
+                order.put(Long.valueOf(-100000000L * (linenumber == 0 ? 1 : 0) + 10000000L * positions.size() + 1000000L * worddistance + 100000L * linelengthKey(sentence.length(), maxLength) - 10000L * linenumber + uniqCounter--), sentence);
+                if (order.size() > 5) order.remove(order.firstEntry().getKey());
+            }
+            linenumber++;
+        }
+        
+        StringBuilder sentence;
+        SnippetExtractor tsr;
+        while (!order.isEmpty()) {
+            sentence = order.remove(order.lastKey()); // sentence with the biggest score
+            try {
+                tsr = new SnippetExtractor(sentence.toString(), queryhashes, maxLength);
+            } catch (UnsupportedOperationException e) {
+                continue;
+            }
+            snippetString = tsr.snippetString;
+            if (snippetString != null && snippetString.length() > 0) {
+                remainingHashes = tsr.remainingHashes;
+                if (remainingHashes.isEmpty()) {
+                    // we have found the snippet
+                    return; // finished!
+                } else if (remainingHashes.size() < queryhashes.size()) {
+                    // the result has not all words in it.
+                    // find another sentence that represents the missing other words
+                    // and find recursively more sentences
+                    maxLength = maxLength - snippetString.length();
+                    if (maxLength < 20) maxLength = 20;
+                    try {
+                        tsr = new SnippetExtractor(order.values(), remainingHashes, maxLength);
+                    } catch (UnsupportedOperationException e) {
+                        throw e;
+                    }
+                    final String nextSnippet = tsr.snippetString;
+                    if (nextSnippet == null) return;
+                    snippetString = snippetString + (" / " + nextSnippet);
+                    remainingHashes = tsr.remainingHashes;
+                    return;
+                } else {
+                    // error
+                    //assert remaininghashes.size() < queryhashes.size() : "remaininghashes.size() = " + remaininghashes.size() + ", queryhashes.size() = " + queryhashes.size() + ", sentence = '" + sentence + "', result = '" + result + "'";
+                    continue;
+                }
+            }
+        }
+        throw new UnsupportedOperationException("no snippet computed");
+    }
+    
+    private static int linelengthKey(int givenlength, int maxlength) {
+        if (givenlength > maxlength) return 1;
+        if (givenlength >= maxlength / 2 && givenlength < maxlength) return 7;
+        if (givenlength >= maxlength / 4 && givenlength < maxlength / 2) return 5;
+        if (givenlength >= maxlength / 8 && givenlength < maxlength / 4) return 3;
+        return 0;
+    }
+    
+    private SnippetExtractor(String sentence, final HandleSet queryhashes, final int maxLength) throws UnsupportedOperationException {
+        try {
+            if (sentence == null) throw new UnsupportedOperationException("no sentence given");
+            if (queryhashes == null || queryhashes.isEmpty()) throw new UnsupportedOperationException("queryhashes == null");
+            byte[] hash;
+            
+            // find all hashes that appear in the sentence
+            final TreeMap<byte[], Integer> hs = Condenser.hashSentence(sentence);
+            final Iterator<byte[]> j = queryhashes.iterator();
+            Integer pos;
+            int p, minpos = sentence.length(), maxpos = -1;
+            final HandleSet remainingHashes = new HandleSet(queryhashes.row().primaryKeyLength, queryhashes.comparator(), 0);
+            while (j.hasNext()) {
+                hash = j.next();
+                pos = hs.get(hash);
+                if (pos == null) {
+                    try {
+                        remainingHashes.put(hash);
+                    } catch (RowSpaceExceededException e) {
+                        Log.logException(e);
+                    }
+                } else {
+                    p = pos.intValue();
+                    if (p > maxpos) maxpos = p;
+                    if (p < minpos) minpos = p;
+                }
+            }
+            // check result size
+            maxpos = maxpos + 10;
+            if (maxpos > sentence.length()) maxpos = sentence.length();
+            if (minpos < 0) minpos = 0;
+            // we have a result, but is it short enough?
+            if (maxpos - minpos + 10 > maxLength) {
+                // the string is too long, even if we cut at both ends
+                // so cut here in the middle of the string
+                final int lenb = sentence.length();
+                sentence = sentence.substring(0, (minpos + 20 > sentence.length()) ? sentence.length() : minpos + 20).trim() +
+                " [..] " +
+                sentence.substring((maxpos + 26 > sentence.length()) ? sentence.length() : maxpos + 26).trim();
+                maxpos = maxpos + lenb - sentence.length() + 6;
+            }
+            if (maxpos > maxLength) {
+                // the string is too long, even if we cut it at the end
+                // so cut it here at both ends at once
+                assert maxpos >= minpos;
+                final int newlen = Math.max(10, maxpos - minpos + 10);
+                final int around = (maxLength - newlen) / 2;
+                assert minpos - around < sentence.length() : "maxpos = " + maxpos + ", minpos = " + minpos + ", around = " + around + ", sentence.length() = " + sentence.length();
+                //assert ((maxpos + around) <= sentence.length()) && ((maxpos + around) <= sentence.length()) : "maxpos = " + maxpos + ", minpos = " + minpos + ", around = " + around + ", sentence.length() = " + sentence.length();
+                sentence = "[..] " + sentence.substring(minpos - around, ((maxpos + around) > sentence.length()) ? sentence.length() : (maxpos + around)).trim() + " [..]";
+                minpos = around;
+                maxpos = sentence.length() - around - 5;
+            }
+            if (sentence.length() > maxLength) {
+                // trim sentence, 1st step (cut at right side)
+                sentence = sentence.substring(0, maxpos).trim() + " [..]";
+            }
+            if (sentence.length() > maxLength) {
+                // trim sentence, 2nd step (cut at left side)
+                sentence = "[..] " + sentence.substring(minpos).trim();
+            }
+            if (sentence.length() > maxLength) {
+                // trim sentence, 3rd step (cut in the middle)
+                sentence = sentence.substring(6, 20).trim() + " [..] " + sentence.substring(sentence.length() - 26, sentence.length() - 6).trim();
+            }
+            this.snippetString = sentence;
+            this.remainingHashes = remainingHashes;
+        } catch (final IndexOutOfBoundsException e) {
+            throw new UnsupportedOperationException(e.getMessage());
+        }
+    }
+    
+    public String getSnippet() {
+        return this.snippetString;
+    }
+    
+    public HandleSet getRemainingWords() {
+        return this.remainingHashes;
+    }
+}