redesign of ranking data structure

- the index administration now uses the same code base for url selection and collection as the search interface. The index administration is therefore a good test environment for ranking order control - removed old postsorting-algorithms, will be replaced with new one - fixed many bugs occurred before during ranking; especially the contraint filtering method removed too many links - fixed media search flags; had been attached to too many urls. The effect should be a better pre-sorting before media load within snippet fetch git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4223 6c8d7289-2bf4-0310-a012-ef5d649a1542
18 years ago · c48b73cda2
parent 6f1308da2f
commit c48b73cda2
11 changed files with 342 additions and 477 deletions
--- a/htroot/CrawlProfileEditor_p.html
+++ b/htroot/CrawlProfileEditor_p.html
@ -42,7 +42,7 @@
    <td><strong>Local Text Indexing</strong></td>
    <td><strong>Local Media Indexing</strong></td>
    <td><strong>Remote Indexing</strong></td>
-    <td></td>
+    <td><strong>Status / Action</strong></td>
  </tr>
  #{crawlProfiles}# 
  <tr class="TableCell#(dark)#Light::Dark#(/dark)#"> 
@ -61,12 +61,14 @@
    <td>#(indexMedia)#no::yes#(/indexMedia)#</td>
    <td>#(remoteIndexing)#no::yes#(/remoteIndexing)#</td>
    <td>#(terminateButton)#::
        <div style="text-decoration:blink">Running</div>
        <form action="CrawlProfileEditor_p.html" method="get" enctype="multipart/form-data">
        <input type="hidden" name="handle" value="#[handle]#" />
        <input type="submit" name="terminate" value="Terminate" />
        </form>
        #(/terminateButton)#
 	    #(deleteButton)#::
 	    Finished
        <form action="CrawlProfileEditor_p.html" method="get" enctype="multipart/form-data">
        <input type="hidden" name="handle" value="#[handle]#" />
        <input type="submit" name="delete" value="Delete" />
--- a/htroot/IndexControlRWIs_p.java
+++ b/htroot/IndexControlRWIs_p.java
@ -46,7 +46,6 @@ import de.anomic.plasma.plasmaSearchEvent;
 import de.anomic.plasma.plasmaSearchQuery;
 import de.anomic.plasma.plasmaSearchRankingProcess;
 import de.anomic.plasma.plasmaSwitchboard;
 import de.anomic.plasma.plasmaWordIndex;
 import de.anomic.plasma.urlPattern.abstractURLPattern;
 import de.anomic.plasma.urlPattern.plasmaURLPattern;
 import de.anomic.server.serverDate;
@ -92,8 +91,8 @@ public class IndexControlRWIs_p {
            if (post.containsKey("keystringsearch")) {
                keyhash = plasmaCondenser.word2hash(keystring);
                prop.put("keyhash", keyhash);
-                final plasmaWordIndex.Finding finding = genSearchresult(prop, sb, keyhash, null, false, sortorder);
+                final plasmaSearchRankingProcess ranking = genSearchresult(prop, sb, keyhash, null, sortorder, false);
-                if (finding.size() == 0) {
+                if (ranking.filteredCount() == 0) {
                    prop.put("searchresult", 1);
                    prop.put("searchresult_word", keystring);
                }
@ -103,8 +102,8 @@ public class IndexControlRWIs_p {
                if (keystring.length() == 0 || !plasmaCondenser.word2hash(keystring).equals(keyhash)) {
                    prop.put("keystring", "&lt;not possible to compute word from hash&gt;");
                }
-                final plasmaWordIndex.Finding finding = genSearchresult(prop, sb, keyhash, null, false, sortorder);
+                final plasmaSearchRankingProcess ranking = genSearchresult(prop, sb, keyhash, null, sortorder, false);
-                if (finding.size() == 0) {
+                if (ranking.filteredCount() == 0) {
                    prop.put("searchresult", 2);
                    prop.put("searchresult_wordhash", keyhash);
                }
@ -162,8 +161,8 @@ public class IndexControlRWIs_p {
                }
                kelondroBitfield flags = compileFlags(post);
                int count = (post.get("lines", "all").equals("all")) ? -1 : post.getInt("lines", -1);
-                final plasmaWordIndex.Finding finding = genSearchresult(prop, sb, keyhash, flags, true, sortorder);
+                final plasmaSearchRankingProcess ranking = genSearchresult(prop, sb, keyhash, flags, sortorder, true);
-                genURLList(prop, keyhash, keystring, finding, flags, count, sortorder);
+                genURLList(prop, keyhash, keystring, ranking, flags, count, sortorder);
            }
            // transfer to other peer
@ -319,11 +318,11 @@ public class IndexControlRWIs_p {
    private static kelondroBitfield compileFlags(serverObjects post) {
        kelondroBitfield b = new kelondroBitfield(4);
-        if (post.get("allurl", "").equals("on")) {
+        if (post.get("allurl", "").equals("on")) return null;
-            for (int i = 0; i < 32; i++) {b.set(i, true);}
+        if (post.get("flags") != null) {
-            return b;
+            if (post.get("flags","").length() == 0) return null;
            return new kelondroBitfield(4, (String) post.get("flags"));
        }
        if (post.get("flags") != null) return new kelondroBitfield(4, (String) post.get("flags"));
        if (post.get("reference", "").equals("on")) b.set(indexRWIEntry.flag_app_reference, true);
        if (post.get("description", "").equals("on")) b.set(indexRWIEntry.flag_app_descr, true);
        if (post.get("author", "").equals("on")) b.set(indexRWIEntry.flag_app_author, true);
@ -359,51 +358,52 @@ public class IndexControlRWIs_p {
        }
    }
-    private static plasmaWordIndex.Finding genSearchresult(serverObjects prop, plasmaSwitchboard sb, String keyhash, kelondroBitfield filter, boolean urlfetch, int sortorder) {
+    private static plasmaSearchRankingProcess genSearchresult(serverObjects prop, plasmaSwitchboard sb, String keyhash, kelondroBitfield filter, int sortorder, boolean fetchURLs) {
-        final plasmaWordIndex.Finding finding = sb.wordIndex.retrieveURLs(new plasmaSearchQuery(keyhash, -1, filter), urlfetch, sortorder, sb.getRanking());
+        plasmaSearchQuery query = new plasmaSearchQuery(keyhash, -1, filter);
-        if (finding.size() == 0) {
+        plasmaSearchRankingProcess ranked = new plasmaSearchRankingProcess(sb.wordIndex, query, null, sb.getRanking(), sortorder, Integer.MAX_VALUE);
        ranked.execQuery(fetchURLs);
        if (ranked.filteredCount() == 0) {
            prop.put("searchresult", 2);
            prop.put("searchresult_wordhash", keyhash);
        } else {
            prop.put("searchresult", 3);
-            prop.put("searchresult_allurl", finding.size());
+            prop.put("searchresult_allurl", ranked.filteredCount());
-            prop.put("searchresult_reference", finding.flagcount()[indexRWIEntry.flag_app_reference]);
+            prop.put("searchresult_reference", ranked.flagCount()[indexRWIEntry.flag_app_reference]);
-            prop.put("searchresult_description", finding.flagcount()[indexRWIEntry.flag_app_descr]);
+            prop.put("searchresult_description", ranked.flagCount()[indexRWIEntry.flag_app_descr]);
-            prop.put("searchresult_author", finding.flagcount()[indexRWIEntry.flag_app_author]);
+            prop.put("searchresult_author", ranked.flagCount()[indexRWIEntry.flag_app_author]);
-            prop.put("searchresult_tag", finding.flagcount()[indexRWIEntry.flag_app_tags]);
+            prop.put("searchresult_tag", ranked.flagCount()[indexRWIEntry.flag_app_tags]);
-            prop.put("searchresult_url", finding.flagcount()[indexRWIEntry.flag_app_url]);
+            prop.put("searchresult_url", ranked.flagCount()[indexRWIEntry.flag_app_url]);
-            prop.put("searchresult_emphasized", finding.flagcount()[indexRWIEntry.flag_app_emphasized]);
+            prop.put("searchresult_emphasized", ranked.flagCount()[indexRWIEntry.flag_app_emphasized]);
-            prop.put("searchresult_image", finding.flagcount()[plasmaCondenser.flag_cat_hasimage]);
+            prop.put("searchresult_image", ranked.flagCount()[plasmaCondenser.flag_cat_hasimage]);
-            prop.put("searchresult_audio", finding.flagcount()[plasmaCondenser.flag_cat_hasaudio]);
+            prop.put("searchresult_audio", ranked.flagCount()[plasmaCondenser.flag_cat_hasaudio]);
-            prop.put("searchresult_video", finding.flagcount()[plasmaCondenser.flag_cat_hasvideo]);
+            prop.put("searchresult_video", ranked.flagCount()[plasmaCondenser.flag_cat_hasvideo]);
-            prop.put("searchresult_app", finding.flagcount()[plasmaCondenser.flag_cat_hasapp]);
+            prop.put("searchresult_app", ranked.flagCount()[plasmaCondenser.flag_cat_hasapp]);
-            prop.put("searchresult_indexof", finding.flagcount()[plasmaCondenser.flag_cat_indexof]);
+            prop.put("searchresult_indexof", ranked.flagCount()[plasmaCondenser.flag_cat_indexof]);
        }
-        return finding;
+        return ranked;
    }
-    private static void genURLList(serverObjects prop, String keyhash, String keystring, plasmaWordIndex.Finding finding, kelondroBitfield flags, int maxlines, int ordering) {
+    private static void genURLList(serverObjects prop, String keyhash, String keystring, plasmaSearchRankingProcess ranked, kelondroBitfield flags, int maxlines, int ordering) {
        // search for a word hash and generate a list of url links
        prop.put("genUrlList_keyHash", keyhash);
-        if (finding.size() == 0) {
+        if (ranked.filteredCount() == 0) {
            prop.put("genUrlList", 1);
            prop.put("genUrlList_count", 0);
            prop.put("searchresult", 2);
        } else {
            prop.put("genUrlList", 2);
            prop.put("searchresult", 3);
-            prop.put("genUrlList_flags", flags.exportB64());
+            prop.put("genUrlList_flags", (flags == null) ? "" : flags.exportB64());
            prop.put("genUrlList_lines", maxlines);
            prop.put("genUrlList_ordering", ordering);
            int i = 0;
            yacyURL url;
            Iterator iter = finding.urls();
            indexURLEntry entry;
            String us;
            long rn = -1;
-            while (iter.hasNext()) {
+            while ((ranked.size() > 0) && ((entry = ranked.bestURL(false)) != null)) {
                entry = (indexURLEntry) iter.next();
                if ((entry == null) || (entry.comp() == null)) continue;
                url = entry.comp().url();
                if (url == null) continue;
@ -452,7 +452,7 @@ public class IndexControlRWIs_p {
                i++;
                if ((maxlines >= 0) && (i >= maxlines)) break;
            }
-            iter = finding.miss().iterator();
+            Iterator iter = ranked.miss(); // iterates url hash strings
            while (iter.hasNext()) {
                us = (String) iter.next();
                prop.put("genUrlList_urlList_"+i+"_urlExists", "0");
--- a/htroot/IndexControlURLs_p.java
+++ b/htroot/IndexControlURLs_p.java
@ -36,6 +36,7 @@ import de.anomic.kelondro.kelondroRotateIterator;
 import de.anomic.plasma.plasmaSwitchboard;
 import de.anomic.server.serverObjects;
 import de.anomic.server.serverSwitch;
 import de.anomic.yacy.yacySeedDB;
 import de.anomic.yacy.yacyURL;
 public class IndexControlURLs_p {
@ -171,7 +172,7 @@ public class IndexControlURLs_p {
            return prop;
        }
        indexURLEntry.Components comp = entry.comp();
-        indexURLEntry le = (entry.referrerHash() == null) ? null : switchboard.wordIndex.loadedURL.load(entry.referrerHash(), null, 0);
+        indexURLEntry le = ((entry.referrerHash() == null) || (entry.referrerHash().length() != yacySeedDB.commonHashLength)) ? null : switchboard.wordIndex.loadedURL.load(entry.referrerHash(), null, 0);
        if (comp.url() == null) {
            prop.put("genUrlProfile", "1");
            prop.put("genUrlProfile_urlhash", urlhash);
--- a/htroot/yacy/search.java
+++ b/htroot/yacy/search.java
@ -83,7 +83,7 @@ public final class search {
        String  profile = post.get("profile", ""); // remote profile hand-over
        if (profile.length() > 0) profile = crypt.simpleDecode(profile, null);
        //final boolean includesnippet = post.get("includesnippet", "false").equals("true");
-        final kelondroBitfield constraint = new kelondroBitfield(4, post.get("constraint", "______"));
+        final kelondroBitfield constraint = ((post.containsKey("constraint")) && (post.get("constraint", "").length() > 0)) ? new kelondroBitfield(4, post.get("constraint", "______")) : null;
 //      final boolean global = ((String) post.get("resource", "global")).equals("global"); // if true, then result may consist of answers from other peers
 //      Date remoteTime = yacyCore.parseUniversalDate((String) post.get(yacySeed.MYTIME));        // read remote time
@ -133,7 +133,7 @@ public final class search {
        long urlRetrievalAllTime = 0, snippetComputationAllTime = 0;
        if ((query.length() == 0) && (abstractSet != null)) {
            // this is _not_ a normal search, only a request for index abstracts
-            theQuery = new plasmaSearchQuery(null, abstractSet, new TreeSet(kelondroBase64Order.enhancedCoder), maxdist, prefer, plasmaSearchQuery.contentdomParser(contentdom), false, count, 0, duetime, filter, plasmaSearchQuery.SEARCHDOM_LOCAL, null, -1, plasmaSearchQuery.catchall_constraint, false);
+            theQuery = new plasmaSearchQuery(null, abstractSet, new TreeSet(kelondroBase64Order.enhancedCoder), maxdist, prefer, plasmaSearchQuery.contentdomParser(contentdom), false, count, 0, duetime, filter, plasmaSearchQuery.SEARCHDOM_LOCAL, null, -1, null, false);
            theQuery.domType = plasmaSearchQuery.SEARCHDOM_LOCAL;
            yacyCore.log.logInfo("INIT HASH SEARCH (abstracts only): " + plasmaSearchQuery.anonymizedQueryHashes(theQuery.queryHashes) + " - " + theQuery.displayResults() + " links");
--- a/htroot/yacysearch.java
+++ b/htroot/yacysearch.java
@ -127,7 +127,7 @@ public class yacysearch {
            prop.put("input_urlmaskfilter", ".*");
            prop.put("input_prefermaskfilter", "");
            prop.put("input_indexof", "off");
-            prop.put("input_constraint", plasmaSearchQuery.catchall_constraint.exportB64());
+            prop.put("input_constraint", "");
            prop.put("input_cat", "href");
            prop.put("input_depth", "0");
            prop.put("input_contentdom", "text");
@ -167,7 +167,7 @@ public class yacysearch {
        String prefermask = post.get("prefermaskfilter", "");
        if ((prefermask.length() > 0) && (prefermask.indexOf(".*") < 0)) prefermask = ".*" + prefermask + ".*";
-        kelondroBitfield constraint = post.containsKey("constraint") ? new kelondroBitfield(4, post.get("constraint", "______")) : plasmaSearchQuery.catchall_constraint;
+        kelondroBitfield constraint = ((post.containsKey("constraint")) && (post.get("constraint", "").length() > 0)) ? new kelondroBitfield(4, post.get("constraint", "______")) : null;
        if (indexof) {
            constraint = new kelondroBitfield(4);
            constraint.set(plasmaCondenser.flag_cat_indexof, true);
@ -401,7 +401,7 @@ public class yacysearch {
        prop.putHTML("input_urlmaskfilter", urlmask);
        prop.putHTML("input_prefermaskfilter", prefermask);
        prop.put("input_indexof", (indexof) ? "on" : "off");
-        prop.put("input_constraint", constraint.exportB64());
+        prop.put("input_constraint", (constraint == null) ? "" : constraint.exportB64());
        prop.put("input_contentdom", post.get("contentdom", "text"));
        prop.put("input_contentdomCheckText", (contentdomCode == plasmaSearchQuery.CONTENTDOM_TEXT) ? "1" : "0");
        prop.put("input_contentdomCheckAudio", (contentdomCode == plasmaSearchQuery.CONTENTDOM_AUDIO) ? "1" : "0");
@ -418,6 +418,17 @@ public class yacysearch {
    }
    private static String navurla(int page, int display, plasmaSearchQuery theQuery) {
-        return "<a href=\"yacysearch.html?display=" + display + "&amp;search=" + theQuery.queryString() + "&amp;count="+ theQuery.displayResults() + "&amp;offset=" + (page * theQuery.displayResults()) + "&amp;resource=" + theQuery.searchdom() + "&amp;time=" + (theQuery.maximumTime / 1000) + "&amp;urlmaskfilter=" + theQuery.urlMask + "&amp;prefermaskfilter=" + theQuery.prefer + "&amp;cat=href&amp;constraint=" + theQuery.constraint.exportB64() + "&amp;contentdom=" + theQuery.contentdom() + "&amp;former=" + theQuery.queryString() + "\">";
+        return
        "<a href=\"yacysearch.html?display=" + display +
        "&amp;search=" + theQuery.queryString() +
        "&amp;count="+ theQuery.displayResults() +
        "&amp;offset=" + (page * theQuery.displayResults()) +
        "&amp;resource=" + theQuery.searchdom() +
        "&amp;time=" + (theQuery.maximumTime / 1000) +
        "&amp;urlmaskfilter=" + theQuery.urlMask +
        "&amp;prefermaskfilter=" + theQuery.prefer +
        "&amp;cat=href&amp;constraint=" + ((theQuery.constraint == null) ? "" : theQuery.constraint.exportB64()) +
        "&amp;contentdom=" + theQuery.contentdom() +
        "&amp;former=" + theQuery.queryString() + "\">";
    }
 }
--- a/htroot/yacysearchitem.java
+++ b/htroot/yacysearchitem.java
@ -220,8 +220,8 @@ public class yacysearchitem {
            prop.put("content_rankingprops", result.word().toPropertyForm() + ", domLengthEstimated=" + yacyURL.domLengthEstimation(result.hash()) +
                    ((yacyURL.probablyRootURL(result.hash())) ? ", probablyRootURL" : "") + 
                    (((wordURL = yacyURL.probablyWordURL(result.hash(), query[0])) != null) ? ", probablyWordURL=" + wordURL.toNormalform(false, true) : ""));
- 
+            plasmaSnippetCache.TextSnippet snippet = result.textSnippet();
-            prop.put("content_snippet", result.textSnippet().getLineMarked(theQuery.queryHashes));
+            prop.put("content_snippet", (snippet == null) ? "(snippet not found)" : snippet.getLineMarked(theQuery.queryHashes));
            return prop;
        }
--- a/source/de/anomic/plasma/plasmaSearchEvent.java
+++ b/source/de/anomic/plasma/plasmaSearchEvent.java
@ -125,7 +125,7 @@ public final class plasmaSearchEvent {
        if ((query.domType == plasmaSearchQuery.SEARCHDOM_GLOBALDHT) ||
            (query.domType == plasmaSearchQuery.SEARCHDOM_CLUSTERALL)) {
            // do a global search
-            this.rankedCache = new plasmaSearchRankingProcess(query, process, ranking, max_results_preparation);
+            this.rankedCache = new plasmaSearchRankingProcess(wordIndex, query, process, ranking, 2, max_results_preparation);
            int fetchpeers = (int) (query.maximumTime / 500L); // number of target peers; means 10 peers in 10 seconds
            if (fetchpeers > 50) fetchpeers = 50;
@ -160,14 +160,15 @@ public final class plasmaSearchEvent {
            serverLog.logFine("SEARCH_EVENT", "SEARCH TIME AFTER GLOBAL-TRIGGER TO " + primarySearchThreads.length + " PEERS: " + ((System.currentTimeMillis() - start) / 1000) + " seconds");
        } else {
            // do a local search
-            process.startTimer();
+            this.rankedCache = new plasmaSearchRankingProcess(wordIndex, query, process, ranking, 2, max_results_preparation);
-            Map[] searchContainerMaps = wordIndex.localSearchContainers(query, null);
+            this.rankedCache.execQuery(true);
-            process.yield(COLLECTION, searchContainerMaps[0].size());
+            this.localcount = this.rankedCache.filteredCount();
            //plasmaWordIndex.Finding finding = wordIndex.retrieveURLs(query, false, 2, ranking, process);
            if (generateAbstracts) {
                // compute index abstracts
                process.startTimer();
-                Iterator ci = searchContainerMaps[0].entrySet().iterator();
+                Iterator ci = this.rankedCache.searchContainerMaps()[0].entrySet().iterator();
                Map.Entry entry;
                int maxcount = -1;
                double mindhtdistance = 1.1, d;
@ -190,22 +191,9 @@ public final class plasmaSearchEvent {
                    IACount.put(wordhash, new Integer(container.size()));
                    IAResults.put(wordhash, indexContainer.compressIndex(container, null, 1000).toString());
                }
-                process.yield("abstract generation", searchContainerMaps[0].size());
+                process.yield("abstract generation", this.rankedCache.searchContainerMaps()[0].size());
            }
            process.startTimer();
            indexContainer rcLocal =
                (searchContainerMaps == null) ?
                  plasmaWordIndex.emptyContainer(null, 0) :
                      indexContainer.joinExcludeContainers(
                          searchContainerMaps[0].values(),
                          searchContainerMaps[1].values(),
                          query.maxDistance);
            process.yield(JOIN, rcLocal.size());
            this.localcount = rcLocal.size();
            this.rankedCache = new plasmaSearchRankingProcess(query, process, ranking, max_results_preparation);
            this.rankedCache.insert(rcLocal, true);
        }
        if (query.onlineSnippetFetch) {
@ -221,10 +209,8 @@ public final class plasmaSearchEvent {
            indexURLEntry uentry;
            ResultEntry resultEntry;
            synchronized (rankedCache) {
-                Iterator urlIterator = rankedCache.entries(wordIndex, true);
+                while ((rankedCache.size() > 0) && ((uentry = rankedCache.bestURL(true)) != null) && (resultList.size() < (query.neededResults()))) {
-                while ((urlIterator.hasNext()) && (resultList.size() < (query.neededResults()))) {
+                    System.out.println("***DEBUG*** SEARCH RESULT URL=" + uentry.comp().url().toNormalform(false, false));
                    // fetch next entry
                    uentry = (indexURLEntry) urlIterator.next();
                    resultEntry = obtainResultEntry(uentry, (snippetComputationAllTime < 300) ? 1 : 0);
                    if (resultEntry == null) continue; // the entry had some problems, cannot be used
@ -260,51 +246,12 @@ public final class plasmaSearchEvent {
        public void run() {
            // do a local search
            process.startTimer();
            Map[] searchContainerMaps = wordIndex.localSearchContainers(query, null);
            process.yield(COLLECTION, searchContainerMaps[0].size());
            // use the search containers to fill up rcAbstracts locally
            /*
            if ((rcAbstracts != null) && (searchContainerMap != null)) {
                Iterator i, ci = searchContainerMap.entrySet().iterator();
                Map.Entry entry;
                String wordhash;
                indexContainer container;
                TreeMap singleAbstract;
                String mypeerhash = yacyCore.seedDB.mySeed.hash;
                while (ci.hasNext()) {
                    entry = (Map.Entry) ci.next();
                    wordhash = (String) entry.getKey();
                    container = (indexContainer) entry.getValue();
                    // collect all urlhashes from the container
                    synchronized (rcAbstracts) {
                        singleAbstract = (TreeMap) rcAbstracts.get(wordhash); // a mapping from url-hashes to a string of peer-hashes
                        if (singleAbstract == null) singleAbstract = new TreeMap();
                        i = container.entries();
                        while (i.hasNext()) singleAbstract.put(((indexEntry) i.next()).urlHash(), mypeerhash);
                        rcAbstracts.put(wordhash, singleAbstract);
                    }
                }
            }
            */
            // join and exlcude the local result
            process.startTimer();
            indexContainer rcLocal =
                (searchContainerMaps == null) ?
                  plasmaWordIndex.emptyContainer(null, 0) :
                      indexContainer.joinExcludeContainers(
                          searchContainerMaps[0].values(),
                          searchContainerMaps[1].values(),
                          query.maxDistance);
            process.yield(JOIN, rcLocal.size());
            localcount = rcLocal.size();
            // sort the local containers and truncate it to a limited count,
            // so following sortings together with the global results will be fast
            synchronized (rankedCache) {
-                rankedCache.insert(rcLocal, true);
+                rankedCache.execQuery(true);
                localcount = rankedCache.filteredCount();
            }
        }
    }
@ -367,7 +314,7 @@ public final class plasmaSearchEvent {
        }
        // check constraints
-        if ((!(query.constraint.equals(plasmaSearchQuery.catchall_constraint))) &&
+        if ((query.constraint != null) &&
            (query.constraint.get(plasmaCondenser.flag_cat_indexof)) &&
            (!(comp.title().startsWith("Index of")))) {
            final Iterator wi = query.queryHashes.iterator();
@ -401,7 +348,7 @@ public final class plasmaSearchEvent {
        if (query.contentdom == plasmaSearchQuery.CONTENTDOM_TEXT) {
            // attach text snippet
            startTime = System.currentTimeMillis();
-            plasmaSnippetCache.TextSnippet snippet = plasmaSnippetCache.retrieveTextSnippet(comp.url(), snippetFetchWordHashes, (snippetFetchMode == 2), query.constraint.get(plasmaCondenser.flag_cat_indexof), 180, 3000, (snippetFetchMode == 2) ? Integer.MAX_VALUE : 100000);
+            plasmaSnippetCache.TextSnippet snippet = plasmaSnippetCache.retrieveTextSnippet(comp.url(), snippetFetchWordHashes, (snippetFetchMode == 2), ((query.constraint != null) && (query.constraint.get(plasmaCondenser.flag_cat_indexof))), 180, 3000, (snippetFetchMode == 2) ? Integer.MAX_VALUE : 100000);
            long snippetComputationTime = System.currentTimeMillis() - startTime;
            serverLog.logInfo("SEARCH_EVENT", "text snippet load time for " + comp.url() + ": " + snippetComputationTime + ", " + ((snippet.getErrorCode() < 11) ? "snippet found" : ("no snippet found (" + snippet.getError() + ")")));
@ -550,7 +497,6 @@ public final class plasmaSearchEvent {
    private class resultWorker extends Thread {
        private indexRWIEntry entry;   // entry this thread is working on
        private long timeout; // the date until this thread should try to work
        private long sleeptime; // the sleeptime of this thread at the beginning of its life
        private int id;
@ -559,7 +505,6 @@ public final class plasmaSearchEvent {
            this.id = id;
            this.timeout = System.currentTimeMillis() + lifetime;
            this.sleeptime = lifetime / 10 * id;
            this.entry = null;
        }
        public void run() {
@ -568,35 +513,16 @@ public final class plasmaSearchEvent {
            if (anyRemoteSearchAlive()) try {Thread.sleep(this.sleeptime);} catch (InterruptedException e1) {}
            // start fetching urls and snippets
-            while (true) {
+            indexURLEntry page;
-                
+            while ((resultList.size() < query.neededResults() + query.displayResults()) &&
-                if (resultList.size() > query.neededResults() + query.displayResults()) break; // computed enough
+                   (System.currentTimeMillis() < this.timeout) &&
-
+                   ((page = rankedCache.bestURL(true)) != null)) {
-                if (System.currentTimeMillis() > this.timeout) break; // time is over
+                if (anyResultWith(page.hash())) continue;
                if (anyFailureWith(page.hash())) continue;
                // try secondary search
                prepareSecondarySearch(); // will be executed only once
                // fetch next entry to work on
                this.entry = null;
                entry = nextOrder();
                if (entry == null) {
                    if (anyRemoteSearchAlive()) {
                        // wait and try again
                        try {Thread.sleep(100);} catch (InterruptedException e) {}
                        continue;
                    } else {
                        // we will not see that there come more results in
                        break;
                    }
                }
                indexURLEntry page = wordIndex.loadedURL.load(entry.urlHash(), entry, 0);
                if (page == null) {
                    registerFailure(entry.urlHash(), "url does not exist in lurl-db");
                    continue;
                }
                ResultEntry resultEntry = obtainResultEntry(page, 2);
                if (resultEntry == null) continue; // the entry had some problems, cannot be used
                urlRetrievalAllTime += resultEntry.dbRetrievalTime;
@ -617,29 +543,6 @@ public final class plasmaSearchEvent {
            serverLog.logInfo("SEARCH", "resultWorker thread " + id + " terminated");
        }
        private indexRWIEntry nextOrder() {
            synchronized (rankedCache) {
                Iterator i = rankedCache.entries(null, false);
                indexRWIEntry entry;
                String urlhash;
                while (i.hasNext()) {
                    entry = (indexRWIEntry) i.next();
                    urlhash = entry.urlHash();
                    if ((anyFailureWith(urlhash)) || (anyWorkerWith(urlhash)) || (anyResultWith(urlhash))) continue;
                    return entry;
                }
            }
            return null; // no more entries available
        }
        private boolean anyWorkerWith(String urlhash) {
            for (int i = 0; i < workerThreadCount; i++) {
                if ((workerThreads[i] == null) || (workerThreads[i] == this)) continue;
                if ((workerThreads[i].entry != null) && (workerThreads[i].entry.urlHash().equals(urlhash))) return true;
            }
            return false;
        }
        private boolean anyResultWith(String urlhash) {
            for (int i = 0; i < resultList.size(); i++) {
                if (((ResultEntry) resultList.get(i)).urlentry.hash().equals(urlhash)) return true;
@ -681,7 +584,7 @@ public final class plasmaSearchEvent {
            // fetch the best entry from the resultList, not the entry from item position
            // whenever a specific entry was switched in its position and was returned here
            // a moving pointer is set to assign that item position as not changeable
-            int bestpick = postRankingFavourite(item);
+            int bestpick = item; //postRankingFavourite(item);
            if (bestpick != item) {
                // switch the elements
                ResultEntry buf = (ResultEntry) this.resultList.get(bestpick);
@ -695,68 +598,6 @@ public final class plasmaSearchEvent {
        }
    }
    private int postRankingFavourite(int item) {
        // do a post-ranking on resultList, which should be locked upon time of this call
        long rank, bestrank = 0;
        int bestitem = item;
        ResultEntry entry;
        for (int i = item; i < this.resultList.size(); i++) {
            entry = (ResultEntry) this.resultList.get(i);
            rank = this.ranking.postRanking(this.query, this.references(10), entry, item);
            if (rank > bestrank) {
                bestrank = rank;
                bestitem = i;
            }
        }
        return bestitem;
    }
    /*
    public void removeRedundant() {
        // remove all urls from the pageAcc structure that occur double by specific redundancy rules
        // a link is redundant, if a sub-path of the url is cited before. redundant urls are removed
        // we find redundant urls by iteration over all elements in pageAcc
        Iterator i = pageAcc.entrySet().iterator();
        HashMap paths = new HashMap(); // a url-subpath to pageAcc-key relation
        Map.Entry entry;
        // first scan all entries and find all urls that are referenced
        while (i.hasNext()) {
            entry = (Map.Entry) i.next();
            paths.put(((indexURLEntry) entry.getValue()).comp().url().toNormalform(true, true), entry.getKey());
            //if (path != null) path = shortenPath(path);
            //if (path != null) paths.put(path, entry.getKey());
        }
        // now scan the pageAcc again and remove all redundant urls
        i = pageAcc.entrySet().iterator();
        String shorten;
        while (i.hasNext()) {
            entry = (Map.Entry) i.next();
            shorten = shortenPath(((indexURLEntry) entry.getValue()).comp().url().toNormalform(true, true));
            // scan all subpaths of the url
            while (shorten != null) {
                if (pageAcc.size() <= query.wantedResults) break;
                if (paths.containsKey(shorten)) {
                    //System.out.println("deleting path from search result: " + path + " is redundant to " + shorten);
                    try {
                        i.remove();
                    } catch (IllegalStateException e) {
                    }
                }
                shorten = shortenPath(shorten);
            }
        }
    }
    private static String shortenPath(String path) {
        int pos = path.lastIndexOf('/');
        if (pos < 0) return null;
        return path.substring(0, pos);
    }
    */
    public ArrayList completeResults(long waitingtime) {
        long timeout = System.currentTimeMillis() + waitingtime;
        while ((this.resultList.size() < query.neededResults()) && (anyWorkerAlive()) && (System.currentTimeMillis() < timeout)) {
--- a/source/de/anomic/plasma/plasmaSearchRankingProcess.java
+++ b/source/de/anomic/plasma/plasmaSearchRankingProcess.java
@ -29,7 +29,6 @@ package de.anomic.plasma;
 import java.io.File;
 import java.io.IOException;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.Map;
 import java.util.Set;
@ -40,34 +39,45 @@ import de.anomic.htmlFilter.htmlFilterContentScraper;
 import de.anomic.index.indexContainer;
 import de.anomic.index.indexRWIEntry;
 import de.anomic.index.indexRWIEntryOrder;
 import de.anomic.index.indexURLEntry;
 import de.anomic.kelondro.kelondroBinSearch;
 import de.anomic.kelondro.kelondroMScoreCluster;
 import de.anomic.server.serverCodings;
 import de.anomic.server.serverFileUtils;
 import de.anomic.server.serverProfiling;
 import de.anomic.yacy.yacyURL;
 public final class plasmaSearchRankingProcess {
    public  static kelondroBinSearch[] ybrTables = null; // block-rank tables
    private static boolean useYBR = true;
-    private TreeMap pageAcc; // key = ranking (Long); value = indexRWIEntry
+    private TreeMap sortedRWIEntries; // key = ranking (Long); value = indexRWIEntry; if sortorder < 2 then key is instance of String
    private HashMap doubleDomCache; // key = domhash (6 bytes); value = TreeMap like sortedRWIEntries
    private HashMap handover; // key = urlhash, value = urlstring; used for double-check of urls that had been handed over to search process
    private plasmaSearchQuery query;
    private plasmaSearchRankingProfile ranking;
    private int sortorder;
    private int filteredCount;
    private indexRWIEntryOrder order;
    private serverProfiling process;
    private int maxentries;
    private int globalcount;
    private indexRWIEntryOrder order;
    private serverProfiling process;
    private HashMap urlhashes; // map for double-check; String/Long relation, addresses ranking number (backreference for deletion)
    private kelondroMScoreCluster ref;  // reference score computation for the commonSense heuristic
-    private int[] c; // flag counter
+    private int[] flagcount; // flag counter
    private TreeSet misses; // contains url-hashes that could not been found in the LURL-DB
    private plasmaWordIndex wordIndex;
    private Map[] localSearchContainerMaps;
-    public plasmaSearchRankingProcess(plasmaSearchQuery query, serverProfiling process, plasmaSearchRankingProfile ranking, int maxentries) {
+    public plasmaSearchRankingProcess(plasmaWordIndex wordIndex, plasmaSearchQuery query, serverProfiling process, plasmaSearchRankingProfile ranking, int sortorder, int maxentries) {
        // we collect the urlhashes and construct a list with urlEntry objects
        // attention: if minEntries is too high, this method will not terminate within the maxTime
-        this.pageAcc = new TreeMap();
+        // sortorder: 0 = hash, 1 = url, 2 = ranking
        this.localSearchContainerMaps = null;
        this.sortedRWIEntries = new TreeMap();
        this.doubleDomCache = new HashMap();
        this.handover = new HashMap();
        this.filteredCount = 0;
        this.process = process;
        this.order = null;
        this.query = query;
@ -76,8 +86,80 @@ public final class plasmaSearchRankingProcess {
        this.globalcount = 0;
        this.urlhashes = new HashMap();
        this.ref = new kelondroMScoreCluster();
-        c = new int[32];
+        this.misses = new TreeSet();
-        for (int i = 0; i < 32; i++) {c[i] = 0;}
+        this.wordIndex = wordIndex;
        this.sortorder = sortorder;
        this.flagcount = new int[32];
        for (int i = 0; i < 32; i++) {this.flagcount[i] = 0;}
    }
    public void execQuery(boolean fetchURLs) {
        if (process != null) process.startTimer();
        this.localSearchContainerMaps = wordIndex.localSearchContainers(query, null);
        if (process != null) process.yield(plasmaSearchEvent.COLLECTION, this.localSearchContainerMaps[0].size());
        // join and exlcude the local result
        if (process != null) process.startTimer();
        indexContainer index =
            (this.localSearchContainerMaps == null) ?
              plasmaWordIndex.emptyContainer(null, 0) :
                  indexContainer.joinExcludeContainers(
                      this.localSearchContainerMaps[0].values(),
                      this.localSearchContainerMaps[1].values(),
                      query.maxDistance);
        if (process != null) process.yield(plasmaSearchEvent.JOIN, index.size());
        int joincount = index.size();
        if ((index == null) || (joincount == 0)) {
            return;
        }
        if (sortorder == 2) {
            insert(index, true);
        } else {            
            final Iterator en = index.entries();
            // generate a new map where the urls are sorted (not by hash but by the url text)
            indexRWIEntry ientry;
            indexURLEntry uentry;
            String u;
            loop: while (en.hasNext()) {
                ientry = (indexRWIEntry) en.next();
                // check constraints
                if (!testFlags(ientry)) continue loop;
                // increase flag counts
                for (int i = 0; i < 32; i++) {
                    if (ientry.flags().get(i)) {flagcount[i]++;}
                }
                // load url
                if (sortorder == 0) {
                    this.sortedRWIEntries.put(ientry.urlHash(), ientry);
                    this.urlhashes.put(ientry.urlHash(), ientry.urlHash());
                    filteredCount++;
                } else {
                    if (fetchURLs) {
                        uentry = wordIndex.loadedURL.load(ientry.urlHash(), ientry, 0);
                        if (uentry == null) {
                            this.misses.add(ientry.urlHash());
                        } else {
                            u = uentry.comp().url().toNormalform(false, true);
                            this.sortedRWIEntries.put(u, ientry);
                            this.urlhashes.put(ientry.urlHash(), u);
                            filteredCount++;
                        }
                    } else {
                        filteredCount++;
                    }
                }
                // interrupt if we have enough
                if ((query.neededResults() > 0) && (this.misses.size() + this.sortedRWIEntries.size() > query.neededResults())) break loop;
            } // end loop
        }
    }
    public void insert(indexContainer container, boolean local) {
@ -102,7 +184,6 @@ public final class plasmaSearchRankingProcess {
        // normalize entries and get ranking
        if (process != null) process.startTimer();
        Iterator i = container.entries();
        this.pageAcc = new TreeMap();
        indexRWIEntry iEntry, l;
        long biggestEntry = 0;
        //long s0 = System.currentTimeMillis();
@ -113,89 +194,164 @@ public final class plasmaSearchRankingProcess {
            // increase flag counts
            for (int j = 0; j < 32; j++) {
-                if (iEntry.flags().get(j)) {c[j]++;}
+                if (iEntry.flags().get(j)) {flagcount[j]++;}
            }
            // kick out entries that are too bad according to current findings
            r = new Long(order.cardinal(iEntry));
-            if ((maxentries >= 0) && (pageAcc.size() >= maxentries) && (r.longValue() > biggestEntry)) continue;
+            if ((maxentries >= 0) && (sortedRWIEntries.size() >= maxentries) && (r.longValue() > biggestEntry)) continue;
-                        
+            
            // check constraints
-            if ((!(query.constraint.equals(plasmaSearchQuery.catchall_constraint))) && (!(iEntry.flags().allOf(query.constraint)))) continue; // filter out entries that do not match the search constraint
+            if (!testFlags(iEntry)) continue;
            if (query.contentdom != plasmaSearchQuery.CONTENTDOM_TEXT) {
                if ((query.contentdom == plasmaSearchQuery.CONTENTDOM_AUDIO) && (!(iEntry.flags().get(plasmaCondenser.flag_cat_hasaudio)))) continue;
                if ((query.contentdom == plasmaSearchQuery.CONTENTDOM_VIDEO) && (!(iEntry.flags().get(plasmaCondenser.flag_cat_hasvideo)))) continue;
                if ((query.contentdom == plasmaSearchQuery.CONTENTDOM_IMAGE) && (!(iEntry.flags().get(plasmaCondenser.flag_cat_hasimage)))) continue;
                if ((query.contentdom == plasmaSearchQuery.CONTENTDOM_APP  ) && (!(iEntry.flags().get(plasmaCondenser.flag_cat_hasapp  )))) continue;
            }
-            if ((maxentries < 0) || (pageAcc.size() < maxentries)) {
+            if ((maxentries < 0) || (sortedRWIEntries.size() < maxentries)) {
                if (urlhashes.containsKey(iEntry.urlHash())) continue;
-                while (pageAcc.containsKey(r)) r = new Long(r.longValue() + 1);
+                while (sortedRWIEntries.containsKey(r)) r = new Long(r.longValue() + 1);
-                pageAcc.put(r, iEntry);
+                sortedRWIEntries.put(r, iEntry);
            } else {
                if (r.longValue() > biggestEntry) {
                    continue;
                } else {
                    if (urlhashes.containsKey(iEntry.urlHash())) continue;
-                    l = (indexRWIEntry) pageAcc.remove((Long) pageAcc.lastKey());
+                    l = (indexRWIEntry) sortedRWIEntries.remove((Long) sortedRWIEntries.lastKey());
                    urlhashes.remove(l.urlHash());
-                    while (pageAcc.containsKey(r)) r = new Long(r.longValue() + 1);
+                    while (sortedRWIEntries.containsKey(r)) r = new Long(r.longValue() + 1);
-                    pageAcc.put(r, iEntry);
+                    sortedRWIEntries.put(r, iEntry);
-                    biggestEntry = order.cardinal((indexRWIEntry) pageAcc.get(pageAcc.lastKey()));
+                    biggestEntry = order.cardinal((indexRWIEntry) sortedRWIEntries.get(sortedRWIEntries.lastKey()));
                }
            }
            urlhashes.put(iEntry.urlHash(), r);
            // increase counter for statistics
            if (!local) this.globalcount++;
        }
-        this.filteredCount = pageAcc.size();
+        this.filteredCount = sortedRWIEntries.size();
        //long sc = Math.max(1, System.currentTimeMillis() - s0);
        //System.out.println("###DEBUG### time to sort " + container.size() + " entries to " + this.filteredCount + ": " + sc + " milliseconds, " + (container.size() / sc) + " entries/millisecond, ranking = " + tc);
-        if (container.size() > query.neededResults()) remove(true, true);
+        //if ((query.neededResults() > 0) && (container.size() > query.neededResults())) remove(true, true);
        if (process != null) process.yield(plasmaSearchEvent.PRESORT, container.size());
    }
    public class rIterator implements Iterator {
    	boolean urls;
    	Iterator r;
    	plasmaWordIndex wi;
    	public rIterator(plasmaWordIndex wi, boolean fetchURLs) {
    		// if fetchURLs == true, this iterates indexURLEntry objects, otherwise it iterates indexRWIEntry objects
    		this.urls = fetchURLs;
    		this.r = pageAcc.entrySet().iterator();
    		this.wi = wi;
    	}
 		public boolean hasNext() {
 			return r.hasNext();
 		}
 		public Object next() {
 			Map.Entry entry = (Map.Entry) r.next();
 			indexRWIEntry ientry = (indexRWIEntry) entry.getValue();
 			if (urls) {
 				return wi.loadedURL.load(ientry.urlHash(), ientry, ((Long) entry.getKey()).longValue());
 			} else {
 				return ientry;
 			}
 		}
-		public void remove() {
+    private boolean testFlags(indexRWIEntry ientry) {
-			throw new UnsupportedOperationException();
+        if (query.constraint == null) return true;
-		}
+        // test if ientry matches with filter
        // if all = true: let only entries pass that has all matching bits
        // if all = false: let all entries pass that has at least one matching bit
        if (query.allofconstraint) {
            for (int i = 0; i < 32; i++) {
                if ((query.constraint.get(i)) && (!ientry.flags().get(i))) return false;
            }
            return true;
        }
        for (int i = 0; i < 32; i++) {
            if ((query.constraint.get(i)) && (ientry.flags().get(i))) return true;
        }
        return false;
    }
-    public int size() {
+    public synchronized Map[] searchContainerMaps() {
-        assert pageAcc.size() == urlhashes.size();
+        // direct access to the result maps is needed for abstract generation
-        return pageAcc.size();
+        // this is only available if execQuery() was called before
        return localSearchContainerMaps;
    }
    // todo:
    // - remove redundant urls (sub-path occurred before)
    // - move up shorter urls
    // - root-domain guessing to prefer the root domain over other urls if search word appears in domain name
    private synchronized Object[] /*{Object, indexRWIEntry}*/ bestRWI(boolean skipDoubleDom) {
        // returns from the current RWI list the best entry and removed this entry from the list
        Object bestEntry;
        TreeMap m;
        indexRWIEntry rwi;
        while (sortedRWIEntries.size() > 0) {
            bestEntry = sortedRWIEntries.firstKey();
            rwi = (indexRWIEntry) sortedRWIEntries.remove(bestEntry);
            if (!skipDoubleDom) return new Object[]{bestEntry, rwi};
            // check doubledom
            String domhash = rwi.urlHash().substring(6);
            m = (TreeMap) this.doubleDomCache.get(domhash);
            if (m == null) {
                // first appearance of dom
                m = new TreeMap();
                this.doubleDomCache.put(domhash, m);
                return new Object[]{bestEntry, rwi};
            }
            // second appearances of dom
            m.put(bestEntry, rwi);
        }
        // no more entries in sorted RWI entries. Now take Elements from the doubleDomCache
        // find best entry from all caches
        Iterator i = this.doubleDomCache.values().iterator();
        bestEntry = null;
        Object o;
        indexRWIEntry bestrwi = null;
        while (i.hasNext()) {
            m = (TreeMap) i.next();
            if (m.size() == 0) continue;
            if (bestEntry == null) {
                bestEntry = m.firstKey();
                bestrwi = (indexRWIEntry) m.remove(bestEntry);
                continue;
            }
            o = m.firstKey();
            rwi = (indexRWIEntry) m.remove(o);
            if (o instanceof Long) {
                if (((Long) o).longValue() < ((Long) bestEntry).longValue()) {
                    bestEntry = o;
                    bestrwi = rwi;
                }
            }
            if (o instanceof String) {
                if (((String) o).compareTo((String) bestEntry) < 0) {
                    bestEntry = o;
                    bestrwi = rwi;
                }
            }
        }
        if (bestrwi == null) return null;
        // finally remove the best entry from the doubledom cache
        m = (TreeMap) this.doubleDomCache.get(bestrwi.urlHash().substring(6));
        m.remove(bestEntry);
        return new Object[]{bestEntry, bestrwi};
    }
    public synchronized indexURLEntry bestURL(boolean skipDoubleDom) {
        // returns from the current RWI list the best URL entry and removed this entry from the list
        while ((sortedRWIEntries.size() > 0) || (size() > 0)) {
            Object[] obrwi = bestRWI(skipDoubleDom);
            Object bestEntry = obrwi[0];
            indexRWIEntry ientry = (indexRWIEntry) obrwi[1];
            long ranking = (bestEntry instanceof Long) ? ((Long) bestEntry).longValue() : 0;
            indexURLEntry u = wordIndex.loadedURL.load(ientry.urlHash(), ientry, ranking);
            if (u != null) {
                this.handover.put(u.hash(), u.comp().url().toNormalform(true, false)); // remember that we handed over this url
                return u;
            }
            misses.add(ientry.urlHash());
        }
        return null;
    }
    public synchronized int size() {
        //assert sortedRWIEntries.size() == urlhashes.size() : "sortedRWIEntries.size() = " + sortedRWIEntries.size() + ", urlhashes.size() = " + urlhashes.size();
        int c = sortedRWIEntries.size();
        Iterator i = this.doubleDomCache.values().iterator();
        while (i.hasNext()) c += ((TreeMap) i.next()).size();
        return c;
    }
    public int[] flagCount() {
-    	return c;
+    	return flagcount;
    }
    public int filteredCount() {
@ -207,17 +363,16 @@ public final class plasmaSearchRankingProcess {
    }
    public indexRWIEntry remove(String urlHash) {
-        Long r = (Long) urlhashes.get(urlHash);
+        Object r = (Long) urlhashes.get(urlHash);
        if (r == null) return null;
-        assert pageAcc.containsKey(r);
+        assert sortedRWIEntries.containsKey(r);
-        indexRWIEntry iEntry = (indexRWIEntry) pageAcc.remove(r);
+        indexRWIEntry iEntry = (indexRWIEntry) sortedRWIEntries.remove(r);
        urlhashes.remove(urlHash);
        return iEntry;
    }
-
+    
-    public Iterator entries(plasmaWordIndex wi, boolean fetchURLs) {
+    public Iterator miss() {
-    	// if fetchURLs == true, this iterates indexURLEntry objects, otherwise it iterates indexRWIEntry objects
+        return this.misses.iterator();
        return new rIterator(wi, fetchURLs);
    }
    public Set getReferences(int count) {
@ -257,35 +412,6 @@ public final class plasmaSearchRankingProcess {
        return this.order;
    }
    private void remove(boolean rootDomExt, boolean doubleDom) {
        // this removes all refererences to urls that are extended paths of existing 'RootDom'-urls
        if (pageAcc.size() <= query.neededResults()) return;
        HashSet rootDoms = new HashSet();
        HashSet doubleDoms = new HashSet();
        Iterator i = pageAcc.entrySet().iterator();
        Map.Entry entry;
        indexRWIEntry iEntry;
        String hashpart;
        boolean isWordRootURL;
        TreeSet querywords = plasmaSearchQuery.cleanQuery(query.queryString())[0];
        while (i.hasNext()) {
            if (pageAcc.size() <= query.neededResults()) break;
            entry = (Map.Entry) i.next();
            iEntry = (indexRWIEntry) entry.getValue();
            hashpart = iEntry.urlHash().substring(6);
            isWordRootURL = yacyURL.isWordRootURL(iEntry.urlHash(), querywords);
            if (isWordRootURL) {
                rootDoms.add(hashpart);
            } else {
            	if (((rootDomExt) && (rootDoms.contains(hashpart))) ||
                    ((doubleDom) && (doubleDoms.contains(hashpart)))) {
            		i.remove();
                }
            }
            doubleDoms.add(hashpart);
        }
    }
    public static void loadYBR(File rankingPath, int count) {
        // load ranking tables
        if (rankingPath.exists()) {
@ -337,4 +463,45 @@ public final class plasmaSearchRankingProcess {
        return 15;
    }
    public long postRanking(
                    Set topwords,
                    plasmaSearchEvent.ResultEntry rentry,
                    int position) {
        long r = (255 - position) << 8;
        // for media search: prefer pages with many links
        if (query.contentdom == plasmaSearchQuery.CONTENTDOM_IMAGE) r += rentry.limage() << ranking.coeff_cathasimage;
        if (query.contentdom == plasmaSearchQuery.CONTENTDOM_AUDIO) r += rentry.laudio() << ranking.coeff_cathasaudio;
        if (query.contentdom == plasmaSearchQuery.CONTENTDOM_VIDEO) r += rentry.lvideo() << ranking.coeff_cathasvideo;
        if (query.contentdom == plasmaSearchQuery.CONTENTDOM_APP  ) r += rentry.lapp()   << ranking.coeff_cathasapp;
        // prefer hit with 'prefer' pattern
        if (rentry.url().toNormalform(true, true).matches(query.prefer)) r += 256 << ranking.coeff_prefer;
        if (rentry.title().matches(query.prefer)) r += 256 << ranking.coeff_prefer;
        // apply 'common-sense' heuristic using references
        String urlstring = rentry.url().toNormalform(true, true);
        String[] urlcomps = htmlFilterContentScraper.urlComps(urlstring);
        String[] descrcomps = rentry.title().toLowerCase().split(htmlFilterContentScraper.splitrex);
        for (int j = 0; j < urlcomps.length; j++) {
            if (topwords.contains(urlcomps[j])) r += Math.max(1, 256 - urlstring.length()) << ranking.coeff_urlcompintoplist;
        }
        for (int j = 0; j < descrcomps.length; j++) {
            if (topwords.contains(descrcomps[j])) r += Math.max(1, 256 - rentry.title().length()) << ranking.coeff_descrcompintoplist;
        }
        // apply query-in-result matching
        Set urlcomph = plasmaCondenser.words2hashSet(urlcomps);
        Set descrcomph = plasmaCondenser.words2hashSet(descrcomps);
        Iterator shi = query.queryHashes.iterator();
        String queryhash;
        while (shi.hasNext()) {
            queryhash = (String) shi.next();
            if (urlcomph.contains(queryhash)) r += 256 << ranking.coeff_appurl;
            if (descrcomph.contains(queryhash)) r += 256 << ranking.coeff_appdescr;
        }
        return r;
    }
 }
--- a/source/de/anomic/plasma/plasmaSearchRankingProfile.java
+++ b/source/de/anomic/plasma/plasmaSearchRankingProfile.java
@ -44,9 +44,6 @@ package de.anomic.plasma;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.Map;
 import java.util.Set;
 import de.anomic.htmlFilter.htmlFilterContentScraper;
 public class plasmaSearchRankingProfile {
@ -113,7 +110,7 @@ public class plasmaSearchRankingProfile {
        coeff_appauthor          = 13;
        coeff_apptags            = 8;
        coeff_appref             = 9;
-        coeff_appemph            = 11;
+        coeff_appemph            = 13;
        coeff_urlcompintoplist   = 3;
        coeff_descrcompintoplist = 2;
        coeff_prefer             = 15;
@ -248,47 +245,4 @@ public class plasmaSearchRankingProfile {
        return new String(ext);
    }
    public long postRanking(
                    plasmaSearchQuery query,
                    Set topwords,
                    plasmaSearchEvent.ResultEntry rentry,
                    int position) {
        long ranking = (255 - position) << 8;
        // for media search: prefer pages with many links
        if (query.contentdom == plasmaSearchQuery.CONTENTDOM_IMAGE) ranking += rentry.limage() << coeff_cathasimage;
        if (query.contentdom == plasmaSearchQuery.CONTENTDOM_AUDIO) ranking += rentry.laudio() << coeff_cathasaudio;
        if (query.contentdom == plasmaSearchQuery.CONTENTDOM_VIDEO) ranking += rentry.lvideo() << coeff_cathasvideo;
        if (query.contentdom == plasmaSearchQuery.CONTENTDOM_APP  ) ranking += rentry.lapp()   << coeff_cathasapp;
        // prefer hit with 'prefer' pattern
        if (rentry.url().toNormalform(true, true).matches(query.prefer)) ranking += 256 << coeff_prefer;
        if (rentry.title().matches(query.prefer)) ranking += 256 << coeff_prefer;
        // apply 'common-sense' heuristic using references
        String urlstring = rentry.url().toNormalform(true, true);
        String[] urlcomps = htmlFilterContentScraper.urlComps(urlstring);
        String[] descrcomps = rentry.title().toLowerCase().split(htmlFilterContentScraper.splitrex);
        for (int j = 0; j < urlcomps.length; j++) {
            if (topwords.contains(urlcomps[j])) ranking += Math.max(1, 256 - urlstring.length()) << coeff_urlcompintoplist;
        }
        for (int j = 0; j < descrcomps.length; j++) {
            if (topwords.contains(descrcomps[j])) ranking += Math.max(1, 256 - rentry.title().length()) << coeff_descrcompintoplist;
        }
        // apply query-in-result matching
        Set urlcomph = plasmaCondenser.words2hashSet(urlcomps);
        Set descrcomph = plasmaCondenser.words2hashSet(descrcomps);
        Iterator shi = query.queryHashes.iterator();
        String queryhash;
        while (shi.hasNext()) {
            queryhash = (String) shi.next();
            if (urlcomph.contains(queryhash)) ranking += 256 << coeff_appurl;
            if (descrcomph.contains(queryhash)) ranking += 256 << coeff_appdescr;
        }
        return ranking;
    }
 }
--- a/source/de/anomic/plasma/plasmaWordIndex.java
+++ b/source/de/anomic/plasma/plasmaWordIndex.java
@ -34,7 +34,6 @@ import java.util.HashSet;
 import java.util.Iterator;
 import java.util.Map;
 import java.util.Set;
 import java.util.TreeMap;
 import java.util.TreeSet;
 import de.anomic.htmlFilter.htmlFilterContentScraper;
@ -47,7 +46,6 @@ import de.anomic.index.indexRWIEntry;
 import de.anomic.index.indexRWIRowEntry;
 import de.anomic.index.indexURLEntry;
 import de.anomic.kelondro.kelondroBase64Order;
 import de.anomic.kelondro.kelondroBitfield;
 import de.anomic.kelondro.kelondroCloneableIterator;
 import de.anomic.kelondro.kelondroMergeIterator;
 import de.anomic.kelondro.kelondroOrder;
@ -65,7 +63,7 @@ public final class plasmaWordIndex implements indexRI {
    public  static final long wCacheMaxAge   = 1000 * 60 * 30; // milliseconds; 30 minutes
    public  static final int  wCacheMaxChunk =  400;           // maximum number of references for each urlhash
    public  static final int  lowcachedivisor = 320;
-    public  static final int  maxCollectionPartition = 8; // should be 7
+    public  static final int  maxCollectionPartition = 7; // should be 7
    private final kelondroOrder      indexOrder = kelondroBase64Order.enhancedCoder;
    private final indexRAMRI         dhtOutCache, dhtInCache;
@ -407,115 +405,6 @@ public final class plasmaWordIndex implements indexRI {
        return new Map[]{inclusionContainers, exclusionContainers};
    }
    public Finding retrieveURLs(plasmaSearchQuery query, boolean loadurl, int sortorder, plasmaSearchRankingProfile ranking) {
        // search for a word hash and generate a list of url links
        // sortorder: 0 = hash, 1 = url, 2 = ranking
    	assert query.queryHashes.size() == 1;
    	final TreeSet mi = new TreeSet();
        String keyhash = (String) query.queryHashes.first();
    	kelondroBitfield filter = query.constraint;
        indexContainer index = getContainer(keyhash, null);
        indexRWIEntry ientry;
        indexURLEntry uentry;
        final int[] c = new int[32];
    	for (int i = 0; i < 32; i++) {c[i] = 0;}
        if ((index == null) || (index.size() == 0)) {
        	return new Finding(mi.iterator(), mi.iterator(), mi, 0, c);
        }
    	if (sortorder == 2) {
        	plasmaSearchRankingProcess process = new plasmaSearchRankingProcess(query, null, ranking, query.neededResults());
        	process.insert(index, true);
        	return new Finding(process.entries(this, true), null, mi, process.filteredCount(), process.flagCount());
        } else {
    		final TreeMap tm = new TreeMap();
        	final ArrayList indexes = new ArrayList();
 	        final Iterator en = index.entries();
 	        // generate a new map where the urls are sorted (not by hash but by the url text)
 	        loop: while (en.hasNext()) {
 	            ientry = (indexRWIEntry) en.next();
 	            // test if ientry matches with filter
 	            if (filter != null) {
 	                // if all = true: let only entries pass that has all matching bits
 	                // if all = false: let all entries pass that has at least one matching bit
 	                if (query.allofconstraint) {
 	                    for (int i = 0; i < 32; i++) {
 	                        if ((filter.get(i)) && (!ientry.flags().get(i))) continue loop;
 	                    }
 	                } else {
 	                    boolean nok = true;
 	                    flagtest: for (int i = 0; i < 32; i++) {
 	                        if ((filter.get(i)) && (ientry.flags().get(i))) {nok = false; break flagtest;}
 	                    }
 	                    if (nok) continue loop;
 	                }
 	            }
 	            // increase flag counts
 	            for (int i = 0; i < 32; i++) {
 	                if (ientry.flags().get(i)) {c[i]++;}
 	            }
 	            // load url
 	            if (loadurl) {
 	                uentry = loadedURL.load(ientry.urlHash(), ientry, 0);
 	                if (uentry == null) {
 	                    mi.add(ientry.urlHash());
 	                } else {
 	                    if (sortorder == 0) {
 	                        tm.put(uentry.comp().url().toNormalform(false, true), uentry);
 	                    }
 	                    if (sortorder == 1) {
 	                        tm.put(ientry.urlHash(), uentry);
 	                    }
 	                }
 	            } else {
 	                indexes.add(ientry);
 	            }
 	            if ((query.neededResults() > 0) && (mi.size() + tm.size() > query.neededResults())) break loop;
 	        } // end loop
 	        if (loadurl) {
 	            return new Finding(tm.values().iterator(), null, mi, tm.size(), c);
 	        } else {
 	            return new Finding(null, indexes.iterator(), mi, indexes.size(), c);
 	        }
        }
    }
    public static class Finding {
        private Iterator urls; // an iterator if indexURLEntry objects
        private Iterator rwientries; // an iterator of indexRWIEntry objects
        private Set misses; // a set of hashes where we did not found items
        private int findcount;
        private int[] flagcount;
        public Finding(Iterator urls, Iterator rwientries, Set misses, int findcount, int[] flagcount) {
            this.findcount = findcount;
            this.urls = urls;
            this.rwientries = rwientries;
            this.misses = misses;
            this.flagcount = flagcount;
        }
        public int size() {
            return this.findcount;
        }
        public Iterator urls() {
            return this.urls;
        }
        public Iterator rwientries() {
            return this.rwientries;
        }
        public Set miss() {
            return this.misses;
        }
        public int[] flagcount() {
            return this.flagcount;
        }
    }
    public int size() {
        return java.lang.Math.max(collections.size(), java.lang.Math.max(dhtInCache.size(), dhtOutCache.size()));
    }
--- a/source/de/anomic/yacy/yacyClient.java
+++ b/source/de/anomic/yacy/yacyClient.java
@ -380,7 +380,7 @@ public final class yacyClient {
        post.put("ttl", "0");
        post.put("maxdist", maxDistance);
        post.put("profile", crypt.simpleEncode(rankingProfile.toExternalString()));
-        post.put("constraint", constraint.exportB64());
+        post.put("constraint", (constraint == null) ? "" : constraint.exportB64());
        if (abstractCache != null) post.put("abstracts", "auto");
        final long timestamp = System.currentTimeMillis();