introduction of search profiles; very experimental

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@976 6c8d7289-2bf4-0310-a012-ef5d649a1542
20 years ago · 4dcbc26ef1
parent efd11c13b6
commit 4dcbc26ef1
21 changed files with 582 additions and 186 deletions
--- a/htroot/IndexControl_p.java
+++ b/htroot/IndexControl_p.java
@ -151,7 +151,7 @@ public class IndexControl_p {
                // generate an urlx array
                plasmaWordIndexEntity index = null;
                try {
-                    index = switchboard.wordIndex.getEntity(keyhash, true);
+                    index = switchboard.wordIndex.getEntity(keyhash, true, -1);
                    Iterator en = index.elements(true);
                    int i = 0;
                    urlx = new String[index.size()];
@ -258,7 +258,7 @@ public class IndexControl_p {
            plasmaWordIndexEntity[] indexes = new plasmaWordIndexEntity[1];
            String result;
            long starttime = System.currentTimeMillis();
-            indexes[0] = switchboard.wordIndex.getEntity(keyhash, true);
+            indexes[0] = switchboard.wordIndex.getEntity(keyhash, true, -1);
            // built urlCache
            Iterator urlIter = indexes[0].elements(true);
            HashMap knownURLs = new HashMap();
@ -436,7 +436,7 @@ public class IndexControl_p {
        // search for a word hash and generate a list of url links
        plasmaWordIndexEntity index = null;
        try {
-            index = switchboard.wordIndex.getEntity(keyhash, true);
+            index = switchboard.wordIndex.getEntity(keyhash, true, -1);

            final StringBuffer result = new StringBuffer(1024);
            if (index.size() == 0) {
--- a/htroot/NetworkPicture.java
+++ b/htroot/NetworkPicture.java
@ -69,12 +69,14 @@ public class NetworkPicture {
        int height = 480;
        int passiveLimit = 300;
        int potentialLimit = 300;
+        int maxCount = 1000;
        
        if (post != null) {
            width = post.getInt("width", 640);
            height = post.getInt("height", 420);
            passiveLimit = post.getInt("pal", 300);
            potentialLimit = post.getInt("pol", 300);
+            maxCount = post.getInt("max", 1000);
        }
        
        int innerradius = Math.min(width, height) / 5;
@ -98,7 +100,6 @@ public class NetworkPicture {
        //System.out.println("Seed Maximum distance is       " + yacySeed.maxDHTDistance);
        //System.out.println("Seed Minimum distance is       " + yacySeed.minDHTNumber);
        
-        final int maxCount = 300;
        yacySeed seed;
        int angle;
        long lastseen;
--- a/htroot/yacy/query.java
+++ b/htroot/yacy/query.java
@ -88,7 +88,7 @@ public final class query {
            // <env> shall contain a word hash, the number of assigned lurls to this hash is returned
            de.anomic.plasma.plasmaWordIndexEntity entity = null;
            try {
-                entity = sb.wordIndex.getEntity(env, true);
+                entity = sb.wordIndex.getEntity(env, true, -1);
                prop.put("response", entity.size());
                entity.close();
            } catch (IOException e) {
--- a/source/de/anomic/plasma/plasmaDbImporter.java
+++ b/source/de/anomic/plasma/plasmaDbImporter.java
@ -158,7 +158,7 @@ public class plasmaDbImporter extends Thread {
                try {
                    wordCounter++;
                    wordHash = (String) importWordHashIterator.next();
-                    importWordIdxEntity = importWordIndex.getEntity(wordHash, true);
+                    importWordIdxEntity = importWordIndex.getEntity(wordHash, true, -1);
                    
                    if (importWordIdxEntity.size() == 0) {
                        importWordIdxEntity.deleteComplete();
--- a/source/de/anomic/plasma/plasmaSearchEvent.java
+++ b/source/de/anomic/plasma/plasmaSearchEvent.java
@ -63,6 +63,7 @@ public final class plasmaSearchEvent {
    private plasmaCrawlLURL urlStore;
    private plasmaSnippetCache snippetCache;
    private plasmaWordIndexEntity rcLocal, rcGlobal; // caches for results
+    private plasmaSearchProfile profileLocal, profileGlobal;
    private yacySearch[] searchThreads;
    
    public plasmaSearchEvent(plasmaSearchQuery query, serverLog log, plasmaWordIndex wordIndex, plasmaCrawlLURL urlStore, plasmaSnippetCache snippetCache) {
@ -73,6 +74,13 @@ public final class plasmaSearchEvent {
        this.snippetCache = snippetCache;
        this.rcLocal = new plasmaWordIndexEntity(null);
        this.rcGlobal = new plasmaWordIndexEntity(null);
+        if (query.domType == plasmaSearchQuery.SEARCHDOM_GLOBALDHT) {
+            this.profileLocal  = new plasmaSearchProfile(4 * query.maximumTime / 10, query.wantedResults);
+            this.profileGlobal = new plasmaSearchProfile(6 * query.maximumTime / 10, query.wantedResults);
+        } else {
+            this.profileLocal = new plasmaSearchProfile(query.maximumTime, query.wantedResults);
+            this.profileGlobal = null;
+        }
        this.searchThreads = null;
    }
    
@ -80,9 +88,8 @@ public final class plasmaSearchEvent {
        // combine all threads
        
        if (query.domType == plasmaSearchQuery.SEARCHDOM_GLOBALDHT) {
-            int fetchcount = ((int) (query.maximumTime / 1000L)) * 5; // number of wanted results until break in search
-            int fetchpeers = ((int) (query.maximumTime / 1000L)) * 2; // number of target peers; means 30 peers in 10 seconds
-            long fetchtime = query.maximumTime * 6 / 10;           // time to waste
+            int fetchpeers = (int) (query.maximumTime / 1000L); // number of target peers; means 10 peers in 10 seconds
+            if (fetchpeers > 10) fetchpeers = 10;
            
            // remember time
            long start = System.currentTimeMillis();
@ -91,16 +98,12 @@ public final class plasmaSearchEvent {
            serverInstantThread.oneTimeJob(this, "localSearch", log, 0);
        
            // do a global search
-            int globalContributions = globalSearch(fetchcount, fetchpeers, fetchtime);
+            int globalContributions = globalSearch(fetchpeers);
            log.logFine("SEARCH TIME AFTER GLOBAL-TRIGGER TO " + fetchpeers + " PEERS: " + ((System.currentTimeMillis() - start) / 1000) + " seconds");
            
            try {
                // combine the result and order
-                long remainingTime = query.maximumTime - (System.currentTimeMillis() - start);
-                if (remainingTime < 500) remainingTime = 500;
-                if (remainingTime > 3000) remainingTime = 3000;
-            
-                plasmaSearchResult result = order(remainingTime, query.wantedResults);
+                plasmaSearchResult result = order();
                result.globalContributions = globalContributions;
                result.localContributions = rcLocal.size();
                
@ -112,6 +115,7 @@ public final class plasmaSearchEvent {
                rcLocal = null;
                
                // return search result
+                log.logFine("SEARCHRESULT: " + profileLocal.reportToString());
                return result;
            } catch (IOException e) {
                return null;
@ -120,14 +124,16 @@ public final class plasmaSearchEvent {
            // do a local search
            long start = System.currentTimeMillis();
            try {
-                localSearch(query.maximumTime);
-                plasmaSearchResult result = order(query.maximumTime - (System.currentTimeMillis() - start), query.wantedResults);
+                localSearch();
+                plasmaSearchResult result = order();
                result.localContributions = rcLocal.size();
                
                // clean up
                if ((rcLocal != null) && (!(rcLocal.isTMPEntity()))) rcLocal.close();
                rcLocal = null;
                
+                // return search result
+                log.logFine("SEARCHRESULT: " + profileLocal.reportToString());
                return result;
            } catch (IOException e) {
                return null;
@ -135,19 +141,14 @@ public final class plasmaSearchEvent {
        }
    }
    
-    
-    public void localSearch() throws IOException {
-        // method called by a one-time
-        localSearch(query.maximumTime * 6 / 10);
-    }
-    
-    public int localSearch(long time) throws IOException {
+    public int localSearch() throws IOException {
        // search for the set of hashes and return an array of urlEntry elements
        
-        long stamp = System.currentTimeMillis();
-        
        // retrieve entities that belong to the hashes
-        Set entities = wordIndex.getEntities(query.queryHashes, true, true);
+        profileLocal.startTimer();
+        Set entities = wordIndex.getEntities(query.queryHashes, true, true, profileLocal.getTargetTime(plasmaSearchProfile.PROCESS_COLLECTION));
+        profileLocal.setYieldTime(plasmaSearchProfile.PROCESS_COLLECTION);
+        profileLocal.setYieldCount(plasmaSearchProfile.PROCESS_COLLECTION, (entities == null) ? 0 : entities.size());
        
        // since this is a conjunction we return an empty entity if any word is not known
        if (entities == null) {
@ -156,31 +157,28 @@ public final class plasmaSearchEvent {
        }
        
        // join the result
-        long remainingTime = time - (System.currentTimeMillis() - stamp);
-        if (remainingTime < 1000) remainingTime = 1000;
-        rcLocal = plasmaWordIndexEntity.joinEntities(entities, remainingTime);
-        log.logFine("SEARCH TIME FOR FINDING " + rcLocal.size() + " ELEMENTS: " + ((System.currentTimeMillis() - stamp) / 1000) + " seconds");
+        profileLocal.startTimer();
+        rcLocal = plasmaWordIndexEntity.joinEntities(entities, profileLocal.getTargetTime(plasmaSearchProfile.PROCESS_JOIN));
+        profileLocal.setYieldTime(plasmaSearchProfile.PROCESS_JOIN);
+        profileLocal.setYieldCount(plasmaSearchProfile.PROCESS_JOIN, rcLocal.size());
        
        return rcLocal.size();
    }
    
-    public int globalSearch(int fetchcount, int fetchpeers, long timelimit) {
+    public int globalSearch(int fetchpeers) {
        // do global fetching
        // the result of the fetch is then in the rcGlobal
        if (fetchpeers < 10) fetchpeers = 10;
-        if (fetchcount > query.wantedResults * 10) fetchcount = query.wantedResults * 10;

-        // set a duetime for clients
-        long duetime = timelimit - 4000; // subtract network traffic overhead, guessed 4 seconds
-        if (duetime < 1000) { duetime = 1000; }
+        log.logFine("STARTING " + fetchpeers + " THREADS TO CATCH EACH " + profileGlobal.getTargetCount(plasmaSearchProfile.PROCESS_POSTSORT) + " URLs WITHIN " + (profileGlobal.duetime() / 1000) + " SECONDS");
        
-        long timeout = System.currentTimeMillis() + timelimit;
-        searchThreads = yacySearch.searchHashes(query.queryHashes, urlStore, rcGlobal, fetchcount, fetchpeers, plasmaSwitchboard.urlBlacklist, snippetCache, duetime);
+        long timeout = System.currentTimeMillis() + profileGlobal.duetime() + 4000;
+        searchThreads = yacySearch.searchHashes(query.queryHashes, urlStore, rcGlobal, fetchpeers, plasmaSwitchboard.urlBlacklist, snippetCache, profileGlobal);
        
        // wait until wanted delay passed or wanted result appeared
        while (System.currentTimeMillis() < timeout) {
            // check if all threads have been finished or results so far are enough
-            if (rcGlobal.size() >= fetchcount * 3) break; // we have enough
+            if (rcGlobal.size() >= profileGlobal.getTargetCount(plasmaSearchProfile.PROCESS_POSTSORT) * 3) break; // we have enough
            if (yacySearch.remainingWaiting(searchThreads) == 0) break; // we cannot expect more
            // wait a little time ..
            try {Thread.currentThread().sleep(100);} catch (InterruptedException e) {}
@ -189,7 +187,7 @@ public final class plasmaSearchEvent {
        return rcGlobal.size();
    }
    
-    public plasmaSearchResult order(long maxTime, int minEntries) throws IOException {
+    public plasmaSearchResult order() throws IOException {
        // we collect the urlhashes and construct a list with urlEntry objects
        // attention: if minEntries is too high, this method will not terminate within the maxTime

@ -197,19 +195,29 @@ public final class plasmaSearchEvent {
        searchResult.merge(rcLocal, -1);
        searchResult.merge(rcGlobal, -1);
        
+        long preorderTime = profileLocal.getTargetTime(plasmaSearchProfile.PROCESS_PRESORT);
+        long postorderTime = profileLocal.getTargetTime(plasmaSearchProfile.PROCESS_POSTSORT);
+        
+        profileLocal.startTimer();
+        plasmaSearchPreOrder preorder = new plasmaSearchPreOrder(query);
+        preorder.addEntity(searchResult, preorderTime);
+        profileLocal.setYieldTime(plasmaSearchProfile.PROCESS_PRESORT);
+        profileLocal.setYieldCount(plasmaSearchProfile.PROCESS_PRESORT, rcLocal.size());
+        
+        profileLocal.startTimer();
 	plasmaSearchResult acc = new plasmaSearchResult(query);
 	if (searchResult == null) return acc; // strange case where searchResult is not proper: acc is then empty
        if (searchResult.size() == 0) return acc; // case that we have nothing to do
        
-	Iterator e = searchResult.elements(true);
+        // start url-fetch
 	plasmaWordIndexEntry entry;
-        long startCreateTime = System.currentTimeMillis();
+        long postorderLimitTime = (postorderTime < 0) ? Long.MAX_VALUE : System.currentTimeMillis() + postorderTime;
        plasmaCrawlLURL.Entry page;
+        int minEntries = profileLocal.getTargetCount(plasmaSearchProfile.PROCESS_POSTSORT);
 	try {
-	    while (e.hasNext()) {
-                if ((acc.sizeFetched() >= minEntries) &&
-                    (System.currentTimeMillis() - startCreateTime >= maxTime)) break;
-                entry = (plasmaWordIndexEntry) e.next();
+	    while (preorder.hasNext()) {
+                if ((acc.sizeFetched() >= minEntries) && (System.currentTimeMillis() >= postorderLimitTime)) break;
+                entry = (plasmaWordIndexEntry) preorder.next();
                // find the url entry
                page = urlStore.getEntry(entry.getUrlHash());
                // add a result
@ -218,10 +226,15 @@ public final class plasmaSearchEvent {
 	} catch (kelondroException ee) {
 	    serverLog.logSevere("PLASMA", "Database Failure during plasmaSearch.order: " + ee.getMessage(), ee);
 	}
-        long startSortTime = System.currentTimeMillis();
+        profileLocal.setYieldTime(plasmaSearchProfile.PROCESS_URLFETCH);
+        profileLocal.setYieldCount(plasmaSearchProfile.PROCESS_URLFETCH, acc.sizeFetched());
+
+        // start postsorting
+        profileLocal.startTimer();
        acc.sortResults();
-        serverLog.logFine("PLASMA", "plasmaSearchEvent.order: minEntries = " + minEntries + ", effectiveEntries = " + acc.sizeOrdered() + ", demanded Time = " + maxTime + ", effectiveTime = " + (System.currentTimeMillis() - startCreateTime) + ", createTime = " + (startSortTime - startCreateTime) + ", sortTime = " + (System.currentTimeMillis() - startSortTime));
-	return acc;
+        profileLocal.setYieldTime(plasmaSearchProfile.PROCESS_POSTSORT);
+        profileLocal.setYieldCount(plasmaSearchProfile.PROCESS_POSTSORT, acc.sizeOrdered());
+        return acc;
    }
    
    public void flushResults() {
@ -230,31 +243,38 @@ public final class plasmaSearchEvent {
        // it is wise to call this within a separate thread because this method waits untill all 
        if (searchThreads == null) return;

-        // wait untill all threads are finished
+        // wait until all threads are finished
        int remaining;
+        int count = 0;
+        String wordHash;
        long starttime = System.currentTimeMillis();
        while ((remaining = yacySearch.remainingWaiting(searchThreads)) > 0) {
-            try {Thread.currentThread().sleep(5000);} catch (InterruptedException e) {}
+            // flush the rcGlobal as much as is there so far
+            synchronized (rcGlobal) {
+                Iterator hashi = query.queryHashes.iterator();
+                while (hashi.hasNext()) {
+                    wordHash = (String) hashi.next();
+                    Iterator i = rcGlobal.elements(true);
+                    plasmaWordIndexEntry entry;
+                    while (i.hasNext()) {
+                        entry = (plasmaWordIndexEntry) i.next();
+                        wordIndex.addEntries(plasmaWordIndexEntryContainer.instantContainer(wordHash, System.currentTimeMillis(), entry), false);
+                    }
+                }
+                // the rcGlobal was flushed, empty it
+                count += rcGlobal.size();
+                rcGlobal.deleteComplete();
+            }    
+            // wait a little bit before trying again
+            try {Thread.currentThread().sleep(3000);} catch (InterruptedException e) {}
            if (System.currentTimeMillis() - starttime > 90000) {
                yacySearch.interruptAlive(searchThreads);
-                serverLog.logFine("PLASMA", "SEARCH FLUSH: " + remaining + " PEERS STILL BUSY; ABANDONED");
+                serverLog.logFine("PLASMA", "SEARCH FLUSH: " + remaining + " PEERS STILL BUSY; ABANDONED; SEARCH WAS " + query.queryWords);
                break;
            }
        }
        
-        // now flush the rcGlobal into wordIndex
-        Iterator hashi = query.queryHashes.iterator();
-        String wordHash;
-        while (hashi.hasNext()) {
-            wordHash = (String) hashi.next();
-            Iterator i = rcGlobal.elements(true);
-            plasmaWordIndexEntry entry;
-            while (i.hasNext()) {
-                entry = (plasmaWordIndexEntry) i.next();
-                wordIndex.addEntries(plasmaWordIndexEntryContainer.instantContainer(wordHash, System.currentTimeMillis(), entry), false);
-            }
-        }
-        serverLog.logFine("PLASMA", "FINISHED FLUSHING " + rcGlobal.size() + " GLOBAL SEARCH RESULTS");
+        serverLog.logFine("PLASMA", "FINISHED FLUSHING " + count + " GLOBAL SEARCH RESULTS FOR SEARCH " + query.queryWords);
 	        
        // finally delete the temporary index
        rcGlobal = null;
--- a/source/de/anomic/plasma/plasmaSearchPreOrder.java
+++ b/source/de/anomic/plasma/plasmaSearchPreOrder.java
@ -0,0 +1,103 @@
+// plasmaSearchPreOder.java 
+// -----------------------
+// part of YACY
+// (C) by Michael Peter Christen; mc@anomic.de
+// first published on http://www.anomic.de
+// Frankfurt, Germany, 2005
+// Created: 23.10.2005
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+//
+// Using this software in any meaning (reading, learning, copying, compiling,
+// running) means that you agree that the Author(s) is (are) not responsible
+// for cost, loss of data or any harm that may be caused directly or indirectly
+// by usage of this softare or this documentation. The usage of this software
+// is on your own risk. The installation and usage (starting/running) of this
+// software may allow other people or application to access your computer and
+// any attached devices and is highly dependent on the configuration of the
+// software which must be done by the user of the software; the author(s) is
+// (are) also not responsible for proper configuration and usage of the
+// software, even if provoked by documentation provided together with
+// the software.
+//
+// Any changes to this file according to the GPL as documented in the file
+// gpl.txt aside this file in the shipment you received can be done to the
+// lines that follows this copyright notice here, but changes must not be
+// done inside the copyright notive above. A re-distribution must contain
+// the intact and unchanged copyright notice.
+// Contributions and changes to the program code must be marked as such.
+
+
+package de.anomic.plasma;
+
+import java.util.TreeMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.net.URL;
+
+import de.anomic.kelondro.kelondroMScoreCluster;
+import de.anomic.server.serverCodings;
+
+public final class plasmaSearchPreOrder {
+    
+    private TreeMap pageAcc; // key = order hash; value = plasmaLURL.entry
+    private plasmaSearchQuery query;
+    
+    public plasmaSearchPreOrder(plasmaSearchQuery query) {
+        this.pageAcc = new TreeMap();
+        this.query = query;
+    }
+    
+    public plasmaSearchPreOrder cloneSmart() {
+        // clones only the top structure
+        plasmaSearchPreOrder theClone = new plasmaSearchPreOrder(query);
+        theClone.pageAcc = (TreeMap) this.pageAcc.clone();
+        return theClone;
+    }
+    
+    
+    public boolean hasNext() {
+        return pageAcc.size() > 0;
+    }
+    
+    public plasmaWordIndexEntry next() {
+        Object top = pageAcc.lastKey();
+        return (plasmaWordIndexEntry) pageAcc.remove(top);
+    }
+    
+    public void addEntity(plasmaWordIndexEntity entity, long maxTime) {
+        Iterator i = entity.elements(true);
+        long limitTime = (maxTime < 0) ? Long.MAX_VALUE : System.currentTimeMillis() + maxTime;
+        plasmaWordIndexEntry entry;
+        while (i.hasNext()) {
+            if (System.currentTimeMillis() > limitTime) break;
+            entry = (plasmaWordIndexEntry) i.next();
+            addEntry(entry);
+        }
+    }
+    
+    public void addEntry(plasmaWordIndexEntry indexEntry) {
+        long ranking = 0;
+        if (query.order[0].equals(plasmaSearchQuery.ORDER_QUALITY))  ranking  = 4096 * indexEntry.getQuality();
+        else if (query.order[0].equals(plasmaSearchQuery.ORDER_DATE)) ranking  = 4096 * indexEntry.getVirtualAge();
+        if (query.order[1].equals(plasmaSearchQuery.ORDER_QUALITY))  ranking += indexEntry.getQuality();
+        else if (query.order[1].equals(plasmaSearchQuery.ORDER_DATE)) ranking += indexEntry.getVirtualAge();
+        pageAcc.put(serverCodings.encodeHex(ranking, 16) + indexEntry.getUrlHash(), indexEntry);
+    }
+
+    
+}
--- a/source/de/anomic/plasma/plasmaSearchProfile.java
+++ b/source/de/anomic/plasma/plasmaSearchProfile.java
@ -0,0 +1,276 @@
+// plasmaSearchProfile.java 
+// -----------------------
+// part of YACY
+// (C) by Michael Peter Christen; mc@anomic.de
+// first published on http://www.anomic.de
+// Frankfurt, Germany, 2005
+// Created: 17.10.2005
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+//
+// Using this software in any meaning (reading, learning, copying, compiling,
+// running) means that you agree that the Author(s) is (are) not responsible
+// for cost, loss of data or any harm that may be caused directly or indirectly
+// by usage of this softare or this documentation. The usage of this software
+// is on your own risk. The installation and usage (starting/running) of this
+// software may allow other people or application to access your computer and
+// any attached devices and is highly dependent on the configuration of the
+// software which must be done by the user of the software; the author(s) is
+// (are) also not responsible for proper configuration and usage of the
+// software, even if provoked by documentation provided together with
+// the software.
+//
+// Any changes to this file according to the GPL as documented in the file
+// gpl.txt aside this file in the shipment you received can be done to the
+// lines that follows this copyright notice here, but changes must not be
+// done inside the copyright notive above. A re-distribution must contain
+// the intact and unchanged copyright notice.
+// Contributions and changes to the program code must be marked as such.
+
+package de.anomic.plasma;
+
+import java.util.HashMap;
+import java.lang.StringBuffer;
+import java.lang.Cloneable;
+
+/**
+ *
+ * This class provides timing properties for search processes
+ * It shall be used to initiate a search and also to evaluate
+ * the real obtained timings after a search is performed
+ */
+
+public class plasmaSearchProfile implements Cloneable {
+    
+    // collection:
+    // time = time to get a RWI out of RAM cache, assortments and WORDS files
+    // count = maximum number of RWI-entries that shall be collected
+    
+    // join
+    // time = time to perform the join between all collected RWIs
+    // count = maximum number of entries that shall be joined
+    
+    // presort:
+    // time = time to do a sort of the joined URL-records
+    // count = maximum number of entries that shall be pre-sorted
+    
+    // urlfetch:
+    // time = time to fetch the real URLs from the LURL database
+    // count = maximum number of urls that shall be fetched
+    
+    // postsort:
+    // time = time for final sort of URLs
+    // count = maximum number oof URLs that shall be retrieved during sort
+    
+    // snippetfetch:
+    // time = time to fetch snippets for selected URLs
+    // count = maximum number of snipptes to be fetched
+    
+    public static final char PROCESS_COLLECTION   = 'c';
+    public static final char PROCESS_JOIN         = 'j';
+    public static final char PROCESS_PRESORT      = 'r';
+    public static final char PROCESS_URLFETCH     = 'u';
+    public static final char PROCESS_POSTSORT     = 'o';
+    public static final char PROCESS_SNIPPETFETCH = 's';
+    
+    public static char[] sequence = new char[]{
+        PROCESS_COLLECTION,
+        PROCESS_JOIN,
+        PROCESS_PRESORT,
+        PROCESS_URLFETCH,
+        PROCESS_POSTSORT,
+        PROCESS_SNIPPETFETCH
+    };
+
+    private HashMap targetTime;
+    private HashMap targetCount;
+    private HashMap yieldTime;
+    private HashMap yieldCount;
+    private long timer;
+    
+    private plasmaSearchProfile() {
+        targetTime = new HashMap();
+        targetCount = new HashMap();
+        yieldTime = new HashMap();
+        yieldCount = new HashMap();
+        timer = 0;
+    }
+    
+    public plasmaSearchProfile(long time, int count) {
+        this(
+          3 * time / 12, 10 * count, 
+          1 * time / 12, 10 * count, 
+          1 * time / 12, 10 * count, 
+          2 * time / 12,  5 * count, 
+          4 * time / 12, count, 
+          1 * time / 12, 1
+        );
+    }
+    
+    public plasmaSearchProfile(
+            long time_collection,   int count_collection,
+            long time_join,         int count_join,
+            long time_presort,      int count_presort,
+            long time_urlfetch,     int count_urlfetch,
+            long time_postsort,     int count_postsort,
+            long time_snippetfetch, int count_snippetfetch) {
+        this();
+        
+        targetTime.put(new Character(PROCESS_COLLECTION), new Long(time_collection));
+        targetTime.put(new Character(PROCESS_JOIN), new Long(time_join));
+        targetTime.put(new Character(PROCESS_PRESORT), new Long(time_presort));
+        targetTime.put(new Character(PROCESS_URLFETCH), new Long(time_urlfetch));
+        targetTime.put(new Character(PROCESS_POSTSORT), new Long(time_postsort));
+        targetTime.put(new Character(PROCESS_SNIPPETFETCH), new Long(time_snippetfetch));
+        targetCount.put(new Character(PROCESS_COLLECTION), new Integer(count_collection));
+        targetCount.put(new Character(PROCESS_JOIN), new Integer(count_join));
+        targetCount.put(new Character(PROCESS_PRESORT), new Integer(count_presort));
+        targetCount.put(new Character(PROCESS_URLFETCH), new Integer(count_urlfetch));
+        targetCount.put(new Character(PROCESS_POSTSORT), new Integer(count_postsort));
+        targetCount.put(new Character(PROCESS_SNIPPETFETCH), new Integer(count_snippetfetch));
+        
+    }
+
+    public Object clone() {
+        plasmaSearchProfile p = new plasmaSearchProfile();
+        p.targetTime = (HashMap) this.targetTime.clone();
+        p.targetCount = (HashMap) this.targetCount.clone();
+        p.yieldTime = (HashMap) this.yieldTime.clone();
+        p.yieldCount = (HashMap) this.yieldCount.clone();
+        return (Object) p;
+    }
+    
+    public plasmaSearchProfile(String s) {
+        targetTime = new HashMap();
+        targetCount = new HashMap();
+        yieldTime = new HashMap();
+        yieldCount = new HashMap();
+        
+        intoMap(s, targetTime, targetCount);
+    }
+    
+    public long duetime() {
+        // returns the old duetime value as sum of all waiting times
+        long d = 0;
+        for (int i = 0; i < sequence.length; i++) {
+            d += ((Long) targetTime.get(new Character(sequence[i]))).longValue();
+        }
+        return d;
+    }
+    
+    public void putYield(String s) {
+        intoMap(s, yieldTime, yieldCount);
+    }
+
+    public String yieldToString() {
+        return toString(yieldTime, yieldCount);
+    }
+    
+    public String targetToString() {
+        return toString(targetTime, targetCount);
+    }
+    
+    public long getTargetTime(char type) {
+        // sum up all time that was demanded and subtract all that had been wasted
+        long sum = 0;
+        Long t;
+        Character element;
+        for (int i = 0; i < sequence.length; i++) {
+            element = new Character(sequence[i]);
+            t = (Long) targetTime.get(element);
+            if (t != null) sum += t.longValue();
+            if (type == sequence[i]) return (sum < 0) ? 0 : sum;
+            t = (Long) yieldTime.get(element);
+            if (t != null) sum -= t.longValue();
+        }
+        return 0;
+    }
+    
+    public int getTargetCount(char type) {
+        Integer i = (Integer) targetCount.get(new Character(type));
+        if (i == null) return -1; else return i.intValue();
+    }
+    
+    public long getYieldTime(char type) {
+        Long l = (Long) yieldTime.get(new Character(type));
+        if (l == null) return -1; else return l.longValue();
+    }
+    
+    public int getYieldCount(char type) {
+        Integer i = (Integer) yieldCount.get(new Character(type));
+        if (i == null) return -1; else return i.intValue();
+    }
+    
+    public void startTimer() {
+        this.timer = System.currentTimeMillis();
+    }
+    
+    public void setYieldTime(char type) {
+        // sets a time that is computed using the timer
+        long t = System.currentTimeMillis() - this.timer;
+        yieldTime.put(new Character(type), new Long(t));
+    }
+    
+    public void setYieldCount(char type, int count) {
+        yieldCount.put(new Character(type), new Integer(count));
+    }
+    
+    public String reportToString() {
+        return "target=" + toString(targetTime, targetCount) + "; yield=" + toString(yieldTime, yieldCount);
+    }
+    
+    public static String toString(HashMap time, HashMap count) {
+        // put this into a format in such a way that it can be send in a http header or post argument
+        // that means that no '=' or spaces are allowed
+        StringBuffer sb = new StringBuffer(sequence.length * 10);
+        Character element;
+        Integer xi;
+        Long xl;
+        for (int i = 0; i < sequence.length; i++) {
+            element = new Character(sequence[i]);
+            sb.append("t");
+            sb.append(element);
+            xl = (Long) time.get(element);
+            sb.append((xl == null) ? "0" : xl.toString());
+            sb.append("|");
+            sb.append("c");
+            sb.append(element);
+            xi = (Integer) count.get(element);
+            sb.append((xi == null) ? "0" : xi.toString());
+            sb.append("|");
+        }
+        return sb.toString();
+    }
+    
+    public static void intoMap(String s, HashMap time, HashMap count) {
+        // this is the reverse method to toString
+        int p = 0;
+        char ct;
+        String elt;
+        String v;
+        int p1;
+        while ((p < s.length()) && ((p1 = s.indexOf('|', p)) > 0)) {
+            ct = s.charAt(p);
+            elt = s.substring(p + 1, p + 2);
+            v = s.substring(p + 2, p1);
+            if (ct == 't') {
+                time.put(elt, new Long(Long.parseLong(v)));
+            } else {
+                count.put(elt, new Integer(Integer.parseInt(v)));
+            }
+        }
+    }
+    
+}
--- a/source/de/anomic/plasma/plasmaSearchResult.java
+++ b/source/de/anomic/plasma/plasmaSearchResult.java
@ -135,7 +135,6 @@ public final class plasmaSearchResult {
        String[] urlcomps;
        String[] descrcomps;
        long ranking;
-        long inc = 4096 * 4096;
        String queryhash;
        for (int i = 0; i < results.size(); i++) {
            // take out values from result array
@ -147,14 +146,10 @@ public final class plasmaSearchResult {
            
            // apply pre-calculated order attributes
            ranking = 0;
-            if (query.order[0].equals(plasmaSearchQuery.ORDER_QUALITY))  ranking  = 4096 * indexEntry.getQuality();
-            else if (query.order[0].equals(plasmaSearchQuery.ORDER_DATE)) ranking  = 4096 * indexEntry.getVirtualAge();
-            if (query.order[1].equals(plasmaSearchQuery.ORDER_QUALITY))  ranking += indexEntry.getQuality();
-            else if (query.order[1].equals(plasmaSearchQuery.ORDER_DATE)) ranking += indexEntry.getVirtualAge();
            
            // apply 'common-sense' heuristic using references
-            for (int j = 0; j < urlcomps.length; j++) if (commonSense.contains(urlcomps[j])) ranking += inc;
-            for (int j = 0; j < descrcomps.length; j++) if (commonSense.contains(descrcomps[j])) ranking += inc;
+            for (int j = 0; j < urlcomps.length; j++) if (commonSense.contains(urlcomps[j])) ranking++;
+            for (int j = 0; j < descrcomps.length; j++) if (commonSense.contains(descrcomps[j])) ranking++;
            
            // apply query-in-result matching
            Set urlcomph = plasmaSearchQuery.words2hashes(urlcomps);
@ -162,8 +157,8 @@ public final class plasmaSearchResult {
            Iterator shi = query.queryHashes.iterator();
            while (shi.hasNext()) {
                queryhash = (String) shi.next();
-                if (urlcomph.contains(queryhash)) ranking += 10 * inc;
-                if (descrcomph.contains(queryhash)) ranking += 100 * inc;
+                if (urlcomph.contains(queryhash)) ranking += 10;
+                if (descrcomph.contains(queryhash)) ranking += 100;
            }
            
            // insert value
--- a/source/de/anomic/plasma/plasmaSnippetCache.java
+++ b/source/de/anomic/plasma/plasmaSnippetCache.java
@ -168,7 +168,7 @@ public class plasmaSnippetCache {
        return new result(line, source, null);
    }
    
-    public synchronized void storeToCache(String wordhashes, String urlhash, String snippet) {
+    public void storeToCache(String wordhashes, String urlhash, String snippet) {
        // generate key
        String key = urlhash + wordhashes;

@ -371,13 +371,14 @@ public class plasmaSnippetCache {
            this.log);
    }
    
-    public void fetch(plasmaSearchResult acc, Set queryhashes, String urlmask, int fetchcount) {
+    public void fetch(plasmaSearchResult acc, Set queryhashes, String urlmask, int fetchcount, long maxTime) {
        // fetch snippets
        int i = 0;
        plasmaCrawlLURL.Entry urlentry;
        String urlstring;
        plasmaSnippetCache.result snippet;
-        while ((acc.hasMoreElements()) && (i < fetchcount)) {
+        long limitTime = (maxTime < 0) ? Long.MAX_VALUE : System.currentTimeMillis() + maxTime;
+        while ((acc.hasMoreElements()) && (i < fetchcount) && (System.currentTimeMillis() < limitTime)) {
            urlentry = acc.nextElement();
            if (urlentry.url().getHost().endsWith(".yacyh")) continue;
            urlstring = htmlFilterContentScraper.urlNormalform(urlentry.url());
--- a/source/de/anomic/plasma/plasmaSwitchboard.java
+++ b/source/de/anomic/plasma/plasmaSwitchboard.java
@ -1443,55 +1443,21 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
 	if (date == null) return ""; else return DateFormatter.format(date);
    }
    
-    /*
-    public class presearch extends Thread {
-        Set queryhashes;
-        char[] order;
-        String urlmask;
-        long time;
-        int searchcount, fetchcount;
-        public presearch(Set queryhashes, char[] order, long time, String urlmask, int searchcount, int fetchcount) {
-            this.queryhashes = queryhashes;
-            this.order = order;
-            this.urlmask = urlmask;
-            this.time = time;
-            this.searchcount = searchcount;
-            this.fetchcount = fetchcount;
-        }
-        public void run() {
-            plasmaWordIndexEntity idx = null;
-            try {
-                // search the database locally
-                log.logFine("presearch: started job");
-                idx = searchManager.searchHashes(queryhashes, time);
-                log.logFine("presearch: found " + idx.size() + " results");
-                plasmaSearchResult acc = searchManager.order(idx, queryhashes, stopwords, order, time, searchcount);
-                if (acc == null) return;
-                log.logFine("presearch: ordered results, now " + acc.sizeOrdered() + " URLs ready for fetch");
-                
-                // take some elements and fetch the snippets
-                snippetCache.fetch(acc, queryhashes, urlmask, fetchcount);
-            } catch (IOException e) {
-                log.logSevere("presearch: failed", e);
-            } finally {
-                if (idx != null) try { idx.close(); } catch (Exception e){}
-            }
-            log.logFine("presearch: job terminated");
-        }
-    }
-    
-    */
-    
    //public serverObjects searchFromLocal(Set querywords, String order1, String order2, int count, boolean global, long time /*milliseconds*/, String urlmask) {
    public serverObjects searchFromLocal(plasmaSearchQuery query) {
        
 	// tell all threads to do nothing for a specific time
+        //log.logInfo("A");
 	wordIndex.intermission(2 * query.maximumTime);
+	//log.logInfo("B");
 	intermissionAllThreads(2 * query.maximumTime);
+        //log.logInfo("C");
 	
        serverObjects prop = new serverObjects();
-        try {
+        //log.logInfo("D");
+	try {
            // filter out words that appear in bluelist
+            //log.logInfo("E");
            query.filterOut(blueList);
            
            // log
@ -1510,8 +1476,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
            plasmaSearchResult acc = theSearch.search();
            
            // fetch snippets
-            if (query.domType != plasmaSearchQuery.SEARCHDOM_GLOBALDHT)
-                snippetCache.fetch(acc.cloneSmart(), query.queryHashes, query.urlMask, 10);
+            //if (query.domType != plasmaSearchQuery.SEARCHDOM_GLOBALDHT) snippetCache.fetch(acc.cloneSmart(), query.queryHashes, query.urlMask, 10, 1000);
            log.logFine("SEARCH TIME AFTER ORDERING OF SEARCH RESULT: " + ((System.currentTimeMillis() - timestamp) / 1000) + " seconds");
            
            // result is a List of urlEntry elements: prepare answer
@ -1531,8 +1496,10 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
                String host, hash, address, descr = "";
                yacySeed seed;
                plasmaSnippetCache.result snippet;
+                long targetTime = timestamp + query.maximumTime;
+                if (targetTime < System.currentTimeMillis()) targetTime = System.currentTimeMillis() + 5000;
                //kelondroMScoreCluster ref = new kelondroMScoreCluster();
-                while ((acc.hasMoreElements()) && (i < query.wantedResults)) {
+                while ((acc.hasMoreElements()) && (i < query.wantedResults) && (System.currentTimeMillis() < targetTime)) {
                    urlentry = acc.nextElement();
                    url = urlentry.url();
                    urlhash = urlentry.hash();
@ -1639,15 +1606,15 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
 	wordIndex.intermission(2 * query.maximumTime);
 	intermissionAllThreads(2 * query.maximumTime);

+        query.domType = plasmaSearchQuery.SEARCHDOM_LOCAL;
+        
        serverObjects prop = new serverObjects();
        try {
            log.logInfo("INIT HASH SEARCH: " + query.queryHashes + " - " + query.wantedResults + " links");
            long timestamp = System.currentTimeMillis();
            plasmaSearchEvent theSearch = new plasmaSearchEvent(query, log, wordIndex, urlPool.loadedURL, snippetCache);
-            int idxc = theSearch.localSearch(query.maximumTime * 8 / 10);
-            long remainingTime = query.maximumTime - (System.currentTimeMillis() - timestamp);
-            if (remainingTime < 500) remainingTime = 500;
-            plasmaSearchResult acc = theSearch.order(remainingTime, 10);
+            int idxc = theSearch.localSearch();
+            plasmaSearchResult acc = theSearch.order();
            
            // result is a List of urlEntry elements
            if (acc == null) {
--- a/source/de/anomic/plasma/plasmaWordIndex.java
+++ b/source/de/anomic/plasma/plasmaWordIndex.java
@ -148,23 +148,29 @@ public final class plasmaWordIndex {
        return condenser.getWords().size();
    }
    
-    public plasmaWordIndexEntity getEntity(String wordHash, boolean deleteIfEmpty) {
-        return ramCache.getIndex(wordHash, deleteIfEmpty);
+    public plasmaWordIndexEntity getEntity(String wordHash, boolean deleteIfEmpty, long maxTime) {
+        return ramCache.getIndex(wordHash, deleteIfEmpty, maxTime);
    }

-    public Set getEntities(Set wordHashes, boolean deleteIfEmpty, boolean interruptIfEmpty) {
+    public Set getEntities(Set wordHashes, boolean deleteIfEmpty, boolean interruptIfEmpty, long maxTime) {
        
        // retrieve entities that belong to the hashes
        HashSet entities = new HashSet();
        String singleHash;
        plasmaWordIndexEntity singleEntity;
        Iterator i = wordHashes.iterator();
+        long start = System.currentTimeMillis();
+        long remaining;
        while (i.hasNext()) {
+            // check time
+            remaining = maxTime - (System.currentTimeMillis() - start);
+            if ((maxTime > 0) && (remaining <= 0)) break;
+            
            // get next hash:
            singleHash = (String) i.next();
            
            // retrieve index
-            singleEntity = getEntity(singleHash, true);
+            singleEntity = getEntity(singleHash, true, (maxTime < 0) ? -1 : remaining / (wordHashes.size() - entities.size()));
            
            // check result
            if (((singleEntity == null) || (singleEntity.size() == 0)) && (interruptIfEmpty)) return null;
--- a/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java
+++ b/source/de/anomic/plasma/plasmaWordIndexAssortmentCluster.java
@ -158,18 +158,20 @@ public final class plasmaWordIndexAssortmentCluster {
        if (newContainer.size() > clusterCapacity) return newContainer; // it will not fit
        if (newContainer.size() <= clusterCount) newContainer = storeSingular(wordHash, newContainer);
        if (newContainer == null) return null;
-        newContainer.add(removeFromAll(wordHash));
+        newContainer.add(removeFromAll(wordHash, -1));
        if (newContainer.size() > clusterCapacity) return newContainer;
        storeStretched(wordHash, newContainer);
        return null;
    }
    
-    public plasmaWordIndexEntryContainer removeFromAll(String wordHash) {
+    public plasmaWordIndexEntryContainer removeFromAll(String wordHash, long maxTime) {
        // collect all records from all the assortments and return them
        plasmaWordIndexEntryContainer buffer, record = new plasmaWordIndexEntryContainer(wordHash);
+        long limitTime = (maxTime < 0) ? Long.MAX_VALUE : System.currentTimeMillis() + maxTime;
 	for (int i = 0; i < clusterCount; i++) {
 	    buffer = assortments[i].remove(wordHash);
 	    if (buffer != null) record.add(buffer);
+            if (System.currentTimeMillis() > limitTime) break;
 	}
        return record;
    }
--- a/source/de/anomic/plasma/plasmaWordIndexCache.java
+++ b/source/de/anomic/plasma/plasmaWordIndexCache.java
@ -416,9 +416,10 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
        return ((long) intTime) * ((long) 1000) + startTime;
    }
    
-    private boolean flushFromAssortmentCluster(String key) {
+    private boolean flushFromAssortmentCluster(String key, long maxTime) {
 	// this should only be called if the assortment shall be deleted or returned in an index entity
-        plasmaWordIndexEntryContainer container = assortmentCluster.removeFromAll(key);
+        maxTime = 8 * maxTime / 10; // reserve time for later adding to backend
+        plasmaWordIndexEntryContainer container = assortmentCluster.removeFromAll(key, maxTime);
        if (container == null) {
            return false;
        } else {
@ -428,12 +429,19 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
        }
    }

-    public plasmaWordIndexEntity getIndex(String wordHash, boolean deleteIfEmpty) {
+    public plasmaWordIndexEntity getIndex(String wordHash, boolean deleteIfEmpty, long maxTime) {
        flushThread.pause();
+        long start = System.currentTimeMillis();
        flushFromMem(wordHash);
-        flushFromAssortmentCluster(wordHash);
+        if (maxTime < 0) {
+            flushFromAssortmentCluster(wordHash, -1);
+        } else {
+            long remaining = maxTime - (System.currentTimeMillis() - start);
+            if (remaining > 0) flushFromAssortmentCluster(wordHash, remaining);
+        }
        flushThread.proceed();
-	return backend.getIndex(wordHash, deleteIfEmpty);
+        long r = maxTime - (System.currentTimeMillis() - start);
+	return backend.getIndex(wordHash, deleteIfEmpty, (r < 0) ? 0 : r);
    }
    
    public long getUpdateTime(String wordHash) {
@ -454,7 +462,7 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
            hashScore.deleteScore(wordHash);
            hashDate.deleteScore(wordHash);
        }
-        assortmentCluster.removeFromAll(wordHash);
+        assortmentCluster.removeFromAll(wordHash, -1);
 	backend.deleteIndex(wordHash);
        flushThread.proceed();
    }
@ -462,7 +470,7 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
    public synchronized int removeEntries(String wordHash, String[] urlHashes, boolean deleteComplete) {
        flushThread.pause();
        flushFromMem(wordHash);
-        flushFromAssortmentCluster(wordHash);
+        flushFromAssortmentCluster(wordHash, -1);
        int removed = backend.removeEntries(wordHash, urlHashes, deleteComplete);
        flushThread.proceed();
        return removed;
@ -562,7 +570,7 @@ public final class plasmaWordIndexCache implements plasmaWordIndexInterface {
            } else {
                // take out all words from the assortment to see if it fits
                // together with the extracted assortment
-                plasmaWordIndexEntryContainer container = assortmentCluster.removeFromAll(wordhash);
+                plasmaWordIndexEntryContainer container = assortmentCluster.removeFromAll(wordhash, -1);
                if (size + container.size() > assortmentCluster.clusterCapacity) {
                    // this will also be too big to integrate, add to entity
                    entity.addEntries(container);
--- a/source/de/anomic/plasma/plasmaWordIndexClassicDB.java
+++ b/source/de/anomic/plasma/plasmaWordIndexClassicDB.java
@ -181,7 +181,7 @@ public class plasmaWordIndexClassicDB implements plasmaWordIndexInterface {
        }
    }

-    public plasmaWordIndexEntity getIndex(String wordHash, boolean deleteIfEmpty) {
+    public plasmaWordIndexEntity getIndex(String wordHash, boolean deleteIfEmpty, long maxTime) {
        try {
            return new plasmaWordIndexEntity(databaseRoot, wordHash, deleteIfEmpty);
        } catch (IOException e) {
@ -210,7 +210,7 @@ public class plasmaWordIndexClassicDB implements plasmaWordIndexInterface {
        plasmaWordIndexEntity pi = null;
        int count = 0;
        try {
-            pi =  getIndex(wordHash, true);
+            pi = getIndex(wordHash, true, -1);
            for (int i = 0; i < urlHashes.length; i++)
                if (pi.removeEntry(urlHashes[i], deleteComplete)) count++;
            int size = pi.size();
--- a/source/de/anomic/plasma/plasmaWordIndexDistribution.java
+++ b/source/de/anomic/plasma/plasmaWordIndexDistribution.java
@ -314,7 +314,7 @@ public final class plasmaWordIndexDistribution {
                    ((nexthash = (String) wordHashIterator.next()) != null) && 
                    (nexthash.trim().length() > 0)
            ) {
-                indexEntity = this.wordIndex.getEntity(nexthash, true);
+                indexEntity = this.wordIndex.getEntity(nexthash, true, -1);
                if (indexEntity.size() == 0) {
                    indexEntity.deleteComplete();
                } else if ((indexEntity.size() <= count)||        // if we havn't exceeded the limit
@ -355,7 +355,7 @@ public final class plasmaWordIndexDistribution {
                        }
                    } catch (kelondroException e) {
                        this.log.logSevere("plasmaWordIndexDistribution/1: deleted DB for word " + indexEntity.wordHash(), e);
-                        try {indexEntity.deleteComplete();} catch (IOException ee) {}
+                        indexEntity.deleteComplete();
                    }
                } else {
                    // make an on-the-fly entity and insert values
@ -389,7 +389,7 @@ public final class plasmaWordIndexDistribution {
                        tmpEntities.add(tmpEntity);
                    } catch (kelondroException e) {
                        this.log.logSevere("plasmaWordIndexDistribution/2: deleted DB for word " + indexEntity.wordHash(), e);
-                        try {indexEntity.deleteComplete();} catch (IOException ee) {}
+                        indexEntity.deleteComplete();
                    }
                    indexEntity.close(); // important: is not closed elswhere and cannot be deleted afterwards
                    indexEntity = null;
@ -427,7 +427,7 @@ public final class plasmaWordIndexDistribution {
                    urlHashes[c++] = indexEntry.getUrlHash();
                }
                wordIndex.removeEntries(indexEntities[i].wordHash(), urlHashes, true);
-                indexEntity = wordIndex.getEntity(indexEntities[i].wordHash(), true);
+                indexEntity = wordIndex.getEntity(indexEntities[i].wordHash(), true, -1);
                sz = indexEntity.size();
                indexEntity.close();
                log.logFine("Deleted partial index (" + c + " URLs) for word " + indexEntities[i].wordHash() + "; " + sz + " entries left");
--- a/source/de/anomic/plasma/plasmaWordIndexEntity.java
+++ b/source/de/anomic/plasma/plasmaWordIndexEntity.java
@ -50,6 +50,7 @@ import java.util.Set;
 import de.anomic.kelondro.kelondroRecords;
 import de.anomic.kelondro.kelondroTree;
 import de.anomic.kelondro.kelondroException;
+import de.anomic.server.logging.serverLog;

 public final class plasmaWordIndexEntity {

@ -128,11 +129,7 @@ public final class plasmaWordIndexEntity {
 	if (theTmpMap == null) {
            int size = theIndex.size(); 
            if ((size == 0) && (delete)) {
-                try {
-                    deleteComplete();
-                } catch (IOException e) {
-                    delete = false;
-                }
+                deleteComplete();
                return 0;
            } else {
                return size;
@ -164,6 +161,7 @@ public final class plasmaWordIndexEntity {
    }
    
    public boolean addEntry(plasmaWordIndexEntry entry) throws IOException {
+        if (entry == null) return false;
 	if (theTmpMap == null) {
 	    return (theIndex.put(entry.getUrlHash().getBytes(), entry.toEncodedForm(false).getBytes()) == null);
 	} else {
@ -191,9 +189,9 @@ public final class plasmaWordIndexEntity {
        return count;
    }
    
-    public boolean deleteComplete() throws IOException {
+    public boolean deleteComplete() {
        if (theTmpMap == null) {
-            theIndex.close();
+            try {theIndex.close();} catch (IOException e) {}
            // remove file
            boolean success = theLocation.delete();
            // and also the paren directory if that is empty
@ -257,10 +255,7 @@ public final class plasmaWordIndexEntity {
 	    } catch (IOException e) {
                i = null;
 		throw new RuntimeException("dbenum: " + e.getMessage());
-	    } catch (kelondroException e) {
-                i = null;
-                throw new RuntimeException("dbenum: " + e.getMessage());
-            }
+	    }
 	}
        public void remove() {
            throw new UnsupportedOperationException();
@ -305,9 +300,13 @@ public final class plasmaWordIndexEntity {
        // a time=-1 means: no timeout
        Iterator i = otherEntity.elements(true);
        long timeout = (time == -1) ? Long.MAX_VALUE : System.currentTimeMillis() + time;
+        try {
        while ((i.hasNext()) && (System.currentTimeMillis() < timeout)) {
            addEntry((plasmaWordIndexEntry) i.next());            
        }
+        } catch (kelondroException e) {
+            serverLog.logSevere("PLASMA", "plasmaWordIndexEntity.merge: " + e.getMessage());
+        }
    }
    
    public static plasmaWordIndexEntity joinEntities(Set entities, long time) throws IOException {
--- a/source/de/anomic/plasma/plasmaWordIndexInterface.java
+++ b/source/de/anomic/plasma/plasmaWordIndexInterface.java
@ -50,7 +50,7 @@ public interface plasmaWordIndexInterface {
    
    public Iterator wordHashes(String startWordHash, boolean up);

-    public plasmaWordIndexEntity getIndex(String wordHash, boolean deleteIfEmpty);
+    public plasmaWordIndexEntity getIndex(String wordHash, boolean deleteIfEmpty, long maxTime);
    public long getUpdateTime(String wordHash);
    public void deleteIndex(String wordHash);

--- a/source/de/anomic/server/logging/serverLog.java
+++ b/source/de/anomic/server/logging/serverLog.java
@ -182,4 +182,12 @@ public final class serverLog {
            if (fileIn != null) try {fileIn.close();}catch(Exception e){}
        }
    }
+    
+    public static final String format(String s, int n, int fillChar) {
+        int l = s.length();
+        if (l >= n) return s;
+        StringBuffer sb = new StringBuffer(l + n);
+        for (int i = l + n; i > n; n--) sb.insert(0, fillChar);
+        return sb.toString();
+    }
 }
--- a/source/de/anomic/yacy/yacyClient.java
+++ b/source/de/anomic/yacy/yacyClient.java
@ -58,6 +58,7 @@ import de.anomic.plasma.plasmaWordIndexEntry;
 import de.anomic.plasma.plasmaWordIndexEntryContainer;
 import de.anomic.plasma.plasmaURLPattern;
 import de.anomic.plasma.plasmaWordIndex;
+import de.anomic.plasma.plasmaSearchProfile;
 import de.anomic.server.serverCore;
 import de.anomic.server.serverObjects;
 import de.anomic.tools.crypt;
@ -337,9 +338,9 @@ public final class yacyClient {
        }
    }

-    public static int search(String wordhashes, int count, boolean global, yacySeed targetPeer,
+    public static int search(String wordhashes, boolean global, yacySeed targetPeer,
                             plasmaCrawlLURL urlManager, plasmaWordIndexEntity entityCache,
-                             plasmaURLPattern blacklist, plasmaSnippetCache snippets, long duetime) {
+                             plasmaURLPattern blacklist, plasmaSnippetCache snippets, plasmaSearchProfile profile) {
        // send a search request to peer with remote Hash
        // this mainly converts the words into word hashes

@ -376,15 +377,17 @@ public final class yacyClient {
                "&query=" + wordhashes;
             */
            final serverObjects obj = new serverObjects(9);
-                obj.put("myseed", yacyCore.seedDB.mySeed.genSeedStr(key));
-                obj.put("youare", targetPeer.hash);
-                obj.put("key", key);
-                obj.put("count", count);
-                obj.put("resource", ((global) ? "global" : "local"));
-                obj.put("query", wordhashes);
-                obj.put("ttl", "0");
-                obj.put("duetime", Long.toString(duetime));
-                obj.put(yacySeed.MYTIME, yacyCore.universalDateShortString(new Date()));
+            long duetime = profile.duetime();
+            obj.put("myseed", yacyCore.seedDB.mySeed.genSeedStr(key));
+            obj.put("youare", targetPeer.hash);
+            obj.put("key", key);
+            obj.put("count", profile.getTargetCount(plasmaSearchProfile.PROCESS_POSTSORT));
+            obj.put("resource", ((global) ? "global" : "local"));
+            obj.put("query", wordhashes);
+            obj.put("ttl", "0");
+            obj.put("duetime", Long.toString(duetime));
+            obj.put("profile", profile.targetToString()); // new duetimes splitted by specific search tasks
+            obj.put(yacySeed.MYTIME, yacyCore.universalDateShortString(new Date()));

            //yacyCore.log.logDebug("yacyClient.search url=" + url);
            final long timestamp = System.currentTimeMillis();
@ -400,7 +403,11 @@ public final class yacyClient {
                            obj
                    )
            );
+
+            // compute all computation times
            final long totalrequesttime = System.currentTimeMillis() - timestamp;
+            String returnProfile = (String) result.get("profile");
+            if (returnProfile != null) profile.putYield(returnProfile);
            
            /*
            HashMap result = nxTools.table(httpc.wget(new URL(url),
@ -464,7 +471,7 @@ public final class yacyClient {
            } catch (NumberFormatException e) {
                searchtime = totalrequesttime;
            }
-            yacyCore.log.logFine("yacyClient.search: processed " + results + " links from peer " + targetPeer.hash + ":" + targetPeer.getName() + ", score=" + targetPeer.selectscore + ", DHTdist=" + yacyDHTAction.dhtDistance(targetPeer.hash, wordhashes) + ", duetime=" + duetime + ", searchtime=" + searchtime + ", netdelay=" + (totalrequesttime - searchtime) + ", references=" + result.get("references"));
+            yacyCore.log.logFine("SEARCH " + results + " URLS FROM " + targetPeer.hash + ":" + targetPeer.getName() + ", score=" + targetPeer.selectscore + ", DHTdist=" + yacyDHTAction.dhtDistance(targetPeer.hash, wordhashes) + ", duetime=" + duetime + ", searchtime=" + searchtime + ", netdelay=" + (totalrequesttime - searchtime) + ", references=" + result.get("references"));
            return results;
        } catch (Exception e) {
            yacyCore.log.logSevere("yacyClient.search error: '" + targetPeer.get(yacySeed.NAME, "anonymous") + "' failed - " + e);
--- a/source/de/anomic/yacy/yacySearch.java
+++ b/source/de/anomic/yacy/yacySearch.java
@ -53,12 +53,12 @@ import de.anomic.plasma.plasmaCrawlLURL;
 import de.anomic.plasma.plasmaURLPattern;
 import de.anomic.plasma.plasmaSnippetCache;
 import de.anomic.plasma.plasmaWordIndexEntity;
+import de.anomic.plasma.plasmaSearchProfile;
 import de.anomic.server.logging.serverLog;

 public class yacySearch extends Thread {

    final private Set wordhashes;
-    final private int count;
    final private boolean global;
    final private plasmaCrawlLURL urlManager;
    final private plasmaWordIndexEntity entityCache;
@ -66,13 +66,12 @@ public class yacySearch extends Thread {
    final private plasmaSnippetCache snippetCache;
    final private yacySeed targetPeer;
    private int links;
-    final private long duetime;
+    final private plasmaSearchProfile profile;

-    public yacySearch(Set wordhashes, int count, boolean global, yacySeed targetPeer,
-                      plasmaCrawlLURL urlManager, plasmaWordIndexEntity entityCache, plasmaURLPattern blacklist, plasmaSnippetCache snippetCache, long duetime) {
+    public yacySearch(Set wordhashes, boolean global, yacySeed targetPeer,
+                      plasmaCrawlLURL urlManager, plasmaWordIndexEntity entityCache, plasmaURLPattern blacklist, plasmaSnippetCache snippetCache, plasmaSearchProfile profile) {
        super("yacySearch_" + targetPeer.getName());
        this.wordhashes = wordhashes;
-        this.count = count;
        this.global = global;
        this.urlManager = urlManager;
        this.entityCache = entityCache;
@ -80,11 +79,11 @@ public class yacySearch extends Thread {
        this.snippetCache = snippetCache;
        this.targetPeer = targetPeer;
        this.links = -1;
-        this.duetime = duetime;
+        this.profile = (plasmaSearchProfile) profile.clone();
    }

    public void run() {
-        this.links = yacyClient.search(set2string(wordhashes), count, global, targetPeer, urlManager, entityCache, blacklist, snippetCache, duetime);
+        this.links = yacyClient.search(set2string(wordhashes), global, targetPeer, urlManager, entityCache, blacklist, snippetCache, profile);
        if (links != 0) {
            //yacyCore.log.logInfo("REMOTE SEARCH - remote peer " + targetPeer.hash + ":" + targetPeer.getName() + " contributed " + links + " links for word hash " + wordhashes);
            yacyCore.seedDB.mySeed.incRI(links);
@ -103,6 +102,10 @@ public class yacySearch extends Thread {
        return this.links;
    }
    
+    public plasmaSearchProfile profile() {
+        return this.profile;
+    }
+    
    private static yacySeed[] selectPeers(Set wordhashes, int seedcount) {
        // find out a specific number of seeds, that would be relevant for the given word hash(es)
        // the result is ordered by relevance: [0] is most relevant
@ -166,7 +169,7 @@ public class yacySearch extends Thread {
    }

    public static yacySearch[] searchHashes(Set wordhashes, plasmaCrawlLURL urlManager, plasmaWordIndexEntity entityCache,
-                           int count, int targets, plasmaURLPattern blacklist, plasmaSnippetCache snippetCache, long duetime) {
+                           int targets, plasmaURLPattern blacklist, plasmaSnippetCache snippetCache, plasmaSearchProfile profile) {
        // check own peer status
        if (yacyCore.seedDB.mySeed == null || yacyCore.seedDB.mySeed.getAddress() == null) { return null; }

@ -178,8 +181,8 @@ public class yacySearch extends Thread {
        if (targets == 0) return null;
        yacySearch[] searchThreads = new yacySearch[targets];
        for (int i = 0; i < targets; i++) {           
-            searchThreads[i]= new yacySearch(wordhashes, count, true, targetPeers[i],
-                    urlManager, entityCache, blacklist, snippetCache, duetime);
+            searchThreads[i]= new yacySearch(wordhashes, true, targetPeers[i],
+                    urlManager, entityCache, blacklist, snippetCache, profile);
            searchThreads[i].start();
            try {Thread.currentThread().sleep(20);} catch (InterruptedException e) {}

--- a/source/yacy.java
+++ b/source/yacy.java
@ -755,7 +755,7 @@ public final class yacy {
                try {
                    wordCounter++;
                    wordHash = (String) importWordHashIterator.next();
-                    importWordIdxEntity = importWordIndex.getEntity(wordHash, true);
+                    importWordIdxEntity = importWordIndex.getEntity(wordHash, true, -1);
                    
                    if (importWordIdxEntity.size() == 0) {
                        importWordIdxEntity.deleteComplete();
@ -878,7 +878,7 @@ public final class yacy {
                try {
                    wordCounter++;
                    wordhash = (String) wordHashIterator.next();
-                    wordIdxEntity = wordIndex.getEntity(wordhash, true);
+                    wordIdxEntity = wordIndex.getEntity(wordhash, true, -1);
                    
                    // the combined container will fit, read the container
                    Iterator wordIdxEntries = wordIdxEntity.elements(true);