introduction of dht-burst modes: this can expand the number of target peers in some cases where a better heuristic is needed. The problematic cases are either when a muti-word search is made (still a hard case for our term-oriented DHT) or when a network operator wants that all robinson peers are asked. We therefore introduced two new network steering values that switch on more peers during the peer selection. Because the number of peers can now be very large, the number of maximum httpc connections was also increased.

Please see new coments in yacy.network.freeworld.unit for details of the new DHT selection methods. The number of maximum peers is now not fixed to a specific number but may increase with - the partition exponent - the number of redundant peers - the robinson burst percentage - the multiword burst percentage The maximum can then be the number of senior peers (all visible peers). git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7479 6c8d7289-2bf4-0310-a012-ef5d649a1542
14 years ago · 5892fff51f
parent 4588b5a291
commit 5892fff51f
17 changed files with 237 additions and 222 deletions
--- a/defaults/yacy.network.freeworld.unit
+++ b/defaults/yacy.network.freeworld.unit
@ -6,17 +6,68 @@
 # this is a work in progress. disabled properties are not yet used #
 # -----------------------------------------------------------------#

-# general network definition
+# define the name of the nework
+# this nickname is also used to identifiy network requests
 network.unit.name = freeworld
+
+# the visible name of the network
 network.unit.description = Public YaCy Community
+
+# definition of the content domain: possible values are:
+# global, local, any
 network.unit.domain = global
+
+# maximum search time for remote queries (deprecated)
 network.unit.search.time = 4
+
+# flag to switch on dht transmission
+# if the dht transmission is set to 'false' then for a global
+# query all targets are accessed
 network.unit.dht = true
+
+# the number of redundant target peers:
+# redundant peers get a copy of the original dht target information
 network.unit.dhtredundancy.junior = 1
 network.unit.dhtredundancy.senior = 3
+
+# the vertical partition of the dht: this applies a division
+# of the dht into 2^^<partitionExponent> fragments which get
+# all the same word-partition targets but a document-dht computed
+# fragment of all documents
 network.unit.dht.partitionExponent = 4
+
+# network request burst attributes: this enables non-dht target
+# positions for certain situations. This is not a 'traditional' burst-mode
+# since it does not refer to a handshake to a single client but it refers
+# to not-handshaking in a distributed way. It means to get data without using
+# a dht transmission logic.
+
+# robinson burst: percentage of the number of robinson peers that
+# shall be accessed for every search. This includes also robinson peers
+# that do not have a matching peer tag. If this is set to 100 then all robinson
+# peers are always asked
+network.unit.dht.burst.robinson = 50
+
+# multi-word burst: percentage of the number of all peers that
+# shall be accessed for multi-word searches. Multi-word search is
+# a hard problem when the distributed search network is divided by
+# term (as done with yacy, partly..).
+# Scientific solutions for this problem is to apply heuristics.
+# This heuristic enables to switch on a full network scan to get also
+# non-distributed multi-word positions. For a full scan set this value to 100.
+# Attention: this may out-number the maxcount of available httpc network connections.
+network.unit.dht.burst.multiword = 30
+
+# switch to enable verification of search results
+# must be set to true in untrusted networks and can be
+# set to false in completely trusted networks
 network.unit.inspection.searchverify = true
+
+# speed of remote crawl de-queueing. this is the number of milliseconds
+# as a pause between two requests
 network.unit.remotecrawl.speed = 300
+
+# addresses of seed-list bootstrap locations
 network.unit.bootstrap.seedlist0 = http://www.yacy.net/seed.txt
 network.unit.bootstrap.seedlist1 = http://home.arcor.de/hermens/yacy/seed.txt
 network.unit.bootstrap.seedlist2 = http://low.audioattack.de/yacy/seed.txt
--- a/htroot/yacy/search.java
+++ b/htroot/yacy/search.java
@ -61,6 +61,7 @@ import de.anomic.search.Segment;
 import de.anomic.search.Segments;
 import de.anomic.search.Switchboard;
 import de.anomic.search.ResultEntry;
+import de.anomic.search.SwitchboardConstants;
 import de.anomic.server.serverCore;
 import de.anomic.server.serverObjects;
 import de.anomic.server.serverSwitch;
@ -294,7 +295,7 @@ public final class search {
            yacyChannel.channels(yacyChannel.REMOTESEARCH).addMessage(new RSSMessage("Remote Search Request from " + ((remoteSeed == null) ? "unknown" : remoteSeed.getName()), QueryParams.anonymizedQueryHashes(theQuery.queryHashes), ""));
            
            // make event
-            theSearch = SearchEventCache.getEvent(theQuery, sb.peers, sb.tables, null, abstracts.length() > 0, sb.loader);
+            theSearch = SearchEventCache.getEvent(theQuery, sb.peers, sb.tables, null, abstracts.length() > 0, sb.loader, (int) sb.getConfigLong(SwitchboardConstants.DHT_BURST_ROBINSON, 0), (int) sb.getConfigLong(SwitchboardConstants.DHT_BURST_MULTIWORD, 0));
            
            // set statistic details of search result and find best result index set
            joincount = theSearch.getRankingResult().getLocalIndexCount() - theSearch.getRankingResult().getMissCount();
--- a/htroot/yacysearch.java
+++ b/htroot/yacysearch.java
@ -520,7 +520,7 @@ public class yacysearch {
                theQuery.setOffset(0); // in case that this is a new search, always start without a offset 
                offset = 0;
            }
-            final SearchEvent theSearch = SearchEventCache.getEvent(theQuery, sb.peers, sb.tables, (sb.isRobinsonMode()) ? sb.clusterhashes : null, false, sb.loader);
+            final SearchEvent theSearch = SearchEventCache.getEvent(theQuery, sb.peers, sb.tables, (sb.isRobinsonMode()) ? sb.clusterhashes : null, false, sb.loader, (int) sb.getConfigLong(SwitchboardConstants.DHT_BURST_ROBINSON, 0), (int) sb.getConfigLong(SwitchboardConstants.DHT_BURST_MULTIWORD, 0));
            try {Thread.sleep(global ? 100 : 10);} catch (InterruptedException e1) {} // wait a little time to get first results in the search
            
            if (offset == 0) {
--- a/source/de/anomic/data/YMarkTables.java
+++ b/source/de/anomic/data/YMarkTables.java
@ -501,13 +501,9 @@ public class YMarkTables {
 	}
 	
 	public static TreeMap<String,Word> getWordCounts(final Document document) {
-        try {
-			if(document != null) {
-                return sortWordCounts(new Condenser(document, true, true, LibraryProvider.dymLib).words());
-			}
-		} catch (IOException e) {			
-			Log.logException(e);
-		}
+        if (document != null) {
+            return sortWordCounts(new Condenser(document, true, true, LibraryProvider.dymLib).words());
+        }
 		return new TreeMap<String, Word>();
 	}
 	
--- a/source/de/anomic/search/SearchEvent.java
+++ b/source/de/anomic/search/SearchEvent.java
@ -86,7 +86,9 @@ public final class SearchEvent {
                             final WorkTables workTables,
                             final SortedMap<byte[], String> preselectedPeerHashes,
                             final boolean generateAbstracts,
-                             final LoaderDispatcher loader) {
+                             final LoaderDispatcher loader,
+                             final int burstRobinsonPercent,
+                             final int burstMultiwordPercent) {
        this.eventTime = System.currentTimeMillis(); // for lifetime check
        this.peers = peers;
        this.workTables = workTables;
@ -106,9 +108,7 @@ public final class SearchEvent {
        boolean remote = (query.domType == QueryParams.SEARCHDOM_GLOBALDHT) || (query.domType == QueryParams.SEARCHDOM_CLUSTERALL);
        if (remote && peers.sizeConnected() == 0) remote = false;
        final long start = System.currentTimeMillis();
-        if (remote) {
-        	final int fetchpeers = 32;
-            
+        if (remote) {
        	// initialize a ranking process that is the target for data
        	// that is generated concurrently from local and global search threads
            this.rankingProcess = new RankingProcess(this.query, this.order, max_results_preparation);
@ -118,7 +118,6 @@ public final class SearchEvent {
                       
            // start global searches
            final long timer = System.currentTimeMillis();
-            Log.logFine("SEARCH_EVENT", "STARTING " + fetchpeers + " THREADS TO CATCH EACH " + query.displayResults() + " URLs");
            this.primarySearchThreads = (query.queryHashes.isEmpty()) ? null : yacySearch.primaryRemoteSearches(
                    QueryParams.hashSet2hashString(query.queryHashes),
                    QueryParams.hashSet2hashString(query.excludeHashes),
@ -133,11 +132,13 @@ public final class SearchEvent {
                    peers,
                    rankingProcess,
                    secondarySearchSuperviser,
-                    fetchpeers,
                    Switchboard.urlBlacklist,
                    query.ranking,
                    query.constraint,
-                    (query.domType == QueryParams.SEARCHDOM_GLOBALDHT) ? null : preselectedPeerHashes);
+                    (query.domType == QueryParams.SEARCHDOM_GLOBALDHT) ? null : preselectedPeerHashes,
+                    burstRobinsonPercent,
+                    burstMultiwordPercent);
+            Log.logFine("SEARCH_EVENT", "STARTING " + this.primarySearchThreads.length + " THREADS TO CATCH EACH " + query.displayResults() + " URLs");
            if (this.primarySearchThreads != null) {
                this.rankingProcess.moreFeeders(this.primarySearchThreads.length);
                EventTracker.update(EventTracker.EClass.SEARCH, new ProfilingGraph.searchEvent(query.id(true), Type.REMOTESEARCH_START, "", this.primarySearchThreads.length, System.currentTimeMillis() - timer), false);
--- a/source/de/anomic/search/SearchEventCache.java
+++ b/source/de/anomic/search/SearchEventCache.java
@ -104,7 +104,9 @@ public class SearchEventCache {
            final WorkTables workTables,
            final SortedMap<byte[], String> preselectedPeerHashes,
            final boolean generateAbstracts,
-            final LoaderDispatcher loader) {
+            final LoaderDispatcher loader,
+            final int burstRobinsonPercent,
+            final int burstMultiwordPercent) {
        
        String id = query.id(false);
        SearchEvent event = SearchEventCache.lastEvents.get(id);
@ -126,7 +128,7 @@ public class SearchEventCache {
        }
        if (event == null) {
            // start a new event
-            event = new SearchEvent(query, peers, workTables, preselectedPeerHashes, generateAbstracts, loader);
+            event = new SearchEvent(query, peers, workTables, preselectedPeerHashes, generateAbstracts, loader, burstRobinsonPercent, burstMultiwordPercent);
        }
    
        return event;
--- a/source/de/anomic/search/Segment.java
+++ b/source/de/anomic/search/Segment.java
@ -28,7 +28,6 @@ package de.anomic.search;

 import java.io.File;
 import java.io.IOException;
-import java.io.UnsupportedEncodingException;
 import java.util.Date;
 import java.util.Iterator;
 import java.util.Map;
@ -423,11 +422,7 @@ public class Segment {
            }
            // get the word set
            Set<String> words = null;
-            try {
-                words = new Condenser(document, true, true, null).words().keySet();
-            } catch (final UnsupportedEncodingException e) {
-                Log.logException(e);
-            }
+            words = new Condenser(document, true, true, null).words().keySet();
            
            // delete all word references
            int count = 0;
--- a/source/de/anomic/search/Switchboard.java
+++ b/source/de/anomic/search/Switchboard.java
@ -49,7 +49,6 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
 import java.io.Reader;
-import java.io.UnsupportedEncodingException;
 import java.net.MalformedURLException;
 import java.security.NoSuchAlgorithmException;
 import java.security.PublicKey;
@ -1834,19 +1833,13 @@ public final class Switchboard extends serverSwitch {
        Condenser[] condenser = new Condenser[in.documents.length];
        if (this.log.isFine()) log.logFine("Condensing for '" + in.queueEntry.url().toNormalform(false, true) + "'");
        for (int i = 0; i < in.documents.length; i++) {
-            // strip out words and generate statistics
-            try {
-                condenser[i] = new Condenser(in.documents[i], in.queueEntry.profile().indexText(), in.queueEntry.profile().indexMedia(), LibraryProvider.dymLib);
-    
-                // update image result list statistics
-                // its good to do this concurrently here, because it needs a DNS lookup
-                // to compute a URL hash which is necessary for a double-check
-                final CrawlProfile profile = in.queueEntry.profile();
-                ResultImages.registerImages(in.queueEntry.url(), in.documents[i], (profile == null) ? true : !profile.remoteIndexing());
-
-            } catch (final UnsupportedEncodingException e) {
-                return null;
-            }
+            condenser[i] = new Condenser(in.documents[i], in.queueEntry.profile().indexText(), in.queueEntry.profile().indexMedia(), LibraryProvider.dymLib);
+   
+            // update image result list statistics
+            // its good to do this concurrently here, because it needs a DNS lookup
+            // to compute a URL hash which is necessary for a double-check
+            final CrawlProfile profile = in.queueEntry.profile();
+            ResultImages.registerImages(in.queueEntry.url(), in.documents[i], (profile == null) ? true : !profile.remoteIndexing());
        }
        return new indexingQueueEntry(in.process, in.queueEntry, in.documents, condenser);
    }
--- a/source/de/anomic/search/SwitchboardConstants.java
+++ b/source/de/anomic/search/SwitchboardConstants.java
@ -271,6 +271,10 @@ public final class SwitchboardConstants {
    public static final String CLUSTER_MODE_PRIVATE_CLUSTER     = "privatecluster";
    public static final String CLUSTER_MODE_PUBLIC_PEER         = "publicpeer";
    public static final String CLUSTER_PEERS_IPPORT             = "cluster.peers.ipport";
+
+    public static final String DHT_BURST_ROBINSON               = "network.unit.dht.burst.robinson";
+    public static final String DHT_BURST_MULTIWORD              = "network.unit.dht.burst.multiword";
+    
    /**
     * <p><code>public static final String <strong>CRAWLER_THREADS_ACTIVE_MAX</strong> = "crawler.MaxActiveThreads"</code></p>
     * <p>Name of the setting how many active crawler-threads may maximal be running on the same time</p>
--- a/source/de/anomic/yacy/dht/PartitionScheme.java
+++ b/source/de/anomic/yacy/dht/PartitionScheme.java
@ -65,7 +65,7 @@ public interface PartitionScheme {
    public long dhtPosition(final byte[] wordHash, final int verticalPosition);
    
    public int verticalPosition(final byte[] urlHash);
-    
+
    public long[] dhtPositions(final byte[] wordHash);
 
    public long dhtDistance(final byte[] word, final String urlHash, final yacySeed peer);
--- a/source/de/anomic/yacy/dht/PeerSelection.java
+++ b/source/de/anomic/yacy/dht/PeerSelection.java
@ -26,9 +26,12 @@ package de.anomic.yacy.dht;

 import java.util.ArrayList;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
+import java.util.SortedMap;

 import net.yacy.cora.date.AbstractFormatter;
 import net.yacy.cora.storage.DynamicScore;
@ -53,13 +56,109 @@ import de.anomic.yacy.yacyVersion;
 */

 public class PeerSelection {
-    
-    public static void selectDHTPositions(
+
+    public static yacySeed[] selectClusterPeers(final yacySeedDB seedDB, final SortedMap<byte[], String> peerhashes) {
+        final Iterator<Map.Entry<byte[], String>> i = peerhashes.entrySet().iterator();
+        final List<yacySeed> l = new ArrayList<yacySeed>();
+        Map.Entry<byte[], String> entry;
+        yacySeed s;
+        while (i.hasNext()) {
+            entry = i.next();
+            s = seedDB.get(new String(entry.getKey())); // should be getConnected; get only during testing time
+            if (s != null) {
+                s.setAlternativeAddress(entry.getValue());
+                l.add(s);
+            }
+        }
+        return l.toArray(new yacySeed[l.size()]);
+    }
+
+    public static yacySeed[] selectSearchTargets(
+            final yacySeedDB seedDB,
+            final HandleSet wordhashes,
+            int redundancy,
+            int burstRobinsonPercent,
+            int burstMultiwordPercent) {
+        // find out a specific number of seeds, that would be relevant for the given word hash(es)
+        // the result is ordered by relevance: [0] is most relevant
+        // the seedcount is the maximum number of wanted results
+        if (seedDB == null) { return null; }
+        
+        // put in seeds according to dht
+        final Map<String, yacySeed> regularSeeds = new HashMap<String, yacySeed>(); // dht position seeds
+        yacySeed seed;
+        Iterator<yacySeed> dhtEnum;         
+        Iterator<byte[]> iter = wordhashes.iterator();
+        while (iter.hasNext()) {
+            selectDHTPositions(seedDB, iter.next(), redundancy, regularSeeds);
+        }
+        //int minimumseeds = Math.min(seedDB.scheme.verticalPartitions(), regularSeeds.size()); // that should be the minimum number of seeds that are returned
+        //int maximumseeds = seedDB.scheme.verticalPartitions() * redundancy; // this is the maximum number of seeds according to dht and heuristics. It can be more using burst mode.
+        
+        // put in some seeds according to size of peer.
+        // But not all, that would produce too much load on the largest peers
+        dhtEnum = seedDB.seedsSortedConnected(false, yacySeed.ICOUNT);
+        int c = Math.max(Math.min(5, seedDB.sizeConnected()), wordhashes.size() > 1 ? seedDB.sizeConnected() * burstMultiwordPercent / 100 : 0);
+        while (dhtEnum.hasNext() && c-- > 0) {
+            seed = dhtEnum.next();
+            if (seed == null) continue;
+            if (seed.getAge() < 1) { // the 'workshop feature'
+                Log.logInfo("DHT", "selectPeers/Age: " + seed.hash + ":" + seed.getName() + ", is newbie, age = " + seed.getAge());
+                regularSeeds.put(seed.hash, seed);
+                continue;
+            }
+            if (Math.random() * 100 + (wordhashes.size() > 1 ? burstMultiwordPercent : 25) >= 50) {
+                if (Log.isFine("DHT")) Log.logFine("DHT", "selectPeers/CountBurst: " + seed.hash + ":" + seed.getName() + ", RWIcount=" + seed.getWordCount());
+                regularSeeds.put(seed.hash, seed);
+                continue;
+            }
+        }
+
+        // create a set that contains only robinson peers because these get a special handling
+        dhtEnum = seedDB.seedsConnected(true, false, null, 0.50f);
+        Set<yacySeed> robinson = new HashSet<yacySeed>();
+        while (dhtEnum.hasNext()) {
+            seed = dhtEnum.next();
+            if (seed == null) continue;
+            if (!seed.getFlagAcceptRemoteIndex()) robinson.add(seed);
+        }
+
+        // add robinson peers according to robinson burst rate
+        dhtEnum = robinson.iterator();
+        c = robinson.size() * burstRobinsonPercent / 100;
+        while (dhtEnum.hasNext() && c-- > 0) {
+            seed = dhtEnum.next();
+            if (Math.random() * 100 + burstRobinsonPercent >= 100) {
+                if (Log.isFine("DHT")) Log.logFine("DHT", "selectPeers/RobinsonBurst: " + seed.hash + ":" + seed.getName());
+                regularSeeds.put(seed.hash, seed);
+                continue;
+            }
+        }
+
+        // put in seeds that are public robinson peers and where the peer tags match with query
+        // or seeds that are newbies to ensure that private demonstrations always work
+        dhtEnum = robinson.iterator();
+        while (dhtEnum.hasNext()) {
+            seed = dhtEnum.next();
+            if (seed.matchPeerTags(wordhashes)) {
+                // peer tags match
+                String specialized = seed.getPeerTags().toString();
+                if (!specialized.equals("[*]")) Log.logInfo("DHT", "selectPeers/PeerTags: " + seed.hash + ":" + seed.getName() + ", is specialized peer for " + specialized);
+                regularSeeds.put(seed.hash, seed);
+            }
+        }
+        
+        // produce return set
+        yacySeed[] result = new yacySeed[regularSeeds.size()];
+        result = regularSeeds.values().toArray(result);
+        return result;
+    }
+
+    private static void selectDHTPositions(
            final yacySeedDB seedDB, 
            byte[] wordhash,
            int redundancy, 
-            Map<String, yacySeed> regularSeeds,
-            DynamicScore<String> ranking) {
+            Map<String, yacySeed> regularSeeds) {
        // this method is called from the search target computation
        final long[] dhtVerticalTargets = seedDB.scheme.dhtPositions(wordhash);
        yacySeed seed;
@ -72,50 +171,13 @@ public class PeerSelection {
                seed = dhtEnum.next();
                if (seed == null || seed.hash == null) continue;
                if (!seed.getFlagAcceptRemoteIndex()) continue; // probably a robinson peer
-                if (Log.isFine("PLASMA")) Log.logFine("PLASMA", "selectPeers/DHTorder: " + seed.hash + ":" + seed.getName() + "/ score " + c);
-                ranking.inc(seed.hash, 2 * c);
+                if (Log.isFine("DHT")) Log.logFine("DHT", "selectPeers/DHTorder: " + seed.hash + ":" + seed.getName() + "/ score " + c);
                regularSeeds.put(seed.hash, seed);
                c--;
            }
        }
    }
-    
-    private static int guessedOwn = 0;
-    
-    public static boolean shallBeOwnWord(final yacySeedDB seedDB, final byte[] wordhash, final String urlhash, final int redundancy) {
-        // the guessIfOwnWord is a fast method that should only fail in case that a 'true' may be incorrect, but a 'false' shall always be correct
-        if (guessIfOwnWord(seedDB, wordhash, urlhash)) {
-            // this case must be verified, because it can be wrong.
-            guessedOwn++;
-            return verifyIfOwnWord(seedDB, wordhash, urlhash, redundancy);
-        } else {
-            return false;
-        }
-        
-    }
-    
-    private static boolean guessIfOwnWord(final yacySeedDB seedDB, final byte[] wordhash, final String urlhash) {
-        if (seedDB == null) return false;
-        int connected = seedDB.sizeConnected();
-        if (connected == 0) return true;
-        final long target = seedDB.scheme.dhtPosition(wordhash, urlhash);
-        final long mypos = seedDB.scheme.dhtPosition(seedDB.mySeed().hash.getBytes(), urlhash);
-        long distance = FlatWordPartitionScheme.dhtDistance(target, mypos);
-        if (distance <= 0) return false;
-        if (distance <= Long.MAX_VALUE / connected * 2) return true;
-        return false;
-    }
-    
-    private static boolean verifyIfOwnWord(final yacySeedDB seedDB, byte[] wordhash, String urlhash, int redundancy) {
-        String myHash = seedDB.mySeed().hash;
-        wordhash = FlatWordPartitionScheme.positionToHash(seedDB.scheme.dhtPosition(wordhash, urlhash));
-        final Iterator<yacySeed> dhtEnum = getAcceptRemoteIndexSeeds(seedDB, wordhash, redundancy, true);
-        while (dhtEnum.hasNext()) {
-            if (dhtEnum.next().hash.equals(myHash)) return true;
-        }
-        return false;
-    }
-    
+
    public static byte[] selectTransferStart() {
        return Base64Order.enhancedCoder.encode(Digest.encodeMD5Raw(Long.toString(System.currentTimeMillis()))).substring(2, 2 + Word.commonHashLength).getBytes();
    }
@ -131,7 +193,7 @@ public class PeerSelection {
            final byte[] starthash,
            int max,
            boolean alsoMyOwn) {
-        final Iterator<yacySeed> seedIter = PeerSelection.getAcceptRemoteIndexSeeds(seedDB, starthash, max, alsoMyOwn);
+        final Iterator<yacySeed> seedIter = getAcceptRemoteIndexSeeds(seedDB, starthash, max, alsoMyOwn);
        final ArrayList<yacySeed> targets = new ArrayList<yacySeed>();
        while (seedIter.hasNext() && max-- > 0) targets.add(seedIter.next());
        return targets;
@ -159,7 +221,7 @@ public class PeerSelection {
        private int remaining;
        private boolean alsoMyOwn;
        
-        public acceptRemoteIndexSeedEnum(yacySeedDB seedDB, final byte[] starthash, int max, boolean alsoMyOwn) {
+        private acceptRemoteIndexSeedEnum(yacySeedDB seedDB, final byte[] starthash, int max, boolean alsoMyOwn) {
            this.seedDB = seedDB;
            this.se = getDHTSeeds(seedDB, starthash, yacyVersion.YACY_HANDLES_COLLECTION_INDEX);
            this.remaining = max;
@ -238,7 +300,7 @@ public class PeerSelection {
        private float minVersion;
        private yacySeedDB seedDB;
        
-        public seedDHTEnum(final yacySeedDB seedDB, final byte[] firstHash, final float minVersion) {
+        private seedDHTEnum(final yacySeedDB seedDB, final byte[] firstHash, final float minVersion) {
            this.seedDB = seedDB;
            this.steps = seedDB.sizeConnected();
            this.minVersion = minVersion;
@ -290,7 +352,7 @@ public class PeerSelection {
        private yacySeed nextSeed;
        private yacySeedDB seedDB;
        
-        public providesRemoteCrawlURLsEnum(final yacySeedDB seedDB) {
+        private providesRemoteCrawlURLsEnum(final yacySeedDB seedDB) {
            this.seedDB = seedDB;
            se = getDHTSeeds(seedDB, null, yacyVersion.YACY_POVIDES_REMOTECRAWL_LISTS);
            nextSeed = nextInternal();
--- a/source/de/anomic/yacy/yacyClient.java
+++ b/source/de/anomic/yacy/yacyClient.java
@ -422,7 +422,7 @@ public final class yacyClient {
                sitehash, authorhash, count, maxDistance, global, partitions, target.getHexHash() + ".yacyh", target.getClusterAddress(),
                secondarySearchSuperviser, rankingProfile, constraint);
        } catch (final IOException e) {
-            yacyCore.log.logInfo("SEARCH failed, Peer: " + target.hash + ":" + target.getName() + " (" + e.getMessage() + "), score=" + target.selectscore);
+            yacyCore.log.logInfo("SEARCH failed, Peer: " + target.hash + ":" + target.getName() + " (" + e.getMessage() + ")");
            //yacyCore.peerActions.peerDeparture(target, "search request to peer created io exception: " + e.getMessage());
            return -1;
        }
@ -553,17 +553,12 @@ public final class yacyClient {
        }
        
        // generate statistics
-        if (yacyCore.log.isFine()) yacyCore.log.logFine("SEARCH "
-                + result.urlcount
-                + " URLS FROM "
-                + target.hash
-                + ":"
-                + target.getName()
-                + ", score="
-                + target.selectscore
-                + ", searchtime=" + result.searchtime + ", netdelay="
-                + (totalrequesttime - result.searchtime) + ", references="
-                + result.references);
+        if (yacyCore.log.isFine()) yacyCore.log.logFine(
+                "SEARCH " + result.urlcount +
+                " URLS FROM " + target.hash + ":" + target.getName() +
+                ", searchtime=" + result.searchtime +
+                ", netdelay=" + (totalrequesttime - result.searchtime) +
+                ", references=" + result.references);
        return result.urlcount;
    }
    
--- a/source/de/anomic/yacy/yacySearch.java
+++ b/source/de/anomic/yacy/yacySearch.java
@ -24,16 +24,10 @@

 package de.anomic.yacy;

-import java.util.ArrayList;
-import java.util.HashMap;
 import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
 import java.util.SortedMap;
 import java.util.regex.Pattern;

-import net.yacy.cora.storage.DynamicScore;
-import net.yacy.cora.storage.ScoreCluster;
 import net.yacy.kelondro.index.HandleSet;
 import net.yacy.kelondro.logging.Log;
 import net.yacy.kelondro.order.Bitfield;
@ -151,104 +145,7 @@ public class yacySearch extends Thread {
    public yacySeed target() {
        return targetPeer;
    }
-
-    private static yacySeed[] selectClusterPeers(final yacySeedDB seedDB, final SortedMap<byte[], String> peerhashes) {
-    	final Iterator<Map.Entry<byte[], String>> i = peerhashes.entrySet().iterator();
-    	final List<yacySeed> l = new ArrayList<yacySeed>();
-    	Map.Entry<byte[], String> entry;
-    	yacySeed s;
-    	while (i.hasNext()) {
-            entry = i.next();
-            s = seedDB.get(new String(entry.getKey())); // should be getConnected; get only during testing time
-            if (s != null) {
-                s.setAlternativeAddress(entry.getValue());
-                l.add(s);
-            }
-    	}
-//    	final yacySeed[] result = new yacySeed[l.size()];
-//    	for (int j = 0; j < l.size(); j++) {
-//    		result[j] = l.get(j);
-//    	}
-//    	return result;
-    	return l.toArray(new yacySeed[0]);
-    }
    
-    private static yacySeed[] selectSearchTargets(final yacySeedDB seedDB, final HandleSet wordhashes, int seedcount, int redundancy) {
-        // find out a specific number of seeds, that would be relevant for the given word hash(es)
-        // the result is ordered by relevance: [0] is most relevant
-        // the seedcount is the maximum number of wanted results
-        if (seedDB == null) { return null; }
-        if ((seedcount >= seedDB.sizeConnected()) || (seedDB.noDHTActivity())) {
-            seedcount = seedDB.sizeConnected();
-        }
-        
-        // put in seeds according to dht
-        final DynamicScore<String> ranking = new ScoreCluster<String>();
-        final Map<String, yacySeed> regularSeeds = new HashMap<String, yacySeed>();
-        final Map<String, yacySeed> matchingSeeds = new HashMap<String, yacySeed>();
-        yacySeed seed;
-        Iterator<yacySeed> dhtEnum;         
-        Iterator<byte[]> iter = wordhashes.iterator();
-        while (iter.hasNext()) {
-            PeerSelection.selectDHTPositions(seedDB, iter.next(), redundancy, regularSeeds, ranking);
-        }
-
-        // put in seeds according to size of peer
-        dhtEnum = seedDB.seedsSortedConnected(false, yacySeed.ICOUNT);
-        int c = Math.min(seedDB.sizeConnected(), seedcount);
-        int score;
-        while (dhtEnum.hasNext() && c > 0) {
-            seed = dhtEnum.next();
-            if (seed == null) continue;
-            if (!seed.getFlagAcceptRemoteIndex()) continue; // probably a robinson peer
-            score = (int) Math.round(Math.random() * ((c / 3) + 3));
-            if (Log.isFine("PLASMA")) Log.logFine("PLASMA", "selectPeers/RWIcount: " + seed.hash + ":" + seed.getName() + ", RWIcount=" + seed.getWordCount() + ", score " + score);
-            ranking.inc(seed.hash, score);
-            regularSeeds.put(seed.hash, seed);
-            c--;
-        }
-
-        // put in seeds that are public robinson peers and where the peer tags match with query
-        // or seeds that are newbies to ensure that public demonstrations always work
-        dhtEnum = seedDB.seedsConnected(true, false, null, (float) 0.50);
-        while (dhtEnum.hasNext()) {
-        	seed = dhtEnum.next();
-            if (seed == null) continue;
-            if (seed.matchPeerTags(wordhashes)) {
-                String specialized = seed.getPeerTags().toString();
-                if (!specialized.equals("[*]")) Log.logInfo("PLASMA", "selectPeers/PeerTags: " + seed.hash + ":" + seed.getName() + ", is specialized peer for " + specialized);
-                regularSeeds.remove(seed.hash);
-                ranking.delete(seed.hash);
-                matchingSeeds.put(seed.hash, seed);
-            } else if (seed.getFlagAcceptRemoteIndex() && seed.getAge() < 1) { // the 'workshop feature'
-                Log.logInfo("PLASMA", "selectPeers/Age: " + seed.hash + ":" + seed.getName() + ", is newbie, age = " + seed.getAge());
-                regularSeeds.remove(seed.hash);
-                ranking.delete(seed.hash);
-                matchingSeeds.put(seed.hash, seed);
-            }
-        }
-        
-        // evaluate the ranking score and select seeds
-        seedcount = Math.min(ranking.size(), seedcount);
-        final yacySeed[] result = new yacySeed[seedcount + matchingSeeds.size()];
-        c = 0;
-        final Iterator<String> iters = ranking.keys(false); // higher are better
-        while (iters.hasNext() && c < seedcount) {
-            seed = regularSeeds.get(iters.next());
-            seed.selectscore = c;
-            Log.logInfo("PLASMA", "selectPeers/_dht_: " + seed.hash + ":" + seed.getName() + " is choice " + c);
-            result[c++] = seed;
-        }
-        for (final yacySeed s: matchingSeeds.values()) {
-            s.selectscore = c;
-            Log.logInfo("PLASMA", "selectPeers/_match_: " + s.hash + ":" + s.getName() + " is choice " + c);
-            result[c++] = s;
-        }
-
-//      System.out.println("DEBUG yacySearch.selectPeers = " + seedcount + " seeds:"); for (int i = 0; i < seedcount; i++) System.out.println(" #" + i + ":" + result[i]); // debug
-        return result;
-    }
-
    public static yacySearch[] primaryRemoteSearches(
            final String wordhashes, final String excludehashes,
            final Pattern prefer, final Pattern filter, String language,
@ -259,11 +156,12 @@ public class yacySearch extends Thread {
            final yacySeedDB peers,
            final RankingProcess containerCache,
            final SearchEvent.SecondarySearchSuperviser secondarySearchSuperviser,
-            int targets,
            final Blacklist blacklist,
            final RankingProfile rankingProfile,
            final Bitfield constraint,
-            final SortedMap<byte[], String> clusterselection) {
+            final SortedMap<byte[], String> clusterselection,
+            final int burstRobinsonPercent,
+            final int burstMultiwordPercent) {
        // check own peer status
        //if (wordIndex.seedDB.mySeed() == null || wordIndex.seedDB.mySeed().getPublicAddress() == null) { return null; }

@ -272,14 +170,15 @@ public class yacySearch extends Thread {
        assert wordhashes.length() >= 12 : "wordhashes = " + wordhashes;
        final yacySeed[] targetPeers =
            (clusterselection == null) ?
-                    selectSearchTargets(
+                    PeerSelection.selectSearchTargets(
                            peers,
                            QueryParams.hashes2Set(wordhashes),
-                            targets,
-                            peers.redundancy())
-                  : selectClusterPeers(peers, clusterselection);
+                            peers.redundancy(),
+                            burstRobinsonPercent,
+                            burstMultiwordPercent)
+                  : PeerSelection.selectClusterPeers(peers, clusterselection);
        if (targetPeers == null) return new yacySearch[0];
-        targets = targetPeers.length;
+        int targets = targetPeers.length;
        if (targets == 0) return new yacySearch[0];
        final yacySearch[] searchThreads = new yacySearch[targets];
        for (int i = 0; i < targets; i++) {
@ -292,6 +191,7 @@ public class yacySearch extends Thread {
                    indexSegment, peers, containerCache, secondarySearchSuperviser, blacklist, rankingProfile, constraint);
                searchThreads[i].start();
            } catch (OutOfMemoryError e) {
+                e.printStackTrace();
                break;
            }
        }
--- a/source/de/anomic/yacy/yacySeed.java
+++ b/source/de/anomic/yacy/yacySeed.java
@ -47,6 +47,7 @@ import java.io.IOException;
 import java.net.InetAddress;
 import java.net.MalformedURLException;
 import java.net.URL;
+import java.util.Comparator;
 import java.util.Date;
 import java.util.HashMap;
 import java.util.Iterator;
@ -71,7 +72,7 @@ import de.anomic.tools.bitfield;
 import de.anomic.tools.crypt;
 import de.anomic.yacy.dht.FlatWordPartitionScheme;

-public class yacySeed implements Cloneable {
+public class yacySeed implements Cloneable, Comparable<yacySeed>, Comparator<yacySeed> {

    public static String ANON_PREFIX = "_anon";
    
@ -171,7 +172,6 @@ public class yacySeed implements Cloneable {
    public String hash;
    /** a set of identity founding values, eg. IP, name of the peer, YaCy-version, ...*/
    private final ConcurrentHashMap<String, String> dna;
-    protected int selectscore = -1; // only for debugging
    private String alternativeIP = null;

    public yacySeed(final String theHash, final ConcurrentHashMap<String, String> theDna) {
@ -858,5 +858,24 @@ public class yacySeed implements Cloneable {
        ndna.putAll(this.dna);
        return new yacySeed(this.hash, ndna);
    }
+
+    @Override
+    public int compareTo(yacySeed arg0) {
+        // TODO Auto-generated method stub
+        int o1 = this.hashCode();
+        int o2 = arg0.hashCode();
+        if (o1 > o2) return 1;
+        if (o2 > o1) return -1;
+        return 0;
+    }
+    
+    public int hashCode() {
+        return (int) (Base64Order.enhancedCoder.cardinal(this.hash) & ((long) Integer.MAX_VALUE));
+    }
+
+    @Override
+    public int compare(yacySeed o1, yacySeed o2) {
+        return o1.compareTo(o2);
+    }
    
 }
--- a/source/net/yacy/cora/protocol/http/HTTPClient.java
+++ b/source/net/yacy/cora/protocol/http/HTTPClient.java
@ -91,7 +91,7 @@ import org.apache.http.util.EntityUtils;
 */
 public class HTTPClient {

-	private final static int maxcon = 20;
+	private final static int maxcon = 200;
 	private static IdledConnectionEvictor idledConnectionEvictor = null;
 	private static HttpClient httpClient = initConnectionManager();
 	private Header[] headers = null;
--- a/source/net/yacy/document/Condenser.java
+++ b/source/net/yacy/document/Condenser.java
@ -105,7 +105,7 @@ public final class Condenser {
            final boolean indexText,
            final boolean indexMedia,
            final WordCache meaningLib
-            ) throws UnsupportedEncodingException {
+            ) {
        // if addMedia == true, then all the media links are also parsed and added to the words
        // added media words are flagged with the appropriate media flag
        this.words = new HashMap<String, Word>();
@ -254,7 +254,7 @@ public final class Condenser {
        }
    }

-    public Condenser(final InputStream text, final WordCache meaningLib) throws UnsupportedEncodingException {
+    public Condenser(final InputStream text, final WordCache meaningLib) {
        this.languageIdentificator = null; // we don't need that here
        // analysis = new Properties();
        words = new TreeMap<String, Word>();
@ -278,7 +278,7 @@ public final class Condenser {
        return this.languageIdentificator.getLanguage();
    }

-    private void createCondensement(final InputStream is, final WordCache meaningLib) throws UnsupportedEncodingException {
+    private void createCondensement(final InputStream is, final WordCache meaningLib) {
        assert is != null;
        final Set<String> currsentwords = new HashSet<String>();
        StringBuilder sentence = new StringBuilder(100);
@ -461,11 +461,7 @@ public final class Condenser {
 		} catch (UnsupportedEncodingException e1) {
 			buffer = new ByteArrayInputStream(text.getBytes());
 		}
-        try {
-            return new Condenser(buffer, meaningLib).words();
-        } catch (final UnsupportedEncodingException e) {
-            return null;
-        }
+        return new Condenser(buffer, meaningLib).words();
    }
    
    public static void main(final String[] args) {
--- a/source/net/yacy/document/WordTokenizer.java
+++ b/source/net/yacy/document/WordTokenizer.java
@ -40,7 +40,7 @@ public class WordTokenizer implements Enumeration<String> {
    private unsievedWordsEnum e;
    private WordCache meaningLib;

-    public WordTokenizer(final InputStream is, final WordCache meaningLib) throws UnsupportedEncodingException {
+    public WordTokenizer(final InputStream is, final WordCache meaningLib) {
        assert is != null;
        this.e = new unsievedWordsEnum(is);
        this.buffer = nextElement0();
@ -83,7 +83,7 @@ public class WordTokenizer implements Enumeration<String> {
        private List<StringBuilder> s;
        private int sIndex;

-        public unsievedWordsEnum(final InputStream is) throws UnsupportedEncodingException {
+        public unsievedWordsEnum(final InputStream is) {
            assert is != null;
            e = new SentenceReader(is);
            s = new ArrayList<StringBuilder>();