diff --git a/source/net/yacy/peers/DHTSelection.java b/source/net/yacy/peers/DHTSelection.java index e40be93c4..63f408181 100644 --- a/source/net/yacy/peers/DHTSelection.java +++ b/source/net/yacy/peers/DHTSelection.java @@ -130,25 +130,33 @@ public class DHTSelection { return extraSeeds; } - public static Set selectDHTSearchTargets(final SeedDB seedDB, final HandleSet wordhashes, final int minage, final int redundancy, final int maxredundancy, final Random random) { + /** + * @param seedDB the seeds database. + * @param wordhashes hashes of the words we are searching for + * @param minage the minimum age of each seed in days + * @param minWordCount the minimum RWI words count of each seed + * @param random a random generator instance + * @return a list of matching candidate seeds for remote RWI search + */ + public static Set selectDHTSearchTargets(final SeedDB seedDB, final HandleSet wordhashes, final int minage, final int minWordCount, final int redundancy, final int maxredundancy, final Random random) { // put in seeds according to dht - Set seeds = new LinkedHashSet(); // dht position seeds + Set seeds = new LinkedHashSet<>(); // dht position seeds if (seedDB != null) { Iterator iter = wordhashes.iterator(); while (iter.hasNext()) { - seeds.addAll(collectHorizontalDHTPositions(seedDB, iter.next(), minage, redundancy, maxredundancy, random)); + seeds.addAll(collectHorizontalDHTPositions(seedDB, iter.next(), minage, minWordCount, redundancy, maxredundancy, random)); } } return seeds; } - private static ArrayList collectHorizontalDHTPositions(final SeedDB seedDB, final byte[] wordhash, final int minage, final int redundancy, final int maxredundancy, Random random) { + private static ArrayList collectHorizontalDHTPositions(final SeedDB seedDB, final byte[] wordhash, final int minage, final int minWordCount, final int redundancy, final int maxredundancy, final Random random) { // this method is called from the search target computation - ArrayList collectedSeeds = new ArrayList(redundancy * seedDB.scheme.verticalPartitions()); + ArrayList collectedSeeds = new ArrayList<>(redundancy * seedDB.scheme.verticalPartitions()); for (int verticalPosition = 0; verticalPosition < seedDB.scheme.verticalPartitions(); verticalPosition++) { - ArrayList seeds = selectVerticalDHTPositions(seedDB, wordhash, minage, maxredundancy, verticalPosition); + ArrayList seeds = selectVerticalDHTPositions(seedDB, wordhash, minage, minWordCount, maxredundancy, verticalPosition); if (seeds.size() <= redundancy) { collectedSeeds.addAll(seeds); } else { @@ -163,12 +171,19 @@ public class DHTSelection { return collectedSeeds; } + /** + * @param seedDB the seeds database. Must not be null. + * @param wordhash the word we are searching for + * @param minage the minimum age of each seed in days + * @param redundancy the number of redundant peer position for this parition, minimum is 1 + * @return a list of matching candidate seeds for DHT distribution + */ @SuppressWarnings("unchecked") public static List[] selectDHTDistributionTargets(final SeedDB seedDB, final byte[] wordhash, final int minage, final int redundancy) { // this method is called from the distribution target computation List[] seedlists = (List[]) Array.newInstance(ArrayList.class, seedDB.scheme.verticalPartitions()); for (int verticalPosition = 0; verticalPosition < seedDB.scheme.verticalPartitions(); verticalPosition++) { - seedlists[verticalPosition] = selectVerticalDHTPositions(seedDB, wordhash, minage, redundancy, verticalPosition); + seedlists[verticalPosition] = selectVerticalDHTPositions(seedDB, wordhash, minage, Integer.MIN_VALUE, redundancy, verticalPosition); } return seedlists; } @@ -178,13 +193,14 @@ public class DHTSelection { * @param seedDB the database of seeds * @param wordhash the word we are searching for * @param minage the minimum age of a seed in days (to prevent that too young seeds which cannot have results yet are asked) + * @param minWordCount the minimum RWI words count of each seed * @param redundancy the number of redundant peer position for this parition, minimum is 1 * @param verticalPosition the verical position, thats the number of the partition 0 <= verticalPosition < seedDB.scheme.verticalPartitions() * @return a list of seeds for the redundant positions */ - private static ArrayList selectVerticalDHTPositions(final SeedDB seedDB, final byte[] wordhash, final int minage, final int redundancy, int verticalPosition) { + private static ArrayList selectVerticalDHTPositions(final SeedDB seedDB, final byte[] wordhash, final int minage, final int minWordCount, final int redundancy, int verticalPosition) { // this method is called from the search target computation - ArrayList seeds = new ArrayList(redundancy); + ArrayList seeds = new ArrayList<>(redundancy); final long dhtVerticalTarget = seedDB.scheme.verticalDHTPosition(wordhash, verticalPosition); final byte[] verticalhash = Distribution.positionToHash(dhtVerticalTarget); final Iterator dhtEnum = getAcceptRemoteIndexSeeds(seedDB, verticalhash, redundancy, false); @@ -195,6 +211,10 @@ public class DHTSelection { if (seed == null || seed.hash == null) continue; if (!seed.getFlagAcceptRemoteIndex()) continue; // probably a robinson peer if (seed.getAge() < minage) continue; // prevent bad results because of too strong network growth + if(seed.getWordCount() < minWordCount) { + /* Even if the peer is not a robinson and has the required minimum age, it may have an empty or disabled RWI */ + continue; + } if (RemoteSearch.log.isInfo()) RemoteSearch.log.info("selectPeers/DHTorder: " + seed.hash + ":" + seed.getName() + "/ score " + c); seeds.add(seed); c--; @@ -380,26 +400,24 @@ public class DHTSelection { } } - + /** * get either the youngest or oldest peers from the seed db. Count as many as requested * @param seedDB * @param up if up = true then get the most recent peers, if up = false then get oldest * @param count number of wanted peers + * @param minWordCount the minimum RWI words count of each seed * @return a hash map of peer hashes to seed object */ - public static ConcurrentMap seedsByAge(final SeedDB seedDB, final boolean up, int count) { + public static ConcurrentMap seedsByAge(final SeedDB seedDB, final boolean up, int count, final int minWordCount) { if (count > seedDB.sizeConnected()) count = seedDB.sizeConnected(); Seed ys; - //long age; - final Iterator s = seedDB.seedsSortedConnected(!up, Seed.LASTSEEN); + final Iterator seeds = seedDB.seedsSortedConnected(!up, Seed.LASTSEEN); try { final ConcurrentMap result = new ConcurrentHashMap(); - while (s.hasNext() && count-- > 0) { - ys = s.next(); - if (ys != null && ys.hash != null) { - //age = (System.currentTimeMillis() - ys.getLastSeenUTC()) / 1000 / 60; - //System.out.println("selected seedsByAge up=" + up + ", age/min = " + age); + while (seeds.hasNext() && count-- > 0) { + ys = seeds.next(); + if (ys != null && ys.hash != null && ys.getWordCount() >= minWordCount) { result.put(ys.hash, ys); } } @@ -410,4 +428,15 @@ public class DHTSelection { } } + /** + * get either the youngest or oldest peers from the seed db. Count as many as requested + * @param seedDB + * @param up if up = true then get the most recent peers, if up = false then get oldest + * @param count number of wanted peers + * @return a hash map of peer hashes to seed object + */ + public static ConcurrentMap seedsByAge(final SeedDB seedDB, final boolean up, int count) { + return seedsByAge(seedDB, up, count, Integer.MIN_VALUE); + } + } \ No newline at end of file diff --git a/source/net/yacy/peers/RemoteSearch.java b/source/net/yacy/peers/RemoteSearch.java index 01ae2eb19..52a4fdae6 100644 --- a/source/net/yacy/peers/RemoteSearch.java +++ b/source/net/yacy/peers/RemoteSearch.java @@ -190,7 +190,8 @@ public class RemoteSearch extends Thread { if (Memory.load() > 2.0) {redundancy = Math.max(1, redundancy - 1); healthMessage.append(", load() > 2.0");} if (Memory.cores() < 4) {redundancy = Math.max(1, redundancy - 1); healthMessage.append(", cores() < 4");} if (Memory.cores() == 1) {redundancy = 1; healthMessage.append(", cores() == 1");} - int minage = 3; + final int minage = 3; + final int minRWIWordCount = 1; // we exclude seeds with empty or disabled RWI from remote RWI search int robinsoncount = event.peers.scheme.verticalPartitions() * redundancy / 2; if (indexingQueueSize > 0) robinsoncount = Math.max(1, robinsoncount / 2); if (indexingQueueSize > 10) robinsoncount = Math.max(1, robinsoncount / 2); @@ -212,17 +213,19 @@ public class RemoteSearch extends Thread { event.peers, QueryParams.hashes2Set(ASCII.String(Word.word2hash(newGoal))), minage, + minRWIWordCount, redundancy, event.peers.redundancy(), random); } else { // select just random peers - dhtPeers = DHTSelection.seedsByAge(event.peers, false, event.peers.redundancy()).values(); + dhtPeers = DHTSelection.seedsByAge(event.peers, false, event.peers.redundancy(), minRWIWordCount).values(); } } else { dhtPeers = DHTSelection.selectDHTSearchTargets( event.peers, event.query.getQueryGoal().getIncludeHashes(), minage, + minRWIWordCount, redundancy, event.peers.redundancy(), random); // this set of peers may be too large and consume too many threads if more than one word is searched.