added another peer selection rule:

- select also non-robinson (dht-) peers if their peer tags match with search words
- the peer tag '*' can now act as catch-all rule: shall be selected always

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4963 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 17 years ago
parent d37fd064f9
commit 080cda97ef

@ -3,7 +3,7 @@ javacSource=1.5
javacTarget=1.5
# Release Configuration
releaseVersion=0.591
releaseVersion=0.592
stdReleaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
embReleaseFile=yacy_emb_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
proReleaseFile=yacy_pro_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz

@ -177,6 +177,7 @@ public class yacySearch extends Thread {
// put in seeds according to dht
final kelondroMScoreCluster<String> ranking = new kelondroMScoreCluster<String>();
final HashMap<String, yacySeed> regularSeeds = new HashMap<String, yacySeed>();
final HashMap<String, yacySeed> matchingSeeds = new HashMap<String, yacySeed>();
final HashMap<String, yacySeed> robinsonSeeds = new HashMap<String, yacySeed>();
yacySeed seed;
Iterator<yacySeed> dhtEnum;
@ -225,40 +226,49 @@ public class yacySearch extends Thread {
// enhance ranking for regular peers
if (seed.matchPeerTags(wordhashes)) { // access robinson peers with matching tag
serverLog.logInfo("PLASMA", "selectPeers/PeerTags: " + seed.hash + ":" + seed.getName() + ", is specialized peer for " + seed.getPeerTags().toString());
ranking.addScore(seed.hash, seedcount);
regularSeeds.put(seed.hash, seed);
regularSeeds.remove(seed.hash);
ranking.deleteScore(seed.hash);
matchingSeeds.put(seed.hash, seed);
}
if (seed.getAge() < 1) { // the 'workshop feature'
serverLog.logInfo("PLASMA", "selectPeers/Age: " + seed.hash + ":" + seed.getName() + ", is newbie, age = " + seed.getAge());
ranking.addScore(seed.hash, seedcount);
regularSeeds.put(seed.hash, seed);
regularSeeds.remove(seed.hash);
ranking.deleteScore(seed.hash);
matchingSeeds.put(seed.hash, seed);
}
} else {
// this is a robinson peer
// in case the peer has more than a million urls, take it as search target
if (seed.getLinkCount() > 1000000) {
if (seed.getLinkCount() > 1000000 || seed.matchPeerTags(wordhashes)) {
regularSeeds.remove(seed.hash);
ranking.deleteScore(seed.hash);
robinsonSeeds.put(seed.hash, seed);
if (seed.matchPeerTags(wordhashes))
matchingSeeds.put(seed.hash, seed);
else
robinsonSeeds.put(seed.hash, seed);
}
}
}
// evaluate the ranking score and select seeds
seedcount = Math.min(ranking.size(), seedcount);
yacySeed[] result = new yacySeed[seedcount + robinsonSeeds.size()];
yacySeed[] result = new yacySeed[seedcount + robinsonSeeds.size() + matchingSeeds.size()];
c = 0;
iter = ranking.scores(false); // higher are better
while (iter.hasNext() && c < seedcount) {
seed = regularSeeds.get(iter.next());
seed.selectscore = c;
serverLog.logInfo("PLASMA", "selectPeers/_lineup_: " + seed.hash + ":" + seed.getName() + " is choice " + c);
serverLog.logInfo("PLASMA", "selectPeers/_dht_: " + seed.hash + ":" + seed.getName() + " is choice " + c);
result[c++] = seed;
}
for (yacySeed s: robinsonSeeds.values()) {
serverLog.logInfo("PLASMA", "selectPeers/_robinson_: " + s.hash + ":" + s.getName() + " is choice " + c);
result[c++] = s;
}
for (yacySeed s: matchingSeeds.values()) {
serverLog.logInfo("PLASMA", "selectPeers/_match_: " + s.hash + ":" + s.getName() + " is choice " + c);
result[c++] = s;
}
// System.out.println("DEBUG yacySearch.selectPeers = " + seedcount + " seeds:"); for (int i = 0; i < seedcount; i++) System.out.println(" #" + i + ":" + result[i]); // debug
return result;

@ -591,7 +591,9 @@ public class yacySeed {
}
public boolean matchPeerTags(Set<String> searchHashes) {
Set<String> tags = serverCodings.string2set(get(PEERTAGS, ""), "|");
String peertags = get(PEERTAGS, "");
if (peertags.equals("*")) return true;
Set<String> tags = serverCodings.string2set(peertags, "|");
Iterator<String> i = tags.iterator();
while (i.hasNext()) {
if (searchHashes.contains(indexWord.word2hash(i.next()))) return true;

Loading…
Cancel
Save