From 5cb00889d9c8a5eb6c92840469fadb6bdce6f53c Mon Sep 17 00:00:00 2001 From: orbiter Date: Mon, 15 Aug 2005 01:12:25 +0000 Subject: [PATCH] enhancements to dht selection, search and search presentation git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@540 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- build.properties | 2 +- htroot/index.html | 2 +- htroot/index.java | 3 +++ .../de/anomic/plasma/plasmaSwitchboard.java | 14 ++++++---- source/de/anomic/yacy/yacySearch.java | 27 +++++++++++++++---- startYACY.sh | 2 +- stopYACY.sh | 2 +- 7 files changed, 38 insertions(+), 14 deletions(-) diff --git a/build.properties b/build.properties index 71d11ced3..9cd6a43fb 100644 --- a/build.properties +++ b/build.properties @@ -3,7 +3,7 @@ javacSource=1.4 javacTarget=1.4 # Release Configuration -releaseVersion=0.396 +releaseVersion=0.397 releaseFile=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz #releaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz releaseDir=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr} diff --git a/htroot/index.html b/htroot/index.html index e778e1c33..a17b6ee40 100644 --- a/htroot/index.html +++ b/htroot/index.html @@ -82,7 +82,7 @@ Please repeat your search to see if there are late-responses from remote peers :: -#[linkcount]# results from a total number of #[totalcount]# known links. +#[linkcount]# results from #[orderedcount]# ordered links of a total number of #[totalcount]# known. → Catch up more links from 'late' peers. #(/num-results)# diff --git a/htroot/index.java b/htroot/index.java index 05ba54593..e60d82306 100644 --- a/htroot/index.java +++ b/htroot/index.java @@ -168,11 +168,13 @@ public class index { } else { int linkcount = Integer.parseInt(prop.get("linkcount", "0")); + int orderedcount = Integer.parseInt(prop.get("orderedcount", "0")); int totalcount = Integer.parseInt(prop.get("totalcount", "0")); if (totalcount > 10) { Object[] references = (Object[]) prop.get("references", new String[0]); prop.put("num-results", 4); prop.put("num-results_linkcount", linkcount); + prop.put("num-results_orderedcount", orderedcount); prop.put("num-results_totalcount", totalcount); int hintcount = references.length; if (hintcount > 0) { @@ -199,6 +201,7 @@ public class index { else { prop.put("num-results", 4); prop.put("num-results_linkcount", linkcount); + prop.put("num-results_orderedcount", orderedcount); prop.put("num-results_totalcount", totalcount); } } diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index 1cd131e26..869fa4c09 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -1301,10 +1301,11 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser // do global fetching int globalresults = 0; if (global) { - int fetchcount = ((int) time / 1000) * 4; // number of wanted results until break in search - int fetchpeers = 10 + ((int) time / 1000) * 3; // number of target peers; means 30 peers in 10 seconds - long fetchtime = time * 7 / 10; // time to waste - if (fetchcount > count) fetchcount = count; + int fetchcount = ((int) time / 1000) * 5; // number of wanted results until break in search + int fetchpeers = ((int) time / 1000) * 2; // number of target peers; means 30 peers in 10 seconds + long fetchtime = time * 6 / 10; // time to waste + if (fetchpeers < 10) fetchpeers = 10; + if (fetchcount > count * 10) fetchcount = count * 10; globalresults = yacySearch.searchHashes(queryhashes, urlPool.loadedURL, searchManager, fetchcount, fetchpeers, urlBlacklist, snippetCache, fetchtime); log.logDebug("SEARCH TIME AFTER GLOBAL-TRIGGER TO " + fetchpeers + " PEERS: " + ((System.currentTimeMillis() - timestamp) / 1000) + " seconds"); } @@ -1326,9 +1327,11 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser // result is a List of urlEntry elements: prepare answer if (acc == null) { prop.put("totalcount", "0"); + prop.put("orderedcount", "0"); prop.put("linkcount", "0"); } else { - prop.put("totalcount", Integer.toString(acc.sizeOrdered())); + prop.put("totalcount", Integer.toString(idx.size())); + prop.put("orderedcount", Integer.toString(acc.sizeOrdered())); int i = 0; int p; URL url; @@ -1428,6 +1431,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser // log log.logInfo("EXIT WORD SEARCH: " + gs + " - " + prop.get("totalcount", "0") + " links found, " + + prop.get("orderedcount", "0") + " links ordered, " + prop.get("linkcount", "?") + " links selected, " + ((System.currentTimeMillis() - timestamp) / 1000) + " seconds"); if (idx != null) idx.close(); diff --git a/source/de/anomic/yacy/yacySearch.java b/source/de/anomic/yacy/yacySearch.java index a7a06db0a..7de28b808 100644 --- a/source/de/anomic/yacy/yacySearch.java +++ b/source/de/anomic/yacy/yacySearch.java @@ -106,6 +106,7 @@ public class yacySearch extends Thread { if (yacyCore.seedDB == null) return null; if (seedcount > yacyCore.seedDB.sizeConnected()) seedcount = yacyCore.seedDB.sizeConnected(); + // put in seeds according to dht kelondroMScoreCluster ranking = new kelondroMScoreCluster(); HashMap seeds = new HashMap(); yacySeed seed; @@ -116,17 +117,33 @@ public class yacySearch extends Thread { while (i.hasNext()) { wordhash = (String) i.next(); dhtEnum = yacyCore.dhtAgent.getDHTSeeds(true, wordhash); - c = 0; - while ((dhtEnum.hasMoreElements()) && (c < seedcount)) { + c = seedcount; + while ((dhtEnum.hasMoreElements()) && (c > 0)) { seed = (yacySeed) dhtEnum.nextElement(); - //System.out.println("Selected peer " + seed.hash + " for wordhash " + wordhash + ", score " + c); - ranking.addScore(seed.hash, c++); + //System.out.println("Selected peer " + seed.hash + "/" + seed.getName() + " for wordhash " + wordhash + ", score " + c); + ranking.addScore(seed.hash, c--); seeds.put(seed.hash, seed); } } + + // put in seeds according to size of peer + dhtEnum = yacyCore.seedDB.seedsSortedConnected(false, "ICount"); + c = seedcount; + int score; + if (c > yacyCore.seedDB.sizeConnected()) c = yacyCore.seedDB.sizeConnected(); + while ((dhtEnum.hasMoreElements()) && (c > 0)) { + seed = (yacySeed) dhtEnum.nextElement(); + score = (int) Math.round(Math.random() * c / 2); + //System.out.println("Selected peer " + seed.hash + "/" + seed.getName() + " for maxRWI=" + seed.getMap().get("ICount") + ", score " + score); + ranking.addScore(seed.hash, score); + seeds.put(seed.hash, seed); + c--; + } + + // evaluate the ranking score and select seeds if (ranking.size() < seedcount) seedcount = ranking.size(); yacySeed[] result = new yacySeed[seedcount]; - Iterator e = ranking.scores(true); // lower are better + Iterator e = ranking.scores(false); // higher are better c = 0; while ((e.hasNext()) && (c < result.length)) { seed = (yacySeed) seeds.get((String) e.next()); diff --git a/startYACY.sh b/startYACY.sh index c0c3f12f4..782bf7437 100755 --- a/startYACY.sh +++ b/startYACY.sh @@ -22,7 +22,7 @@ else else nohup java -classpath classes:htroot:$CLASSPATH yacy > /dev/null & fi - echo "YaCy started as daemon process. View it's activity in yacy.log" + echo "YaCy started as daemon process. View it's activity in log/yacy00.log" echo "To stop YaCy, please execute stopYACY.sh and wait some seconds" echo "To administrate YaCy, start your web browser and open http://localhost:8080" fi diff --git a/stopYACY.sh b/stopYACY.sh index bd65bd1d4..c514892e6 100755 --- a/stopYACY.sh +++ b/stopYACY.sh @@ -8,4 +8,4 @@ for N in `ls -1 libx/*.jar`; do CLASSPATH="$CLASSPATH$N:"; done java -classpath classes:htroot:$CLASSPATH yacy -shutdown echo "please wait until the YaCy daemon process terminates" -echo "you can monitor this with 'tail -f yacy.log' and 'fuser yacy.log'" \ No newline at end of file +echo "you can monitor this with 'tail -f log/yacy00.log' and 'fuser log/yacy00.log'" \ No newline at end of file