From 5cb00889d9c8a5eb6c92840469fadb6bdce6f53c Mon Sep 17 00:00:00 2001
From: orbiter
Date: Mon, 15 Aug 2005 01:12:25 +0000
Subject: [PATCH] enhancements to dht selection, search and search presentation
git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@540 6c8d7289-2bf4-0310-a012-ef5d649a1542
---
build.properties | 2 +-
htroot/index.html | 2 +-
htroot/index.java | 3 +++
.../de/anomic/plasma/plasmaSwitchboard.java | 14 ++++++----
source/de/anomic/yacy/yacySearch.java | 27 +++++++++++++++----
startYACY.sh | 2 +-
stopYACY.sh | 2 +-
7 files changed, 38 insertions(+), 14 deletions(-)
diff --git a/build.properties b/build.properties
index 71d11ced3..9cd6a43fb 100644
--- a/build.properties
+++ b/build.properties
@@ -3,7 +3,7 @@ javacSource=1.4
javacTarget=1.4
# Release Configuration
-releaseVersion=0.396
+releaseVersion=0.397
releaseFile=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
#releaseFile=yacy_v${releaseVersion}_${DSTAMP}_${releaseNr}.tar.gz
releaseDir=yacy_dev_v${releaseVersion}_${DSTAMP}_${releaseNr}
diff --git a/htroot/index.html b/htroot/index.html
index e778e1c33..a17b6ee40 100644
--- a/htroot/index.html
+++ b/htroot/index.html
@@ -82,7 +82,7 @@ Please repeat your search to see if there are late-responses from remote peers
then please run your own peer and start a crawl of your wanted information to make it
available for everyone. Then stay online to support crawls from other peers. Thank you!
::
-#[linkcount]# results from a total number of #[totalcount]# known links.
+#[linkcount]# results from #[orderedcount]# ordered links of a total number of #[totalcount]# known.
→ Catch up more links
from 'late' peers.
#(/num-results)#
diff --git a/htroot/index.java b/htroot/index.java
index 05ba54593..e60d82306 100644
--- a/htroot/index.java
+++ b/htroot/index.java
@@ -168,11 +168,13 @@ public class index {
} else {
int linkcount = Integer.parseInt(prop.get("linkcount", "0"));
+ int orderedcount = Integer.parseInt(prop.get("orderedcount", "0"));
int totalcount = Integer.parseInt(prop.get("totalcount", "0"));
if (totalcount > 10) {
Object[] references = (Object[]) prop.get("references", new String[0]);
prop.put("num-results", 4);
prop.put("num-results_linkcount", linkcount);
+ prop.put("num-results_orderedcount", orderedcount);
prop.put("num-results_totalcount", totalcount);
int hintcount = references.length;
if (hintcount > 0) {
@@ -199,6 +201,7 @@ public class index {
else {
prop.put("num-results", 4);
prop.put("num-results_linkcount", linkcount);
+ prop.put("num-results_orderedcount", orderedcount);
prop.put("num-results_totalcount", totalcount);
}
}
diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java
index 1cd131e26..869fa4c09 100644
--- a/source/de/anomic/plasma/plasmaSwitchboard.java
+++ b/source/de/anomic/plasma/plasmaSwitchboard.java
@@ -1301,10 +1301,11 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
// do global fetching
int globalresults = 0;
if (global) {
- int fetchcount = ((int) time / 1000) * 4; // number of wanted results until break in search
- int fetchpeers = 10 + ((int) time / 1000) * 3; // number of target peers; means 30 peers in 10 seconds
- long fetchtime = time * 7 / 10; // time to waste
- if (fetchcount > count) fetchcount = count;
+ int fetchcount = ((int) time / 1000) * 5; // number of wanted results until break in search
+ int fetchpeers = ((int) time / 1000) * 2; // number of target peers; means 30 peers in 10 seconds
+ long fetchtime = time * 6 / 10; // time to waste
+ if (fetchpeers < 10) fetchpeers = 10;
+ if (fetchcount > count * 10) fetchcount = count * 10;
globalresults = yacySearch.searchHashes(queryhashes, urlPool.loadedURL, searchManager, fetchcount, fetchpeers, urlBlacklist, snippetCache, fetchtime);
log.logDebug("SEARCH TIME AFTER GLOBAL-TRIGGER TO " + fetchpeers + " PEERS: " + ((System.currentTimeMillis() - timestamp) / 1000) + " seconds");
}
@@ -1326,9 +1327,11 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
// result is a List of urlEntry elements: prepare answer
if (acc == null) {
prop.put("totalcount", "0");
+ prop.put("orderedcount", "0");
prop.put("linkcount", "0");
} else {
- prop.put("totalcount", Integer.toString(acc.sizeOrdered()));
+ prop.put("totalcount", Integer.toString(idx.size()));
+ prop.put("orderedcount", Integer.toString(acc.sizeOrdered()));
int i = 0;
int p;
URL url;
@@ -1428,6 +1431,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
// log
log.logInfo("EXIT WORD SEARCH: " + gs + " - " +
prop.get("totalcount", "0") + " links found, " +
+ prop.get("orderedcount", "0") + " links ordered, " +
prop.get("linkcount", "?") + " links selected, " +
((System.currentTimeMillis() - timestamp) / 1000) + " seconds");
if (idx != null) idx.close();
diff --git a/source/de/anomic/yacy/yacySearch.java b/source/de/anomic/yacy/yacySearch.java
index a7a06db0a..7de28b808 100644
--- a/source/de/anomic/yacy/yacySearch.java
+++ b/source/de/anomic/yacy/yacySearch.java
@@ -106,6 +106,7 @@ public class yacySearch extends Thread {
if (yacyCore.seedDB == null) return null;
if (seedcount > yacyCore.seedDB.sizeConnected()) seedcount = yacyCore.seedDB.sizeConnected();
+ // put in seeds according to dht
kelondroMScoreCluster ranking = new kelondroMScoreCluster();
HashMap seeds = new HashMap();
yacySeed seed;
@@ -116,17 +117,33 @@ public class yacySearch extends Thread {
while (i.hasNext()) {
wordhash = (String) i.next();
dhtEnum = yacyCore.dhtAgent.getDHTSeeds(true, wordhash);
- c = 0;
- while ((dhtEnum.hasMoreElements()) && (c < seedcount)) {
+ c = seedcount;
+ while ((dhtEnum.hasMoreElements()) && (c > 0)) {
seed = (yacySeed) dhtEnum.nextElement();
- //System.out.println("Selected peer " + seed.hash + " for wordhash " + wordhash + ", score " + c);
- ranking.addScore(seed.hash, c++);
+ //System.out.println("Selected peer " + seed.hash + "/" + seed.getName() + " for wordhash " + wordhash + ", score " + c);
+ ranking.addScore(seed.hash, c--);
seeds.put(seed.hash, seed);
}
}
+
+ // put in seeds according to size of peer
+ dhtEnum = yacyCore.seedDB.seedsSortedConnected(false, "ICount");
+ c = seedcount;
+ int score;
+ if (c > yacyCore.seedDB.sizeConnected()) c = yacyCore.seedDB.sizeConnected();
+ while ((dhtEnum.hasMoreElements()) && (c > 0)) {
+ seed = (yacySeed) dhtEnum.nextElement();
+ score = (int) Math.round(Math.random() * c / 2);
+ //System.out.println("Selected peer " + seed.hash + "/" + seed.getName() + " for maxRWI=" + seed.getMap().get("ICount") + ", score " + score);
+ ranking.addScore(seed.hash, score);
+ seeds.put(seed.hash, seed);
+ c--;
+ }
+
+ // evaluate the ranking score and select seeds
if (ranking.size() < seedcount) seedcount = ranking.size();
yacySeed[] result = new yacySeed[seedcount];
- Iterator e = ranking.scores(true); // lower are better
+ Iterator e = ranking.scores(false); // higher are better
c = 0;
while ((e.hasNext()) && (c < result.length)) {
seed = (yacySeed) seeds.get((String) e.next());
diff --git a/startYACY.sh b/startYACY.sh
index c0c3f12f4..782bf7437 100755
--- a/startYACY.sh
+++ b/startYACY.sh
@@ -22,7 +22,7 @@ else
else
nohup java -classpath classes:htroot:$CLASSPATH yacy > /dev/null &
fi
- echo "YaCy started as daemon process. View it's activity in yacy.log"
+ echo "YaCy started as daemon process. View it's activity in log/yacy00.log"
echo "To stop YaCy, please execute stopYACY.sh and wait some seconds"
echo "To administrate YaCy, start your web browser and open http://localhost:8080"
fi
diff --git a/stopYACY.sh b/stopYACY.sh
index bd65bd1d4..c514892e6 100755
--- a/stopYACY.sh
+++ b/stopYACY.sh
@@ -8,4 +8,4 @@ for N in `ls -1 libx/*.jar`; do CLASSPATH="$CLASSPATH$N:"; done
java -classpath classes:htroot:$CLASSPATH yacy -shutdown
echo "please wait until the YaCy daemon process terminates"
-echo "you can monitor this with 'tail -f yacy.log' and 'fuser yacy.log'"
\ No newline at end of file
+echo "you can monitor this with 'tail -f log/yacy00.log' and 'fuser log/yacy00.log'"
\ No newline at end of file