From 40925f4fb78b930b311a28bb3d7da61b3d0c9d03 Mon Sep 17 00:00:00 2001
From: theli
Date: Tue, 13 Sep 2005 10:29:04 +0000
Subject: [PATCH] *) Improving complete index transfer performance by
automatically increasing size of transfered word chunk for fast
connections (much similar to normal dht behavior)
git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@719 6c8d7289-2bf4-0310-a012-ef5d649a1542
---
htroot/IndexTransfer_p.html | 20 +++++++-------
htroot/IndexTransfer_p.java | 1 +
.../plasma/plasmaWordIndexDistribution.java | 27 +++++++++++++------
3 files changed, 30 insertions(+), 18 deletions(-)
diff --git a/htroot/IndexTransfer_p.html b/htroot/IndexTransfer_p.html
index ed1f7b8f8..439aed536 100644
--- a/htroot/IndexTransfer_p.html
+++ b/htroot/IndexTransfer_p.html
@@ -22,28 +22,28 @@
#(running)#
-Not running |
-- |
-- |
-
+ | Not running |
+- |
+- |
+
|
-
+ |
|
::
- #[status]# |
- #[twcount]# (#[twpercent]#%) |
- #[twrange]# |
- #[peerName]# |
- #(stopped)#::
+ | #[status]# |
+ #[twcount]# (#[twpercent]#%) |
+ #[twchunk]# words: #[twrange]# |
+ #[peerName]# |
+ #(stopped)#::
#(/stopped)# |
#(/running)#
diff --git a/htroot/IndexTransfer_p.java b/htroot/IndexTransfer_p.java
index 54b283f76..7b9f261f5 100644
--- a/htroot/IndexTransfer_p.java
+++ b/htroot/IndexTransfer_p.java
@@ -94,6 +94,7 @@ public class IndexTransfer_p {
prop.put("running_twcount",transferedIdxCount);
prop.put("running_twpercent",Float.toString(transfThread.getTransferedIndexPercent()));
prop.put("running_twrange", transfThread.getRange());
+ prop.put("running_twchunk", Integer.toString(transfThread.getChunkSize()));
prop.put("running_peerName",transfThread.getSeed().getName());
prop.put("running_stopped",(transfThread.isFinished()) || (!transfThread.isAlive())?1:0);
}
diff --git a/source/de/anomic/plasma/plasmaWordIndexDistribution.java b/source/de/anomic/plasma/plasmaWordIndexDistribution.java
index e513facea..add29070b 100644
--- a/source/de/anomic/plasma/plasmaWordIndexDistribution.java
+++ b/source/de/anomic/plasma/plasmaWordIndexDistribution.java
@@ -460,7 +460,6 @@ public class plasmaWordIndexDistribution {
public class transferIndexThread extends Thread {
-
private yacySeed seed = null;
private boolean delete = false;
private boolean finished = false;
@@ -468,12 +467,13 @@ public class plasmaWordIndexDistribution {
private String status = "running";
private String oldStartingPointHash = "------------", startPointHash = "------------";
private int wordsDBSize = 0;
+ private int chunkSize = 500;
public transferIndexThread(yacySeed seed, boolean delete) {
+ super(new ThreadGroup("TransferIndexThreadGroup"),"TransferIndex_" + seed.getName());
this.seed = seed;
this.delete = delete;
- this.wordsDBSize = plasmaSwitchboard.getSwitchboard().wordIndex.size();
- this.setName("TransferIndex_" + seed.getName());
+ this.wordsDBSize = plasmaSwitchboard.getSwitchboard().wordIndex.size();
}
public void run() {
@@ -489,6 +489,10 @@ public class plasmaWordIndexDistribution {
return this.finished;
}
+ public int getChunkSize() {
+ return this.chunkSize;
+ }
+
public int getTransferedIndexCount() {
return this.transferedIndexCount;
}
@@ -528,7 +532,7 @@ public class plasmaWordIndexDistribution {
start = System.currentTimeMillis();
// selecting 500 words to transfer
- Object[] selectResult = selectTransferIndexes(startPointHash, 500);
+ Object[] selectResult = selectTransferIndexes(startPointHash, chunkSize);
plasmaWordIndexEntity[] indexEntities = (plasmaWordIndexEntity[]) selectResult[0];
HashMap urlCache = (HashMap) selectResult[1]; // String (url-hash) / plasmaCrawlLURL.Entry
@@ -562,11 +566,18 @@ public class plasmaWordIndexDistribution {
String error = yacyClient.transferIndex(seed, indexEntities, urlCache);
if (error == null) {
// words successfully transfered
+ long transferTime = System.currentTimeMillis() - start;
plasmaWordIndexDistribution.this.log.logInfo("Index transfer of " + idxCount + " words [" + indexEntities[0].wordHash() + " .. " + indexEntities[indexEntities.length-1].wordHash() + "]" +
- " to peer " + seed.getName() + ":" + seed.hash + " in " +
- ((System.currentTimeMillis() - start) / 1000) + " seconds successfull (" +
- (1000 * idxCount / (System.currentTimeMillis() - start + 1)) + " words/s)");
+ " to peer " + seed.getName() + ":" + seed.hash + " in " + (transferTime/1000) + " seconds successfull (" +
+ (1000 * idxCount / (transferTime + 1)) + " words/s)");
retryCount = 0;
+
+ if (transferTime > 30000) {
+ if (chunkSize>100) chunkSize-=50;
+ } else {
+ chunkSize+=50;
+ }
+
break;
} else {
// worts transfer failed
@@ -604,7 +615,7 @@ public class plasmaWordIndexDistribution {
Thread.sleep(retryCount*5000);
continue;
} else {
- seed = yacyCore.seedDB.getConnected(seed.hash);
+ yacyCore.seedDB.getConnected(seed.hash);
this.status = "running";
break;
}