*) Improving complete index transfer performance by automatically increasing size of transfered word chunk

for fast connections (much similar to normal dht behavior) 
   

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@719 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
theli 20 years ago
parent 91ab4d044b
commit 40925f4fb7

@ -22,28 +22,28 @@
</tr> </tr>
#(running)# #(running)#
<tr valign="top" class="TableCellLight"> <tr valign="top" class="TableCellLight">
<td>Not running</td> <td class="small">Not running</td>
<td align="center">-</td> <td class="small" align="center">-</td>
<td align="center">-</td> <td class="small" align="center">-</td>
<td> <td class="small">
<select name="hostHash"> <select name="hostHash">
#{hosts}# #{hosts}#
<option value="#[hosthash]#">#[hostname]#</option> <option value="#[hosthash]#">#[hostname]#</option>
#{/hosts}# #{/hosts}#
</select> </select>
</td> </td>
<td> <td class="small">
<input type="submit" name="startIndexTransfer" value="Start Index Transfer"> <input type="submit" name="startIndexTransfer" value="Start Index Transfer">
</td> </td>
</tr> </tr>
</table></p> </table></p>
:: ::
<tr class="TableCellLight"> <tr class="TableCellLight">
<td><font color="#(stopped)#green::red#(/stopped)#">#[status]#</font></td> <td class="small"><font color="#(stopped)#green::red#(/stopped)#">#[status]#</font></td>
<td align="right">#[twcount]# (#[twpercent]#%) </td> <td class="small" align="right">#[twcount]# (#[twpercent]#%) </td>
<td align="rigth"><tt>#[twrange]#</tt></td> <td class="small" align="rigth">#[twchunk]# words: <tt>#[twrange]#</tt></td>
<td>#[peerName]#</td> <td class="small">#[peerName]#</td>
<td>#(stopped)#<input type="submit" name="stopIndexTransfer" value="Stop Index Transfer">:: <td class="small">#(stopped)#<input type="submit" name="stopIndexTransfer" value="Stop Index Transfer">::
<input type="submit" name="newIndexTransfer" value="Start New Index Transfer">#(/stopped)#</td> <input type="submit" name="newIndexTransfer" value="Start New Index Transfer">#(/stopped)#</td>
</tr> </tr>
#(/running)# #(/running)#

@ -94,6 +94,7 @@ public class IndexTransfer_p {
prop.put("running_twcount",transferedIdxCount); prop.put("running_twcount",transferedIdxCount);
prop.put("running_twpercent",Float.toString(transfThread.getTransferedIndexPercent())); prop.put("running_twpercent",Float.toString(transfThread.getTransferedIndexPercent()));
prop.put("running_twrange", transfThread.getRange()); prop.put("running_twrange", transfThread.getRange());
prop.put("running_twchunk", Integer.toString(transfThread.getChunkSize()));
prop.put("running_peerName",transfThread.getSeed().getName()); prop.put("running_peerName",transfThread.getSeed().getName());
prop.put("running_stopped",(transfThread.isFinished()) || (!transfThread.isAlive())?1:0); prop.put("running_stopped",(transfThread.isFinished()) || (!transfThread.isAlive())?1:0);
} }

@ -460,7 +460,6 @@ public class plasmaWordIndexDistribution {
public class transferIndexThread extends Thread { public class transferIndexThread extends Thread {
private yacySeed seed = null; private yacySeed seed = null;
private boolean delete = false; private boolean delete = false;
private boolean finished = false; private boolean finished = false;
@ -468,12 +467,13 @@ public class plasmaWordIndexDistribution {
private String status = "running"; private String status = "running";
private String oldStartingPointHash = "------------", startPointHash = "------------"; private String oldStartingPointHash = "------------", startPointHash = "------------";
private int wordsDBSize = 0; private int wordsDBSize = 0;
private int chunkSize = 500;
public transferIndexThread(yacySeed seed, boolean delete) { public transferIndexThread(yacySeed seed, boolean delete) {
super(new ThreadGroup("TransferIndexThreadGroup"),"TransferIndex_" + seed.getName());
this.seed = seed; this.seed = seed;
this.delete = delete; this.delete = delete;
this.wordsDBSize = plasmaSwitchboard.getSwitchboard().wordIndex.size(); this.wordsDBSize = plasmaSwitchboard.getSwitchboard().wordIndex.size();
this.setName("TransferIndex_" + seed.getName());
} }
public void run() { public void run() {
@ -489,6 +489,10 @@ public class plasmaWordIndexDistribution {
return this.finished; return this.finished;
} }
public int getChunkSize() {
return this.chunkSize;
}
public int getTransferedIndexCount() { public int getTransferedIndexCount() {
return this.transferedIndexCount; return this.transferedIndexCount;
} }
@ -528,7 +532,7 @@ public class plasmaWordIndexDistribution {
start = System.currentTimeMillis(); start = System.currentTimeMillis();
// selecting 500 words to transfer // selecting 500 words to transfer
Object[] selectResult = selectTransferIndexes(startPointHash, 500); Object[] selectResult = selectTransferIndexes(startPointHash, chunkSize);
plasmaWordIndexEntity[] indexEntities = (plasmaWordIndexEntity[]) selectResult[0]; plasmaWordIndexEntity[] indexEntities = (plasmaWordIndexEntity[]) selectResult[0];
HashMap urlCache = (HashMap) selectResult[1]; // String (url-hash) / plasmaCrawlLURL.Entry HashMap urlCache = (HashMap) selectResult[1]; // String (url-hash) / plasmaCrawlLURL.Entry
@ -562,11 +566,18 @@ public class plasmaWordIndexDistribution {
String error = yacyClient.transferIndex(seed, indexEntities, urlCache); String error = yacyClient.transferIndex(seed, indexEntities, urlCache);
if (error == null) { if (error == null) {
// words successfully transfered // words successfully transfered
long transferTime = System.currentTimeMillis() - start;
plasmaWordIndexDistribution.this.log.logInfo("Index transfer of " + idxCount + " words [" + indexEntities[0].wordHash() + " .. " + indexEntities[indexEntities.length-1].wordHash() + "]" + plasmaWordIndexDistribution.this.log.logInfo("Index transfer of " + idxCount + " words [" + indexEntities[0].wordHash() + " .. " + indexEntities[indexEntities.length-1].wordHash() + "]" +
" to peer " + seed.getName() + ":" + seed.hash + " in " + " to peer " + seed.getName() + ":" + seed.hash + " in " + (transferTime/1000) + " seconds successfull (" +
((System.currentTimeMillis() - start) / 1000) + " seconds successfull (" + (1000 * idxCount / (transferTime + 1)) + " words/s)");
(1000 * idxCount / (System.currentTimeMillis() - start + 1)) + " words/s)");
retryCount = 0; retryCount = 0;
if (transferTime > 30000) {
if (chunkSize>100) chunkSize-=50;
} else {
chunkSize+=50;
}
break; break;
} else { } else {
// worts transfer failed // worts transfer failed
@ -604,7 +615,7 @@ public class plasmaWordIndexDistribution {
Thread.sleep(retryCount*5000); Thread.sleep(retryCount*5000);
continue; continue;
} else { } else {
seed = yacyCore.seedDB.getConnected(seed.hash); yacyCore.seedDB.getConnected(seed.hash);
this.status = "running"; this.status = "running";
break; break;
} }

Loading…
Cancel
Save