added new performance menu for search sequence configuration and monitoring

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@990 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 20 years ago
parent 4fa942511b
commit 5b0911d7ea

@ -0,0 +1,118 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
<html>
<head>
<title>YaCy '#[clientname]#': Performance Settings of Search Sequence</title>
#[metas]#
</head>
<body marginheight="0" marginwidth="0" leftmargin="0" topmargin="0">
#[header]#
#[submenuPerformance]#
<br>
<h2>Timing Settings of Search Sequence</h2>
<p>
<div class=small><b>Settings for local search profile:</b></div>
<table border="0" cellpadding="2" cellspacing="1">
<form action="PerformanceSearch_p.html" method="post" enctype="multipart/form-data">
<tr class="TableHeader" valign="bottom">
<td class="small">Entity</td>
<td class="small">Collection</td>
<td class="small">Join</td>
<td class="small">Pre-Sort</td>
<td class="small">URL Fetch</td>
<td class="small">Post-Sort</td>
<td class="small">Filter</td>
<td class="small">Snippet-Fetch</td>
</tr>
<tr class="TableCellDark">
<td class="small" align="left">execution Time (percentage; sum of this must be 100)</td>
<td class="small" align="right"><input name="searchProcessLocalTime_c" type="text" align="right" size="8" maxlength="8" value="#[searchProcessLocalTime_c]#"></td>
<td class="small" align="right"><input name="searchProcessLocalTime_j" type="text" align="right" size="8" maxlength="8" value="#[searchProcessLocalTime_j]#"></td>
<td class="small" align="right"><input name="searchProcessLocalTime_r" type="text" align="right" size="8" maxlength="8" value="#[searchProcessLocalTime_r]#"></td>
<td class="small" align="right"><input name="searchProcessLocalTime_u" type="text" align="right" size="8" maxlength="8" value="#[searchProcessLocalTime_u]#"></td>
<td class="small" align="right"><input name="searchProcessLocalTime_o" type="text" align="right" size="8" maxlength="8" value="#[searchProcessLocalTime_o]#"></td>
<td class="small" align="right"><input name="searchProcessLocalTime_f" type="text" align="right" size="8" maxlength="8" value="#[searchProcessLocalTime_f]#"></td>
<td class="small" align="right"><input name="searchProcessLocalTime_s" type="text" align="right" size="8" maxlength="8" value="#[searchProcessLocalTime_s]#"></td>
</tr>
<tr class="TableCellDark">
<td class="small" align="left">result count (percentage of requested amount)</td>
<td class="small" align="right"><input name="searchProcessLocalCount_c" type="text" align="right" size="8" maxlength="8" value="#[searchProcessLocalCount_c]#"></td>
<td class="small" align="right"><input name="searchProcessLocalCount_j" type="text" align="right" size="8" maxlength="8" value="#[searchProcessLocalCount_j]#"></td>
<td class="small" align="right"><input name="searchProcessLocalCount_r" type="text" align="right" size="8" maxlength="8" value="#[searchProcessLocalCount_r]#"></td>
<td class="small" align="right"><input name="searchProcessLocalCount_u" type="text" align="right" size="8" maxlength="8" value="#[searchProcessLocalCount_u]#"></td>
<td class="small" align="right"><input name="searchProcessLocalCount_o" type="text" align="right" size="8" maxlength="8" value="#[searchProcessLocalCount_o]#"></td>
<td class="small" align="right"><input name="searchProcessLocalCount_f" type="text" align="right" size="8" maxlength="8" value="#[searchProcessLocalCount_f]#"></td>
<td class="small" align="right"><input name="searchProcessLocalCount_s" type="text" align="right" size="8" maxlength="8" value="#[searchProcessLocalCount_s]#"></td>
</tr>
<tr class="TableCellLight">
<td class="small" align="left" colspan="8">
<input type="submit" name="submitlocalprofilecustom" value="Submit New Profile Values">&nbsp;&nbsp;&nbsp;
<input type="submit" name="submitlocalprofiledefault" value="Reset To Default Values">&nbsp;&nbsp;&nbsp;
</td>
</tr>
</form>
</table>
</p>
#(submitlocalrespond)#
::
<p>Your settings are valid and will be used for next search.</p>
::
<p>Reset to default settings done.</p>
::
<p>Your settings cannot be accepted: sum of execution time percentage is not 100</p>
#(/submitlocalrespond)#
<p>
<div class=small><b>Timing results of latest search request:</b></div>
<table border="0" cellpadding="2" cellspacing="1">
<tr class="TableHeader" valign="bottom">
<td class="small">Entity</td>
<td class="small">Collection</td>
<td class="small">Join</td>
<td class="small">Pre-Sort</td>
<td class="small">URL Fetch</td>
<td class="small">Post-Sort</td>
<td class="small">Filter</td>
<td class="small">Snippet-Fetch</td>
</tr>
<tr class="TableCellDark">
<td class="small" align="left">execution Time (absolute milliseconds)</td>
<td class="small" align="right">#[latestLocalTimeAbs_c]#</td>
<td class="small" align="right">#[latestLocalTimeAbs_j]#</td>
<td class="small" align="right">#[latestLocalTimeAbs_r]#</td>
<td class="small" align="right">#[latestLocalTimeAbs_u]#</td>
<td class="small" align="right">#[latestLocalTimeAbs_o]#</td>
<td class="small" align="right">#[latestLocalTimeAbs_f]#</td>
<td class="small" align="right">#[latestLocalTimeAbs_s]#</td>
</tr>
<tr class="TableCellDark">
<td class="small" align="left">execution Time (relative)</td>
<td class="small" align="right">#[latestLocalTimeRel_c]#</td>
<td class="small" align="right">#[latestLocalTimeRel_j]#</td>
<td class="small" align="right">#[latestLocalTimeRel_r]#</td>
<td class="small" align="right">#[latestLocalTimeRel_u]#</td>
<td class="small" align="right">#[latestLocalTimeRel_o]#</td>
<td class="small" align="right">#[latestLocalTimeRel_f]#</td>
<td class="small" align="right">#[latestLocalTimeRel_s]#</td>
</tr>
<tr class="TableCellDark">
<td class="small" align="left">result count (absolute amount)</td>
<td class="small" align="right">#[latestLocalCountAbs_c]#</td>
<td class="small" align="right">#[latestLocalCountAbs_j]#</td>
<td class="small" align="right">#[latestLocalCountAbs_r]#</td>
<td class="small" align="right">#[latestLocalCountAbs_u]#</td>
<td class="small" align="right">#[latestLocalCountAbs_o]#</td>
<td class="small" align="right">#[latestLocalCountAbs_f]#</td>
<td class="small" align="right">#[latestLocalCountAbs_s]#</td>
</tr>
</table>
</p>
<p>
The network picture below shows how the latest search query was solved by asking corresponding peers in the DHT:
<img src="SearchEventPicture.png">
</p>
#[footer]#
</body>
</html>

@ -0,0 +1,131 @@
//PerformaceSearch_p.java
//-----------------------
//part of YaCy
//(C) by Michael Peter Christen; mc@anomic.de
//first published on http://www.anomic.de
//Frankfurt, Germany, 2004, 2005
//last major change: 16.02.2005
//
//This program is free software; you can redistribute it and/or modify
//it under the terms of the GNU General Public License as published by
//the Free Software Foundation; either version 2 of the License, or
//(at your option) any later version.
//
//This program is distributed in the hope that it will be useful,
//but WITHOUT ANY WARRANTY; without even the implied warranty of
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
//GNU General Public License for more details.
//
//You should have received a copy of the GNU General Public License
//along with this program; if not, write to the Free Software
//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
//Using this software in any meaning (reading, learning, copying, compiling,
//running) means that you agree that the Author(s) is (are) not responsible
//for cost, loss of data or any harm that may be caused directly or indirectly
//by usage of this softare or this documentation. The usage of this software
//is on your own risk. The installation and usage (starting/running) of this
//software may allow other people or application to access your computer and
//any attached devices and is highly dependent on the configuration of the
//software which must be done by the user of the software; the author(s) is
//(are) also not responsible for proper configuration and usage of the
//software, even if provoked by documentation provided together with
//the software.
//
//Any changes to this file according to the GPL as documented in the file
//gpl.txt aside this file in the shipment you received can be done to the
//lines that follows this copyright notice here, but changes must not be
//done inside the copyright notive above. A re-distribution must contain
//the intact and unchanged copyright notice.
//Contributions and changes to the program code must be marked as such.
//You must compile this file with
//javac -classpath .:../classes Network.java
//if the shell's current path is HTROOT
import java.util.Iterator;
import java.util.Map;
import java.io.File;
import org.apache.commons.pool.impl.GenericObjectPool;
import de.anomic.http.httpHeader;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.plasma.plasmaSearchProfile;
import de.anomic.plasma.plasmaSearchEvent;
import de.anomic.server.serverCore;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.server.serverThread;
import de.anomic.server.serverFileUtils;
public class PerformanceSearch_p {
public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch sb) {
// return variable that accumulates replacements
plasmaSwitchboard switchboard = (plasmaSwitchboard) sb;
serverObjects prop = new serverObjects();
File defaultSettingsFile = new File(switchboard.getRootPath(), "yacy.init");
Map defaultSettings = ((post == null) || (!(post.containsKey("submitlocalprofiledefault")))) ? null : serverFileUtils.loadHashMap(defaultSettingsFile);
prop.put("submitlocalrespond", 0);
// execute commands
if (post != null) {
if (post.containsKey("submitlocalprofilecustom")) {
// first count percentages
int c = 0;
for (int i = 0; i < plasmaSearchProfile.sequence.length; i++) {
c += post.getInt("searchProcessLocalTime_" + plasmaSearchProfile.sequence[i], 0);
}
// if check is ok set new values
if (c == 100) {
for (int i = 0; i < plasmaSearchProfile.sequence.length; i++) {
sb.setConfig("searchProcessLocalTime_" + plasmaSearchProfile.sequence[i], post.get("searchProcessLocalTime_" + plasmaSearchProfile.sequence[i], ""));
sb.setConfig("searchProcessLocalCount_" + plasmaSearchProfile.sequence[i], post.get("searchProcessLocalCount_" + plasmaSearchProfile.sequence[i], ""));
}
prop.put("submitlocalrespond", 1);
} else {
prop.put("submitlocalrespond", 3);
}
}
if (post.containsKey("submitlocalprofiledefault")) {
for (int i = 0; i < plasmaSearchProfile.sequence.length; i++) {
sb.setConfig("searchProcessLocalTime_" + plasmaSearchProfile.sequence[i], (String) defaultSettings.get("searchProcessLocalTime_" + plasmaSearchProfile.sequence[i]));
sb.setConfig("searchProcessLocalCount_" + plasmaSearchProfile.sequence[i], (String) defaultSettings.get("searchProcessLocalCount_" + plasmaSearchProfile.sequence[i]));
}
prop.put("submitlocalrespond", 2);
}
}
// prepare values
plasmaSearchEvent se = plasmaSearchEvent.lastEvent;
// count complete execution time
long time = 0;
long t;
int c;
char sequence;
if (se != null) for (int i = 0; i < plasmaSearchProfile.sequence.length; i++) {
t = se.getLocalProfile().getYieldTime(plasmaSearchProfile.sequence[i]);
if (t > 0) time += t;
}
for (int i = 0; i < plasmaSearchProfile.sequence.length; i++) {
sequence = plasmaSearchProfile.sequence[i];
prop.put("searchProcessLocalTime_" + sequence, sb.getConfig("searchProcessLocalTime_" + sequence, ""));
prop.put("searchProcessLocalCount_" + sequence, sb.getConfig("searchProcessLocalCount_" + sequence, ""));
if (se == null) {
prop.put("latestLocalTimeAbs_" + sequence, "-");
prop.put("latestLocalTimeRel_" + sequence, "-");
prop.put("latestLocalCountAbs_" + sequence, "-");
} else {
t = se.getLocalProfile().getYieldTime(sequence);
prop.put("latestLocalTimeAbs_" + sequence, (t < 0) ? "-" : Long.toString(t));
prop.put("latestLocalTimeRel_" + sequence, ((t < 0) ? 0 : (t * 100 / time)) + "%");
c = se.getLocalProfile().getYieldCount(sequence);
prop.put("latestLocalCountAbs_" + sequence, (c < 0) ? "-" : Integer.toString(c));
}
}
return prop;
}
}

@ -2,10 +2,13 @@
<tr height="10"><td colspan="17" class="MenuHeader">&nbsp;Performance Menu</td></tr>
<tr height="2"><td colspan="17"></td></tr>
<tr class="TableHeader">
<td width="50%" class="MenuSubItem">&nbsp;<img border="0" src="/env/grafics/lock.gif" align="top">&nbsp;
<td width="33%" class="MenuSubItem">&nbsp;<img border="0" src="/env/grafics/lock.gif" align="top">&nbsp;
<a href="/PerformanceQueues_p.html" class="MenuItemLink">Queues Performance Settings</a>&nbsp;</td>
<td class="MenuSubSpacer"></td>
<td width="50%" class="MenuSubItem">&nbsp;<img border="0" src="/env/grafics/lock.gif" align="top">&nbsp;
<td width="33%" class="MenuSubItem">&nbsp;<img border="0" src="/env/grafics/lock.gif" align="top">&nbsp;
<a href="/PerformanceMemory_p.html" class="MenuItemLink">Memory Settings for Database Caches</a>&nbsp;</td>
<td class="MenuSubSpacer"></td>
<td width="33%" class="MenuSubItem">&nbsp;<img border="0" src="/env/grafics/lock.gif" align="top">&nbsp;
<a href="/PerformanceSearch_p.html" class="MenuItemLink">Timing Settings for Search Sequence</a>&nbsp;</td>
</tr>
</table>

@ -109,9 +109,7 @@ from 'late' peers.
<p>
#(resultbottomline)#
::
The global search resulted in #[globalresults]# link contributions from other YaCy peers.<br><br>
The Network grafic below shows where the search query was solved by asking corresponding peers in the DHT:
<img src="SearchEventPicture.png">
The global search resulted in #[globalresults]# link contributions from other YaCy peers.
::
You can enrich the search results by using the 'global' option: This will search also other YaCy peers
::

@ -39,7 +39,7 @@
// the intact and unchanged copyright notice.
// Contributions and changes to the program code must be marked as such.
/* A kelondroIndex is a table with indexed access on the first row
/* A kelondroIndex is a table with indexed access on the first column
Elements may be selected from the table with logarithmic computation time
using the get-method. Inserts have also the same computation order and
can be done with the put-method.

@ -90,6 +90,10 @@ public final class plasmaSearchEvent {
return query;
}
public plasmaSearchProfile getLocalProfile() {
return profileLocal;
}
public yacySearch[] getSearchThreads() {
return searchThreads;
}
@ -246,6 +250,13 @@ public final class plasmaSearchEvent {
acc.sortResults();
profileLocal.setYieldTime(plasmaSearchProfile.PROCESS_POSTSORT);
profileLocal.setYieldCount(plasmaSearchProfile.PROCESS_POSTSORT, acc.sizeOrdered());
// apply filter
profileLocal.startTimer();
acc.removeRedundant();
profileLocal.setYieldTime(plasmaSearchProfile.PROCESS_FILTER);
profileLocal.setYieldCount(plasmaSearchProfile.PROCESS_FILTER, acc.sizeOrdered());
return acc;
}
@ -292,27 +303,4 @@ public final class plasmaSearchEvent {
rcGlobal = null;
}
/*
public void preSearch() {
plasmaWordIndexEntity idx = null;
try {
// search the database locally
log.logFine("presearch: started job");
idx = searchHashes(query.queryHashes, time);
log.logFine("presearch: found " + idx.size() + " results");
plasmaSearchResult acc = order(idx, queryhashes, order, time, searchcount);
if (acc == null) return;
log.logFine("presearch: ordered results, now " + acc.sizeOrdered() + " URLs ready for fetch");
// take some elements and fetch the snippets
snippetCache.fetch(acc, queryhashes, urlmask, fetchcount);
} catch (IOException e) {
log.logSevere("presearch: failed", e);
} finally {
if (idx != null) try { idx.close(); } catch (Exception e){}
}
log.logFine("presearch: job terminated");
}
*/
}

@ -83,6 +83,7 @@ public class plasmaSearchProfile implements Cloneable {
public static final char PROCESS_PRESORT = 'r';
public static final char PROCESS_URLFETCH = 'u';
public static final char PROCESS_POSTSORT = 'o';
public static final char PROCESS_FILTER = 'f';
public static final char PROCESS_SNIPPETFETCH = 's';
public static char[] sequence = new char[]{
@ -91,6 +92,7 @@ public class plasmaSearchProfile implements Cloneable {
PROCESS_PRESORT,
PROCESS_URLFETCH,
PROCESS_POSTSORT,
PROCESS_FILTER,
PROCESS_SNIPPETFETCH
};
@ -114,7 +116,8 @@ public class plasmaSearchProfile implements Cloneable {
1 * time / 12, 10 * count,
1 * time / 12, 10 * count,
2 * time / 12, 5 * count,
4 * time / 12, count,
3 * time / 12, count,
1 * time / 12, count,
1 * time / 12, 1
);
}
@ -125,6 +128,7 @@ public class plasmaSearchProfile implements Cloneable {
long time_presort, int count_presort,
long time_urlfetch, int count_urlfetch,
long time_postsort, int count_postsort,
long time_filter, int count_filter,
long time_snippetfetch, int count_snippetfetch) {
this();
@ -133,12 +137,14 @@ public class plasmaSearchProfile implements Cloneable {
targetTime.put(new Character(PROCESS_PRESORT), new Long(time_presort));
targetTime.put(new Character(PROCESS_URLFETCH), new Long(time_urlfetch));
targetTime.put(new Character(PROCESS_POSTSORT), new Long(time_postsort));
targetTime.put(new Character(PROCESS_FILTER), new Long(time_filter));
targetTime.put(new Character(PROCESS_SNIPPETFETCH), new Long(time_snippetfetch));
targetCount.put(new Character(PROCESS_COLLECTION), new Integer(count_collection));
targetCount.put(new Character(PROCESS_JOIN), new Integer(count_join));
targetCount.put(new Character(PROCESS_PRESORT), new Integer(count_presort));
targetCount.put(new Character(PROCESS_URLFETCH), new Integer(count_urlfetch));
targetCount.put(new Character(PROCESS_POSTSORT), new Integer(count_postsort));
targetCount.put(new Character(PROCESS_FILTER), new Integer(count_filter));
targetCount.put(new Character(PROCESS_SNIPPETFETCH), new Integer(count_snippetfetch));
}

@ -169,9 +169,6 @@ public final class plasmaSearchResult {
pageAcc.put(serverCodings.encodeHex(ranking, 16) + indexEntry.getUrlHash(), page);
}
// remove redundant paths
removeRedundant();
// flush memory
results = null;
}

@ -587,3 +587,66 @@ indexTransfer.maxOpenFiles = 800
#
storagePeerHash =
# Search sequence settings
# collection:
# time = time to get a RWI out of RAM cache, assortments and WORDS files
# count = maximum number of RWI-entries that shall be collected
#
# join:
# time = time to perform the join between all collected RWIs
# count = maximum number of entries that shall be joined
#
# presort:
# time = time to do a sort of the joined URL-records
# count = maximum number of entries that shall be pre-sorted
#
# urlfetch:
# time = time to fetch the real URLs from the LURL database
# count = maximum number of urls that shall be fetched
#
# postsort:
# time = time for final sort of URLs
# count = maximum number oof URLs that shall be retrieved during sort
#
# filter:
# time = time to filter out unwanted urls (like redundant urls)
# count = maximum number of urls that shall be filtered
#
# snippetfetch:
# time = time to fetch snippets for selected URLs
# count = maximum number of snipptes to be fetched
#
# all values are percent
# time-percent is the percent of total search time
# count-percent is the percent of total wanted urls in result
# we distinguish local and remote search times
searchProcessLocalTime_c = 25
searchProcessLocalCount_c = 10000000
searchProcessLocalTime_j = 10
searchProcessLocalCount_j = 1000000
searchProcessLocalTime_r = 10
searchProcessLocalCount_r =100000
searchProcessLocalTime_u = 30
searchProcessLocalCount_u = 10000
searchProcessLocalTime_o = 10
searchProcessLocalCount_o = 100
searchProcessLocalTime_f = 5
searchProcessLocalCount_f = 100
searchProcessLocalTime_s = 10
searchProcessLocalCount_s = 30
searchProcessRemoteTime_c = 25
searchProcessRemoteCount_c = 1000000
searchProcessRemoteTime_j = 10
searchProcessRemoteCount_j = 1000000
searchProcessRemoteTime_r = 10
searchProcessRemoteCount_r = 1000
searchProcessRemoteTime_u = 30
searchProcessRemoteCount_u = 1000
searchProcessRemoteTime_o = 10
searchProcessRemoteCount_o = 1000
searchProcessRemoteTime_f = 5
searchProcessRemoteCount_f = 100
searchProcessRemoteTime_s = 10
searchProcessRemoteCount_s = 10

Loading…
Cancel
Save