- removed some debugging code from search process - should speed up now

- added some profiling code to search event - more time details in PerformanceSearch_p.html

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4594 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 17 years ago
parent fba46c51d7
commit 93633abed8

@ -143,7 +143,7 @@ public class ConfigNetwork_p {
try {
RTCbusySleep = Integer.parseInt(env.getConfig(plasmaSwitchboard.CRAWLJOB_REMOTE_TRIGGERED_CRAWL_BUSYSLEEP, "100"));
} catch (NumberFormatException e) {}
int RTCppm = (int) (60000L / RTCbusySleep);
int RTCppm = (int) (60000L / (RTCbusySleep + 1));
prop.put("acceptCrawlLimit", RTCppm);
boolean indexDistribute = sb.getConfig(plasmaSwitchboard.INDEX_DIST_ALLOW, "true").equals("true");

@ -17,7 +17,8 @@
<td>Query</td>
<td>Event</td>
<td>Time</td>
<td>Duration (milliseconds)</td>
<td>Delta (ms)</td>
<td>Duration (ms)</td>
<td>Result-Count</td>
</tr>
#{table}#
@ -25,6 +26,7 @@
<td>#[query]#</td>
<td>#[event]#</td>
<td>#[time]#</td>
<td>#[delta]#</td>
<td>#[duration]#</td>
<td>#[count]#</td>
</tr>

@ -43,15 +43,18 @@ public class PerformanceSearch_p {
int c = 0;
serverProfiling.Event event;
plasmaProfiling.searchEvent search;
long lastt = 0;
while (events.hasNext()) {
event = events.next();
search = (plasmaProfiling.searchEvent) event.payload;
prop.put("table_" + c + "_query", search.queryID);
prop.put("table_" + c + "_event", search.processName);
prop.putNum("table_" + c + "_count", search.resultCount);
prop.putNum("table_" + c + "_delta", event.time - lastt);
prop.put("table_" + c + "_time", (new Date(event.time)).toString());
prop.putNum("table_" + c + "_duration", search.duration);
c++;
lastt = event.time;
}
prop.put("table", c);

@ -55,6 +55,7 @@ import de.anomic.kelondro.kelondroBitfield;
import de.anomic.kelondro.kelondroMSetTools;
import de.anomic.plasma.plasmaCondenser;
import de.anomic.plasma.plasmaParserDocument;
import de.anomic.plasma.plasmaProfiling;
import de.anomic.plasma.plasmaSearchEvent;
import de.anomic.plasma.plasmaSearchQuery;
import de.anomic.plasma.plasmaSearchRankingProfile;
@ -62,6 +63,7 @@ import de.anomic.plasma.plasmaSnippetCache;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverCore;
import de.anomic.server.serverObjects;
import de.anomic.server.serverProfiling;
import de.anomic.server.serverSwitch;
import de.anomic.server.logging.serverLog;
import de.anomic.tools.yFormatter;
@ -259,7 +261,7 @@ public class yacysearch {
true,
yacyURL.TLD_any_zone_filter,
client);
serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(theQuery.id(true), plasmaSearchEvent.INITIALIZATION, 0, 0));
// tell all threads to do nothing for a specific time
sb.intermissionAllThreads(10000);

@ -35,12 +35,14 @@ import java.util.TreeSet;
import de.anomic.http.httpHeader;
import de.anomic.kelondro.kelondroMSetTools;
import de.anomic.kelondro.kelondroNaturalOrder;
import de.anomic.plasma.plasmaProfiling;
import de.anomic.plasma.plasmaSearchEvent;
import de.anomic.plasma.plasmaSearchQuery;
import de.anomic.plasma.plasmaSearchRankingProcess;
import de.anomic.plasma.plasmaSnippetCache;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverObjects;
import de.anomic.server.serverProfiling;
import de.anomic.server.serverSwitch;
import de.anomic.tools.crypt;
import de.anomic.tools.nxTools;
@ -83,7 +85,7 @@ public class yacysearchitem {
return prop;
}
plasmaSearchQuery theQuery = theSearch.getQuery();
// dynamically update count values
if (!rss) {
int offset = theQuery.neededResults() - theQuery.displayResults() + 1;
@ -163,6 +165,7 @@ public class yacysearchitem {
prop.put("references", "1");
}
}
serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(theQuery.id(true), plasmaSearchEvent.FINALIZATION + "-" + "bottomline", 0, 0));
return prop;
}
@ -223,6 +226,8 @@ public class yacysearchitem {
(((wordURL = yacyURL.probablyWordURL(result.hash(), query[0])) != null) ? ", probablyWordURL=" + wordURL.toNormalform(false, true) : ""));
plasmaSnippetCache.TextSnippet snippet = result.textSnippet();
prop.put("content_snippet", (snippet == null) ? "(snippet not found)" : snippet.getLineMarked(theQuery.queryHashes));
serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(theQuery.id(true), plasmaSearchEvent.FINALIZATION + "-" + item, 0, 0));
return prop;
}

@ -55,11 +55,13 @@ import de.anomic.yacy.yacyURL;
public final class plasmaSearchEvent {
public static final String INITIALIZATION = "initialization";
public static final String COLLECTION = "collection";
public static final String JOIN = "join";
public static final String PRESORT = "presort";
public static final String URLFETCH = "urlfetch";
public static final String NORMALIZING = "normalizing";
public static final String FINALIZATION = "finalization";
public static int workerThreadCount = 10;
public static String lastEventID = "";
@ -198,6 +200,7 @@ public final class plasmaSearchEvent {
this.workerThreads[i] = new resultWorker(i, 10000);
this.workerThreads[i].start();
}
serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(query.id(true), "online snippet fetch threads started", 0, 0));
} else {
// prepare result vector directly without worker threads
long timer = System.currentTimeMillis();
@ -229,6 +232,7 @@ public final class plasmaSearchEvent {
// clean up events
cleanupEvents(false);
serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(query.id(true), "event-cleanup", 0, 0));
// store this search to a cache so it can be re-used
lastEvents.put(query.id(false), this);
@ -282,6 +286,8 @@ public final class plasmaSearchEvent {
// load only urls if there was not yet a root url of that hash
// find the url entry
serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(query.id(true), "obtain result entry - start", 0, 0));
long startTime = System.currentTimeMillis();
indexURLEntry.Components comp = page.comp();
@ -344,6 +350,7 @@ public final class plasmaSearchEvent {
plasmaSnippetCache.TextSnippet snippet = plasmaSnippetCache.retrieveTextSnippet(comp, snippetFetchWordHashes, (snippetFetchMode == 2), ((query.constraint != null) && (query.constraint.get(plasmaCondenser.flag_cat_indexof))), 180, 3000, (snippetFetchMode == 2) ? Integer.MAX_VALUE : 100000);
long snippetComputationTime = System.currentTimeMillis() - startTime;
serverLog.logInfo("SEARCH_EVENT", "text snippet load time for " + comp.url() + ": " + snippetComputationTime + ", " + ((snippet.getErrorCode() < 11) ? "snippet found" : ("no snippet found (" + snippet.getError() + ")")));
serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(query.id(true), "obtain result entry - finish", 0, 0));
if (snippet.getErrorCode() < 11) {
// we loaded the file and found the snippet

@ -289,9 +289,9 @@ public final class plasmaSearchQuery {
public String id(boolean anonymized) {
// generate a string that identifies a search so results can be re-used in a cache
if (anonymized) {
return anonymizedQueryHashes(this.queryHashes) + "-" + anonymizedQueryHashes(this.excludeHashes) + "*" + this.contentdom + "*" + this.zonecode + "*" + this.ranking.toExternalString();
return anonymizedQueryHashes(this.queryHashes) + "-" + anonymizedQueryHashes(this.excludeHashes) + "*" + this.contentdom + "*" + this.zonecode + "*" + plasmaCondenser.word2hash(this.ranking.toExternalString());
} else {
return hashSet2hashString(this.queryHashes) + "-" + hashSet2hashString(this.excludeHashes) + "*" + this.contentdom + "*" + this.zonecode + "*" + this.ranking.toExternalString();
return hashSet2hashString(this.queryHashes) + "-" + hashSet2hashString(this.excludeHashes) + "*" + this.contentdom + "*" + this.zonecode + "*" + plasmaCondenser.word2hash(this.ranking.toExternalString());
}
}

@ -184,9 +184,11 @@ public final class plasmaSearchRankingProcess {
}
// count domZones
indexURLEntry uentry = wordIndex.loadedURL.load(iEntry.urlHash, iEntry, 0);
/*
indexURLEntry uentry = wordIndex.loadedURL.load(iEntry.urlHash, iEntry, 0); // this eats up a lot of time!!!
yacyURL uurl = (uentry == null) ? null : uentry.comp().url();
System.out.println("DEBUG domDomain dom=" + ((uurl == null) ? "null" : uurl.getHost()) + ", zone=" + yacyURL.domDomain(iEntry.urlHash()));
*/
this.domZones[yacyURL.domDomain(iEntry.urlHash())]++;
// insert

Loading…
Cancel
Save