From 93633abed899e91f9978e5d874ddbb427c74ac13 Mon Sep 17 00:00:00 2001 From: orbiter Date: Sun, 23 Mar 2008 00:55:04 +0000 Subject: [PATCH] - removed some debugging code from search process - should speed up now - added some profiling code to search event - more time details in PerformanceSearch_p.html git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4594 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/ConfigNetwork_p.java | 2 +- htroot/PerformanceSearch_p.html | 4 +++- htroot/PerformanceSearch_p.java | 3 +++ htroot/yacysearch.java | 4 +++- htroot/yacysearchitem.java | 7 ++++++- source/de/anomic/plasma/plasmaSearchEvent.java | 7 +++++++ source/de/anomic/plasma/plasmaSearchQuery.java | 4 ++-- source/de/anomic/plasma/plasmaSearchRankingProcess.java | 4 +++- 8 files changed, 28 insertions(+), 7 deletions(-) diff --git a/htroot/ConfigNetwork_p.java b/htroot/ConfigNetwork_p.java index 8146ed1c2..a57566c4b 100644 --- a/htroot/ConfigNetwork_p.java +++ b/htroot/ConfigNetwork_p.java @@ -143,7 +143,7 @@ public class ConfigNetwork_p { try { RTCbusySleep = Integer.parseInt(env.getConfig(plasmaSwitchboard.CRAWLJOB_REMOTE_TRIGGERED_CRAWL_BUSYSLEEP, "100")); } catch (NumberFormatException e) {} - int RTCppm = (int) (60000L / RTCbusySleep); + int RTCppm = (int) (60000L / (RTCbusySleep + 1)); prop.put("acceptCrawlLimit", RTCppm); boolean indexDistribute = sb.getConfig(plasmaSwitchboard.INDEX_DIST_ALLOW, "true").equals("true"); diff --git a/htroot/PerformanceSearch_p.html b/htroot/PerformanceSearch_p.html index 78f92af07..c060750d9 100644 --- a/htroot/PerformanceSearch_p.html +++ b/htroot/PerformanceSearch_p.html @@ -17,7 +17,8 @@ Query Event Time - Duration (milliseconds) + Delta (ms) + Duration (ms) Result-Count #{table}# @@ -25,6 +26,7 @@ #[query]# #[event]# #[time]# + #[delta]# #[duration]# #[count]# diff --git a/htroot/PerformanceSearch_p.java b/htroot/PerformanceSearch_p.java index 64981af85..de2c2df29 100644 --- a/htroot/PerformanceSearch_p.java +++ b/htroot/PerformanceSearch_p.java @@ -43,15 +43,18 @@ public class PerformanceSearch_p { int c = 0; serverProfiling.Event event; plasmaProfiling.searchEvent search; + long lastt = 0; while (events.hasNext()) { event = events.next(); search = (plasmaProfiling.searchEvent) event.payload; prop.put("table_" + c + "_query", search.queryID); prop.put("table_" + c + "_event", search.processName); prop.putNum("table_" + c + "_count", search.resultCount); + prop.putNum("table_" + c + "_delta", event.time - lastt); prop.put("table_" + c + "_time", (new Date(event.time)).toString()); prop.putNum("table_" + c + "_duration", search.duration); c++; + lastt = event.time; } prop.put("table", c); diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java index e2cde895d..e732e5c7f 100644 --- a/htroot/yacysearch.java +++ b/htroot/yacysearch.java @@ -55,6 +55,7 @@ import de.anomic.kelondro.kelondroBitfield; import de.anomic.kelondro.kelondroMSetTools; import de.anomic.plasma.plasmaCondenser; import de.anomic.plasma.plasmaParserDocument; +import de.anomic.plasma.plasmaProfiling; import de.anomic.plasma.plasmaSearchEvent; import de.anomic.plasma.plasmaSearchQuery; import de.anomic.plasma.plasmaSearchRankingProfile; @@ -62,6 +63,7 @@ import de.anomic.plasma.plasmaSnippetCache; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverCore; import de.anomic.server.serverObjects; +import de.anomic.server.serverProfiling; import de.anomic.server.serverSwitch; import de.anomic.server.logging.serverLog; import de.anomic.tools.yFormatter; @@ -259,7 +261,7 @@ public class yacysearch { true, yacyURL.TLD_any_zone_filter, client); - + serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(theQuery.id(true), plasmaSearchEvent.INITIALIZATION, 0, 0)); // tell all threads to do nothing for a specific time sb.intermissionAllThreads(10000); diff --git a/htroot/yacysearchitem.java b/htroot/yacysearchitem.java index be5b1a657..4540873b5 100644 --- a/htroot/yacysearchitem.java +++ b/htroot/yacysearchitem.java @@ -35,12 +35,14 @@ import java.util.TreeSet; import de.anomic.http.httpHeader; import de.anomic.kelondro.kelondroMSetTools; import de.anomic.kelondro.kelondroNaturalOrder; +import de.anomic.plasma.plasmaProfiling; import de.anomic.plasma.plasmaSearchEvent; import de.anomic.plasma.plasmaSearchQuery; import de.anomic.plasma.plasmaSearchRankingProcess; import de.anomic.plasma.plasmaSnippetCache; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverObjects; +import de.anomic.server.serverProfiling; import de.anomic.server.serverSwitch; import de.anomic.tools.crypt; import de.anomic.tools.nxTools; @@ -83,7 +85,7 @@ public class yacysearchitem { return prop; } plasmaSearchQuery theQuery = theSearch.getQuery(); - + // dynamically update count values if (!rss) { int offset = theQuery.neededResults() - theQuery.displayResults() + 1; @@ -163,6 +165,7 @@ public class yacysearchitem { prop.put("references", "1"); } } + serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(theQuery.id(true), plasmaSearchEvent.FINALIZATION + "-" + "bottomline", 0, 0)); return prop; } @@ -223,6 +226,8 @@ public class yacysearchitem { (((wordURL = yacyURL.probablyWordURL(result.hash(), query[0])) != null) ? ", probablyWordURL=" + wordURL.toNormalform(false, true) : "")); plasmaSnippetCache.TextSnippet snippet = result.textSnippet(); prop.put("content_snippet", (snippet == null) ? "(snippet not found)" : snippet.getLineMarked(theQuery.queryHashes)); + serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(theQuery.id(true), plasmaSearchEvent.FINALIZATION + "-" + item, 0, 0)); + return prop; } diff --git a/source/de/anomic/plasma/plasmaSearchEvent.java b/source/de/anomic/plasma/plasmaSearchEvent.java index 9ce724d23..ad4c5c156 100644 --- a/source/de/anomic/plasma/plasmaSearchEvent.java +++ b/source/de/anomic/plasma/plasmaSearchEvent.java @@ -55,11 +55,13 @@ import de.anomic.yacy.yacyURL; public final class plasmaSearchEvent { + public static final String INITIALIZATION = "initialization"; public static final String COLLECTION = "collection"; public static final String JOIN = "join"; public static final String PRESORT = "presort"; public static final String URLFETCH = "urlfetch"; public static final String NORMALIZING = "normalizing"; + public static final String FINALIZATION = "finalization"; public static int workerThreadCount = 10; public static String lastEventID = ""; @@ -198,6 +200,7 @@ public final class plasmaSearchEvent { this.workerThreads[i] = new resultWorker(i, 10000); this.workerThreads[i].start(); } + serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(query.id(true), "online snippet fetch threads started", 0, 0)); } else { // prepare result vector directly without worker threads long timer = System.currentTimeMillis(); @@ -229,6 +232,7 @@ public final class plasmaSearchEvent { // clean up events cleanupEvents(false); + serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(query.id(true), "event-cleanup", 0, 0)); // store this search to a cache so it can be re-used lastEvents.put(query.id(false), this); @@ -282,6 +286,8 @@ public final class plasmaSearchEvent { // load only urls if there was not yet a root url of that hash // find the url entry + + serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(query.id(true), "obtain result entry - start", 0, 0)); long startTime = System.currentTimeMillis(); indexURLEntry.Components comp = page.comp(); @@ -344,6 +350,7 @@ public final class plasmaSearchEvent { plasmaSnippetCache.TextSnippet snippet = plasmaSnippetCache.retrieveTextSnippet(comp, snippetFetchWordHashes, (snippetFetchMode == 2), ((query.constraint != null) && (query.constraint.get(plasmaCondenser.flag_cat_indexof))), 180, 3000, (snippetFetchMode == 2) ? Integer.MAX_VALUE : 100000); long snippetComputationTime = System.currentTimeMillis() - startTime; serverLog.logInfo("SEARCH_EVENT", "text snippet load time for " + comp.url() + ": " + snippetComputationTime + ", " + ((snippet.getErrorCode() < 11) ? "snippet found" : ("no snippet found (" + snippet.getError() + ")"))); + serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(query.id(true), "obtain result entry - finish", 0, 0)); if (snippet.getErrorCode() < 11) { // we loaded the file and found the snippet diff --git a/source/de/anomic/plasma/plasmaSearchQuery.java b/source/de/anomic/plasma/plasmaSearchQuery.java index 13ac98e5d..4f7a49f95 100644 --- a/source/de/anomic/plasma/plasmaSearchQuery.java +++ b/source/de/anomic/plasma/plasmaSearchQuery.java @@ -289,9 +289,9 @@ public final class plasmaSearchQuery { public String id(boolean anonymized) { // generate a string that identifies a search so results can be re-used in a cache if (anonymized) { - return anonymizedQueryHashes(this.queryHashes) + "-" + anonymizedQueryHashes(this.excludeHashes) + "*" + this.contentdom + "*" + this.zonecode + "*" + this.ranking.toExternalString(); + return anonymizedQueryHashes(this.queryHashes) + "-" + anonymizedQueryHashes(this.excludeHashes) + "*" + this.contentdom + "*" + this.zonecode + "*" + plasmaCondenser.word2hash(this.ranking.toExternalString()); } else { - return hashSet2hashString(this.queryHashes) + "-" + hashSet2hashString(this.excludeHashes) + "*" + this.contentdom + "*" + this.zonecode + "*" + this.ranking.toExternalString(); + return hashSet2hashString(this.queryHashes) + "-" + hashSet2hashString(this.excludeHashes) + "*" + this.contentdom + "*" + this.zonecode + "*" + plasmaCondenser.word2hash(this.ranking.toExternalString()); } } diff --git a/source/de/anomic/plasma/plasmaSearchRankingProcess.java b/source/de/anomic/plasma/plasmaSearchRankingProcess.java index 3a8cce781..fbf08ea50 100644 --- a/source/de/anomic/plasma/plasmaSearchRankingProcess.java +++ b/source/de/anomic/plasma/plasmaSearchRankingProcess.java @@ -184,9 +184,11 @@ public final class plasmaSearchRankingProcess { } // count domZones - indexURLEntry uentry = wordIndex.loadedURL.load(iEntry.urlHash, iEntry, 0); + /* + indexURLEntry uentry = wordIndex.loadedURL.load(iEntry.urlHash, iEntry, 0); // this eats up a lot of time!!! yacyURL uurl = (uentry == null) ? null : uentry.comp().url(); System.out.println("DEBUG domDomain dom=" + ((uurl == null) ? "null" : uurl.getHost()) + ", zone=" + yacyURL.domDomain(iEntry.urlHash())); + */ this.domZones[yacyURL.domDomain(iEntry.urlHash())]++; // insert