From 61a81820e33a58f520f397b33d1916d20bddd3da Mon Sep 17 00:00:00 2001 From: orbiter Date: Sun, 17 Feb 2008 23:35:48 +0000 Subject: [PATCH] - refactoring of search tracker - added link to search history to repeat the search git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4493 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/AccessTracker_p.html | 2 +- htroot/AccessTracker_p.java | 53 +++++++++---------- htroot/yacy/search.java | 30 +++++------ htroot/yacy/user/ysearch.java | 23 ++++---- htroot/yacysearch.java | 25 +++++---- .../de/anomic/plasma/plasmaSearchQuery.java | 34 ++++++------ .../de/anomic/plasma/plasmaSwitchboard.java | 7 +-- 7 files changed, 82 insertions(+), 92 deletions(-) diff --git a/htroot/AccessTracker_p.html b/htroot/AccessTracker_p.html index e0200a892..0f15e6f45 100644 --- a/htroot/AccessTracker_p.html +++ b/htroot/AccessTracker_p.html @@ -76,7 +76,7 @@ #[resulttime]# #[urltime]# #[snippettime]# - #[querystring]# + #[querystring]# #{/list}# diff --git a/htroot/AccessTracker_p.java b/htroot/AccessTracker_p.java index de7bfe5a0..d70d5e453 100644 --- a/htroot/AccessTracker_p.java +++ b/htroot/AccessTracker_p.java @@ -27,10 +27,8 @@ import java.util.ArrayList; import java.util.ConcurrentModificationException; import java.util.Date; -import java.util.HashMap; import java.util.Iterator; import java.util.Map; -import java.util.Set; import java.util.TreeMap; import java.util.TreeSet; import java.util.Map.Entry; @@ -55,7 +53,6 @@ public class AccessTracker_p { return accessClone; } - @SuppressWarnings("unchecked") public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch sb) { plasmaSwitchboard switchboard = (plasmaSwitchboard) sb; @@ -128,9 +125,8 @@ public class AccessTracker_p { prop.put("page_num", entCount); } if ((page == 2) || (page == 4)) { - ArrayList> array = (page == 2) ? switchboard.localSearches : switchboard.remoteSearches; - Long trackerHandle; - HashMap searchProfile; + ArrayList array = (page == 2) ? switchboard.localSearches : switchboard.remoteSearches; + plasmaSearchQuery searchProfile; int m = Math.min(maxCount, array.size()); long qcountSum = 0; long rcountSum = 0; @@ -140,44 +136,43 @@ public class AccessTracker_p { for (int entCount = 0; entCount < m; entCount++) { searchProfile = array.get(array.size() - entCount - 1); - trackerHandle = (Long) searchProfile.get("time"); // put values in template prop.put("page_list_" + entCount + "_dark", ((dark) ? 1 : 0) ); dark =! dark; - prop.putHTML("page_list_" + entCount + "_host", (String) searchProfile.get("host")); - prop.put("page_list_" + entCount + "_date", serverDate.formatShortSecond(new Date(trackerHandle.longValue()))); - prop.put("page_list_" + entCount + "_timestamp", trackerHandle.longValue()); + prop.putHTML("page_list_" + entCount + "_host", searchProfile.host); + prop.put("page_list_" + entCount + "_date", serverDate.formatShortSecond(new Date(searchProfile.handle.longValue()))); + prop.put("page_list_" + entCount + "_timestamp", searchProfile.handle.longValue()); if (page == 2) { // local search - prop.putNum("page_list_" + entCount + "_offset", ((Integer) searchProfile.get("offset")).longValue()); - prop.put("page_list_" + entCount + "_querystring", (String) searchProfile.get("querystring")); + prop.putNum("page_list_" + entCount + "_offset", searchProfile.offset); + prop.put("page_list_" + entCount + "_querystring", searchProfile.queryString); } else { // remote search - prop.putHTML("page_list_" + entCount + "_peername", (String) searchProfile.get("peername")); - prop.put("page_list_" + entCount + "_queryhashes", plasmaSearchQuery.anonymizedQueryHashes((Set) searchProfile.get("queryhashes"))); + prop.putHTML("page_list_" + entCount + "_peername", (searchProfile.remotepeer == null) ? "" : searchProfile.remotepeer.getName()); + prop.put("page_list_" + entCount + "_queryhashes", plasmaSearchQuery.anonymizedQueryHashes(searchProfile.queryHashes)); } - prop.putNum("page_list_" + entCount + "_querycount", ((Integer) searchProfile.get("querycount")).longValue()); - prop.putNum("page_list_" + entCount + "_resultcount", ((Integer) searchProfile.get("resultcount")).longValue()); - prop.putNum("page_list_" + entCount + "_urltime", ((Long) searchProfile.get("resulturltime")).longValue()); - prop.putNum("page_list_" + entCount + "_snippettime", ((Long) searchProfile.get("resultsnippettime")).longValue()); - prop.putNum("page_list_" + entCount + "_resulttime", ((Long) searchProfile.get("resulttime")).longValue()); - qcountSum += ((Integer) searchProfile.get("querycount")).intValue(); - rcountSum += ((Integer) searchProfile.get("resultcount")).intValue(); - utimeSum += ((Long) searchProfile.get("resulturltime")).longValue(); - stimeSum += ((Long) searchProfile.get("resultsnippettime")).longValue(); - rtimeSum += ((Long) searchProfile.get("resulttime")).longValue(); + prop.putNum("page_list_" + entCount + "_querycount", searchProfile.linesPerPage); + prop.putNum("page_list_" + entCount + "_resultcount", searchProfile.resultcount); + prop.putNum("page_list_" + entCount + "_urltime", searchProfile.urlretrievaltime); + prop.putNum("page_list_" + entCount + "_snippettime", searchProfile.snippetcomputationtime); + prop.putNum("page_list_" + entCount + "_resulttime", searchProfile.searchtime); + qcountSum += searchProfile.linesPerPage; + rcountSum += searchProfile.resultcount; + utimeSum += searchProfile.urlretrievaltime; + stimeSum += searchProfile.snippetcomputationtime; + rtimeSum += searchProfile.searchtime; } prop.put("page_list", m); prop.put("page_num", m); // Put -1 instead of NaN as result for empty search list if (m == 0) m = -1; - prop.putNum("page_querycount_avg", (double)qcountSum/m); - prop.putNum("page_resultcount_avg", (double)rcountSum/m); - prop.putNum("page_urltime_avg", (double)utimeSum/m); - prop.putNum("page_snippettime_avg", (double)stimeSum/m); - prop.putNum("page_resulttime_avg", (double)rtimeSum/m); + prop.putNum("page_querycount_avg", (double) qcountSum / m); + prop.putNum("page_resultcount_avg", (double) rcountSum / m); + prop.putNum("page_urltime_avg", (double) utimeSum / m); + prop.putNum("page_snippettime_avg", (double) stimeSum / m); + prop.putNum("page_resulttime_avg", (double) rtimeSum / m); prop.putNum("page_total", (page == 2) ? switchboard.localSearches.size() : switchboard.remoteSearches.size()); } if ((page == 3) || (page == 5)) { diff --git a/htroot/yacy/search.java b/htroot/yacy/search.java index 0f5697e2e..f64e0beb2 100644 --- a/htroot/yacy/search.java +++ b/htroot/yacy/search.java @@ -29,7 +29,6 @@ // if the shell's current path is htroot/yacy import java.util.ArrayList; -import java.util.HashMap; import java.util.Iterator; import java.util.Map; import java.util.Set; @@ -68,7 +67,8 @@ public final class search { if (post == null || env == null || !yacyNetwork.authentifyRequest(post, env)) { return prop; } - + String client = (String) header.get(httpHeader.CONNECTION_PROP_CLIENTIP); + // test: // http://localhost:8080/yacy/search.html?query=4galTpdpDM5Q (search for linux) // http://localhost:8080/yacy/search.html?query=gh8DKIhGKXws (search for book) @@ -149,10 +149,10 @@ public final class search { int joincount = 0; plasmaSearchQuery theQuery = null; ArrayList accu = null; - long urlRetrievalAllTime = 0, snippetComputationAllTime = 0; + plasmaSearchEvent theSearch = null; if ((query.length() == 0) && (abstractSet != null)) { // this is _not_ a normal search, only a request for index abstracts - theQuery = new plasmaSearchQuery(null, abstractSet, new TreeSet(kelondroBase64Order.enhancedComparator), rankingProfile, maxdist, prefer, plasmaSearchQuery.contentdomParser(contentdom), false, count, 0, filter, plasmaSearchQuery.SEARCHDOM_LOCAL, null, -1, null, false); + theQuery = new plasmaSearchQuery(null, abstractSet, new TreeSet(kelondroBase64Order.enhancedComparator), rankingProfile, maxdist, prefer, plasmaSearchQuery.contentdomParser(contentdom), false, count, 0, filter, plasmaSearchQuery.SEARCHDOM_LOCAL, null, -1, null, false, client); theQuery.domType = plasmaSearchQuery.SEARCHDOM_LOCAL; yacyCore.log.logInfo("INIT HASH SEARCH (abstracts only): " + plasmaSearchQuery.anonymizedQueryHashes(theQuery.queryHashes) + " - " + theQuery.displayResults() + " links"); @@ -177,14 +177,12 @@ public final class search { } else { // retrieve index containers from search request - theQuery = new plasmaSearchQuery(null, queryhashes, excludehashes, rankingProfile, maxdist, prefer, plasmaSearchQuery.contentdomParser(contentdom), false, count, 0, filter, plasmaSearchQuery.SEARCHDOM_LOCAL, null, -1, constraint, false); + theQuery = new plasmaSearchQuery(null, queryhashes, excludehashes, rankingProfile, maxdist, prefer, plasmaSearchQuery.contentdomParser(contentdom), false, count, 0, filter, plasmaSearchQuery.SEARCHDOM_LOCAL, null, -1, constraint, false, client); theQuery.domType = plasmaSearchQuery.SEARCHDOM_LOCAL; yacyCore.log.logInfo("INIT HASH SEARCH (query-" + abstracts + "): " + plasmaSearchQuery.anonymizedQueryHashes(theQuery.queryHashes) + " - " + theQuery.displayResults() + " links"); // make event - plasmaSearchEvent theSearch = plasmaSearchEvent.getEvent(theQuery, rankingProfile, sb.wordIndex, null, true); - urlRetrievalAllTime = theSearch.getURLRetrievalTime(); - snippetComputationAllTime = theSearch.getSnippetComputationTime(); + theSearch = plasmaSearchEvent.getEvent(theQuery, rankingProfile, sb.wordIndex, null, true); // set statistic details of search result and find best result index set if (theSearch.getRankingResult().getLocalResourceSize() == 0) { @@ -279,17 +277,15 @@ public final class search { prop.put("fwrec", ""); // peers that would have helped to construct this result (recommendations) // prepare search statistics - Long trackerHandle = new Long(System.currentTimeMillis()); - HashMap searchProfile = theQuery.resultProfile(joincount, System.currentTimeMillis() - timestamp, urlRetrievalAllTime, snippetComputationAllTime); - String client = (String) header.get(httpHeader.CONNECTION_PROP_CLIENTIP); - searchProfile.put("host", client); - yacySeed remotepeer = yacyCore.seedDB.lookupByIP(natLib.getInetAddress(client), true, false, false); - searchProfile.put("peername", (remotepeer == null) ? "unknown" : remotepeer.getName()); - searchProfile.put("time", trackerHandle); - sb.remoteSearches.add(searchProfile); + theQuery.remotepeer = yacyCore.seedDB.lookupByIP(natLib.getInetAddress(client), true, false, false); + theQuery.resultcount = (theSearch == null) ? 0 : theSearch.getRankingResult().getLocalResourceSize() + theSearch.getRankingResult().getRemoteResourceSize(); + theQuery.searchtime = System.currentTimeMillis() - timestamp; + theQuery.urlretrievaltime = (theSearch == null) ? 0 : theSearch.getURLRetrievalTime(); + theQuery.snippetcomputationtime = (theSearch == null) ? 0 : theSearch.getSnippetComputationTime(); + sb.remoteSearches.add(theQuery); TreeSet handles = sb.remoteSearchTracker.get(client); if (handles == null) handles = new TreeSet(); - handles.add(trackerHandle); + handles.add(theQuery.handle); sb.remoteSearchTracker.put(client, handles); // log diff --git a/htroot/yacy/user/ysearch.java b/htroot/yacy/user/ysearch.java index 186932fb6..6f4b42c4a 100644 --- a/htroot/yacy/user/ysearch.java +++ b/htroot/yacy/user/ysearch.java @@ -237,6 +237,7 @@ public class ysearch { final boolean globalsearch = (global) && (yacyonline) && (sb.getConfigBool(plasmaSwitchboard.INDEX_RECEIVE_ALLOW, false)); // do the search + String client = (String) header.get(httpHeader.CONNECTION_PROP_CLIENTIP); // the search client who initiated the search TreeSet queryHashes = plasmaCondenser.words2hashes(query[0]); plasmaSearchQuery theQuery = new plasmaSearchQuery( querystring, @@ -255,10 +256,10 @@ public class ysearch { "", 20, constraint, - true); + true, + client); - String client = (String) header.get(httpHeader.CONNECTION_PROP_CLIENTIP); // the search client who initiated the search - + // tell all threads to do nothing for a specific time sb.intermissionAllThreads(10000); @@ -289,18 +290,16 @@ public class ysearch { ((System.currentTimeMillis() - timestamp) / 1000) + " seconds"); // prepare search statistics - Long trackerHandle = new Long(System.currentTimeMillis()); - HashMap searchProfile = theQuery.resultProfile(theSearch.getRankingResult().getLocalResourceSize() + theSearch.getRankingResult().getRemoteResourceSize(), System.currentTimeMillis() - timestamp, theSearch.getURLRetrievalTime(), theSearch.getSnippetComputationTime()); - searchProfile.put("querystring", theQuery.queryString); - searchProfile.put("time", trackerHandle); - searchProfile.put("host", client); - searchProfile.put("offset", new Integer(0)); - sb.localSearches.add(searchProfile); + theQuery.resultcount = theSearch.getRankingResult().getLocalResourceSize() + theSearch.getRankingResult().getRemoteResourceSize(); + theQuery.searchtime = System.currentTimeMillis() - timestamp; + theQuery.urlretrievaltime = theSearch.getURLRetrievalTime(); + theQuery.snippetcomputationtime = theSearch.getSnippetComputationTime(); + sb.localSearches.add(theQuery); TreeSet handles = sb.localSearchTracker.get(client); if (handles == null) handles = new TreeSet(); - handles.add(trackerHandle); + handles.add(theQuery.handle); sb.localSearchTracker.put(client, handles); - + prop = new serverObjects(); int totalcount = theSearch.getRankingResult().getLocalResourceSize() + theSearch.getRankingResult().getRemoteResourceSize(); prop.put("num-results_offset", offset); diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java index 322acd7ab..228f36cd5 100644 --- a/htroot/yacysearch.java +++ b/htroot/yacysearch.java @@ -85,7 +85,8 @@ public class yacysearch { String promoteSearchPageGreeting = env.getConfig("promoteSearchPageGreeting", ""); if (env.getConfigBool("promoteSearchPageGreeting.useNetworkName", false)) promoteSearchPageGreeting = env.getConfig("network.unit.description", ""); if (promoteSearchPageGreeting.length() == 0) promoteSearchPageGreeting = "P2P WEB SEARCH"; - + String client = (String) header.get(httpHeader.CONNECTION_PROP_CLIENTIP); // the search client who initiated the search + // get query String querystring = (post == null) ? "" : post.get("search", "").trim(); @@ -255,10 +256,10 @@ public class yacysearch { "", 20, constraint, - true); + true, + client); - String client = (String) header.get(httpHeader.CONNECTION_PROP_CLIENTIP); // the search client who initiated the search - + // tell all threads to do nothing for a specific time sb.intermissionAllThreads(10000); @@ -289,18 +290,16 @@ public class yacysearch { ((System.currentTimeMillis() - timestamp) / 1000) + " seconds"); // prepare search statistics - Long trackerHandle = new Long(System.currentTimeMillis()); - HashMap searchProfile = theQuery.resultProfile(theSearch.getRankingResult().getLocalResourceSize() + theSearch.getRankingResult().getRemoteResourceSize(), System.currentTimeMillis() - timestamp, theSearch.getURLRetrievalTime(), theSearch.getSnippetComputationTime()); - searchProfile.put("querystring", theQuery.queryString); - searchProfile.put("time", trackerHandle); - searchProfile.put("host", client); - searchProfile.put("offset", new Integer(0)); - sb.localSearches.add(searchProfile); + theQuery.resultcount = theSearch.getRankingResult().getLocalResourceSize() + theSearch.getRankingResult().getRemoteResourceSize(); + theQuery.searchtime = System.currentTimeMillis() - timestamp; + theQuery.urlretrievaltime = theSearch.getURLRetrievalTime(); + theQuery.snippetcomputationtime = theSearch.getSnippetComputationTime(); + sb.localSearches.add(theQuery); TreeSet handles = sb.localSearchTracker.get(client); if (handles == null) handles = new TreeSet(); - handles.add(trackerHandle); + handles.add(theQuery.handle); sb.localSearchTracker.put(client, handles); - + prop = new serverObjects(); int totalcount = theSearch.getRankingResult().getLocalResourceSize() + theSearch.getRankingResult().getRemoteResourceSize(); prop.put("num-results_offset", offset); diff --git a/source/de/anomic/plasma/plasmaSearchQuery.java b/source/de/anomic/plasma/plasmaSearchQuery.java index bcaebeec2..87828b98a 100644 --- a/source/de/anomic/plasma/plasmaSearchQuery.java +++ b/source/de/anomic/plasma/plasmaSearchQuery.java @@ -42,7 +42,6 @@ package de.anomic.plasma; -import java.util.HashMap; import java.util.Iterator; import java.util.Set; import java.util.TreeSet; @@ -53,6 +52,7 @@ import de.anomic.kelondro.kelondroBitfield; import de.anomic.kelondro.kelondroMSetTools; import de.anomic.kelondro.kelondroNaturalOrder; import de.anomic.server.serverCharBuffer; +import de.anomic.yacy.yacySeed; import de.anomic.yacy.yacySeedDB; public final class plasmaSearchQuery { @@ -75,7 +75,7 @@ public final class plasmaSearchQuery { public String queryString; public TreeSet queryHashes, excludeHashes; - private int linesPerPage, offset; + public int linesPerPage, offset; public String prefer; public int contentdom; public String urlMask; @@ -87,6 +87,12 @@ public final class plasmaSearchQuery { public boolean allofconstraint; public boolean onlineSnippetFetch; public plasmaSearchRankingProfile ranking; + public String host; + public yacySeed remotepeer; + public Long handle; + // values that are set after a search: + public int resultcount; // number of found results + public long searchtime, urlretrievaltime, snippetcomputationtime; // time to perform the search, to get all the urls, and to compute the snippets public plasmaSearchQuery(String queryString, int lines, @@ -116,16 +122,20 @@ public final class plasmaSearchQuery { this.constraint = constraint; this.allofconstraint = false; this.onlineSnippetFetch = false; + this.host = null; + this.remotepeer = null; + this.handle = new Long(System.currentTimeMillis()); } -public plasmaSearchQuery( + public plasmaSearchQuery( String queryString, TreeSet queryHashes, TreeSet excludeHashes, plasmaSearchRankingProfile ranking, int maxDistance, String prefer, int contentdom, boolean onlineSnippetFetch, int lines, int offset, String urlMask, int domType, String domGroupName, int domMaxTargets, - kelondroBitfield constraint, boolean allofconstraint) { + kelondroBitfield constraint, boolean allofconstraint, + String host) { this.queryString = queryString; this.queryHashes = queryHashes; this.excludeHashes = excludeHashes; @@ -143,6 +153,9 @@ public plasmaSearchQuery( this.constraint = constraint; this.allofconstraint = allofconstraint; this.onlineSnippetFetch = onlineSnippetFetch; + this.host = host; + this.remotepeer = null; + this.handle = new Long(System.currentTimeMillis()); } public int neededResults() { @@ -280,17 +293,4 @@ public plasmaSearchQuery( } } - public HashMap resultProfile(int searchcount, long searchtime, long urlretrieval, long snippetcomputation) { - // generate statistics about search: query, time, etc - HashMap r = new HashMap(); - r.put("queryhashes", queryHashes); - r.put("querystring", queryString); - r.put("querycount", new Integer(linesPerPage)); - //r.put("querytime", new Long(maximumTime)); - r.put("resultcount", new Integer(searchcount)); - r.put("resulttime", new Long(searchtime)); - r.put("resulturltime", new Long(urlretrieval)); - r.put("resultsnippettime", new Long(snippetcomputation)); - return r; - } } diff --git a/source/de/anomic/plasma/plasmaSwitchboard.java b/source/de/anomic/plasma/plasmaSwitchboard.java index 97b626c25..df6e67d3a 100644 --- a/source/de/anomic/plasma/plasmaSwitchboard.java +++ b/source/de/anomic/plasma/plasmaSwitchboard.java @@ -230,7 +230,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser public dbImportManager dbImportManager; public plasmaDHTFlush transferIdxThread = null; private plasmaDHTChunk dhtTransferChunk = null; - public ArrayList> localSearches, remoteSearches; // array of search result properties as HashMaps + public ArrayList localSearches; // array of search result properties as HashMaps + public ArrayList remoteSearches; // array of search result properties as HashMaps public HashMap> localSearchTracker, remoteSearchTracker; // mappings from requesting host to a TreeSet of Long(access time) public long lastseedcheckuptime = -1; public long indexedPages = 0; @@ -1208,8 +1209,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser // init search history trackers this.localSearchTracker = new HashMap>(); // String:TreeSet - IP:set of Long(accessTime) this.remoteSearchTracker = new HashMap>(); - this.localSearches = new ArrayList>(); // contains search result properties as HashMaps - this.remoteSearches = new ArrayList>(); + this.localSearches = new ArrayList(); // contains search result properties as HashMaps + this.remoteSearches = new ArrayList(); // init messages: clean up message symbol File notifierSource = new File(getRootPath(), getConfig(HTROOT_PATH, HTROOT_PATH_DEFAULT) + "/env/grafics/empty.gif");