- refactoring of search tracker

- added link to search history to repeat the search

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4493 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 17 years ago
parent 9ecc17baef
commit 61a81820e3

@ -76,7 +76,7 @@
<td>#[resulttime]#</td>
<td>#[urltime]#</td>
<td>#[snippettime]#</td>
<td>#[querystring]#</td>
<td><a href="/yacysearch.html?search=#[querystring]#&resource=local">#[querystring]#</a></td>
</tr>
#{/list}#
<tr class="TableHeader">

@ -27,10 +27,8 @@
import java.util.ArrayList;
import java.util.ConcurrentModificationException;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.Map.Entry;
@ -55,7 +53,6 @@ public class AccessTracker_p {
return accessClone;
}
@SuppressWarnings("unchecked")
public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch sb) {
plasmaSwitchboard switchboard = (plasmaSwitchboard) sb;
@ -128,9 +125,8 @@ public class AccessTracker_p {
prop.put("page_num", entCount);
}
if ((page == 2) || (page == 4)) {
ArrayList<HashMap<String, Object>> array = (page == 2) ? switchboard.localSearches : switchboard.remoteSearches;
Long trackerHandle;
HashMap<String, Object> searchProfile;
ArrayList<plasmaSearchQuery> array = (page == 2) ? switchboard.localSearches : switchboard.remoteSearches;
plasmaSearchQuery searchProfile;
int m = Math.min(maxCount, array.size());
long qcountSum = 0;
long rcountSum = 0;
@ -140,44 +136,43 @@ public class AccessTracker_p {
for (int entCount = 0; entCount < m; entCount++) {
searchProfile = array.get(array.size() - entCount - 1);
trackerHandle = (Long) searchProfile.get("time");
// put values in template
prop.put("page_list_" + entCount + "_dark", ((dark) ? 1 : 0) );
dark =! dark;
prop.putHTML("page_list_" + entCount + "_host", (String) searchProfile.get("host"));
prop.put("page_list_" + entCount + "_date", serverDate.formatShortSecond(new Date(trackerHandle.longValue())));
prop.put("page_list_" + entCount + "_timestamp", trackerHandle.longValue());
prop.putHTML("page_list_" + entCount + "_host", searchProfile.host);
prop.put("page_list_" + entCount + "_date", serverDate.formatShortSecond(new Date(searchProfile.handle.longValue())));
prop.put("page_list_" + entCount + "_timestamp", searchProfile.handle.longValue());
if (page == 2) {
// local search
prop.putNum("page_list_" + entCount + "_offset", ((Integer) searchProfile.get("offset")).longValue());
prop.put("page_list_" + entCount + "_querystring", (String) searchProfile.get("querystring"));
prop.putNum("page_list_" + entCount + "_offset", searchProfile.offset);
prop.put("page_list_" + entCount + "_querystring", searchProfile.queryString);
} else {
// remote search
prop.putHTML("page_list_" + entCount + "_peername", (String) searchProfile.get("peername"));
prop.put("page_list_" + entCount + "_queryhashes", plasmaSearchQuery.anonymizedQueryHashes((Set<String>) searchProfile.get("queryhashes")));
prop.putHTML("page_list_" + entCount + "_peername", (searchProfile.remotepeer == null) ? "<unknown>" : searchProfile.remotepeer.getName());
prop.put("page_list_" + entCount + "_queryhashes", plasmaSearchQuery.anonymizedQueryHashes(searchProfile.queryHashes));
}
prop.putNum("page_list_" + entCount + "_querycount", ((Integer) searchProfile.get("querycount")).longValue());
prop.putNum("page_list_" + entCount + "_resultcount", ((Integer) searchProfile.get("resultcount")).longValue());
prop.putNum("page_list_" + entCount + "_urltime", ((Long) searchProfile.get("resulturltime")).longValue());
prop.putNum("page_list_" + entCount + "_snippettime", ((Long) searchProfile.get("resultsnippettime")).longValue());
prop.putNum("page_list_" + entCount + "_resulttime", ((Long) searchProfile.get("resulttime")).longValue());
qcountSum += ((Integer) searchProfile.get("querycount")).intValue();
rcountSum += ((Integer) searchProfile.get("resultcount")).intValue();
utimeSum += ((Long) searchProfile.get("resulturltime")).longValue();
stimeSum += ((Long) searchProfile.get("resultsnippettime")).longValue();
rtimeSum += ((Long) searchProfile.get("resulttime")).longValue();
prop.putNum("page_list_" + entCount + "_querycount", searchProfile.linesPerPage);
prop.putNum("page_list_" + entCount + "_resultcount", searchProfile.resultcount);
prop.putNum("page_list_" + entCount + "_urltime", searchProfile.urlretrievaltime);
prop.putNum("page_list_" + entCount + "_snippettime", searchProfile.snippetcomputationtime);
prop.putNum("page_list_" + entCount + "_resulttime", searchProfile.searchtime);
qcountSum += searchProfile.linesPerPage;
rcountSum += searchProfile.resultcount;
utimeSum += searchProfile.urlretrievaltime;
stimeSum += searchProfile.snippetcomputationtime;
rtimeSum += searchProfile.searchtime;
}
prop.put("page_list", m);
prop.put("page_num", m);
// Put -1 instead of NaN as result for empty search list
if (m == 0) m = -1;
prop.putNum("page_querycount_avg", (double)qcountSum/m);
prop.putNum("page_resultcount_avg", (double)rcountSum/m);
prop.putNum("page_urltime_avg", (double)utimeSum/m);
prop.putNum("page_snippettime_avg", (double)stimeSum/m);
prop.putNum("page_resulttime_avg", (double)rtimeSum/m);
prop.putNum("page_querycount_avg", (double) qcountSum / m);
prop.putNum("page_resultcount_avg", (double) rcountSum / m);
prop.putNum("page_urltime_avg", (double) utimeSum / m);
prop.putNum("page_snippettime_avg", (double) stimeSum / m);
prop.putNum("page_resulttime_avg", (double) rtimeSum / m);
prop.putNum("page_total", (page == 2) ? switchboard.localSearches.size() : switchboard.remoteSearches.size());
}
if ((page == 3) || (page == 5)) {

@ -29,7 +29,6 @@
// if the shell's current path is htroot/yacy
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
@ -68,7 +67,8 @@ public final class search {
if (post == null || env == null || !yacyNetwork.authentifyRequest(post, env)) {
return prop;
}
String client = (String) header.get(httpHeader.CONNECTION_PROP_CLIENTIP);
// test:
// http://localhost:8080/yacy/search.html?query=4galTpdpDM5Q (search for linux)
// http://localhost:8080/yacy/search.html?query=gh8DKIhGKXws (search for book)
@ -149,10 +149,10 @@ public final class search {
int joincount = 0;
plasmaSearchQuery theQuery = null;
ArrayList<ResultEntry> accu = null;
long urlRetrievalAllTime = 0, snippetComputationAllTime = 0;
plasmaSearchEvent theSearch = null;
if ((query.length() == 0) && (abstractSet != null)) {
// this is _not_ a normal search, only a request for index abstracts
theQuery = new plasmaSearchQuery(null, abstractSet, new TreeSet<String>(kelondroBase64Order.enhancedComparator), rankingProfile, maxdist, prefer, plasmaSearchQuery.contentdomParser(contentdom), false, count, 0, filter, plasmaSearchQuery.SEARCHDOM_LOCAL, null, -1, null, false);
theQuery = new plasmaSearchQuery(null, abstractSet, new TreeSet<String>(kelondroBase64Order.enhancedComparator), rankingProfile, maxdist, prefer, plasmaSearchQuery.contentdomParser(contentdom), false, count, 0, filter, plasmaSearchQuery.SEARCHDOM_LOCAL, null, -1, null, false, client);
theQuery.domType = plasmaSearchQuery.SEARCHDOM_LOCAL;
yacyCore.log.logInfo("INIT HASH SEARCH (abstracts only): " + plasmaSearchQuery.anonymizedQueryHashes(theQuery.queryHashes) + " - " + theQuery.displayResults() + " links");
@ -177,14 +177,12 @@ public final class search {
} else {
// retrieve index containers from search request
theQuery = new plasmaSearchQuery(null, queryhashes, excludehashes, rankingProfile, maxdist, prefer, plasmaSearchQuery.contentdomParser(contentdom), false, count, 0, filter, plasmaSearchQuery.SEARCHDOM_LOCAL, null, -1, constraint, false);
theQuery = new plasmaSearchQuery(null, queryhashes, excludehashes, rankingProfile, maxdist, prefer, plasmaSearchQuery.contentdomParser(contentdom), false, count, 0, filter, plasmaSearchQuery.SEARCHDOM_LOCAL, null, -1, constraint, false, client);
theQuery.domType = plasmaSearchQuery.SEARCHDOM_LOCAL;
yacyCore.log.logInfo("INIT HASH SEARCH (query-" + abstracts + "): " + plasmaSearchQuery.anonymizedQueryHashes(theQuery.queryHashes) + " - " + theQuery.displayResults() + " links");
// make event
plasmaSearchEvent theSearch = plasmaSearchEvent.getEvent(theQuery, rankingProfile, sb.wordIndex, null, true);
urlRetrievalAllTime = theSearch.getURLRetrievalTime();
snippetComputationAllTime = theSearch.getSnippetComputationTime();
theSearch = plasmaSearchEvent.getEvent(theQuery, rankingProfile, sb.wordIndex, null, true);
// set statistic details of search result and find best result index set
if (theSearch.getRankingResult().getLocalResourceSize() == 0) {
@ -279,17 +277,15 @@ public final class search {
prop.put("fwrec", ""); // peers that would have helped to construct this result (recommendations)
// prepare search statistics
Long trackerHandle = new Long(System.currentTimeMillis());
HashMap<String, Object> searchProfile = theQuery.resultProfile(joincount, System.currentTimeMillis() - timestamp, urlRetrievalAllTime, snippetComputationAllTime);
String client = (String) header.get(httpHeader.CONNECTION_PROP_CLIENTIP);
searchProfile.put("host", client);
yacySeed remotepeer = yacyCore.seedDB.lookupByIP(natLib.getInetAddress(client), true, false, false);
searchProfile.put("peername", (remotepeer == null) ? "unknown" : remotepeer.getName());
searchProfile.put("time", trackerHandle);
sb.remoteSearches.add(searchProfile);
theQuery.remotepeer = yacyCore.seedDB.lookupByIP(natLib.getInetAddress(client), true, false, false);
theQuery.resultcount = (theSearch == null) ? 0 : theSearch.getRankingResult().getLocalResourceSize() + theSearch.getRankingResult().getRemoteResourceSize();
theQuery.searchtime = System.currentTimeMillis() - timestamp;
theQuery.urlretrievaltime = (theSearch == null) ? 0 : theSearch.getURLRetrievalTime();
theQuery.snippetcomputationtime = (theSearch == null) ? 0 : theSearch.getSnippetComputationTime();
sb.remoteSearches.add(theQuery);
TreeSet<Long> handles = sb.remoteSearchTracker.get(client);
if (handles == null) handles = new TreeSet<Long>();
handles.add(trackerHandle);
handles.add(theQuery.handle);
sb.remoteSearchTracker.put(client, handles);
// log

@ -237,6 +237,7 @@ public class ysearch {
final boolean globalsearch = (global) && (yacyonline) && (sb.getConfigBool(plasmaSwitchboard.INDEX_RECEIVE_ALLOW, false));
// do the search
String client = (String) header.get(httpHeader.CONNECTION_PROP_CLIENTIP); // the search client who initiated the search
TreeSet<String> queryHashes = plasmaCondenser.words2hashes(query[0]);
plasmaSearchQuery theQuery = new plasmaSearchQuery(
querystring,
@ -255,10 +256,10 @@ public class ysearch {
"",
20,
constraint,
true);
true,
client);
String client = (String) header.get(httpHeader.CONNECTION_PROP_CLIENTIP); // the search client who initiated the search
// tell all threads to do nothing for a specific time
sb.intermissionAllThreads(10000);
@ -289,18 +290,16 @@ public class ysearch {
((System.currentTimeMillis() - timestamp) / 1000) + " seconds");
// prepare search statistics
Long trackerHandle = new Long(System.currentTimeMillis());
HashMap<String, Object> searchProfile = theQuery.resultProfile(theSearch.getRankingResult().getLocalResourceSize() + theSearch.getRankingResult().getRemoteResourceSize(), System.currentTimeMillis() - timestamp, theSearch.getURLRetrievalTime(), theSearch.getSnippetComputationTime());
searchProfile.put("querystring", theQuery.queryString);
searchProfile.put("time", trackerHandle);
searchProfile.put("host", client);
searchProfile.put("offset", new Integer(0));
sb.localSearches.add(searchProfile);
theQuery.resultcount = theSearch.getRankingResult().getLocalResourceSize() + theSearch.getRankingResult().getRemoteResourceSize();
theQuery.searchtime = System.currentTimeMillis() - timestamp;
theQuery.urlretrievaltime = theSearch.getURLRetrievalTime();
theQuery.snippetcomputationtime = theSearch.getSnippetComputationTime();
sb.localSearches.add(theQuery);
TreeSet<Long> handles = sb.localSearchTracker.get(client);
if (handles == null) handles = new TreeSet<Long>();
handles.add(trackerHandle);
handles.add(theQuery.handle);
sb.localSearchTracker.put(client, handles);
prop = new serverObjects();
int totalcount = theSearch.getRankingResult().getLocalResourceSize() + theSearch.getRankingResult().getRemoteResourceSize();
prop.put("num-results_offset", offset);

@ -85,7 +85,8 @@ public class yacysearch {
String promoteSearchPageGreeting = env.getConfig("promoteSearchPageGreeting", "");
if (env.getConfigBool("promoteSearchPageGreeting.useNetworkName", false)) promoteSearchPageGreeting = env.getConfig("network.unit.description", "");
if (promoteSearchPageGreeting.length() == 0) promoteSearchPageGreeting = "P2P WEB SEARCH";
String client = (String) header.get(httpHeader.CONNECTION_PROP_CLIENTIP); // the search client who initiated the search
// get query
String querystring = (post == null) ? "" : post.get("search", "").trim();
@ -255,10 +256,10 @@ public class yacysearch {
"",
20,
constraint,
true);
true,
client);
String client = (String) header.get(httpHeader.CONNECTION_PROP_CLIENTIP); // the search client who initiated the search
// tell all threads to do nothing for a specific time
sb.intermissionAllThreads(10000);
@ -289,18 +290,16 @@ public class yacysearch {
((System.currentTimeMillis() - timestamp) / 1000) + " seconds");
// prepare search statistics
Long trackerHandle = new Long(System.currentTimeMillis());
HashMap<String, Object> searchProfile = theQuery.resultProfile(theSearch.getRankingResult().getLocalResourceSize() + theSearch.getRankingResult().getRemoteResourceSize(), System.currentTimeMillis() - timestamp, theSearch.getURLRetrievalTime(), theSearch.getSnippetComputationTime());
searchProfile.put("querystring", theQuery.queryString);
searchProfile.put("time", trackerHandle);
searchProfile.put("host", client);
searchProfile.put("offset", new Integer(0));
sb.localSearches.add(searchProfile);
theQuery.resultcount = theSearch.getRankingResult().getLocalResourceSize() + theSearch.getRankingResult().getRemoteResourceSize();
theQuery.searchtime = System.currentTimeMillis() - timestamp;
theQuery.urlretrievaltime = theSearch.getURLRetrievalTime();
theQuery.snippetcomputationtime = theSearch.getSnippetComputationTime();
sb.localSearches.add(theQuery);
TreeSet<Long> handles = sb.localSearchTracker.get(client);
if (handles == null) handles = new TreeSet<Long>();
handles.add(trackerHandle);
handles.add(theQuery.handle);
sb.localSearchTracker.put(client, handles);
prop = new serverObjects();
int totalcount = theSearch.getRankingResult().getLocalResourceSize() + theSearch.getRankingResult().getRemoteResourceSize();
prop.put("num-results_offset", offset);

@ -42,7 +42,6 @@
package de.anomic.plasma;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Set;
import java.util.TreeSet;
@ -53,6 +52,7 @@ import de.anomic.kelondro.kelondroBitfield;
import de.anomic.kelondro.kelondroMSetTools;
import de.anomic.kelondro.kelondroNaturalOrder;
import de.anomic.server.serverCharBuffer;
import de.anomic.yacy.yacySeed;
import de.anomic.yacy.yacySeedDB;
public final class plasmaSearchQuery {
@ -75,7 +75,7 @@ public final class plasmaSearchQuery {
public String queryString;
public TreeSet<String> queryHashes, excludeHashes;
private int linesPerPage, offset;
public int linesPerPage, offset;
public String prefer;
public int contentdom;
public String urlMask;
@ -87,6 +87,12 @@ public final class plasmaSearchQuery {
public boolean allofconstraint;
public boolean onlineSnippetFetch;
public plasmaSearchRankingProfile ranking;
public String host;
public yacySeed remotepeer;
public Long handle;
// values that are set after a search:
public int resultcount; // number of found results
public long searchtime, urlretrievaltime, snippetcomputationtime; // time to perform the search, to get all the urls, and to compute the snippets
public plasmaSearchQuery(String queryString,
int lines,
@ -116,16 +122,20 @@ public final class plasmaSearchQuery {
this.constraint = constraint;
this.allofconstraint = false;
this.onlineSnippetFetch = false;
this.host = null;
this.remotepeer = null;
this.handle = new Long(System.currentTimeMillis());
}
public plasmaSearchQuery(
public plasmaSearchQuery(
String queryString, TreeSet<String> queryHashes, TreeSet<String> excludeHashes,
plasmaSearchRankingProfile ranking,
int maxDistance, String prefer, int contentdom,
boolean onlineSnippetFetch,
int lines, int offset, String urlMask,
int domType, String domGroupName, int domMaxTargets,
kelondroBitfield constraint, boolean allofconstraint) {
kelondroBitfield constraint, boolean allofconstraint,
String host) {
this.queryString = queryString;
this.queryHashes = queryHashes;
this.excludeHashes = excludeHashes;
@ -143,6 +153,9 @@ public plasmaSearchQuery(
this.constraint = constraint;
this.allofconstraint = allofconstraint;
this.onlineSnippetFetch = onlineSnippetFetch;
this.host = host;
this.remotepeer = null;
this.handle = new Long(System.currentTimeMillis());
}
public int neededResults() {
@ -280,17 +293,4 @@ public plasmaSearchQuery(
}
}
public HashMap<String, Object> resultProfile(int searchcount, long searchtime, long urlretrieval, long snippetcomputation) {
// generate statistics about search: query, time, etc
HashMap<String, Object> r = new HashMap<String, Object>();
r.put("queryhashes", queryHashes);
r.put("querystring", queryString);
r.put("querycount", new Integer(linesPerPage));
//r.put("querytime", new Long(maximumTime));
r.put("resultcount", new Integer(searchcount));
r.put("resulttime", new Long(searchtime));
r.put("resulturltime", new Long(urlretrieval));
r.put("resultsnippettime", new Long(snippetcomputation));
return r;
}
}

@ -230,7 +230,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
public dbImportManager dbImportManager;
public plasmaDHTFlush transferIdxThread = null;
private plasmaDHTChunk dhtTransferChunk = null;
public ArrayList<HashMap<String, Object>> localSearches, remoteSearches; // array of search result properties as HashMaps
public ArrayList<plasmaSearchQuery> localSearches; // array of search result properties as HashMaps
public ArrayList<plasmaSearchQuery> remoteSearches; // array of search result properties as HashMaps
public HashMap<String, TreeSet<Long>> localSearchTracker, remoteSearchTracker; // mappings from requesting host to a TreeSet of Long(access time)
public long lastseedcheckuptime = -1;
public long indexedPages = 0;
@ -1208,8 +1209,8 @@ public final class plasmaSwitchboard extends serverAbstractSwitch implements ser
// init search history trackers
this.localSearchTracker = new HashMap<String, TreeSet<Long>>(); // String:TreeSet - IP:set of Long(accessTime)
this.remoteSearchTracker = new HashMap<String, TreeSet<Long>>();
this.localSearches = new ArrayList<HashMap<String, Object>>(); // contains search result properties as HashMaps
this.remoteSearches = new ArrayList<HashMap<String, Object>>();
this.localSearches = new ArrayList<plasmaSearchQuery>(); // contains search result properties as HashMaps
this.remoteSearches = new ArrayList<plasmaSearchQuery>();
// init messages: clean up message symbol
File notifierSource = new File(getRootPath(), getConfig(HTROOT_PATH, HTROOT_PATH_DEFAULT) + "/env/grafics/empty.gif");

Loading…
Cancel
Save