fix for missing anonymization in search profiling

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4274 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 17 years ago
parent e3e4f06be4
commit 270d016d89

@ -149,7 +149,7 @@ public final class search {
long timer = System.currentTimeMillis();
Map[] containers = sb.wordIndex.localSearchContainers(theQuery, plasmaSearchQuery.hashes2Set(urls));
serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(theQuery.id(), plasmaSearchEvent.COLLECTION, containers[0].size(), System.currentTimeMillis() - timer));
serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(theQuery.id(true), plasmaSearchEvent.COLLECTION, containers[0].size(), System.currentTimeMillis() - timer));
if (containers != null) {
Iterator ci = containers[0].entrySet().iterator();
Map.Entry entry;
@ -244,7 +244,7 @@ public final class search {
refstr.append(",").append((String) j.next());
}
prop.put("references", (refstr.length() > 0) ? refstr.substring(1) : refstr.toString());
serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(theQuery.id(), "reference collection", ws.size(), System.currentTimeMillis() - timer));
serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(theQuery.id(true), "reference collection", ws.size(), System.currentTimeMillis() - timer));
}
prop.put("indexabstract", indexabstract.toString());
@ -271,7 +271,7 @@ public final class search {
}
prop.put("links", links.toString());
prop.put("linkcount", accu.size());
serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(theQuery.id(), "result list preparation", accu.size(), System.currentTimeMillis() - timer));
serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(theQuery.id(true), "result list preparation", accu.size(), System.currentTimeMillis() - timer));
}
// add information about forward peers

@ -278,7 +278,7 @@ public class yacysearch {
long timestamp = System.currentTimeMillis();
// create a new search event
if (plasmaSearchEvent.getEvent(theQuery.id()) == null) {
if (plasmaSearchEvent.getEvent(theQuery.id(false)) == null) {
theQuery.setOffset(0); // in case that this is a new search, always start without a offset
offset = 0;
}
@ -347,11 +347,11 @@ public class yacysearch {
// generate the search result lines; they will be produced by another servlet
for (int i = 0; i < theQuery.displayResults(); i++) {
prop.put("results_" + i + "_item", offset + i);
prop.put("results_" + i + "_eventID", theQuery.id());
prop.put("results_" + i + "_eventID", theQuery.id(false));
}
prop.put("results", theQuery.displayResults());
prop.put("resultTable", (contentdomCode <= 1) ? "0" : "1");
prop.put("eventID", theQuery.id()); // for bottomline
prop.put("eventID", theQuery.id(false)); // for bottomline
// process result of search
if (filtered.size() > 0) {

@ -147,7 +147,7 @@ public final class plasmaSearchEvent {
ranking,
query.constraint,
(query.domType == plasmaSearchQuery.SEARCHDOM_GLOBALDHT) ? null : preselectedPeerHashes);
serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(query.id(), "remote search thread start", this.primarySearchThreads.length, System.currentTimeMillis() - timer));
serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(query.id(true), "remote search thread start", this.primarySearchThreads.length, System.currentTimeMillis() - timer));
// meanwhile do a local search
localSearchThread = new localSearchProcess();
@ -188,7 +188,7 @@ public final class plasmaSearchEvent {
IACount.put(wordhash, new Integer(container.size()));
IAResults.put(wordhash, indexContainer.compressIndex(container, null, 1000).toString());
}
serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(query.id(), "abstract generation", this.rankedCache.searchContainerMaps()[0].size(), System.currentTimeMillis() - timer));
serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(query.id(true), "abstract generation", this.rankedCache.searchContainerMaps()[0].size(), System.currentTimeMillis() - timer));
}
}
@ -228,15 +228,15 @@ public final class plasmaSearchEvent {
}
}
}
serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(query.id(), "offline snippet fetch", resultList.size(), System.currentTimeMillis() - timer));
serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(query.id(true), "offline snippet fetch", resultList.size(), System.currentTimeMillis() - timer));
}
// clean up events
cleanupEvents(false);
// store this search to a cache so it can be re-used
lastEvents.put(query.id(), this);
lastEventID = query.id();
lastEvents.put(query.id(false), this);
lastEventID = query.id(false);
}
private class localSearchProcess extends Thread {
@ -267,7 +267,7 @@ public final class plasmaSearchEvent {
Set removeWords = cleanEvent.query.queryHashes;
removeWords.addAll(cleanEvent.query.excludeHashes);
cleanEvent.wordIndex.removeEntriesMultiple(removeWords, cleanEvent.failedURLs.keySet());
serverLog.logInfo("SearchEvents", "cleaning up event " + cleanEvent.query.id() + ", removed " + cleanEvent.failedURLs.size() + " URL references on " + removeWords.size() + " words");
serverLog.logInfo("SearchEvents", "cleaning up event " + cleanEvent.query.id(true) + ", removed " + cleanEvent.failedURLs.size() + " URL references on " + removeWords.size() + " words");
// remove the event
i.remove();
@ -362,7 +362,7 @@ public final class plasmaSearchEvent {
} else {
// problems with snippet fetch
registerFailure(page.hash(), "no text snippet for URL " + comp.url());
plasmaSnippetCache.failConsequences(snippet, query.id());
plasmaSnippetCache.failConsequences(snippet, query.id(false));
return null;
}
} else {
@ -457,7 +457,7 @@ public final class plasmaSearchEvent {
boolean generateAbstracts,
TreeSet abstractSet) {
synchronized (lastEvents) {
plasmaSearchEvent event = (plasmaSearchEvent) lastEvents.get(query.id());
plasmaSearchEvent event = (plasmaSearchEvent) lastEvents.get(query.id(false));
if (event == null) {
event = new plasmaSearchEvent(query, ranking, wordIndex, preselectedPeerHashes, generateAbstracts, abstractSet);
} else {

@ -189,6 +189,24 @@ public plasmaSearchQuery(String queryString, TreeSet queryHashes, TreeSet exclud
while (i.hasNext()) sb.append((String) i.next());
return new String(sb);
}
public static String anonymizedQueryHashes(Set hashes) {
// create a more anonymized representation of euqery hashes for logging
Iterator i = hashes.iterator();
StringBuffer sb = new StringBuffer(hashes.size() * (yacySeedDB.commonHashLength + 2) + 2);
sb.append("[");
String hash;
if (i.hasNext()) {
hash = (String) i.next();
sb.append(hash.substring(0, 3)).append(".........");
}
while (i.hasNext()) {
hash = (String) i.next();
sb.append(", ").append(hash.substring(0, 3)).append(".........");
}
sb.append("]");
return new String(sb);
}
public static final boolean matches(String text, TreeSet keyhashes) {
// returns true if any of the word hashes in keyhashes appear in the String text
@ -245,27 +263,13 @@ public plasmaSearchQuery(String queryString, TreeSet queryHashes, TreeSet exclud
kelondroMSetTools.excludeDestructive(queryHashes, blues);
}
public static String anonymizedQueryHashes(Set hashes) {
// create a more anonymized representation of euqery hashes for logging
StringBuffer sb = new StringBuffer(hashes.size() * 14 + 2);
Iterator i = hashes.iterator();
sb.append("[");
String hash;
if (i.hasNext()) {
hash = (String) i.next();
sb.append(hash.substring(0, 3)).append(".........");
}
while (i.hasNext()) {
hash = (String) i.next();
sb.append(", ").append(hash.substring(0, 3)).append(".........");
}
sb.append("]");
return new String(sb);
}
public String id() {
public String id(boolean anonymized) {
// generate a string that identifies a search so results can be re-used in a cache
return hashSet2hashString(this.queryHashes) + "-" + hashSet2hashString(this.excludeHashes) + ":" + this.contentdom;
if (anonymized) {
return anonymizedQueryHashes(this.queryHashes) + "-" + anonymizedQueryHashes(this.excludeHashes) + ":" + this.contentdom;
} else {
return hashSet2hashString(this.queryHashes) + "-" + hashSet2hashString(this.excludeHashes) + ":" + this.contentdom;
}
}
public HashMap resultProfile(int searchcount, long searchtime, long urlretrieval, long snippetcomputation) {

@ -95,7 +95,7 @@ public final class plasmaSearchRankingProcess {
long timer = System.currentTimeMillis();
this.localSearchContainerMaps = wordIndex.localSearchContainers(query, null);
serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(query.id(), plasmaSearchEvent.COLLECTION, this.localSearchContainerMaps[0].size(), System.currentTimeMillis() - timer));
serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(query.id(true), plasmaSearchEvent.COLLECTION, this.localSearchContainerMaps[0].size(), System.currentTimeMillis() - timer));
// join and exlcude the local result
timer = System.currentTimeMillis();
@ -106,7 +106,7 @@ public final class plasmaSearchRankingProcess {
this.localSearchContainerMaps[0].values(),
this.localSearchContainerMaps[1].values(),
query.maxDistance);
serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(query.id(), plasmaSearchEvent.JOIN, index.size(), System.currentTimeMillis() - timer));
serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(query.id(true), plasmaSearchEvent.JOIN, index.size(), System.currentTimeMillis() - timer));
int joincount = index.size();
if ((index == null) || (joincount == 0)) {
@ -172,7 +172,7 @@ public final class plasmaSearchRankingProcess {
this.order = new indexRWIEntryOrder(ranking);
}
this.order.extend(container);
serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(query.id(), plasmaSearchEvent.NORMALIZING, container.size(), System.currentTimeMillis() - timer));
serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(query.id(true), plasmaSearchEvent.NORMALIZING, container.size(), System.currentTimeMillis() - timer));
/*
container.setOrdering(o, 0);
@ -233,7 +233,7 @@ public final class plasmaSearchRankingProcess {
//System.out.println("###DEBUG### time to sort " + container.size() + " entries to " + this.filteredCount + ": " + sc + " milliseconds, " + (container.size() / sc) + " entries/millisecond, ranking = " + tc);
//if ((query.neededResults() > 0) && (container.size() > query.neededResults())) remove(true, true);
serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(query.id(), plasmaSearchEvent.PRESORT, container.size(), System.currentTimeMillis() - timer));
serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(query.id(true), plasmaSearchEvent.PRESORT, container.size(), System.currentTimeMillis() - timer));
}
private boolean testFlags(indexRWIEntry ientry) {

Loading…
Cancel
Save