diff --git a/htroot/yacy/search.java b/htroot/yacy/search.java index e1755f4a0..6bc073868 100644 --- a/htroot/yacy/search.java +++ b/htroot/yacy/search.java @@ -149,7 +149,7 @@ public final class search { long timer = System.currentTimeMillis(); Map[] containers = sb.wordIndex.localSearchContainers(theQuery, plasmaSearchQuery.hashes2Set(urls)); - serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(theQuery.id(), plasmaSearchEvent.COLLECTION, containers[0].size(), System.currentTimeMillis() - timer)); + serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(theQuery.id(true), plasmaSearchEvent.COLLECTION, containers[0].size(), System.currentTimeMillis() - timer)); if (containers != null) { Iterator ci = containers[0].entrySet().iterator(); Map.Entry entry; @@ -244,7 +244,7 @@ public final class search { refstr.append(",").append((String) j.next()); } prop.put("references", (refstr.length() > 0) ? refstr.substring(1) : refstr.toString()); - serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(theQuery.id(), "reference collection", ws.size(), System.currentTimeMillis() - timer)); + serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(theQuery.id(true), "reference collection", ws.size(), System.currentTimeMillis() - timer)); } prop.put("indexabstract", indexabstract.toString()); @@ -271,7 +271,7 @@ public final class search { } prop.put("links", links.toString()); prop.put("linkcount", accu.size()); - serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(theQuery.id(), "result list preparation", accu.size(), System.currentTimeMillis() - timer)); + serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(theQuery.id(true), "result list preparation", accu.size(), System.currentTimeMillis() - timer)); } // add information about forward peers diff --git a/htroot/yacysearch.java b/htroot/yacysearch.java index 93e12e69d..b2a238e2a 100644 --- a/htroot/yacysearch.java +++ b/htroot/yacysearch.java @@ -278,7 +278,7 @@ public class yacysearch { long timestamp = System.currentTimeMillis(); // create a new search event - if (plasmaSearchEvent.getEvent(theQuery.id()) == null) { + if (plasmaSearchEvent.getEvent(theQuery.id(false)) == null) { theQuery.setOffset(0); // in case that this is a new search, always start without a offset offset = 0; } @@ -347,11 +347,11 @@ public class yacysearch { // generate the search result lines; they will be produced by another servlet for (int i = 0; i < theQuery.displayResults(); i++) { prop.put("results_" + i + "_item", offset + i); - prop.put("results_" + i + "_eventID", theQuery.id()); + prop.put("results_" + i + "_eventID", theQuery.id(false)); } prop.put("results", theQuery.displayResults()); prop.put("resultTable", (contentdomCode <= 1) ? "0" : "1"); - prop.put("eventID", theQuery.id()); // for bottomline + prop.put("eventID", theQuery.id(false)); // for bottomline // process result of search if (filtered.size() > 0) { diff --git a/source/de/anomic/plasma/plasmaSearchEvent.java b/source/de/anomic/plasma/plasmaSearchEvent.java index 4da48ab65..de0c93931 100644 --- a/source/de/anomic/plasma/plasmaSearchEvent.java +++ b/source/de/anomic/plasma/plasmaSearchEvent.java @@ -147,7 +147,7 @@ public final class plasmaSearchEvent { ranking, query.constraint, (query.domType == plasmaSearchQuery.SEARCHDOM_GLOBALDHT) ? null : preselectedPeerHashes); - serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(query.id(), "remote search thread start", this.primarySearchThreads.length, System.currentTimeMillis() - timer)); + serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(query.id(true), "remote search thread start", this.primarySearchThreads.length, System.currentTimeMillis() - timer)); // meanwhile do a local search localSearchThread = new localSearchProcess(); @@ -188,7 +188,7 @@ public final class plasmaSearchEvent { IACount.put(wordhash, new Integer(container.size())); IAResults.put(wordhash, indexContainer.compressIndex(container, null, 1000).toString()); } - serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(query.id(), "abstract generation", this.rankedCache.searchContainerMaps()[0].size(), System.currentTimeMillis() - timer)); + serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(query.id(true), "abstract generation", this.rankedCache.searchContainerMaps()[0].size(), System.currentTimeMillis() - timer)); } } @@ -228,15 +228,15 @@ public final class plasmaSearchEvent { } } } - serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(query.id(), "offline snippet fetch", resultList.size(), System.currentTimeMillis() - timer)); + serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(query.id(true), "offline snippet fetch", resultList.size(), System.currentTimeMillis() - timer)); } // clean up events cleanupEvents(false); // store this search to a cache so it can be re-used - lastEvents.put(query.id(), this); - lastEventID = query.id(); + lastEvents.put(query.id(false), this); + lastEventID = query.id(false); } private class localSearchProcess extends Thread { @@ -267,7 +267,7 @@ public final class plasmaSearchEvent { Set removeWords = cleanEvent.query.queryHashes; removeWords.addAll(cleanEvent.query.excludeHashes); cleanEvent.wordIndex.removeEntriesMultiple(removeWords, cleanEvent.failedURLs.keySet()); - serverLog.logInfo("SearchEvents", "cleaning up event " + cleanEvent.query.id() + ", removed " + cleanEvent.failedURLs.size() + " URL references on " + removeWords.size() + " words"); + serverLog.logInfo("SearchEvents", "cleaning up event " + cleanEvent.query.id(true) + ", removed " + cleanEvent.failedURLs.size() + " URL references on " + removeWords.size() + " words"); // remove the event i.remove(); @@ -362,7 +362,7 @@ public final class plasmaSearchEvent { } else { // problems with snippet fetch registerFailure(page.hash(), "no text snippet for URL " + comp.url()); - plasmaSnippetCache.failConsequences(snippet, query.id()); + plasmaSnippetCache.failConsequences(snippet, query.id(false)); return null; } } else { @@ -457,7 +457,7 @@ public final class plasmaSearchEvent { boolean generateAbstracts, TreeSet abstractSet) { synchronized (lastEvents) { - plasmaSearchEvent event = (plasmaSearchEvent) lastEvents.get(query.id()); + plasmaSearchEvent event = (plasmaSearchEvent) lastEvents.get(query.id(false)); if (event == null) { event = new plasmaSearchEvent(query, ranking, wordIndex, preselectedPeerHashes, generateAbstracts, abstractSet); } else { diff --git a/source/de/anomic/plasma/plasmaSearchQuery.java b/source/de/anomic/plasma/plasmaSearchQuery.java index e0fad1858..24db9459e 100644 --- a/source/de/anomic/plasma/plasmaSearchQuery.java +++ b/source/de/anomic/plasma/plasmaSearchQuery.java @@ -189,6 +189,24 @@ public plasmaSearchQuery(String queryString, TreeSet queryHashes, TreeSet exclud while (i.hasNext()) sb.append((String) i.next()); return new String(sb); } + + public static String anonymizedQueryHashes(Set hashes) { + // create a more anonymized representation of euqery hashes for logging + Iterator i = hashes.iterator(); + StringBuffer sb = new StringBuffer(hashes.size() * (yacySeedDB.commonHashLength + 2) + 2); + sb.append("["); + String hash; + if (i.hasNext()) { + hash = (String) i.next(); + sb.append(hash.substring(0, 3)).append("........."); + } + while (i.hasNext()) { + hash = (String) i.next(); + sb.append(", ").append(hash.substring(0, 3)).append("........."); + } + sb.append("]"); + return new String(sb); + } public static final boolean matches(String text, TreeSet keyhashes) { // returns true if any of the word hashes in keyhashes appear in the String text @@ -245,27 +263,13 @@ public plasmaSearchQuery(String queryString, TreeSet queryHashes, TreeSet exclud kelondroMSetTools.excludeDestructive(queryHashes, blues); } - public static String anonymizedQueryHashes(Set hashes) { - // create a more anonymized representation of euqery hashes for logging - StringBuffer sb = new StringBuffer(hashes.size() * 14 + 2); - Iterator i = hashes.iterator(); - sb.append("["); - String hash; - if (i.hasNext()) { - hash = (String) i.next(); - sb.append(hash.substring(0, 3)).append("........."); - } - while (i.hasNext()) { - hash = (String) i.next(); - sb.append(", ").append(hash.substring(0, 3)).append("........."); - } - sb.append("]"); - return new String(sb); - } - - public String id() { + public String id(boolean anonymized) { // generate a string that identifies a search so results can be re-used in a cache - return hashSet2hashString(this.queryHashes) + "-" + hashSet2hashString(this.excludeHashes) + ":" + this.contentdom; + if (anonymized) { + return anonymizedQueryHashes(this.queryHashes) + "-" + anonymizedQueryHashes(this.excludeHashes) + ":" + this.contentdom; + } else { + return hashSet2hashString(this.queryHashes) + "-" + hashSet2hashString(this.excludeHashes) + ":" + this.contentdom; + } } public HashMap resultProfile(int searchcount, long searchtime, long urlretrieval, long snippetcomputation) { diff --git a/source/de/anomic/plasma/plasmaSearchRankingProcess.java b/source/de/anomic/plasma/plasmaSearchRankingProcess.java index 196c513de..134da2cb2 100644 --- a/source/de/anomic/plasma/plasmaSearchRankingProcess.java +++ b/source/de/anomic/plasma/plasmaSearchRankingProcess.java @@ -95,7 +95,7 @@ public final class plasmaSearchRankingProcess { long timer = System.currentTimeMillis(); this.localSearchContainerMaps = wordIndex.localSearchContainers(query, null); - serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(query.id(), plasmaSearchEvent.COLLECTION, this.localSearchContainerMaps[0].size(), System.currentTimeMillis() - timer)); + serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(query.id(true), plasmaSearchEvent.COLLECTION, this.localSearchContainerMaps[0].size(), System.currentTimeMillis() - timer)); // join and exlcude the local result timer = System.currentTimeMillis(); @@ -106,7 +106,7 @@ public final class plasmaSearchRankingProcess { this.localSearchContainerMaps[0].values(), this.localSearchContainerMaps[1].values(), query.maxDistance); - serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(query.id(), plasmaSearchEvent.JOIN, index.size(), System.currentTimeMillis() - timer)); + serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(query.id(true), plasmaSearchEvent.JOIN, index.size(), System.currentTimeMillis() - timer)); int joincount = index.size(); if ((index == null) || (joincount == 0)) { @@ -172,7 +172,7 @@ public final class plasmaSearchRankingProcess { this.order = new indexRWIEntryOrder(ranking); } this.order.extend(container); - serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(query.id(), plasmaSearchEvent.NORMALIZING, container.size(), System.currentTimeMillis() - timer)); + serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(query.id(true), plasmaSearchEvent.NORMALIZING, container.size(), System.currentTimeMillis() - timer)); /* container.setOrdering(o, 0); @@ -233,7 +233,7 @@ public final class plasmaSearchRankingProcess { //System.out.println("###DEBUG### time to sort " + container.size() + " entries to " + this.filteredCount + ": " + sc + " milliseconds, " + (container.size() / sc) + " entries/millisecond, ranking = " + tc); //if ((query.neededResults() > 0) && (container.size() > query.neededResults())) remove(true, true); - serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(query.id(), plasmaSearchEvent.PRESORT, container.size(), System.currentTimeMillis() - timer)); + serverProfiling.update("SEARCH", new plasmaProfiling.searchEvent(query.id(true), plasmaSearchEvent.PRESORT, container.size(), System.currentTimeMillis() - timer)); } private boolean testFlags(indexRWIEntry ientry) {