- moved some variables from Stwitchboard to new class AccessTracker

- added a limitation in access tracking to delete queries which are older than 10 minutes

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7410 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 14 years ago
parent 0c79789517
commit a4c9d27287

@ -39,6 +39,7 @@ import net.yacy.cora.protocol.Domains;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.RequestHeader;
import de.anomic.search.AccessTracker;
import de.anomic.search.QueryParams;
import de.anomic.search.Switchboard;
import de.anomic.server.serverCore;
@ -46,7 +47,6 @@ import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.server.serverAccessTracker.Track;
import de.anomic.yacy.yacySeed;
import java.util.List;
public class AccessTracker_p {
@ -140,9 +140,8 @@ public class AccessTracker_p {
prop.put("page_list", entCount);
prop.put("page_num", entCount);
} else if ((page == 2) || (page == 4)) {
final List<QueryParams> array = (page == 2) ? sb.localSearches : sb.remoteSearches;
QueryParams searchProfile;
int m = Math.min(maxCount, array.size());
final Iterator<QueryParams> ai = (page == 2) ? AccessTracker.get(AccessTracker.Location.local) : AccessTracker.get(AccessTracker.Location.remote);
QueryParams query;
long qcountSum = 0;
long rcountSum = 0;
long rcount = 0;
@ -152,42 +151,47 @@ public class AccessTracker_p {
long utimeSum1 = 0;
long stimeSum1 = 0;
long rtimeSum1 = 0;
int m = 0;
for (int entCount = 0; entCount < m; entCount++) {
searchProfile = array.get(array.size() - entCount - 1);
while (ai.hasNext()) {
try {
query = ai.next();
} catch (ConcurrentModificationException e) {
break;
}
// put values in template
prop.put("page_list_" + entCount + "_dark", ((dark) ? 1 : 0) );
prop.put("page_list_" + m + "_dark", ((dark) ? 1 : 0) );
dark =! dark;
prop.putHTML("page_list_" + entCount + "_host", searchProfile.host);
prop.put("page_list_" + entCount + "_date", SimpleFormatter.format(new Date(searchProfile.handle.longValue())));
prop.put("page_list_" + entCount + "_timestamp", searchProfile.handle.longValue());
prop.putHTML("page_list_" + m + "_host", query.host);
prop.put("page_list_" + m + "_date", SimpleFormatter.format(new Date(query.time.longValue())));
prop.put("page_list_" + m + "_timestamp", query.time.longValue());
if (page == 2) {
// local search
prop.putNum("page_list_" + entCount + "_offset", searchProfile.offset);
prop.putHTML("page_list_" + entCount + "_querystring", searchProfile.queryString);
prop.putNum("page_list_" + m + "_offset", query.offset);
prop.putHTML("page_list_" + m + "_querystring", query.queryString);
} else {
// remote search
prop.putHTML("page_list_" + entCount + "_peername", (searchProfile.remotepeer == null) ? "<unknown>" : searchProfile.remotepeer.getName());
prop.put("page_list_" + entCount + "_queryhashes", QueryParams.anonymizedQueryHashes(searchProfile.queryHashes));
prop.putHTML("page_list_" + m + "_peername", (query.remotepeer == null) ? "<unknown>" : query.remotepeer.getName());
prop.put("page_list_" + m + "_queryhashes", QueryParams.anonymizedQueryHashes(query.queryHashes));
}
prop.putNum("page_list_" + entCount + "_querycount", searchProfile.itemsPerPage);
prop.putNum("page_list_" + entCount + "_resultcount", searchProfile.resultcount);
prop.putNum("page_list_" + entCount + "_urltime", searchProfile.urlretrievaltime);
prop.putNum("page_list_" + entCount + "_snippettime", searchProfile.snippetcomputationtime);
prop.putNum("page_list_" + entCount + "_resulttime", searchProfile.searchtime);
prop.putHTML("page_list_" + entCount + "_userAgent", searchProfile.userAgent);
qcountSum += searchProfile.itemsPerPage;
rcountSum += searchProfile.resultcount;
utimeSum += searchProfile.urlretrievaltime;
stimeSum += searchProfile.snippetcomputationtime;
rtimeSum += searchProfile.searchtime;
if (searchProfile.resultcount > 0){
prop.putNum("page_list_" + m + "_querycount", query.itemsPerPage);
prop.putNum("page_list_" + m + "_resultcount", query.resultcount);
prop.putNum("page_list_" + m + "_urltime", query.urlretrievaltime);
prop.putNum("page_list_" + m + "_snippettime", query.snippetcomputationtime);
prop.putNum("page_list_" + m + "_resulttime", query.searchtime);
prop.putHTML("page_list_" + m + "_userAgent", query.userAgent);
qcountSum += query.itemsPerPage;
rcountSum += query.resultcount;
utimeSum += query.urlretrievaltime;
stimeSum += query.snippetcomputationtime;
rtimeSum += query.searchtime;
if (query.resultcount > 0){
rcount++;
utimeSum1 += searchProfile.urlretrievaltime;
stimeSum1 += searchProfile.snippetcomputationtime;
rtimeSum1 += searchProfile.searchtime;
utimeSum1 += query.urlretrievaltime;
stimeSum1 += query.snippetcomputationtime;
rtimeSum1 += query.searchtime;
}
m++;
}
prop.put("page_list", m);
prop.put("page_num", m);
@ -228,16 +232,16 @@ public class AccessTracker_p {
prop.putNum("page_urltime_avg1", (double) utimeSum1 / rcount);
prop.putNum("page_snippettime_avg1", (double) stimeSum1 / rcount);
prop.putNum("page_resulttime_avg1", (double) rtimeSum1 / rcount);
prop.putNum("page_total", (page == 2) ? sb.localSearches.size() : sb.remoteSearches.size());
prop.putNum("page_total", (page == 2) ? AccessTracker.size(AccessTracker.Location.local) : AccessTracker.size(AccessTracker.Location.remote));
} else if ((page == 3) || (page == 5)) {
final Iterator<Entry<String, TreeSet<Long>>> i = (page == 3) ? sb.localSearchTracker.entrySet().iterator() : sb.remoteSearchTracker.entrySet().iterator();
String host;
TreeSet<Long> handles;
int entCount = 0;
int m = 0;
int qphSum = 0;
Map.Entry<String, TreeSet<Long>> entry;
try {
while ((entCount < maxCount) && (i.hasNext())) {
while ((m < maxCount) && (i.hasNext())) {
entry = i.next();
host = entry.getKey();
handles = entry.getValue();
@ -246,29 +250,29 @@ public class AccessTracker_p {
final Iterator<Long> ii = handles.iterator();
while (ii.hasNext()) {
final Long timestamp = ii.next();
prop.put("page_list_" + entCount + "_dates_" + dateCount + "_date", SimpleFormatter.format(new Date(timestamp.longValue())));
prop.put("page_list_" + entCount + "_dates_" + dateCount + "_timestamp", timestamp.toString());
prop.put("page_list_" + m + "_dates_" + dateCount + "_date", SimpleFormatter.format(new Date(timestamp.longValue())));
prop.put("page_list_" + m + "_dates_" + dateCount + "_timestamp", timestamp.toString());
dateCount++;
}
prop.put("page_list_" + entCount + "_dates", dateCount);
prop.put("page_list_" + m + "_dates", dateCount);
final int qph = handles.tailSet(Long.valueOf(System.currentTimeMillis() - 1000 * 60 * 60)).size();
qphSum += qph;
prop.put("page_list_" + entCount + "_qph", qph);
prop.put("page_list_" + m + "_qph", qph);
prop.put("page_list_" + entCount + "_dark", ((dark) ? 1 : 0) ); dark =! dark;
prop.putHTML("page_list_" + entCount + "_host", host);
prop.put("page_list_" + m + "_dark", ((dark) ? 1 : 0) ); dark =! dark;
prop.putHTML("page_list_" + m + "_host", host);
if (page == 5) {
final yacySeed remotepeer = sb.peers.lookupByIP(Domains.dnsResolve(host), true, true, true);
prop.putHTML("page_list_" + entCount + "_peername", (remotepeer == null) ? "UNKNOWN" : remotepeer.getName());
prop.putHTML("page_list_" + m + "_peername", (remotepeer == null) ? "UNKNOWN" : remotepeer.getName());
}
prop.putNum("page_list_" + entCount + "_count", handles.size());
prop.putNum("page_list_" + m + "_count", handles.size());
// next
entCount++;
m++;
}
} catch (final ConcurrentModificationException e) {} // we dont want to synchronize this
// return empty values to not break the table view if no results can be listed
if (entCount==0) {
if (m==0) {
prop.put("page_list", 1);
prop.put("page_list_0_dates_0_date", "&nbsp;");
prop.put("page_list_0_dates", 1);
@ -278,10 +282,10 @@ public class AccessTracker_p {
prop.put("page_list_0_host", "&nbsp;");
prop.putNum("page_list_0_count", "");
} else {
prop.put("page_list", entCount);
prop.put("page_list", m);
}
prop.putNum("page_num", entCount);
prop.putNum("page_total", (page == 3) ? sb.localSearches.size() : sb.remoteSearches.size());
prop.putNum("page_num", m);
prop.putNum("page_total", (page == 3) ? AccessTracker.size(AccessTracker.Location.local) : AccessTracker.size(AccessTracker.Location.remote));
prop.putNum("page_qph_sum", qphSum);
}
// return rewrite properties

@ -51,6 +51,7 @@ import net.yacy.kelondro.util.EventTracker;
import net.yacy.kelondro.util.ISO639;
import de.anomic.crawler.CrawlProfile;
import de.anomic.search.AccessTracker;
import de.anomic.search.ContentDomain;
import de.anomic.search.QueryParams;
import de.anomic.search.RankingProfile;
@ -397,11 +398,11 @@ public final class search {
theQuery.searchtime = System.currentTimeMillis() - timestamp;
theQuery.urlretrievaltime = (theSearch == null) ? 0 : theSearch.result().getURLRetrievalTime();
theQuery.snippetcomputationtime = (theSearch == null) ? 0 : theSearch.result().getSnippetComputationTime();
sb.remoteSearches.add(theQuery);
AccessTracker.add(AccessTracker.Location.remote, theQuery);
// update the search tracker
synchronized (trackerHandles) {
trackerHandles.add(theQuery.handle); // thats the time when the handle was created
trackerHandles.add(theQuery.time); // thats the time when the handle was created
// we don't need too much entries in the list; remove superfluous
while (trackerHandles.size() > 36) if (!trackerHandles.remove(trackerHandles.first())) break;
}

@ -57,6 +57,7 @@ import net.yacy.kelondro.util.ISO639;
import de.anomic.crawler.CrawlProfile;
import de.anomic.data.DidYouMean;
import de.anomic.data.LibraryProvider;
import de.anomic.search.AccessTracker;
import de.anomic.search.ContentDomain;
import de.anomic.search.QueryParams;
import de.anomic.search.RankingProfile;
@ -544,7 +545,7 @@ public class yacysearch {
theQuery.searchtime = System.currentTimeMillis() - timestamp;
theQuery.urlretrievaltime = theSearch.result().getURLRetrievalTime();
theQuery.snippetcomputationtime = theSearch.result().getSnippetComputationTime();
sb.localSearches.add(theQuery);
AccessTracker.add(AccessTracker.Location.local, theQuery);
// check suggestions
int meanMax = 0;
@ -595,7 +596,7 @@ public class yacysearch {
// update the search tracker
try {
synchronized (trackerHandles) {
trackerHandles.add(theQuery.handle);
trackerHandles.add(theQuery.time);
while (trackerHandles.size() > 600) if (!trackerHandles.remove(trackerHandles.first())) break;
}
sb.localSearchTracker.put(client, trackerHandles);

@ -0,0 +1,68 @@
/**
* AccessTracker
* an interface for Adaptive Replacement Caches
* Copyright 2009 by Michael Peter Christen, mc@yacy.net, Frankfurt a. M., Germany
* First released 29.08.2009 at http://yacy.net
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package de.anomic.search;
import java.util.Iterator;
import java.util.LinkedList;
public class AccessTracker {
public static final int minSize = 1000;
public static final int maxSize = 5000;
public static final int maxAge = 10 * 60 * 1000;
public enum Location {local, remote}
private static LinkedList<QueryParams> localSearches = new LinkedList<QueryParams>();
private static LinkedList<QueryParams> remoteSearches = new LinkedList<QueryParams>();
public static void add(Location location, QueryParams query) {
if (location == Location.local) synchronized (localSearches) {add(localSearches, query);}
if (location == Location.remote) synchronized (remoteSearches) {add(remoteSearches, query);}
}
private static void add(LinkedList<QueryParams> list, QueryParams query) {
list.add(query);
while (list.size() > maxSize) list.removeFirst();
if (list.size() <= minSize) {
return;
}
long timeout = System.currentTimeMillis() - maxAge;
while (list.size() > 0) {
QueryParams q = list.getFirst();
if (q.time.longValue() > timeout) break;
list.removeFirst();
}
}
public static Iterator<QueryParams> get(Location location) {
if (location == Location.local) return localSearches.descendingIterator();
if (location == Location.remote) return remoteSearches.descendingIterator();
return null;
}
public static int size(Location location) {
if (location == Location.local) synchronized (localSearches) {return localSearches.size();}
if (location == Location.remote) synchronized (remoteSearches) {return remoteSearches.size();}
return 0;
}
}

@ -96,7 +96,7 @@ public final class QueryParams {
public final String authorhash;
public final String tenant;
public yacySeed remotepeer;
public final Long handle;
public final Long time;
// values that are set after a search:
public int resultcount; // number of found results
public long searchtime, urlretrievaltime, snippetcomputationtime; // time to perform the search, to get all the urls, and to compute the snippets
@ -147,7 +147,7 @@ public final class QueryParams {
this.sitehash = null;
this.authorhash = null;
this.remotepeer = null;
this.handle = Long.valueOf(System.currentTimeMillis());
this.time = Long.valueOf(System.currentTimeMillis());
this.specialRights = false;
this.navigators = "all";
this.indexSegment = indexSegment;
@ -202,7 +202,7 @@ public final class QueryParams {
this.snippetCacheStrategy = snippetCacheStrategy;
this.host = host;
this.remotepeer = null;
this.handle = Long.valueOf(System.currentTimeMillis());
this.time = Long.valueOf(System.currentTimeMillis());
this.specialRights = specialRights;
this.indexSegment = indexSegment;
this.userAgent = userAgent;

@ -215,8 +215,6 @@ public final class Switchboard extends serverSwitch {
public UserDB userDB;
public BookmarksDB bookmarksDB;
public WebStructureGraph webStructure;
public ArrayList<QueryParams> localSearches; // array of search result properties as HashMaps
public ArrayList<QueryParams> remoteSearches; // array of search result properties as HashMaps
public ConcurrentHashMap<String, TreeSet<Long>> localSearchTracker, remoteSearchTracker; // mappings from requesting host to a TreeSet of Long(access time)
public long indexedPages = 0;
public int searchQueriesRobinsonFromLocal = 0; // absolute counter of all local queries submitted on this peer from a local or autheticated used
@ -519,8 +517,6 @@ public final class Switchboard extends serverSwitch {
// init search history trackers
this.localSearchTracker = new ConcurrentHashMap<String, TreeSet<Long>>(); // String:TreeSet - IP:set of Long(accessTime)
this.remoteSearchTracker = new ConcurrentHashMap<String, TreeSet<Long>>();
this.localSearches = new ArrayList<QueryParams>(); // contains search result properties as HashMaps
this.remoteSearches = new ArrayList<QueryParams>();
// init messages: clean up message symbol
final File notifierSource = new File(getAppPath(), getConfig(SwitchboardConstants.HTROOT_PATH, SwitchboardConstants.HTROOT_PATH_DEFAULT) + "/env/grafics/empty.gif");

@ -844,7 +844,7 @@ public class Table implements Index, Iterable<Row.Entry> {
}
public boolean hasNext() {
return i.hasNext();
return i != null && i.hasNext();
}
public Entry next() {

Loading…
Cancel
Save