From 66ec8b63c178ca9f29c2893043fc4051cd671aeb Mon Sep 17 00:00:00 2001 From: orbiter Date: Mon, 11 Jun 2007 14:05:20 +0000 Subject: [PATCH] added a httpd access tracker: - all requests to the own httdp can now be listed in the access tracker menu - the search statistics had been renamed to access tracker and extended by this tracker git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3861 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- ...Statistics_p.html => AccessTracker_p.html} | 54 ++++++++++++------- ...Statistics_p.java => AccessTracker_p.java} | 44 +++++++++++---- ...chStatistics_p.xml => AccessTracker_p.xml} | 36 +++++++++---- htroot/env/templates/header.template | 2 +- source/de/anomic/http/httpd.java | 9 ++-- .../de/anomic/index/indexContainerOrder.java | 26 +++++++++ .../anomic/server/serverAbstractSwitch.java | 52 ++++++++++++++++++ source/de/anomic/server/serverSwitch.java | 7 ++- source/de/anomic/server/serverTrack.java | 39 ++++++++++++++ yacy.init | 4 ++ 10 files changed, 226 insertions(+), 47 deletions(-) rename htroot/{SearchStatistics_p.html => AccessTracker_p.html} (66%) rename htroot/{SearchStatistics_p.java => AccessTracker_p.java} (82%) rename htroot/{SearchStatistics_p.xml => AccessTracker_p.xml} (66%) create mode 100644 source/de/anomic/server/serverTrack.java diff --git a/htroot/SearchStatistics_p.html b/htroot/AccessTracker_p.html similarity index 66% rename from htroot/SearchStatistics_p.html rename to htroot/AccessTracker_p.html index 55ca67989..0e57d6d3f 100644 --- a/htroot/SearchStatistics_p.html +++ b/htroot/AccessTracker_p.html @@ -1,30 +1,44 @@ - YaCy '#[clientname]#': Search Statistics + YaCy '#[clientname]#': Access Tracker #%env/templates/metas.template%# - + #%env/templates/header.template%# #(page)# -

Search Statistics

- You can monitor search attempts at your own peer. - Local statistics apply for search attempts at your own search portal, - and remote statistics show search requests form other peer. Not all remote peers ask your peer during a global search, - therefore your cannot have a global view over all search requests within the complete YaCy network. +

Server Access Tracker

+

This is a list of requests to the local http server within the last hour.

+

Showing #[num]# requests.

+ + + + + + + #{list}# + + + + + + #{/list}# +
HostDatePath
#[host]##[date]##[path]#
+ :: :: -

Search Statistics: Local Searches

+

Local Searches

This is a list of searches that had been requested from this' peer search interface

-

Showing #[num]# entries from a total of #[total]# Requests.

+

Showing #[num]# entries from a total of #[total]# requests.

@@ -50,9 +64,9 @@ #{/list}#
Requesting Host
:: -

Search Statistics: Local Searche Tracker

+

Local Search Tracker

This is a list of searches that had been requested from this' peer search interface

-

Showing #[num]# entries from a total of #[total]# Requests.

+

Showing #[num]# entries from a total of #[total]# requests.

@@ -70,9 +84,9 @@ #{/list}#
Requesting Host
:: -

Search Statistics: Remote Searches

+

Remote Searches

This is a list of searches that had been requested from remote peer search interface

-

Showing #[num]# entries from a total of #[total]# Requests.

+

Showing #[num]# entries from a total of #[total]# requests.

@@ -98,9 +112,9 @@ #{/list}#
Requesting Host
:: -

Search Statistics: Remote Searche Tracker

+

Remote Search Tracker

This is a list of searches that had been requested from remote peer search interface

-

Showing #[num]# entries from a total of #[total]# Requests.

+

Showing #[num]# entries from a total of #[total]# requests.

diff --git a/htroot/SearchStatistics_p.java b/htroot/AccessTracker_p.java similarity index 82% rename from htroot/SearchStatistics_p.java rename to htroot/AccessTracker_p.java index 5129e1e04..df4ba978a 100644 --- a/htroot/SearchStatistics_p.java +++ b/htroot/AccessTracker_p.java @@ -1,4 +1,4 @@ -// SearchStatistics_p.java +// AccessStatistics_p.java // (C) 2006 by Michael Peter Christen; mc@anomic.de, Frankfurt a. M., Germany // first published 14.01.2007 on http://www.anomic.de // @@ -38,10 +38,11 @@ import de.anomic.plasma.plasmaSearchQuery; import de.anomic.plasma.plasmaSwitchboard; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; +import de.anomic.server.serverTrack; import de.anomic.yacy.yacyCore; import de.anomic.yacy.yacySeed; -public class SearchStatistics_p { +public class AccessTracker_p { public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch sb) { plasmaSwitchboard switchboard = (plasmaSwitchboard) sb; @@ -52,10 +53,31 @@ public class SearchStatistics_p { if (post != null) page = post.getInt("page", 0); prop.put("page", page); - int maxCount = 100; + int maxCount = 1000; boolean dark = true; - if ((page == 1) || (page == 3)) { - ArrayList array = (page == 1) ? switchboard.localSearches : switchboard.remoteSearches; + if (page == 0) { + Iterator i = switchboard.accessHosts(); + String host; + ArrayList access; + int entCount = 0; + serverTrack track; + while ((entCount < maxCount) && (i.hasNext())) { + host = (String) i.next(); + access = switchboard.accessTrack(host); + + for (int j = access.size() - 1; j >= 0; j--) { + track = (serverTrack) access.get(j); + prop.put("page_list_" + entCount + "_host", host); + prop.put("page_list_" + entCount + "_date", yacyCore.universalDateShortString(new Date(track.time))); + prop.put("page_list_" + entCount + "_path", track.path); + entCount++; + } + } + prop.put("page_list", entCount); + prop.put("page_num", entCount); + } + if ((page == 2) || (page == 4)) { + ArrayList array = (page == 2) ? switchboard.localSearches : switchboard.remoteSearches; Long trackerHandle; HashMap searchProfile; int m = Math.min(maxCount, array.size()); @@ -68,7 +90,7 @@ public class SearchStatistics_p { prop.put("page_list_" + entCount + "_host", (String) searchProfile.get("host")); prop.put("page_list_" + entCount + "_date", yacyCore.universalDateShortString(new Date(trackerHandle.longValue()))); prop.put("page_list_" + entCount + "_timestamp", Long.toString(trackerHandle.longValue())); - if (page == 1) { + if (page == 2) { // local search prop.put("page_list_" + entCount + "_offset", ((Integer) searchProfile.get("offset")).toString()); prop.put("page_list_" + entCount + "_querystring", searchProfile.get("querystring")); @@ -84,10 +106,10 @@ public class SearchStatistics_p { } prop.put("page_list", m); prop.put("page_num", m); - prop.put("page_total", (page == 1) ? switchboard.localSearches.size() : switchboard.remoteSearches.size()); + prop.put("page_total", (page == 2) ? switchboard.localSearches.size() : switchboard.remoteSearches.size()); } - if ((page == 2) || (page == 4)) { - Iterator i = (page == 2) ? switchboard.localSearchTracker.entrySet().iterator() : switchboard.remoteSearchTracker.entrySet().iterator(); + if ((page == 3) || (page == 5)) { + Iterator i = (page == 3) ? switchboard.localSearchTracker.entrySet().iterator() : switchboard.remoteSearchTracker.entrySet().iterator(); String host; TreeSet handles; int entCount = 0; @@ -111,7 +133,7 @@ public class SearchStatistics_p { prop.put("page_list_" + entCount + "_dark", ((dark) ? 1 : 0) ); dark =! dark; prop.put("page_list_" + entCount + "_host", host); - if (page == 4) { + if (page == 5) { yacySeed remotepeer = yacyCore.seedDB.lookupByIP(natLib.getInetAddress(host), true, true, true); prop.put("page_list_" + entCount + "_peername", (remotepeer == null) ? "UNKNOWN" : remotepeer.getName()); } @@ -122,7 +144,7 @@ public class SearchStatistics_p { } prop.put("page_list", entCount); prop.put("page_num", entCount); - prop.put("page_total", (page == 2) ? switchboard.localSearches.size() : switchboard.remoteSearches.size()); + prop.put("page_total", (page == 3) ? switchboard.localSearches.size() : switchboard.remoteSearches.size()); } // return rewrite properties return prop; diff --git a/htroot/SearchStatistics_p.xml b/htroot/AccessTracker_p.xml similarity index 66% rename from htroot/SearchStatistics_p.xml rename to htroot/AccessTracker_p.xml index 7ed6f8d5a..4f08c99f9 100644 --- a/htroot/SearchStatistics_p.xml +++ b/htroot/AccessTracker_p.xml @@ -1,8 +1,17 @@ - -#(page)# - :: + +#(page)# + #{list}# + + #[host]# + #[date]# + #[path]# + + #{/list}# + :: + :: #{list}# + #[host]# #[date]# #[offset]# @@ -11,21 +20,25 @@ #[querytime]# #[resulttime]# #[querystring]# + #{/list}# - :: + :: #{list}# - #[host]# + + #[host]# #[count]# #[qph]# #{dates}# #[date]# #{/dates}# + #{/list}# - :: + :: #{list}# - #[host]# + + #[host]# #[peername]# #[date]# #[querycount]# @@ -33,17 +46,20 @@ #[querytime]# #[resulttime]# #[queryhashes]# + #{/list}# - :: + :: #{list}# - #[host]# + + #[host]# #[count]# #[qph]# #{dates}# #[date]# #{/dates}# + #{/list}# #(/page)# - \ No newline at end of file + \ No newline at end of file diff --git a/htroot/env/templates/header.template b/htroot/env/templates/header.template index 042251866..d4f29b129 100644 --- a/htroot/env/templates/header.template +++ b/htroot/env/templates/header.template @@ -27,7 +27,7 @@
  • YaCy Network
  • Crawl Activity
  • Crawl Results
  • -
  • Search Statistics
  • +
  • Access Tracker
  • Server Log
  • Connections
  • Web Cache
  • diff --git a/source/de/anomic/http/httpd.java b/source/de/anomic/http/httpd.java index c03c17c68..ec49fb8a1 100644 --- a/source/de/anomic/http/httpd.java +++ b/source/de/anomic/http/httpd.java @@ -767,6 +767,11 @@ public final class httpd implements serverHandler { // parsing the header httpHeader.parseRequestLine(cmd,s,this.prop,virtualHost); + // track the request + String path = this.prop.getProperty(httpHeader.CONNECTION_PROP_URL); + String args = this.prop.getProperty(httpHeader.CONNECTION_PROP_ARGS, ""); + switchboard.track(this.userAddress.getHostName(), (args.length() > 0) ? path + "?" + args : path); + // reseting the empty request counter this.emptyRequestCount = 0; @@ -777,14 +782,10 @@ public final class httpd implements serverHandler { this.prop.setProperty(httpHeader.CONNECTION_PROP_CLIENTIP, this.clientIP); } - - - // some static methods that needs to be used from any CGI // and also by the httpdFileHandler // but this belongs to the protocol handler, this class. - public static int parseArgs(serverObjects args, InputStream in, int length) throws IOException { // this is a quick hack using a previously coded parseMultipart based on a buffer // should be replaced sometime by a 'right' implementation diff --git a/source/de/anomic/index/indexContainerOrder.java b/source/de/anomic/index/indexContainerOrder.java index c68a61ecf..7c82ca76b 100644 --- a/source/de/anomic/index/indexContainerOrder.java +++ b/source/de/anomic/index/indexContainerOrder.java @@ -1,3 +1,29 @@ +// indexContainerOrder.java +// (C) 2007 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany +// first published 2007 on http://yacy.net +// +// This is a part of YaCy, a peer-to-peer based web search engine +// +// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $ +// $LastChangedRevision: 1986 $ +// $LastChangedBy: orbiter $ +// +// LICENSE +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + package de.anomic.index; import de.anomic.kelondro.kelondroOrder; diff --git a/source/de/anomic/server/serverAbstractSwitch.java b/source/de/anomic/server/serverAbstractSwitch.java index 6a0b00b93..890039cb1 100644 --- a/source/de/anomic/server/serverAbstractSwitch.java +++ b/source/de/anomic/server/serverAbstractSwitch.java @@ -43,6 +43,7 @@ package de.anomic.server; import java.io.File; import java.io.IOException; import java.net.InetAddress; +import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.Map; @@ -52,6 +53,8 @@ import de.anomic.server.logging.serverLog; public abstract class serverAbstractSwitch implements serverSwitch { + private static final long maxTrackingTimeDefault = 1000 * 60 * 60; // store only access data from the last hour to save ram space + // configuration management private final File configFile; private Map configProps; @@ -63,6 +66,8 @@ public abstract class serverAbstractSwitch implements serverSwitch { private final TreeMap switchActions; protected serverLog log; protected int serverJobs; + protected HashMap accessTracker; // mappings from requesting host to an ArrayList of serverTrack-entries + protected long maxTrackingTime; public serverAbstractSwitch(String rootPath, String initPath, String configPath) { // we initialize the switchboard with a property file, @@ -120,6 +125,7 @@ public abstract class serverAbstractSwitch implements serverSwitch { // other settings authorization = new HashMap(); + accessTracker = new HashMap(); // init thread control workerThreads = new TreeMap(); @@ -129,6 +135,9 @@ public abstract class serverAbstractSwitch implements serverSwitch { // init busy state control serverJobs = 0; + + // init server tracking + maxTrackingTime = getConfigLong("maxTrackingTime", maxTrackingTimeDefault); } // a logger for this switchboard @@ -139,6 +148,49 @@ public abstract class serverAbstractSwitch implements serverSwitch { public serverLog getLog() { return log; } + + public void track(String host, String accessPath) { + // learn that a specific host has accessed a specific path + ArrayList access = (ArrayList) accessTracker.get(host); + if (access == null) access = new ArrayList(); + access.add(new serverTrack(accessPath)); + + // clear too old entries + clearTooOldAccess(access); + + // write back to tracker + accessTracker.put(host, access); + } + + public ArrayList accessTrack(String host) { + // returns mapping from Long(accesstime) to path + + ArrayList access = (ArrayList) accessTracker.get(host); + if (access == null) return null; + + // clear too old entries + if (clearTooOldAccess(access)) { + // write back to tracker + accessTracker.put(host, access); + } + + return access; + } + + private boolean clearTooOldAccess(ArrayList access) { + boolean changed = false; + while ((access.size() > 0) && + (((serverTrack) access.get(0)).time < (System.currentTimeMillis() - maxTrackingTime))) { + access.remove(0); + changed = true; + } + return changed; + } + + public Iterator accessHosts() { + // returns an iterator of hosts in tracker (String) + return accessTracker.keySet().iterator(); + } public void setConfig(String key, boolean value) { setConfig(key, (value) ? "true" : "false"); diff --git a/source/de/anomic/server/serverSwitch.java b/source/de/anomic/server/serverSwitch.java index af0d02c10..d4f55e766 100644 --- a/source/de/anomic/server/serverSwitch.java +++ b/source/de/anomic/server/serverSwitch.java @@ -50,9 +50,9 @@ package de.anomic.server; import java.net.InetAddress; +import java.util.ArrayList; import java.util.Iterator; import java.util.Map; - import de.anomic.server.logging.serverLog; public interface serverSwitch { @@ -64,6 +64,11 @@ public interface serverSwitch { public void setLog(serverLog log); public serverLog getLog(); + // access tracker + public void track(String host, String accessPath); // learn that a specific host has accessed a specific path + public ArrayList accessTrack(String host); // returns mapping from Long(accesstime) to path + public Iterator accessHosts(); // returns an iterator of hosts in tracker (String) + // a switchboard can have action listener // these listeners are hooks for numerous methods below public void deployAction(String actionName, diff --git a/source/de/anomic/server/serverTrack.java b/source/de/anomic/server/serverTrack.java new file mode 100644 index 000000000..49178b8df --- /dev/null +++ b/source/de/anomic/server/serverTrack.java @@ -0,0 +1,39 @@ +// serverTrack.java +// (C) 2007 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany +// first published 11.06.2007 on http://yacy.net +// +// This is a part of YaCy, a peer-to-peer based web search engine +// +// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $ +// $LastChangedRevision: 1986 $ +// $LastChangedBy: orbiter $ +// +// LICENSE +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +package de.anomic.server; + +public class serverTrack { + + public long time; // access time + public String path; + + public serverTrack(String path) { + this.time = System.currentTimeMillis(); + this.path = path; + } + +} diff --git a/yacy.init b/yacy.init index 81a195561..70c50e3ae 100644 --- a/yacy.init +++ b/yacy.init @@ -60,6 +60,10 @@ keyStorePassword = pkcs12ImportFile = pkcs12ImportPwd = +# server tracking: maximum time a track entry is hold in the internal cache +# value is in milliseconds, default is one hour +server.maxTrackingTime = 3600000 + # peer-to-peer construction for distributed search # we have several stages: # 1st: a file within every distribution that has a list of URLs:
    Requesting Host