added a httpd access tracker:

- all requests to the own httdp can now be listed in the access tracker menu
- the search statistics had been renamed to access tracker and extended by this tracker

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@3861 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 18 years ago
parent 87ac35fbe0
commit 66ec8b63c1

@ -1,30 +1,44 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>YaCy '#[clientname]#': Search Statistics</title>
<title>YaCy '#[clientname]#': Access Tracker</title>
#%env/templates/metas.template%#
</head>
<body id="SearchStatistics">
<body id="AccessTracker">
#%env/templates/header.template%#
<div class="SubMenu">
<h3>Search Statistics Menu</h3>
<h3>Access Tracker Menu</h3>
<ul class="SubMenu">
<li><a href="/SearchStatistics_p.html?page=1" class="MenuItemLink lock">Local&nbsp;Search&nbsp;Log</a></li>
<li><a href="/SearchStatistics_p.html?page=2" class="MenuItemLink lock">Local&nbsp;Search&nbsp;Host&nbsp;Tracker</a></li>
<li><a href="/SearchStatistics_p.html?page=3" class="MenuItemLink lock">Remote&nbsp;Search&nbsp;Log</a></li>
<li><a href="/SearchStatistics_p.html?page=4" class="MenuItemLink lock">Remote&nbsp;Search&nbsp;Host&nbsp;Tracker</a></li>
<li><a href="/AccessTracker_p.html?page=0" class="MenuItemLink lock">Server&nbsp;Access&nbsp;Tracker</a></li>
<li><a href="/AccessTracker_p.html?page=2" class="MenuItemLink lock">Local&nbsp;Search&nbsp;Log</a></li>
<li><a href="/AccessTracker_p.html?page=3" class="MenuItemLink lock">Local&nbsp;Search&nbsp;Host&nbsp;Tracker</a></li>
<li><a href="/AccessTracker_p.html?page=4" class="MenuItemLink lock">Remote&nbsp;Search&nbsp;Log</a></li>
<li><a href="/AccessTracker_p.html?page=5" class="MenuItemLink lock">Remote&nbsp;Search&nbsp;Host&nbsp;Tracker</a></li>
</ul>
</div>
#(page)#
<h2>Search Statistics</h2>
You can monitor search attempts at your own peer.
Local statistics apply for search attempts at your own search portal,
and remote statistics show search requests form other peer. Not all remote peers ask your peer during a global search,
therefore your cannot have a global view over all search requests within the complete YaCy network.
<h2>Server Access Tracker</h2>
<p>This is a list of requests to the local http server within the last hour.</p>
<p>Showing #[num]# requests.</p>
<table border="0" cellpadding="2" cellspacing="1">
<tr class="TableHeader">
<td>Host</td>
<td>Date</td>
<td>Path</td>
</tr>
#{list}#
<tr class="TableCell#(dark)#Light::Dark#(/dark)#">
<td>#[host]#</td>
<td>#[date]#</td>
<td>#[path]#</td>
</tr>
#{/list}#
</table>
::
::
<h2>Search Statistics: Local Searches</h2>
<h2>Local Searches</h2>
<p>This is a list of searches that had been requested from this' peer search interface</p>
<p>Showing #[num]# entries from a total of #[total]# Requests.</p>
<p>Showing #[num]# entries from a total of #[total]# requests.</p>
<table border="0" cellpadding="2" cellspacing="1">
<tr class="TableHeader">
<td>Requesting Host</td>
@ -50,9 +64,9 @@
#{/list}#
</table>
::
<h2>Search Statistics: Local Searche Tracker</h2>
<h2>Local Search Tracker</h2>
<p>This is a list of searches that had been requested from this' peer search interface</p>
<p>Showing #[num]# entries from a total of #[total]# Requests.</p>
<p>Showing #[num]# entries from a total of #[total]# requests.</p>
<table border="0" cellpadding="2" cellspacing="1">
<tr class="TableHeader">
<td>Requesting Host</td>
@ -70,9 +84,9 @@
#{/list}#
</table>
::
<h2>Search Statistics: Remote Searches</h2>
<h2>Remote Searches</h2>
<p>This is a list of searches that had been requested from remote peer search interface</p>
<p>Showing #[num]# entries from a total of #[total]# Requests.</p>
<p>Showing #[num]# entries from a total of #[total]# requests.</p>
<table border="0" cellpadding="2" cellspacing="1">
<tr class="TableHeader">
<td>Requesting Host</td>
@ -98,9 +112,9 @@
#{/list}#
</table>
::
<h2>Search Statistics: Remote Searche Tracker</h2>
<h2>Remote Search Tracker</h2>
<p>This is a list of searches that had been requested from remote peer search interface</p>
<p>Showing #[num]# entries from a total of #[total]# Requests.</p>
<p>Showing #[num]# entries from a total of #[total]# requests.</p>
<table border="0" cellpadding="2" cellspacing="1">
<tr class="TableHeader">
<td>Requesting Host</td>

@ -1,4 +1,4 @@
// SearchStatistics_p.java
// AccessStatistics_p.java
// (C) 2006 by Michael Peter Christen; mc@anomic.de, Frankfurt a. M., Germany
// first published 14.01.2007 on http://www.anomic.de
//
@ -38,10 +38,11 @@ import de.anomic.plasma.plasmaSearchQuery;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.server.serverTrack;
import de.anomic.yacy.yacyCore;
import de.anomic.yacy.yacySeed;
public class SearchStatistics_p {
public class AccessTracker_p {
public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch sb) {
plasmaSwitchboard switchboard = (plasmaSwitchboard) sb;
@ -52,10 +53,31 @@ public class SearchStatistics_p {
if (post != null) page = post.getInt("page", 0);
prop.put("page", page);
int maxCount = 100;
int maxCount = 1000;
boolean dark = true;
if ((page == 1) || (page == 3)) {
ArrayList array = (page == 1) ? switchboard.localSearches : switchboard.remoteSearches;
if (page == 0) {
Iterator i = switchboard.accessHosts();
String host;
ArrayList access;
int entCount = 0;
serverTrack track;
while ((entCount < maxCount) && (i.hasNext())) {
host = (String) i.next();
access = switchboard.accessTrack(host);
for (int j = access.size() - 1; j >= 0; j--) {
track = (serverTrack) access.get(j);
prop.put("page_list_" + entCount + "_host", host);
prop.put("page_list_" + entCount + "_date", yacyCore.universalDateShortString(new Date(track.time)));
prop.put("page_list_" + entCount + "_path", track.path);
entCount++;
}
}
prop.put("page_list", entCount);
prop.put("page_num", entCount);
}
if ((page == 2) || (page == 4)) {
ArrayList array = (page == 2) ? switchboard.localSearches : switchboard.remoteSearches;
Long trackerHandle;
HashMap searchProfile;
int m = Math.min(maxCount, array.size());
@ -68,7 +90,7 @@ public class SearchStatistics_p {
prop.put("page_list_" + entCount + "_host", (String) searchProfile.get("host"));
prop.put("page_list_" + entCount + "_date", yacyCore.universalDateShortString(new Date(trackerHandle.longValue())));
prop.put("page_list_" + entCount + "_timestamp", Long.toString(trackerHandle.longValue()));
if (page == 1) {
if (page == 2) {
// local search
prop.put("page_list_" + entCount + "_offset", ((Integer) searchProfile.get("offset")).toString());
prop.put("page_list_" + entCount + "_querystring", searchProfile.get("querystring"));
@ -84,10 +106,10 @@ public class SearchStatistics_p {
}
prop.put("page_list", m);
prop.put("page_num", m);
prop.put("page_total", (page == 1) ? switchboard.localSearches.size() : switchboard.remoteSearches.size());
prop.put("page_total", (page == 2) ? switchboard.localSearches.size() : switchboard.remoteSearches.size());
}
if ((page == 2) || (page == 4)) {
Iterator i = (page == 2) ? switchboard.localSearchTracker.entrySet().iterator() : switchboard.remoteSearchTracker.entrySet().iterator();
if ((page == 3) || (page == 5)) {
Iterator i = (page == 3) ? switchboard.localSearchTracker.entrySet().iterator() : switchboard.remoteSearchTracker.entrySet().iterator();
String host;
TreeSet handles;
int entCount = 0;
@ -111,7 +133,7 @@ public class SearchStatistics_p {
prop.put("page_list_" + entCount + "_dark", ((dark) ? 1 : 0) ); dark =! dark;
prop.put("page_list_" + entCount + "_host", host);
if (page == 4) {
if (page == 5) {
yacySeed remotepeer = yacyCore.seedDB.lookupByIP(natLib.getInetAddress(host), true, true, true);
prop.put("page_list_" + entCount + "_peername", (remotepeer == null) ? "UNKNOWN" : remotepeer.getName());
}
@ -122,7 +144,7 @@ public class SearchStatistics_p {
}
prop.put("page_list", entCount);
prop.put("page_num", entCount);
prop.put("page_total", (page == 2) ? switchboard.localSearches.size() : switchboard.remoteSearches.size());
prop.put("page_total", (page == 3) ? switchboard.localSearches.size() : switchboard.remoteSearches.size());
}
// return rewrite properties
return prop;

@ -1,8 +1,17 @@
<?xml version="1.0"?>
<SearchStatistics>
#(page)#<!-- 0: info text -->
::<!-- 1: Search Statistics: Local Searches -->
<AccessTracker>
#(page)#<!-- 0: access tracker -->
<localAccessTracker>#{list}#
<entry>
<host>#[host]#</host>
<date>#[date]#</date>
<path>#[path]#</path>
</entry>
#{/list}#</localAccessTracker>
::<!-- 1: reserve -->
::<!-- 2: Search Statistics: Local Searches -->
<localSearchLog>#{list}#
<entry>
<host>#[host]#</host>
<date timestamp="#[timestamp]#">#[date]#</date>
<offset>#[offset]#</offset>
@ -11,21 +20,25 @@
<querytime>#[querytime]#</querytime>
<resulttime>#[resulttime]#</resulttime>
<querystring>#[querystring]#</querystring>
</entry>
#{/list}#</localSearchLog>
::<!-- 2: Search Statistics: Local Search Tracker -->
::<!-- 3: Search Statistics: Local Search Tracker -->
<localSearchTracker>#{list}#
<host>#[host]#</host>
<entry>
<host>#[host]#</host>
<count>#[count]#</count>
<qph>#[qph]#</qph>
<dates>#{dates}#
<date timestamp="#[timestamp]#">#[date]#</date>
#{/dates}#</dates>
</entry>
#{/list}#</localSearchTracker>
::<!-- 3: Search Statistics: Remote Searches -->
::<!-- 4: Search Statistics: Remote Searches -->
<remoteSearchLog>#{list}#
<host>#[host]#</host>
<entry>
<host>#[host]#</host>
<peername>#[peername]#</peername>
<date timestamp="#[timestamp]#">#[date]#</date>
<querycount>#[querycount]#</querycount>
@ -33,17 +46,20 @@
<querytime>#[querytime]#</querytime>
<resulttime>#[resulttime]#</resulttime>
<queryhashes>#[queryhashes]#</queryhashes>
</entry>
#{/list}#</remoteSearchLog>
::<!-- 4: Search Statistics: Remote Search Tracker -->
::<!-- 5: Search Statistics: Remote Search Tracker -->
<remoteSearchTracker>#{list}#
<host>#[host]#</host>
<entry>
<host>#[host]#</host>
<count>#[count]#</count>
<qph>#[qph]#</qph>
<dates>#{dates}#
<date timestamp="#[timestamp]#">#[date]#</date>
#{/dates}#</dates>
</entry>
#{/list}#</remoteSearchTracker>
#(/page)#
</SearchStatistics>
</AccessTracker>

@ -27,7 +27,7 @@
<li><a href="/Network.html" accesskey="w" class="MenuItemLink">YaCy Network</a></li>
<li><a href="/WatchWebStructure_p.html?host=auto&depth=2&time=1000" class="MenuItemLink lock">Crawl Activity</a></li>
<li><a href="/IndexMonitor.html" class="MenuItemLink">Crawl Results</a></li>
<li><a href="/SearchStatistics_p.html" class="MenuItemLink lock">Search Statistics</a></li>
<li><a href="/AccessTracker_p.html" class="MenuItemLink lock">Access Tracker</a></li>
<li><a href="/ViewLog_p.html" class="MenuItemLink lock">Server Log</a></li>
<li><a href="/Connections_p.html" class="MenuItemLink lock">Connections</a></li>
<li><a href="/CacheAdmin_p.html" class="MenuItemLink lock">Web Cache</a></li>

@ -767,6 +767,11 @@ public final class httpd implements serverHandler {
// parsing the header
httpHeader.parseRequestLine(cmd,s,this.prop,virtualHost);
// track the request
String path = this.prop.getProperty(httpHeader.CONNECTION_PROP_URL);
String args = this.prop.getProperty(httpHeader.CONNECTION_PROP_ARGS, "");
switchboard.track(this.userAddress.getHostName(), (args.length() > 0) ? path + "?" + args : path);
// reseting the empty request counter
this.emptyRequestCount = 0;
@ -777,14 +782,10 @@ public final class httpd implements serverHandler {
this.prop.setProperty(httpHeader.CONNECTION_PROP_CLIENTIP, this.clientIP);
}
// some static methods that needs to be used from any CGI
// and also by the httpdFileHandler
// but this belongs to the protocol handler, this class.
public static int parseArgs(serverObjects args, InputStream in, int length) throws IOException {
// this is a quick hack using a previously coded parseMultipart based on a buffer
// should be replaced sometime by a 'right' implementation

@ -1,3 +1,29 @@
// indexContainerOrder.java
// (C) 2007 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 2007 on http://yacy.net
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
// $LastChangedRevision: 1986 $
// $LastChangedBy: orbiter $
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package de.anomic.index;
import de.anomic.kelondro.kelondroOrder;

@ -43,6 +43,7 @@ package de.anomic.server;
import java.io.File;
import java.io.IOException;
import java.net.InetAddress;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
@ -52,6 +53,8 @@ import de.anomic.server.logging.serverLog;
public abstract class serverAbstractSwitch implements serverSwitch {
private static final long maxTrackingTimeDefault = 1000 * 60 * 60; // store only access data from the last hour to save ram space
// configuration management
private final File configFile;
private Map configProps;
@ -63,6 +66,8 @@ public abstract class serverAbstractSwitch implements serverSwitch {
private final TreeMap switchActions;
protected serverLog log;
protected int serverJobs;
protected HashMap accessTracker; // mappings from requesting host to an ArrayList of serverTrack-entries
protected long maxTrackingTime;
public serverAbstractSwitch(String rootPath, String initPath, String configPath) {
// we initialize the switchboard with a property file,
@ -120,6 +125,7 @@ public abstract class serverAbstractSwitch implements serverSwitch {
// other settings
authorization = new HashMap();
accessTracker = new HashMap();
// init thread control
workerThreads = new TreeMap();
@ -129,6 +135,9 @@ public abstract class serverAbstractSwitch implements serverSwitch {
// init busy state control
serverJobs = 0;
// init server tracking
maxTrackingTime = getConfigLong("maxTrackingTime", maxTrackingTimeDefault);
}
// a logger for this switchboard
@ -139,6 +148,49 @@ public abstract class serverAbstractSwitch implements serverSwitch {
public serverLog getLog() {
return log;
}
public void track(String host, String accessPath) {
// learn that a specific host has accessed a specific path
ArrayList access = (ArrayList) accessTracker.get(host);
if (access == null) access = new ArrayList();
access.add(new serverTrack(accessPath));
// clear too old entries
clearTooOldAccess(access);
// write back to tracker
accessTracker.put(host, access);
}
public ArrayList accessTrack(String host) {
// returns mapping from Long(accesstime) to path
ArrayList access = (ArrayList) accessTracker.get(host);
if (access == null) return null;
// clear too old entries
if (clearTooOldAccess(access)) {
// write back to tracker
accessTracker.put(host, access);
}
return access;
}
private boolean clearTooOldAccess(ArrayList access) {
boolean changed = false;
while ((access.size() > 0) &&
(((serverTrack) access.get(0)).time < (System.currentTimeMillis() - maxTrackingTime))) {
access.remove(0);
changed = true;
}
return changed;
}
public Iterator accessHosts() {
// returns an iterator of hosts in tracker (String)
return accessTracker.keySet().iterator();
}
public void setConfig(String key, boolean value) {
setConfig(key, (value) ? "true" : "false");

@ -50,9 +50,9 @@
package de.anomic.server;
import java.net.InetAddress;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.Map;
import de.anomic.server.logging.serverLog;
public interface serverSwitch {
@ -64,6 +64,11 @@ public interface serverSwitch {
public void setLog(serverLog log);
public serverLog getLog();
// access tracker
public void track(String host, String accessPath); // learn that a specific host has accessed a specific path
public ArrayList accessTrack(String host); // returns mapping from Long(accesstime) to path
public Iterator accessHosts(); // returns an iterator of hosts in tracker (String)
// a switchboard can have action listener
// these listeners are hooks for numerous methods below
public void deployAction(String actionName,

@ -0,0 +1,39 @@
// serverTrack.java
// (C) 2007 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 11.06.2007 on http://yacy.net
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// $LastChangedDate: 2006-04-02 22:40:07 +0200 (So, 02 Apr 2006) $
// $LastChangedRevision: 1986 $
// $LastChangedBy: orbiter $
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package de.anomic.server;
public class serverTrack {
public long time; // access time
public String path;
public serverTrack(String path) {
this.time = System.currentTimeMillis();
this.path = path;
}
}

@ -60,6 +60,10 @@ keyStorePassword =
pkcs12ImportFile =
pkcs12ImportPwd =
# server tracking: maximum time a track entry is hold in the internal cache
# value is in milliseconds, default is one hour
server.maxTrackingTime = 3600000
# peer-to-peer construction for distributed search
# we have several stages:
# 1st: a file within every distribution that has a list of URLs:

Loading…
Cancel
Save