counting crawler traffic again:

fix for http://forum.yacy-websuche.de/viewtopic.php?f=6&t=2808

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7138 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
sixcooler 14 years ago
parent 547d5226ae
commit 17eebd4ef8

@ -32,8 +32,9 @@ import java.util.Date;
import net.yacy.cora.protocol.Domains; import net.yacy.cora.protocol.Domains;
import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.protocol.RequestHeader;
import net.yacy.kelondro.io.ByteCountInputStream; import net.yacy.kelondro.io.ByteCount;
import net.yacy.kelondro.io.ByteCountOutputStream; //import net.yacy.kelondro.io.ByteCountInputStream;
//import net.yacy.kelondro.io.ByteCountOutputStream;
import net.yacy.kelondro.util.DateFormatter; import net.yacy.kelondro.util.DateFormatter;
import net.yacy.kelondro.util.Formatter; import net.yacy.kelondro.util.Formatter;
import net.yacy.kelondro.util.MemoryControl; import net.yacy.kelondro.util.MemoryControl;
@ -83,8 +84,9 @@ public class Status {
sb.continueCrawlJob(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL); sb.continueCrawlJob(SwitchboardConstants.CRAWLJOB_REMOTE_TRIGGERED_CRAWL);
redirect = true; redirect = true;
} else if (post.containsKey("ResetTraffic")) { } else if (post.containsKey("ResetTraffic")) {
ByteCountInputStream.resetCount(); // ByteCountInputStream.resetCount();
ByteCountOutputStream.resetCount(); // ByteCountOutputStream.resetCount();
ByteCount.resetCount();
redirect = true; redirect = true;
} else if (post.containsKey("popup")) { } else if (post.containsKey("popup")) {
final String trigger_enabled = post.get("popup"); final String trigger_enabled = post.get("popup");
@ -292,8 +294,8 @@ public class Status {
// proxy traffic // proxy traffic
//prop.put("trafficIn",bytesToString(httpdByteCountInputStream.getGlobalCount())); //prop.put("trafficIn",bytesToString(httpdByteCountInputStream.getGlobalCount()));
prop.put("trafficProxy", Formatter.bytesToString(ByteCountOutputStream.getAccountCount("PROXY"))); prop.put("trafficProxy", Formatter.bytesToString(ByteCount.getAccountCount(ByteCount.PROXY)));
prop.put("trafficCrawler", Formatter.bytesToString(ByteCountInputStream.getAccountCount("CRAWLER"))); prop.put("trafficCrawler", Formatter.bytesToString(ByteCount.getAccountCount(ByteCount.CRAWLER)));
// connection information // connection information
final serverCore httpd = (serverCore) sb.getThread("10_httpd"); final serverCore httpd = (serverCore) sb.getThread("10_httpd");

@ -1,8 +1,9 @@
import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.protocol.RequestHeader;
import net.yacy.kelondro.io.ByteCountInputStream; import net.yacy.kelondro.io.ByteCount;
import net.yacy.kelondro.io.ByteCountOutputStream; //import net.yacy.kelondro.io.ByteCountInputStream;
//import net.yacy.kelondro.io.ByteCountOutputStream;
import net.yacy.kelondro.util.MemoryControl; import net.yacy.kelondro.util.MemoryControl;
import net.yacy.kelondro.workflow.WorkflowProcessor; import net.yacy.kelondro.workflow.WorkflowProcessor;
import de.anomic.search.Segment; import de.anomic.search.Segment;
@ -49,9 +50,9 @@ public class status_p {
prop.putNum("processors", WorkflowProcessor.availableCPU); prop.putNum("processors", WorkflowProcessor.availableCPU);
// proxy traffic // proxy traffic
prop.put("trafficIn", ByteCountInputStream.getGlobalCount()); prop.put("trafficIn", ByteCount.getGlobalCount());
prop.put("trafficProxy", ByteCountOutputStream.getAccountCount("PROXY")); prop.put("trafficProxy", ByteCount.getAccountCount(ByteCount.PROXY));
prop.put("trafficCrawler", ByteCountInputStream.getAccountCount("CRAWLER")); prop.put("trafficCrawler", ByteCount.getAccountCount(ByteCount.CRAWLER));
// return rewrite properties // return rewrite properties
return prop; return prop;

@ -43,6 +43,7 @@ import net.yacy.cora.protocol.ResponseHeader;
import net.yacy.cora.protocol.http.HTTPClient; import net.yacy.cora.protocol.http.HTTPClient;
import net.yacy.kelondro.blob.BEncodedHeap; import net.yacy.kelondro.blob.BEncodedHeap;
import net.yacy.kelondro.index.RowSpaceExceededException; import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.io.ByteCount;
import de.anomic.crawler.retrieval.HTTPLoader; import de.anomic.crawler.retrieval.HTTPLoader;
@ -347,6 +348,10 @@ public class RobotsTxt {
// sending the get request // sending the get request
robotsTxt = client.GETbytes(robotsURL.toString()); robotsTxt = client.GETbytes(robotsURL.toString());
// statistics:
if (robotsTxt != null) {
ByteCount.addAccountCount(ByteCount.CRAWLER, robotsTxt.length);
}
final int code = client.getHttpResponse().getStatusLine().getStatusCode(); final int code = client.getHttpResponse().getStatusLine().getStatusCode();
final ResponseHeader header = new ResponseHeader(client.getHttpResponse().getAllHeaders()); final ResponseHeader header = new ResponseHeader(client.getHttpResponse().getAllHeaders());

@ -34,6 +34,7 @@ import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.protocol.ResponseHeader; import net.yacy.cora.protocol.ResponseHeader;
import net.yacy.cora.protocol.http.HTTPClient; import net.yacy.cora.protocol.http.HTTPClient;
import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.io.ByteCount;
import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.logging.Log;
import net.yacy.repository.Blacklist; import net.yacy.repository.Blacklist;
@ -137,9 +138,10 @@ public final class HTTPLoader {
// the transfer is ok // the transfer is ok
// we write the new cache entry to file system directly // we write the new cache entry to file system directly
// TODO: res.setAccountingName("CRAWLER"); // res.setAccountingName("CRAWLER");
// final byte[] responseBody = res.getData(); // final byte[] responseBody = res.getData();
long contentLength = responseBody.length; long contentLength = responseBody.length;
ByteCount.addAccountCount(ByteCount.CRAWLER, contentLength);
// check length again in case it was not possible to get the length before loading // check length again in case it was not possible to get the length before loading
if (maxFileSize > 0 && contentLength > maxFileSize) { if (maxFileSize > 0 && contentLength > maxFileSize) {
@ -267,9 +269,12 @@ public final class HTTPLoader {
// if (res.getStatusCode() == 200 || res.getStatusCode() == 203) { // if (res.getStatusCode() == 200 || res.getStatusCode() == 203) {
if (responseBody != null && (code == 200 || code == 203)) { if (responseBody != null && (code == 200 || code == 203)) {
// the transfer is ok // the transfer is ok
//statistics:
ByteCount.addAccountCount(ByteCount.CRAWLER, responseBody.length);
// we write the new cache entry to file system directly // we write the new cache entry to file system directly
// TODO: res.setAccountingName("CRAWLER"); // res.setAccountingName("CRAWLER");
// final byte[] responseBody = res.getData(); // final byte[] responseBody = res.getData();
// create a new cache entry // create a new cache entry

@ -0,0 +1,71 @@
//ByteCount.java
//-----------------------
//(C) by Michael Peter Christen; mc@yacy.net
//first published on http://www.anomic.de
//Frankfurt, Germany, 2004
//
// This file is contributed by Sebastian Gäbel
// last major change: $LastChangedDate: 2010-04-23 11:32:29 +0200 (Fr, 23. Apr 2010) $ by $LastChangedBy: sixcooler $
// Revision: $LastChangedRevision: 6835 $
//
//This program is free software; you can redistribute it and/or modify
//it under the terms of the GNU General Public License as published by
//the Free Software Foundation; either version 2 of the License, or
//(at your option) any later version.
//
//This program is distributed in the hope that it will be useful,
//but WITHOUT ANY WARRANTY; without even the implied warranty of
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
//GNU General Public License for more details.
//
//You should have received a copy of the GNU General Public License
//along with this program; if not, write to the Free Software
//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package net.yacy.kelondro.io;
import java.util.HashMap;
public final class ByteCount {
public final static String PROXY = "PROXY";
public final static String CRAWLER = "CRAWLER";
private final static Object syncObject = new Object();
private static long globalCount = 0;
private final static HashMap<String, Long> countMap = new HashMap<String, Long>(2);
public final static long getGlobalCount() {
return globalCount;
}
public final static long getAccountCount(final String accountName) {
synchronized (syncObject) {
if (countMap.containsKey(accountName)) {
return (countMap.get(accountName)).longValue();
}
return 0;
}
}
public final static void addAccountCount(final String accountName, final long count) {
synchronized (syncObject) {
globalCount += count;
if (accountName != null) {
long current = 0;
if (countMap.containsKey(accountName)) {
current = (countMap.get(accountName)).longValue();
}
current += count;
countMap.put(accountName, current);
}
}
}
public final static void resetCount() {
synchronized (syncObject) {
globalCount = 0;
countMap.clear();
}
}
}

@ -29,13 +29,13 @@ package net.yacy.kelondro.io;
import java.io.FilterInputStream; import java.io.FilterInputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.util.HashMap; //import java.util.HashMap;
public final class ByteCountInputStream extends FilterInputStream { public final class ByteCountInputStream extends FilterInputStream {
private final static Object syncObject = new Object(); // private final static Object syncObject = new Object();
private final static HashMap<String, Long> byteCountInfo = new HashMap<String, Long>(2); // private final static HashMap<String, Long> byteCountInfo = new HashMap<String, Long>(2);
private static long globalByteCount = 0; // private static long globalByteCount = 0;
private boolean finished = false; private boolean finished = false;
protected long byteCount; protected long byteCount;
@ -99,20 +99,20 @@ public final class ByteCountInputStream extends FilterInputStream {
return this.byteCountAccountName; return this.byteCountAccountName;
} }
public final static long getGlobalCount() { // public final static long getGlobalCount() {
synchronized (syncObject) { // synchronized (syncObject) {
return globalByteCount; // return globalByteCount;
} // }
} // }
public final static long getAccountCount(final String accountName) { // public final static long getAccountCount(final String accountName) {
synchronized (syncObject) { // synchronized (syncObject) {
if (byteCountInfo.containsKey(accountName)) { // if (byteCountInfo.containsKey(accountName)) {
return (byteCountInfo.get(accountName)).longValue(); // return (byteCountInfo.get(accountName)).longValue();
} // }
return 0; // return 0;
} // }
} // }
public final void close() throws IOException { public final void close() throws IOException {
super.close(); super.close();
@ -123,24 +123,25 @@ public final class ByteCountInputStream extends FilterInputStream {
if (this.finished) return; if (this.finished) return;
this.finished = true; this.finished = true;
synchronized (syncObject) { ByteCount.addAccountCount(this.byteCountAccountName, this.byteCount);
globalByteCount += this.byteCount; // synchronized (syncObject) {
if (this.byteCountAccountName != null) { // globalByteCount += this.byteCount;
long lastByteCount = 0; // if (this.byteCountAccountName != null) {
if (byteCountInfo.containsKey(this.byteCountAccountName)) { // long lastByteCount = 0;
lastByteCount = byteCountInfo.get(this.byteCountAccountName).longValue(); // if (byteCountInfo.containsKey(this.byteCountAccountName)) {
} // lastByteCount = byteCountInfo.get(this.byteCountAccountName).longValue();
lastByteCount += this.byteCount; // }
byteCountInfo.put(this.byteCountAccountName, Long.valueOf(lastByteCount)); // lastByteCount += this.byteCount;
} // byteCountInfo.put(this.byteCountAccountName, Long.valueOf(lastByteCount));
// }
} //
// }
} }
public final static void resetCount() { // public final static void resetCount() {
synchronized (syncObject) { // synchronized (syncObject) {
globalByteCount = 0; // globalByteCount = 0;
byteCountInfo.clear(); // byteCountInfo.clear();
} // }
} // }
} }

@ -27,13 +27,13 @@ package net.yacy.kelondro.io;
import java.io.BufferedOutputStream; import java.io.BufferedOutputStream;
import java.io.IOException; import java.io.IOException;
import java.io.OutputStream; import java.io.OutputStream;
import java.util.HashMap; //import java.util.HashMap;
public final class ByteCountOutputStream extends BufferedOutputStream { public final class ByteCountOutputStream extends BufferedOutputStream {
private final static Object syncObject = new Object(); // private final static Object syncObject = new Object();
private static long globalByteCount = 0; // private static long globalByteCount = 0;
private final static HashMap<String, Long> byteCountInfo = new HashMap<String, Long>(2); // private final static HashMap<String, Long> byteCountInfo = new HashMap<String, Long>(2);
protected long byteCount; protected long byteCount;
protected String byteCountAccountName = null; protected String byteCountAccountName = null;
@ -92,44 +92,45 @@ public final class ByteCountOutputStream extends BufferedOutputStream {
return this.byteCountAccountName; return this.byteCountAccountName;
} }
public final static long getGlobalCount() { // public final static long getGlobalCount() {
synchronized (syncObject) { // synchronized (syncObject) {
return globalByteCount; // return globalByteCount;
} // }
} // }
public final static long getAccountCount(final String accountName) { // public final static long getAccountCount(final String accountName) {
synchronized (syncObject) { // synchronized (syncObject) {
if (byteCountInfo.containsKey(accountName)) { // if (byteCountInfo.containsKey(accountName)) {
return (byteCountInfo.get(accountName)).longValue(); // return (byteCountInfo.get(accountName)).longValue();
} // }
return 0; // return 0;
} // }
} // }
public final static void resetCount() { // public final static void resetCount() {
synchronized (syncObject) { // synchronized (syncObject) {
globalByteCount = 0; // globalByteCount = 0;
byteCountInfo.clear(); // byteCountInfo.clear();
} // }
} // }
public final void finish() { public final void finish() {
if (this.finished) return; if (this.finished) return;
this.finished = true; this.finished = true;
synchronized (syncObject) { ByteCount.addAccountCount(this.byteCountAccountName, this.byteCount);
globalByteCount += this.byteCount; // synchronized (syncObject) {
if (this.byteCountAccountName != null) { // globalByteCount += this.byteCount;
long lastByteCount = 0; // if (this.byteCountAccountName != null) {
if (byteCountInfo.containsKey(this.byteCountAccountName)) { // long lastByteCount = 0;
lastByteCount = (byteCountInfo.get(this.byteCountAccountName)).longValue(); // if (byteCountInfo.containsKey(this.byteCountAccountName)) {
} // lastByteCount = (byteCountInfo.get(this.byteCountAccountName)).longValue();
lastByteCount += this.byteCount; // }
byteCountInfo.put(this.byteCountAccountName, Long.valueOf(lastByteCount)); // lastByteCount += this.byteCount;
} // byteCountInfo.put(this.byteCountAccountName, Long.valueOf(lastByteCount));
// }
} //
// }
} }
} }

Loading…
Cancel
Save