- refactoring of the http client

- added a protection against memory leaks for the access tracker

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@5621 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 16 years ago
parent 5d3983faae
commit c12bb8a6d0

@ -67,6 +67,13 @@ pkcs12ImportPwd =
# value is in milliseconds, default is one hour
server.maxTrackingTime = 3600000
# maximum number of tracks per host
server.maxTrackingCount = 1000
# maximum number of hosts that are tracked
server.maxTrackingHostCount = 100
# Network Definition
# There can be separate YaCy networks, and managed sub-groups of the general network.
# The essentials of the network definition are attached in separate property files.

@ -36,7 +36,7 @@ import java.util.List;
import de.anomic.crawler.HTTPLoader;
import de.anomic.data.listManager;
import de.anomic.http.HttpClient;
import de.anomic.http.httpClient;
import de.anomic.http.httpRequestHeader;
import de.anomic.kelondro.util.FileUtils;
import de.anomic.plasma.plasmaSwitchboard;
@ -92,7 +92,7 @@ public class ConfigAppearance_p {
final yacyURL u = new yacyURL(url, null);
final httpRequestHeader reqHeader = new httpRequestHeader();
reqHeader.put(httpRequestHeader.USER_AGENT, HTTPLoader.yacyUserAgent);
skinVector = FileUtils.strings(HttpClient.wget(u.toString(), reqHeader, 10000), "UTF-8");
skinVector = FileUtils.strings(httpClient.wget(u.toString(), reqHeader, 10000), "UTF-8");
} catch (final IOException e) {
prop.put("status", "1");// unable to get URL
prop.put("status_url", url);

@ -39,7 +39,7 @@ import java.util.List;
import de.anomic.crawler.HTTPLoader;
import de.anomic.data.listManager;
import de.anomic.data.translator;
import de.anomic.http.HttpClient;
import de.anomic.http.httpClient;
import de.anomic.http.httpRequestHeader;
import de.anomic.kelondro.util.FileUtils;
import de.anomic.server.serverObjects;
@ -81,7 +81,7 @@ public class ConfigLanguage_p {
final yacyURL u = new yacyURL(url, null);
final httpRequestHeader reqHeader = new httpRequestHeader();
reqHeader.put(httpRequestHeader.USER_AGENT, HTTPLoader.yacyUserAgent);
langVector = FileUtils.strings(HttpClient.wget(u.toString(), reqHeader, 10000), "UTF-8");
langVector = FileUtils.strings(httpClient.wget(u.toString(), reqHeader, 10000), "UTF-8");
}catch(final IOException e){
prop.put("status", "1");//unable to get url
prop.put("status_url", url);

@ -34,7 +34,7 @@ import java.util.Properties;
import java.util.Set;
import de.anomic.http.HttpConnectionInfo;
import de.anomic.http.JakartaCommonsHttpClient;
import de.anomic.http.httpClient;
import de.anomic.http.httpRequestHeader;
import de.anomic.http.httpd;
import de.anomic.kelondro.order.DateFormatter;
@ -226,7 +226,7 @@ public final class Connections_p {
}
}
prop.put("clientList", c);
prop.put("clientActive", JakartaCommonsHttpClient.connectionCount());
prop.put("clientActive", httpClient.connectionCount());
// return rewrite values for templates
return prop;

@ -36,7 +36,7 @@ import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import de.anomic.crawler.HTTPLoader;
import de.anomic.http.HttpClient;
import de.anomic.http.httpClient;
import de.anomic.http.httpRequestHeader;
import de.anomic.kelondro.order.DateFormatter;
import de.anomic.plasma.plasmaSwitchboard;
@ -145,7 +145,7 @@ public class Network {
prop.put("table_my-url", seed.get(yacySeed.SEEDLIST, ""));
// generating the location string
prop.putHTML("table_my-location", HttpClient.generateLocation());
prop.putHTML("table_my-location", httpClient.generateLocation());
}
// overall results: Network statistics
@ -348,7 +348,7 @@ public class Network {
userAgent = null;
if (seed.hash != null && seed.hash.equals(sb.webIndex.seedDB.mySeed().hash)) {
userAgent = HTTPLoader.yacyUserAgent;
location = HttpClient.generateLocation();
location = httpClient.generateLocation();
} else {
userAgent = sb.webIndex.seedDB.peerActions.getUserAgent(seed.getIP());
location = parseLocationInUserAgent(userAgent);

@ -36,7 +36,7 @@ import java.util.Map;
import de.anomic.htmlFilter.htmlFilterImageEntry;
import de.anomic.htmlFilter.htmlFilterCharacterCoding;
import de.anomic.http.HttpClient;
import de.anomic.http.httpClient;
import de.anomic.http.httpRequestHeader;
import de.anomic.http.httpResponseHeader;
import de.anomic.index.indexDocumentMetadata;
@ -199,7 +199,7 @@ public class ViewFile {
return prop;
}
responseHeader = HttpClient.whead(url.toString());
responseHeader = httpClient.whead(url.toString());
if (responseHeader == null) {
prop.put("error", "4");
prop.put("error_errorText", "Unable to load resource metadata.");

@ -7,7 +7,7 @@ import java.util.Set;
import de.anomic.crawler.HTTPLoader;
import de.anomic.htmlFilter.htmlFilterContentScraper;
import de.anomic.htmlFilter.htmlFilterWriter;
import de.anomic.http.HttpClient;
import de.anomic.http.httpClient;
import de.anomic.http.httpRequestHeader;
import de.anomic.kelondro.util.FileUtils;
import de.anomic.plasma.plasmaSwitchboard;
@ -48,7 +48,7 @@ public class getpageinfo_p {
final yacyURL u = new yacyURL(url, null);
final httpRequestHeader reqHeader = new httpRequestHeader();
reqHeader.put(httpRequestHeader.USER_AGENT, HTTPLoader.yacyUserAgent); // do not set the crawler user agent, because this page was loaded by manual entering of the url
final byte[] r = HttpClient.wget(u.toString(), reqHeader, 5000);
final byte[] r = httpClient.wget(u.toString(), reqHeader, 5000);
if (r == null) return prop;
final String contentString=new String(r);

@ -40,7 +40,7 @@ import java.util.List;
import de.anomic.crawler.HTTPLoader;
import de.anomic.data.listManager;
import de.anomic.htmlFilter.htmlFilterCharacterCoding;
import de.anomic.http.HttpClient;
import de.anomic.http.httpClient;
import de.anomic.http.httpRequestHeader;
import de.anomic.index.indexAbstractReferenceBlacklist;
import de.anomic.kelondro.util.FileUtils;
@ -139,7 +139,7 @@ public class sharedBlacklist_p {
// get List
yacyURL u = new yacyURL(downloadURLOld, null);
otherBlacklist = FileUtils.strings(HttpClient.wget(u.toString(), reqHeader, 1000), "UTF-8");
otherBlacklist = FileUtils.strings(httpClient.wget(u.toString(), reqHeader, 1000), "UTF-8");
} catch (final Exception e) {
prop.put("status", STATUS_PEER_UNKNOWN);
prop.putHTML("status_name", Hash);
@ -158,7 +158,7 @@ public class sharedBlacklist_p {
final yacyURL u = new yacyURL(downloadURL, null);
final httpRequestHeader reqHeader = new httpRequestHeader();
reqHeader.put(httpRequestHeader.USER_AGENT, HTTPLoader.yacyUserAgent);
otherBlacklist = FileUtils.strings(HttpClient.wget(u.toString(), reqHeader, 10000), "UTF-8"); //get List
otherBlacklist = FileUtils.strings(httpClient.wget(u.toString(), reqHeader, 10000), "UTF-8"); //get List
} catch (final Exception e) {
prop.put("status", STATUS_URL_PROBLEM);
prop.putHTML("status_address",downloadURL);

@ -28,9 +28,8 @@ package de.anomic.crawler;
import java.io.IOException;
import java.util.Date;
import de.anomic.http.HttpClient;
import de.anomic.http.JakartaCommonsHttpClient;
import de.anomic.http.JakartaCommonsHttpResponse;
import de.anomic.http.httpClient;
import de.anomic.http.httpResponse;
import de.anomic.http.httpRequestHeader;
import de.anomic.http.httpResponseHeader;
import de.anomic.http.httpdProxyCacheEntry;
@ -49,8 +48,8 @@ public final class HTTPLoader {
private static final String DEFAULT_CHARSET = "ISO-8859-1,utf-8;q=0.7,*;q=0.7";
private static final long DEFAULT_MAXFILESIZE = 1024 * 1024 * 10;
public static final int DEFAULT_CRAWLING_RETRY_COUNT = 5;
public static final String crawlerUserAgent = "yacybot (" + HttpClient.getSystemOST() +") http://yacy.net/bot.html";
public static final String yacyUserAgent = "yacy (" + HttpClient.getSystemOST() +") yacy.net";
public static final String crawlerUserAgent = "yacybot (" + httpClient.getSystemOST() +") http://yacy.net/bot.html";
public static final String yacyUserAgent = "yacy (" + httpClient.getSystemOST() +") yacy.net";
/**
* The socket timeout that should be used
@ -139,9 +138,9 @@ public final class HTTPLoader {
requestHeader.put(httpRequestHeader.ACCEPT_ENCODING, sb.getConfig("crawler.http.acceptEncoding", DEFAULT_ENCODING));
// HTTP-Client
final JakartaCommonsHttpClient client = new JakartaCommonsHttpClient(socketTimeout, requestHeader);
final httpClient client = new httpClient(socketTimeout, requestHeader);
JakartaCommonsHttpResponse res = null;
httpResponse res = null;
//try {
// send request
res = client.GET(entry.url().toString());

@ -40,8 +40,8 @@ import java.util.LinkedList;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import de.anomic.http.JakartaCommonsHttpClient;
import de.anomic.http.JakartaCommonsHttpResponse;
import de.anomic.http.httpClient;
import de.anomic.http.httpResponse;
import de.anomic.http.httpRequestHeader;
import de.anomic.kelondro.blob.BLOB;
import de.anomic.kelondro.blob.BLOBHeap;
@ -528,8 +528,8 @@ public class RobotsTxt {
// setup http-client
//TODO: adding Traffic statistic for robots download?
final JakartaCommonsHttpClient client = new JakartaCommonsHttpClient(10000, reqHeaders);
JakartaCommonsHttpResponse res = null;
final httpClient client = new httpClient(10000, reqHeaders);
httpResponse res = null;
try {
// sending the get request
res = client.GET(robotsURL.toString());

@ -41,8 +41,8 @@ import org.xml.sax.helpers.DefaultHandler;
import de.anomic.crawler.CrawlEntry;
import de.anomic.crawler.CrawlProfile;
import de.anomic.crawler.HTTPLoader;
import de.anomic.http.JakartaCommonsHttpClient;
import de.anomic.http.JakartaCommonsHttpResponse;
import de.anomic.http.httpClient;
import de.anomic.http.httpResponse;
import de.anomic.http.httpRequestHeader;
import de.anomic.http.httpdByteCountInputStream;
import de.anomic.index.indexURLReference;
@ -153,8 +153,8 @@ public class SitemapParser extends DefaultHandler {
// download document
final httpRequestHeader requestHeader = new httpRequestHeader();
requestHeader.put(httpRequestHeader.USER_AGENT, HTTPLoader.crawlerUserAgent);
final JakartaCommonsHttpClient client = new JakartaCommonsHttpClient(5000, requestHeader);
JakartaCommonsHttpResponse res = null;
final httpClient client = new httpClient(5000, requestHeader);
httpResponse res = null;
try {
res = client.GET(siteMapURL.toString());
if (res.getStatusCode() != 200) {

@ -44,7 +44,7 @@ import java.util.Properties;
import javax.swing.event.EventListenerList;
import de.anomic.crawler.HTTPLoader;
import de.anomic.http.HttpClient;
import de.anomic.http.httpClient;
import de.anomic.http.httpRequestHeader;
import de.anomic.kelondro.util.FileUtils;
import de.anomic.plasma.plasmaParser;
@ -503,7 +503,7 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen
// load page
final httpRequestHeader reqHeader = new httpRequestHeader();
reqHeader.put(httpRequestHeader.USER_AGENT, HTTPLoader.crawlerUserAgent);
final byte[] page = HttpClient.wget(location.toString(), reqHeader, 10000);
final byte[] page = httpClient.wget(location.toString(), reqHeader, 10000);
if (page == null) throw new IOException("no response from url " + location.toString());
// scrape content

@ -1,148 +0,0 @@
// HttpClient.java
// (C) 2008 by Daniel Raap; danielr@users.berlios.de
// first published 2.4.2008 on http://yacy.net
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// $LastChangedDate: 2008-03-14 01:16:04 +0100 (Fr, 14 Mrz 2008) $
// $LastChangedRevision: 4558 $
// $LastChangedBy: orbiter $
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package de.anomic.http;
import java.io.IOException;
import de.anomic.kelondro.util.Log;
/**
* Client who does http requests
*
* some methods must be implemented (the "socket-layer")
*/
public abstract class HttpClient {
/**
* provide system information for client identification
*/
private static final String systemOST = System.getProperty("os.arch", "no-os-arch") + " " +
System.getProperty("os.name", "no-os-name") + " " + System.getProperty("os.version", "no-os-version") +
"; " + "java " + System.getProperty("java.version", "no-java-version") + "; " + generateLocation();
/**
* generating the location string
*
* @return
*/
public static String generateLocation() {
String loc = System.getProperty("user.timezone", "nowhere");
final int p = loc.indexOf("/");
if (p > 0) {
loc = loc.substring(0, p);
}
loc = loc + "/" + System.getProperty("user.language", "dumb");
return loc;
}
/**
* @return the systemOST
*/
public static String getSystemOST() {
return systemOST;
}
/**
* Gets a page (as raw bytes) addressing vhost at host in uri with specified header and timeout
*
* @param uri
* @param header
* @param vhost
* @param timeout in milliseconds
* @return
*/
public static byte[] wget(final String uri) {
return wget(uri, new httpRequestHeader(), 10000, null);
}
public static byte[] wget(final String uri, final httpRequestHeader header, final int timeout) {
return wget(uri, header, timeout, null);
}
public static byte[] wget(final String uri, final httpRequestHeader header, final int timeout, final String vhost) {
assert uri != null : "precondition violated: uri != null";
addHostHeader(header, vhost);
final JakartaCommonsHttpClient client = new JakartaCommonsHttpClient(timeout, header);
// do the request
try {
final JakartaCommonsHttpResponse response = client.GET(uri);
return response.getData();
} catch (final IOException e) {
Log.logWarning("HTTPC", "wget(" + uri + ") failed: " + e.getMessage());
}
return null;
}
/**
* adds a Host-header to the header if vhost is not null
*
* @param header
* @param vhost
* @return
*/
private static void addHostHeader(httpRequestHeader header, final String vhost) {
if (vhost != null) {
if (header != null) {
header = new httpRequestHeader();
}
// set host-header
header.add(httpRequestHeader.HOST, vhost);
}
}
/**
* Gets a page-header
*
* @param uri
* @return
*/
public static httpResponseHeader whead(final String uri) {
return whead(uri, null);
}
/**
* Gets a page-header
*
* @param uri
* @param header request header
* @return null on error
*/
public static httpResponseHeader whead(final String uri, final httpRequestHeader header) {
final JakartaCommonsHttpClient client = new JakartaCommonsHttpClient(10000, header);
JakartaCommonsHttpResponse response = null;
try {
response = client.HEAD(uri);
return response.getResponseHeader();
} catch (final IOException e) {
Log.logWarning("HTTPC", "whead(" + uri + ") failed: " + e.getMessage());
return null;
} finally {
if (response != null) {
response.closeStream();
}
}
}
}

@ -23,6 +23,7 @@
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package de.anomic.http;
import java.io.ByteArrayOutputStream;
@ -67,7 +68,7 @@ import de.anomic.kelondro.util.Log;
* @author danielr
*
*/
public class JakartaCommonsHttpClient {
public class httpClient {
/**
* "the HttpClient instance and connection manager should be shared among all threads for maximum efficiency."
@ -85,7 +86,7 @@ public class JakartaCommonsHttpClient {
* set options for client
*/
// simple user agent
setUserAgent("yacy (www.yacy.net; " + de.anomic.http.HttpClient.getSystemOST() + ")");
setUserAgent("yacy (www.yacy.net; " + getSystemOST() + ")");
// only one retry
apacheHttpClient.getParams().setParameter(HttpMethodParams.RETRY_HANDLER,
new DefaultHttpMethodRetryHandler(1, false));
@ -152,7 +153,7 @@ public class JakartaCommonsHttpClient {
*
* @param timeout in milliseconds
*/
public JakartaCommonsHttpClient(final int timeout) {
public httpClient(final int timeout) {
this(timeout, null);
}
@ -162,7 +163,7 @@ public class JakartaCommonsHttpClient {
* @param timeout in milliseconds
* @param header header options to send
*/
public JakartaCommonsHttpClient(final int timeout, final httpRequestHeader header) {
public httpClient(final int timeout, final httpRequestHeader header) {
super();
setTimeout(timeout);
setHeader(header);
@ -177,7 +178,7 @@ public class JakartaCommonsHttpClient {
* @param header header options to send
* @param proxyConfig
*/
public JakartaCommonsHttpClient(final int timeout, final httpRequestHeader header, final httpRemoteProxyConfig proxyConfig) {
public httpClient(final int timeout, final httpRequestHeader header, final httpRemoteProxyConfig proxyConfig) {
super();
setTimeout(timeout);
setHeader(header);
@ -246,7 +247,7 @@ public class JakartaCommonsHttpClient {
* @return InputStream of content (body)
* @throws IOException
*/
public JakartaCommonsHttpResponse GET(final String uri) throws IOException {
public httpResponse GET(final String uri) throws IOException {
final HttpMethod get = new GetMethod(uri);
get.setFollowRedirects(followRedirects);
return execute(get);
@ -259,7 +260,7 @@ public class JakartaCommonsHttpClient {
* @return Instance of response with the content.
* @throws IOException
*/
public JakartaCommonsHttpResponse HEAD(final String uri) throws IOException {
public httpResponse HEAD(final String uri) throws IOException {
assert uri != null : "precondition violated: uri != null";
final HttpMethod head = new HeadMethod(uri);
head.setFollowRedirects(followRedirects);
@ -276,7 +277,7 @@ public class JakartaCommonsHttpClient {
* @return Instance of response with the content.
* @throws IOException
*/
public JakartaCommonsHttpResponse POST(final String uri, final InputStream ins) throws IOException {
public httpResponse POST(final String uri, final InputStream ins) throws IOException {
assert uri != null : "precondition violated: uri != null";
assert ins != null : "precondition violated: ins != null";
final PostMethod post = new PostMethod(uri);
@ -295,7 +296,7 @@ public class JakartaCommonsHttpClient {
* @return
* @throws IOException
*/
public JakartaCommonsHttpResponse POST(final String uri, final List<Part> multiparts) throws IOException {
public httpResponse POST(final String uri, final List<Part> multiparts) throws IOException {
return POST(uri, multiparts, false);
}
@ -308,7 +309,7 @@ public class JakartaCommonsHttpClient {
* @return Instance of response with the content.
* @throws IOException
*/
public JakartaCommonsHttpResponse POST(final String uri, final List<Part> multiparts, final boolean gzipBody)
public httpResponse POST(final String uri, final List<Part> multiparts, final boolean gzipBody)
throws IOException {
assert uri != null : "precondition violated: uri != null";
final PostMethod post = new PostMethod(uri);
@ -358,7 +359,7 @@ public class JakartaCommonsHttpClient {
* (non-Javadoc)
* @see de.anomic.http.HttpClient#CONNECT(java.lang.String, int, de.anomic.http.httpHeader)
*/
public JakartaCommonsHttpResponse CONNECT(final String host, final int port) throws IOException {
public httpResponse CONNECT(final String host, final int port) throws IOException {
final HostConfiguration hostConfig = new HostConfiguration();
hostConfig.setHost(host, port);
final HttpMethod connect = new ConnectMethod(hostConfig);
@ -424,7 +425,7 @@ public class JakartaCommonsHttpClient {
* @return
* @throws IOException
*/
private JakartaCommonsHttpResponse execute(final HttpMethod method) throws IOException {
private httpResponse execute(final HttpMethod method) throws IOException {
assert method != null : "precondition violated: method != null";
checkIgnoreCookies(method);
setHeader(method);
@ -458,7 +459,7 @@ public class JakartaCommonsHttpClient {
Arrays.toString(method.getResponseHeaders()));
// return response
return new JakartaCommonsHttpResponse(method);
return new httpResponse(method);
}
/**
@ -611,7 +612,7 @@ public class JakartaCommonsHttpClient {
* @param args
*/
public static void main(final String[] args) {
JakartaCommonsHttpResponse resp = null;
httpResponse resp = null;
String url = args[0];
if (!url.toUpperCase().startsWith("HTTP://")) {
url = "http://" + url;
@ -625,7 +626,7 @@ public class JakartaCommonsHttpClient {
files.add(new FilePart("anotherfile.raw", new ByteArrayPartSource("anotherfile.raw",
"this is not a binary file ;)".getBytes())));
System.out.println("POST " + files.size() + " elements to " + url);
final JakartaCommonsHttpClient client = new JakartaCommonsHttpClient(1000);
final httpClient client = new httpClient(1000);
resp = client.POST(url, files);
System.out.println("----- Header: -----");
System.out.println(resp.getResponseHeader().toString());
@ -635,12 +636,12 @@ public class JakartaCommonsHttpClient {
// whead
System.out.println("whead " + url);
System.out.println("--------------------------------------");
System.out.println(de.anomic.http.HttpClient.whead(url).toString());
System.out.println(whead(url).toString());
} else {
// wget
System.out.println("wget " + url);
System.out.println("--------------------------------------");
System.out.println(new String(de.anomic.http.HttpClient.wget(url, null, 10000)));
System.out.println(new String(wget(url, null, 10000)));
}
} catch (final IOException e) {
e.printStackTrace();
@ -688,4 +689,115 @@ public class JakartaCommonsHttpClient {
public static int connectionCount() {
return conManager.getConnectionsInPool();
}
/**
* provide system information for client identification
*/
private static final String systemOST = System.getProperty("os.arch", "no-os-arch") + " " +
System.getProperty("os.name", "no-os-name") + " " + System.getProperty("os.version", "no-os-version") +
"; " + "java " + System.getProperty("java.version", "no-java-version") + "; " + generateLocation();
/**
* generating the location string
*
* @return
*/
public static String generateLocation() {
String loc = System.getProperty("user.timezone", "nowhere");
final int p = loc.indexOf("/");
if (p > 0) {
loc = loc.substring(0, p);
}
loc = loc + "/" + System.getProperty("user.language", "dumb");
return loc;
}
/**
* @return the systemOST
*/
public static String getSystemOST() {
return systemOST;
}
/**
* Gets a page (as raw bytes) addressing vhost at host in uri with specified header and timeout
*
* @param uri
* @param header
* @param vhost
* @param timeout in milliseconds
* @return
*/
public static byte[] wget(final String uri) {
return wget(uri, new httpRequestHeader(), 10000, null);
}
public static byte[] wget(final String uri, final httpRequestHeader header, final int timeout) {
return wget(uri, header, timeout, null);
}
public static byte[] wget(final String uri, final httpRequestHeader header, final int timeout, final String vhost) {
assert uri != null : "precondition violated: uri != null";
addHostHeader(header, vhost);
final httpClient client = new httpClient(timeout, header);
// do the request
try {
final httpResponse response = client.GET(uri);
return response.getData();
} catch (final IOException e) {
Log.logWarning("HTTPC", "wget(" + uri + ") failed: " + e.getMessage());
}
return null;
}
/**
* adds a Host-header to the header if vhost is not null
*
* @param header
* @param vhost
* @return
*/
private static void addHostHeader(httpRequestHeader header, final String vhost) {
if (vhost != null) {
if (header != null) {
header = new httpRequestHeader();
}
// set host-header
header.add(httpRequestHeader.HOST, vhost);
}
}
/**
* Gets a page-header
*
* @param uri
* @return
*/
public static httpResponseHeader whead(final String uri) {
return whead(uri, null);
}
/**
* Gets a page-header
*
* @param uri
* @param header request header
* @return null on error
*/
public static httpResponseHeader whead(final String uri, final httpRequestHeader header) {
final httpClient client = new httpClient(10000, header);
httpResponse response = null;
try {
response = client.HEAD(uri);
return response.getResponseHeader();
} catch (final IOException e) {
Log.logWarning("HTTPC", "whead(" + uri + ") failed: " + e.getMessage());
return null;
} finally {
if (response != null) {
response.closeStream();
}
}
}
}

@ -41,7 +41,7 @@ import de.anomic.kelondro.util.FileUtils;
* @author daniel
* @since 21.03.2008
*/
public class JakartaCommonsHttpResponse {
public class httpResponse {
private final HttpMethod method;
private String incomingAccountingName = null;
@ -56,7 +56,7 @@ public class JakartaCommonsHttpResponse {
* @param method
* @throws IOException
*/
public JakartaCommonsHttpResponse(final HttpMethod method) {
public httpResponse(final HttpMethod method) {
super();
this.method = method;

@ -438,7 +438,7 @@ public final class httpdProxyHandler {
final GZIPOutputStream gzippedOut = null;
JakartaCommonsHttpResponse res = null;
httpResponse res = null;
try {
final int reqID = requestHeader.hashCode();
@ -473,7 +473,7 @@ public final class httpdProxyHandler {
final String connectHost = hostPart(host, port, yAddress);
final String getUrl = "http://"+ connectHost + remotePath;
final JakartaCommonsHttpClient client = setupHttpClient(requestHeader, connectHost);
final httpClient client = setupHttpClient(requestHeader, connectHost);
// send request
try {
@ -724,7 +724,7 @@ public final class httpdProxyHandler {
public static void doHead(final Properties conProp, final httpRequestHeader requestHeader, OutputStream respond) {
JakartaCommonsHttpResponse res = null;
httpResponse res = null;
yacyURL url = null;
try {
final int reqID = requestHeader.hashCode();
@ -793,7 +793,7 @@ public final class httpdProxyHandler {
final String getUrl = "http://"+ connectHost + remotePath;
if (theLogger.isFinest()) theLogger.logFinest(reqID +" using url: "+ getUrl);
final JakartaCommonsHttpClient client = setupHttpClient(requestHeader, connectHost);
final httpClient client = setupHttpClient(requestHeader, connectHost);
// send request
try {
@ -884,7 +884,7 @@ public final class httpdProxyHandler {
final String getUrl = "http://"+ connectHost + remotePath;
if (theLogger.isFinest()) theLogger.logFinest(reqID +" using url: "+ getUrl);
final JakartaCommonsHttpClient client = setupHttpClient(requestHeader, connectHost);
final httpClient client = setupHttpClient(requestHeader, connectHost);
// check input
if(body == null) {
@ -910,7 +910,7 @@ public final class httpdProxyHandler {
}
body = new ByteArrayInputStream(bodyData);
}
JakartaCommonsHttpResponse res = null;
httpResponse res = null;
try {
// sending the request
res = client.POST(getUrl, body);
@ -1050,9 +1050,9 @@ public final class httpdProxyHandler {
* @param connectHost may be 'host:port' or 'host:port/path'
* @return
*/
private static JakartaCommonsHttpClient setupHttpClient(final httpRequestHeader requestHeader, final String connectHost) {
private static httpClient setupHttpClient(final httpRequestHeader requestHeader, final String connectHost) {
// setup HTTP-client
final JakartaCommonsHttpClient client = new JakartaCommonsHttpClient(timeout, requestHeader);
final httpClient client = new httpClient(timeout, requestHeader);
client.setFollowRedirects(false);
// cookies are handled by the user's browser
client.setIgnoreCookies(true);
@ -1232,10 +1232,10 @@ public final class httpdProxyHandler {
(proxyConfig.useProxy()) &&
(proxyConfig.useProxy4SSL())
) {
final JakartaCommonsHttpClient remoteProxy = new JakartaCommonsHttpClient(timeout, requestHeader, proxyConfig);
final httpClient remoteProxy = new httpClient(timeout, requestHeader, proxyConfig);
remoteProxy.setFollowRedirects(false); // should not be needed, but safe is safe
JakartaCommonsHttpResponse response = null;
httpResponse response = null;
try {
response = remoteProxy.CONNECT(host, port);
// outputs a logline to the serverlog with the current status

@ -40,8 +40,8 @@ import java.util.Map;
import java.util.TreeSet;
import de.anomic.htmlFilter.htmlFilterCharacterCoding;
import de.anomic.http.JakartaCommonsHttpClient;
import de.anomic.http.JakartaCommonsHttpResponse;
import de.anomic.http.httpClient;
import de.anomic.http.httpResponse;
import de.anomic.http.httpRemoteProxyConfig;
import de.anomic.kelondro.blob.Cache;
import de.anomic.kelondro.index.Row;
@ -243,9 +243,9 @@ public final class indexRepositoryReference {
final yacyURL newUrl = new yacyURL(newUrlStr, null);
// doing a http head request to test if the url is correct
final JakartaCommonsHttpClient client = new JakartaCommonsHttpClient(10000);
final httpClient client = new httpClient(10000);
client.setProxy(proxyConfig);
JakartaCommonsHttpResponse res = null;
httpResponse res = null;
try {
res = client.HEAD(newUrl.toString());
} finally {

@ -29,7 +29,7 @@ import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.ArrayList;
import de.anomic.http.HttpClient;
import de.anomic.http.httpClient;
import de.anomic.kelondro.util.FileUtils;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverDomains;
@ -46,7 +46,7 @@ public class natLib {
rm status.htm
*/
try {
ArrayList<String> x = FileUtils.strings(HttpClient.wget("http://admin:"+password+"@192.168.0.1:80/status.htm", null, 10000), "UTF-8");
ArrayList<String> x = FileUtils.strings(httpClient.wget("http://admin:"+password+"@192.168.0.1:80/status.htm", null, 10000), "UTF-8");
x = nxTools.grep(x, 1, "IP Address");
if ((x == null) || (x.size() == 0)) return null;
final String line = nxTools.tail1(x);
@ -59,7 +59,7 @@ public class natLib {
private static String getWhatIsMyIP() {
try {
ArrayList<String> x = FileUtils.strings(
HttpClient.wget("http://www.whatismyip.com/", null, 10000), "UTF-8");
httpClient.wget("http://www.whatismyip.com/", null, 10000), "UTF-8");
x = nxTools.grep(x, 0, "Your IP is");
final String line = nxTools.tail1(x);
return nxTools.awk(line, " ", 4);
@ -71,7 +71,7 @@ public class natLib {
private static String getStanford() {
try {
ArrayList<String> x = FileUtils.strings(
HttpClient.wget("http://www.slac.stanford.edu/cgi-bin/nph-traceroute.pl", null, 10000),
httpClient.wget("http://www.slac.stanford.edu/cgi-bin/nph-traceroute.pl", null, 10000),
"UTF-8");
x = nxTools.grep(x, 0, "firewall protecting your browser");
final String line = nxTools.tail1(x);
@ -83,7 +83,7 @@ public class natLib {
private static String getIPID() {
try {
ArrayList<String> x = FileUtils.strings(HttpClient.wget("http://ipid.shat.net/", null, 10000), "UTF-8");
ArrayList<String> x = FileUtils.strings(httpClient.wget("http://ipid.shat.net/", null, 10000), "UTF-8");
x = nxTools.grep(x, 2, "Your IP address");
final String line = nxTools.tail1(x);
return nxTools.awk(nxTools.awk(nxTools.awk(line, " ", 5), ">", 2), "<", 1);

@ -43,7 +43,7 @@ import com.catcode.odf.OpenDocumentMetadata;
import com.catcode.odf.OpenDocumentTextInputStream;
import de.anomic.crawler.HTTPLoader;
import de.anomic.http.HttpClient;
import de.anomic.http.httpClient;
import de.anomic.http.httpRequestHeader;
import de.anomic.kelondro.util.Log;
import de.anomic.kelondro.util.FileUtils;
@ -246,7 +246,7 @@ public class odtParser extends AbstractParser implements Parser {
// downloading the document content
final httpRequestHeader reqHeader = new httpRequestHeader();
reqHeader.put(httpRequestHeader.USER_AGENT, HTTPLoader.crawlerUserAgent);
final byte[] content = HttpClient.wget(contentUrl.toString(), reqHeader, 10000);
final byte[] content = httpClient.wget(contentUrl.toString(), reqHeader, 10000);
final ByteArrayInputStream input = new ByteArrayInputStream(content);
// parsing the document

@ -34,7 +34,7 @@ import com.jguild.jrpm.io.RPMFile;
import com.jguild.jrpm.io.datatype.DataTypeIf;
import de.anomic.crawler.HTTPLoader;
import de.anomic.http.HttpClient;
import de.anomic.http.httpClient;
import de.anomic.http.httpRequestHeader;
import de.anomic.kelondro.util.FileUtils;
import de.anomic.plasma.plasmaParserDocument;
@ -166,7 +166,7 @@ public class rpmParser extends AbstractParser implements Parser {
final rpmParser testParser = new rpmParser();
final httpRequestHeader reqHeader = new httpRequestHeader();
reqHeader.put(httpRequestHeader.USER_AGENT, HTTPLoader.crawlerUserAgent);
final byte[] content = HttpClient.wget(contentUrl.toString(), reqHeader, 10000);
final byte[] content = httpClient.wget(contentUrl.toString(), reqHeader, 10000);
final ByteArrayInputStream input = new ByteArrayInputStream(content);
testParser.parse(contentUrl, "application/x-rpm", null, input);
} catch (final Exception e) {

@ -35,7 +35,7 @@ import java.util.Iterator;
import java.util.LinkedList;
import de.anomic.crawler.HTTPLoader;
import de.anomic.http.HttpClient;
import de.anomic.http.httpClient;
import de.anomic.http.httpRequestHeader;
import de.anomic.kelondro.order.Base64Order;
import de.anomic.plasma.plasmaParserDocument;
@ -277,7 +277,7 @@ public class vcfParser extends AbstractParser implements Parser {
final vcfParser testParser = new vcfParser();
final httpRequestHeader reqHeader = new httpRequestHeader();
reqHeader.put(httpRequestHeader.USER_AGENT, HTTPLoader.crawlerUserAgent);
final byte[] content = HttpClient.wget(contentUrl.toString(), reqHeader, 10000);
final byte[] content = httpClient.wget(contentUrl.toString(), reqHeader, 10000);
final ByteArrayInputStream input = new ByteArrayInputStream(content);
testParser.parse(contentUrl, "text/x-vcard", "UTF-8",input);
} catch (final Exception e) {

@ -56,8 +56,8 @@ import de.anomic.htmlFilter.htmlFilterContentScraper;
import de.anomic.htmlFilter.htmlFilterImageEntry;
import de.anomic.htmlFilter.htmlFilterInputStream;
import de.anomic.htmlFilter.htmlFilterWriter;
import de.anomic.http.JakartaCommonsHttpClient;
import de.anomic.http.JakartaCommonsHttpResponse;
import de.anomic.http.httpClient;
import de.anomic.http.httpResponse;
import de.anomic.kelondro.util.Log;
import de.anomic.kelondro.util.FileUtils;
import de.anomic.plasma.parser.Parser;
@ -880,7 +880,7 @@ public final class plasmaParser {
}
final String mode = args[0];
JakartaCommonsHttpResponse res = null;
httpResponse res = null;
plasmaParserDocument document = null;
try { // close InputStream when done
if (mode.equalsIgnoreCase("-f")) {
@ -890,7 +890,7 @@ public final class plasmaParser {
contentURL = new yacyURL(args[1], null);
// downloading the document content
final JakartaCommonsHttpClient client = new JakartaCommonsHttpClient(5000);
final httpClient client = new httpClient(5000);
res = client.GET(args[1]);
if (res.getStatusCode() != 200) {

@ -41,7 +41,7 @@ import java.util.regex.Pattern;
import de.anomic.htmlFilter.htmlFilterCharacterCoding;
import de.anomic.htmlFilter.htmlFilterImageEntry;
import de.anomic.http.HttpClient;
import de.anomic.http.httpClient;
import de.anomic.http.httpResponseHeader;
import de.anomic.index.indexDocumentMetadata;
import de.anomic.index.indexURLReference;
@ -852,7 +852,7 @@ public class plasmaSnippetCache {
// getting URL mimeType
try {
responseHeader = HttpClient.whead(url.toString());
responseHeader = httpClient.whead(url.toString());
} catch (final Exception e) {
// ingore this. http header download failed
}

@ -131,8 +131,7 @@ import de.anomic.data.messageBoard;
import de.anomic.data.userDB;
import de.anomic.data.wikiBoard;
import de.anomic.data.wiki.wikiParser;
import de.anomic.http.HttpClient;
import de.anomic.http.JakartaCommonsHttpClient;
import de.anomic.http.httpClient;
import de.anomic.http.httpRemoteProxyConfig;
import de.anomic.http.httpRequestHeader;
import de.anomic.http.httpResponseHeader;
@ -1089,7 +1088,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
indexingStorageProcessor.awaitShutdown(12000);
crawlStacker.close();
this.dbImportManager.close();
JakartaCommonsHttpClient.closeAllConnections();
httpClient.closeAllConnections();
wikiDB.close();
blogDB.close();
blogCommentDB.close();
@ -1306,10 +1305,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
} catch (final IOException e) {};
// close unused connections
JakartaCommonsHttpClient.cleanup();
// clean up too old connection information
super.cleanupAccessTracker(1000 * 60 * 60);
httpClient.cleanup();
// do transmission of CR-files
checkInterruption();
@ -2046,7 +2042,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
url = new yacyURL(seedListFileURL, null);
final long start = System.currentTimeMillis();
header = HttpClient.whead(url.toString(), reqHeader);
header = httpClient.whead(url.toString(), reqHeader);
final long loadtime = System.currentTimeMillis() - start;
if (header == null) {
if (loadtime > getConfigLong("bootstrapLoadTimeout", 6000)) {
@ -2060,7 +2056,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
yacyCore.log.logInfo("BOOTSTRAP: seed-list URL " + seedListFileURL + " too old (" + (header.age() / 86400000) + " days)");
} else {
ssc++;
final byte[] content = HttpClient.wget(url.toString(), reqHeader, (int) getConfigLong("bootstrapLoadTimeout", 20000));
final byte[] content = httpClient.wget(url.toString(), reqHeader, (int) getConfigLong("bootstrapLoadTimeout", 20000));
seedList = FileUtils.strings(content, "UTF-8");
enu = seedList.iterator();
lc = 0;
@ -2128,7 +2124,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
// sending request
final httpRequestHeader reqHeader = new httpRequestHeader();
reqHeader.put(httpRequestHeader.USER_AGENT, HTTPLoader.yacyUserAgent);
final HashMap<String, String> result = FileUtils.table(HttpClient.wget(url.toString(), reqHeader, 10000), "UTF-8");
final HashMap<String, String> result = FileUtils.table(httpClient.wget(url.toString(), reqHeader, 10000), "UTF-8");
if (result == null) return new HashMap<String, String>();
return result;
} catch (final Exception e) {

@ -29,7 +29,6 @@ import java.util.Iterator;
import java.util.Map;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.LinkedBlockingQueue;
import de.anomic.kelondro.util.Log;
@ -37,8 +36,6 @@ import de.anomic.kelondro.util.FileUtils;
public abstract class serverAbstractSwitch<E> implements serverSwitch<E> {
private static final long maxTrackingTimeDefault = 1000 * 60 * 60; // store only access data from the last hour to save ram space
// configuration management
private final File configFile;
private final String configComment;
@ -46,14 +43,13 @@ public abstract class serverAbstractSwitch<E> implements serverSwitch<E> {
protected boolean firstInit;
protected Log log;
protected int serverJobs;
private long maxTrackingTime;
private Map<String, String> configProps;
private final Map<String, String> configRemoved;
private final HashMap<InetAddress, String> authorization;
private final TreeMap<String, serverBusyThread> workerThreads;
private final TreeMap<String, serverSwitchAction> switchActions;
private final LinkedBlockingQueue<E> cacheStack;
private final ConcurrentHashMap<String, SortedMap<Long, String>> accessTracker; // mappings from requesting host to an ArrayList of serverTrack-entries
private final serverAccessTracker accessTracker;
public serverAbstractSwitch(final File rootPath, final String initPath, final String configPath, final boolean applyPro) {
// we initialize the switchboard with a property file,
@ -133,7 +129,6 @@ public abstract class serverAbstractSwitch<E> implements serverSwitch<E> {
// other settings
authorization = new HashMap<InetAddress, String>();
accessTracker = new ConcurrentHashMap<String, SortedMap<Long, String>>();
// init thread control
workerThreads = new TreeMap<String, serverBusyThread>();
@ -145,7 +140,11 @@ public abstract class serverAbstractSwitch<E> implements serverSwitch<E> {
serverJobs = 0;
// init server tracking
maxTrackingTime = getConfigLong("maxTrackingTime", maxTrackingTimeDefault);
this.accessTracker = new serverAccessTracker(
getConfigLong("server.maxTrackingTime", 60 * 60 * 1000),
(int) getConfigLong("server.maxTrackingCount", 1000),
(int) getConfigLong("server.maxTrackingHostCount", 100)
);
}
// a logger for this switchboard
@ -156,59 +155,6 @@ public abstract class serverAbstractSwitch<E> implements serverSwitch<E> {
public Log getLog() {
return log;
}
/*
* remove all entries from the access tracker where the age of the last access is greater than the given timeout
*/
public void cleanupAccessTracker(final long timeout) {
final Iterator<Map.Entry<String, SortedMap<Long, String>>> i = accessTracker.entrySet().iterator();
while (i.hasNext()) {
if (i.next().getValue().tailMap(Long.valueOf(System.currentTimeMillis() - timeout)).size() == 0) i.remove();
}
}
public void track(final String host, String accessPath) {
// learn that a specific host has accessed a specific path
if (accessPath == null) accessPath="NULL";
SortedMap<Long, String> access = accessTracker.get(host);
if (access == null) access = new TreeMap<Long, String>();
synchronized (access) {
access.put(Long.valueOf(System.currentTimeMillis()), accessPath);
// write back to tracker
accessTracker.put(host, clearTooOldAccess(access));
}
}
public SortedMap<Long, String> accessTrack(final String host) {
// returns mapping from Long(accesstime) to path
SortedMap<Long, String> access = accessTracker.get(host);
if (access == null) return null;
// clear too old entries
synchronized (access) {
if ((access = clearTooOldAccess(access)).size() != access.size()) {
// write back to tracker
if (access.size() == 0) {
accessTracker.remove(host);
} else {
accessTracker.put(host, access);
}
}
}
return access;
}
private SortedMap<Long, String> clearTooOldAccess(final SortedMap<Long, String> access) {
return access.tailMap(Long.valueOf(System.currentTimeMillis() - maxTrackingTime));
}
public Iterator<String> accessHosts() {
// returns an iterator of hosts in tracker (String)
final HashMap<String, SortedMap<Long, String>> accessTrackerClone = new HashMap<String, SortedMap<Long, String>>();
accessTrackerClone.putAll(accessTracker);
return accessTrackerClone.keySet().iterator();
}
public void setConfig(final Map<String, String> otherConfigs) {
final Iterator<Map.Entry<String, String>> i = otherConfigs.entrySet().iterator();
@ -547,4 +493,17 @@ public abstract class serverAbstractSwitch<E> implements serverSwitch<E> {
public void handleBusyState(final int jobs) {
serverJobs = jobs;
}
public void track(String host, String accessPath) {
this.accessTracker.track(host, accessPath);
}
public SortedMap<Long, String> accessTrack(String host) {
return this.accessTracker.accessTrack(host);
}
public Iterator<String> accessHosts() {
return this.accessTracker.accessHosts();
}
}

@ -0,0 +1,128 @@
// serverAccessTracker.java
// -------------------------------------
// (C) 2009 by Michael Peter Christen; mc@yacy.net
// first published on http://yacy.net
// Frankfurt, Germany, 20.02.2009
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package de.anomic.server;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.concurrent.ConcurrentHashMap;
public class serverAccessTracker {
private long cleanupCycle = 60000; // 1 minute
private long maxTrackingTime;
private int maxTrackingCount;
private int maxHostCount;
private final ConcurrentHashMap<String, SortedMap<Long, String>> accessTracker; // mappings from requesting host to an ArrayList of serverTrack-entries
private long lastCleanup;
public serverAccessTracker(long maxTrackingTime, int maxTrackingCount, int maxTrackingHostCount) {
this.maxTrackingTime = maxTrackingTime;
this.maxTrackingCount = maxTrackingCount;
this.maxHostCount = maxTrackingHostCount;
this.accessTracker = new ConcurrentHashMap<String, SortedMap<Long, String>>();
}
/*
* remove all entries from the access tracker where the age of the last access is greater than the given timeout
*/
private synchronized void cleanupAccessTracker() {
if (System.currentTimeMillis() - this.lastCleanup < cleanupCycle) return;
// clear entries which had no entry for the maxTrackingTime time
final Iterator<Map.Entry<String, SortedMap<Long, String>>> i = accessTracker.entrySet().iterator();
SortedMap<Long, String> track;
while (i.hasNext()) {
track = i.next().getValue();
if (track.tailMap(Long.valueOf(System.currentTimeMillis() - maxTrackingTime)).size() == 0) {
// all entries are too old. delete the whole track
i.remove();
} else {
// check if the maxTrackingCount is exceeded
while (track.size() > this.maxTrackingCount) {
// delete the oldest entries
track.remove(track.firstKey());
}
}
}
// if there are more entries left than maxTrackingCount, delete some.
while (accessTracker.size() > this.maxHostCount) {
// delete just any
accessTracker.remove(accessTracker.keys().nextElement());
}
this.lastCleanup = System.currentTimeMillis();
}
private SortedMap<Long, String> clearTooOldAccess(final SortedMap<Long, String> access) {
return access.tailMap(Long.valueOf(System.currentTimeMillis() - maxTrackingTime));
}
public void track(final String host, String accessPath) {
// check storage size
if (System.currentTimeMillis() - this.lastCleanup > cleanupCycle) {
cleanupAccessTracker();
this.lastCleanup = System.currentTimeMillis();
}
// learn that a specific host has accessed a specific path
if (accessPath == null) accessPath="NULL";
SortedMap<Long, String> track = accessTracker.get(host);
if (track == null) track = new TreeMap<Long, String>();
synchronized (track) {
track.put(Long.valueOf(System.currentTimeMillis()), accessPath);
// write back to tracker
accessTracker.put(host, clearTooOldAccess(track));
}
}
public SortedMap<Long, String> accessTrack(final String host) {
// returns mapping from Long(accesstime) to path
SortedMap<Long, String> access = accessTracker.get(host);
if (access == null) return null;
// clear too old entries
synchronized (access) {
if ((access = clearTooOldAccess(access)).size() != access.size()) {
// write back to tracker
if (access.size() == 0) {
accessTracker.remove(host);
} else {
accessTracker.put(host, access);
}
}
}
return access;
}
public Iterator<String> accessHosts() {
// returns an iterator of hosts in tracker (String)
final HashMap<String, SortedMap<Long, String>> accessTrackerClone = new HashMap<String, SortedMap<Long, String>>();
accessTrackerClone.putAll(accessTracker);
return accessTrackerClone.keySet().iterator();
}
}

@ -25,7 +25,7 @@ import java.util.ArrayList;
import java.util.Hashtable;
import de.anomic.crawler.HTTPLoader;
import de.anomic.http.HttpClient;
import de.anomic.http.httpClient;
import de.anomic.http.httpRemoteProxyConfig;
import de.anomic.http.httpRequestHeader;
import de.anomic.kelondro.util.FileUtils;
@ -125,7 +125,7 @@ public class loaderThreads {
try {
final httpRequestHeader reqHeader = new httpRequestHeader();
reqHeader.put(httpRequestHeader.USER_AGENT, HTTPLoader.crawlerUserAgent);
page = HttpClient.wget(url.toString(), reqHeader, timeout);
page = httpClient.wget(url.toString(), reqHeader, timeout);
loaded = true;
process.feed(page);
if (process.status() == loaderCore.STATUS_FAILED) {

@ -10,7 +10,7 @@ import java.util.Date;
import de.anomic.crawler.CrawlEntry;
import de.anomic.crawler.CrawlProfile;
import de.anomic.data.userDB;
import de.anomic.http.HttpClient;
import de.anomic.http.httpClient;
import de.anomic.http.httpResponseHeader;
import de.anomic.kelondro.util.Log;
import de.anomic.plasma.plasmaParser;
@ -182,7 +182,7 @@ public class urlRedirectord implements serverHandler, Cloneable {
final yacyURL reqURL = new yacyURL(this.nextURL, null);
// getting URL mimeType
final httpResponseHeader header = HttpClient.whead(reqURL.toString());
final httpResponseHeader header = httpClient.whead(reqURL.toString());
if (plasmaParser.supportedContent(
plasmaParser.PARSER_MODE_URLREDIRECTOR,

@ -62,9 +62,8 @@ import de.anomic.crawler.HTTPLoader;
import de.anomic.crawler.ResultURLs;
import de.anomic.http.DefaultCharsetFilePart;
import de.anomic.http.DefaultCharsetStringPart;
import de.anomic.http.HttpClient;
import de.anomic.http.JakartaCommonsHttpClient;
import de.anomic.http.JakartaCommonsHttpResponse;
import de.anomic.http.httpClient;
import de.anomic.http.httpResponse;
import de.anomic.http.httpRemoteProxyConfig;
import de.anomic.http.httpRequestHeader;
import de.anomic.index.indexContainer;
@ -272,10 +271,10 @@ public final class yacyClient {
final httpRequestHeader header = new httpRequestHeader();
header.put(httpRequestHeader.USER_AGENT, HTTPLoader.yacyUserAgent);
header.put(httpRequestHeader.HOST, vhost);
final JakartaCommonsHttpClient client = new JakartaCommonsHttpClient(timeout, header);
final httpClient client = new httpClient(timeout, header);
client.setProxy(proxyConfig());
JakartaCommonsHttpResponse res = null;
httpResponse res = null;
byte[] content = null;
try {
// send request/data
@ -1080,7 +1079,7 @@ public final class yacyClient {
final httpRequestHeader reqHeader = new httpRequestHeader();
reqHeader.put(httpRequestHeader.USER_AGENT, HTTPLoader.crawlerUserAgent);
final byte[] content = HttpClient.wget(
final byte[] content = httpClient.wget(
"http://" + target.getPublicAddress() + "/yacy/search.html" +
"?myseed=" + sb.webIndex.seedDB.mySeed().genSeedStr(null) +
"&youare=" + target.hash + "&key=" +

@ -41,8 +41,8 @@ import java.util.Map;
import java.util.TreeMap;
import de.anomic.crawler.HTTPLoader;
import de.anomic.http.JakartaCommonsHttpClient;
import de.anomic.http.JakartaCommonsHttpResponse;
import de.anomic.http.httpClient;
import de.anomic.http.httpResponse;
import de.anomic.http.httpRequestHeader;
import de.anomic.http.httpd;
import de.anomic.http.httpdAlternativeDomainNames;
@ -823,9 +823,9 @@ public final class yacySeedDB implements httpdAlternativeDomainNames {
reqHeader.put(httpRequestHeader.USER_AGENT, HTTPLoader.yacyUserAgent);
// init http-client
final JakartaCommonsHttpClient client = new JakartaCommonsHttpClient(10000, reqHeader);
final httpClient client = new httpClient(10000, reqHeader);
byte[] content = null;
JakartaCommonsHttpResponse res = null;
httpResponse res = null;
try {
// send request
res = client.GET(seedURL.toString());

@ -44,8 +44,8 @@ import java.util.regex.Pattern;
import de.anomic.crawler.HTTPLoader;
import de.anomic.htmlFilter.htmlFilterContentScraper;
import de.anomic.http.JakartaCommonsHttpClient;
import de.anomic.http.JakartaCommonsHttpResponse;
import de.anomic.http.httpClient;
import de.anomic.http.httpResponse;
import de.anomic.http.httpResponseHeader;
import de.anomic.http.httpRequestHeader;
import de.anomic.kelondro.util.Log;
@ -346,8 +346,8 @@ public final class yacyVersion implements Comparator<yacyVersion>, Comparable<ya
File download = null;
final httpRequestHeader header = new httpRequestHeader();
header.put(httpResponseHeader.USER_AGENT, HTTPLoader.yacyUserAgent);
final JakartaCommonsHttpClient client = new JakartaCommonsHttpClient(120000, header);
JakartaCommonsHttpResponse res = null;
final httpClient client = new httpClient(120000, header);
httpResponse res = null;
final String name = release.url.getFileName();
try {
res = client.GET(release.url.toString());

@ -48,9 +48,8 @@ import java.util.zip.ZipOutputStream;
import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager;
import de.anomic.data.translator;
import de.anomic.http.HttpClient;
import de.anomic.http.JakartaCommonsHttpClient;
import de.anomic.http.JakartaCommonsHttpResponse;
import de.anomic.http.httpClient;
import de.anomic.http.httpResponse;
import de.anomic.http.httpRequestHeader;
import de.anomic.http.httpd;
import de.anomic.index.indexContainer;
@ -317,8 +316,8 @@ public final class yacy {
// set user-agent
final String userAgent = "yacy/" + Double.toString(version) + " (www.yacy.net; "
+ de.anomic.http.HttpClient.getSystemOST() + ")";
JakartaCommonsHttpClient.setUserAgent(userAgent);
+ httpClient.getSystemOST() + ")";
httpClient.setUserAgent(userAgent);
// start main threads
final String port = sb.getConfig("port", "8080");
@ -430,12 +429,12 @@ public final class yacy {
if (server.isAlive()) try {
// TODO only send request, don't read response (cause server is already down resulting in error)
final yacyURL u = new yacyURL((server.withSSL()?"https":"http")+"://localhost:" + serverCore.getPortNr(port), null);
HttpClient.wget(u.toString(), null, 10000); // kick server
httpClient.wget(u.toString(), null, 10000); // kick server
Log.logConfig("SHUTDOWN", "sent termination signal to server socket");
} catch (final IOException ee) {
Log.logConfig("SHUTDOWN", "termination signal to server socket missed (server shutdown, ok)");
}
JakartaCommonsHttpClient.closeAllConnections();
httpClient.closeAllConnections();
MultiThreadedHttpConnectionManager.shutdownAll();
// idle until the processes are down
@ -568,8 +567,8 @@ public final class yacy {
// send 'wget' to web interface
final httpRequestHeader requestHeader = new httpRequestHeader();
requestHeader.put(httpRequestHeader.AUTHORIZATION, "realm=" + encodedPassword); // for http-authentify
final JakartaCommonsHttpClient con = new JakartaCommonsHttpClient(10000, requestHeader);
JakartaCommonsHttpResponse res = null;
final httpClient con = new httpClient(10000, requestHeader);
httpResponse res = null;
try {
res = con.GET("http://localhost:"+ port +"/Steering.html?shutdown=");

Loading…
Cancel
Save