better abstraction of http client identification

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7675 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 14 years ago
parent a3e707283d
commit d8e934c085

@ -105,6 +105,11 @@ network.unit.definition = defaults/yacy.network.freeworld.unit
# This option is only valid if the network.unit.domain property is set to 'any'
network.unit.domain.nocheck = false
# in addition to non-dht networks a client may have its own agent name
# this option is only used if the value is non-empty and network.unit.dht = false
# that means it is not usable in YaCy p2p-configurations, only in private portal configurations
network.unit.tenant.agent =
# Update process properties
# The update server location is given in the network.unit.definition,
# but the settings for update processing and cycles are individual.

@ -38,7 +38,7 @@ import java.util.Iterator;
import java.util.List;
import java.util.Map;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.util.FileUtils;
@ -99,7 +99,7 @@ public class ConfigAppearance_p {
final Iterator<String> it;
try {
final DigestURI u = new DigestURI(url);
it = FileUtils.strings(u.get(MultiProtocolURI.yacybotUserAgent, 10000));
it = FileUtils.strings(u.get(ClientIdentification.getUserAgent(), 10000));
} catch (final IOException e) {
prop.put("status", "1");// unable to get URL
prop.put("status_url", url);

@ -37,7 +37,7 @@ import java.io.PrintWriter;
import java.util.Iterator;
import java.util.List;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.util.FileUtils;
@ -103,7 +103,7 @@ public class ConfigLanguage_p {
Iterator<String> it;
try {
final DigestURI u = new DigestURI(url);
it = FileUtils.strings(u.get(MultiProtocolURI.yacybotUserAgent, 10000));
it = FileUtils.strings(u.get(ClientIdentification.getUserAgent(), 10000));
} catch(final IOException e) {
prop.put("status", "1");//unable to get url
prop.put("status_url", url);

@ -36,7 +36,7 @@ import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.kelondro.util.MapTools;
@ -146,7 +146,7 @@ public class Network {
prop.put("table_my-url", seed.get(yacySeed.SEEDLISTURL, ""));
// generating the location string
prop.putHTML("table_my-location", MultiProtocolURI.generateLocation());
prop.putHTML("table_my-location", ClientIdentification.generateLocation());
}
// overall results: Network statistics
@ -366,11 +366,11 @@ public class Network {
prop.putHTML(STR_TABLE_LIST + conCount + "_fullname", seed.get(yacySeed.NAME, "deadlink"));
userAgent = null;
if (seed.hash != null && seed.hash.equals(sb.peers.mySeed().hash)) {
userAgent = MultiProtocolURI.yacybotUserAgent;
location = MultiProtocolURI.generateLocation();
userAgent = ClientIdentification.getUserAgent();
location = ClientIdentification.generateLocation();
} else {
userAgent = sb.peers.peerActions.getUserAgent(seed.getIP());
location = MultiProtocolURI.parseLocationInUserAgent(userAgent);
location = ClientIdentification.parseLocationInUserAgent(userAgent);
}
prop.put(STR_TABLE_LIST + conCount + "_location", location);
if (complete) {

@ -48,8 +48,8 @@ import de.anomic.search.Switchboard;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.yacy.yacySeed;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.document.parser.html.CharacterCoding;
import net.yacy.kelondro.data.meta.DigestURI;
@ -145,7 +145,7 @@ public class sharedBlacklist_p {
// get List
DigestURI u = new DigestURI(downloadURLOld);
otherBlacklist = FileUtils.strings(u.get(MultiProtocolURI.yacybotUserAgent, 10000));
otherBlacklist = FileUtils.strings(u.get(ClientIdentification.getUserAgent(), 10000));
} catch (final Exception e) {
prop.put("status", STATUS_PEER_UNKNOWN);
prop.putHTML("status_name", hash);
@ -162,7 +162,7 @@ public class sharedBlacklist_p {
try {
final DigestURI u = new DigestURI(downloadURL);
otherBlacklist = FileUtils.strings(u.get(MultiProtocolURI.yacybotUserAgent, 10000));
otherBlacklist = FileUtils.strings(u.get(ClientIdentification.getUserAgent(), 10000));
} catch (final Exception e) {
prop.put("status", STATUS_URL_PROBLEM);
prop.putHTML("status_address",downloadURL);

@ -38,6 +38,7 @@ import java.util.regex.Pattern;
import org.apache.log4j.Logger;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.protocol.ResponseHeader;
@ -283,7 +284,7 @@ public class RobotsTxt {
RequestHeader reqHeaders = new RequestHeader();
// add yacybot user agent
reqHeaders.put(HeaderFramework.USER_AGENT, MultiProtocolURI.yacybotUserAgent);
reqHeaders.put(HeaderFramework.USER_AGENT, ClientIdentification.getUserAgent());
// adding referer
reqHeaders.put(RequestHeader.REFERER, (MultiProtocolURI.newURL(robotsURL,"/")).toNormalform(true, true));

@ -28,6 +28,7 @@ import java.io.IOException;
import java.util.Date;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.protocol.ResponseHeader;
@ -111,7 +112,7 @@ public final class HTTPLoader {
// create a request header
final RequestHeader requestHeader = new RequestHeader();
requestHeader.put(HeaderFramework.USER_AGENT, MultiProtocolURI.yacybotUserAgent);
requestHeader.put(HeaderFramework.USER_AGENT, ClientIdentification.getUserAgent());
DigestURI refererURL = null;
if (request.referrerhash() != null) refererURL = sb.getURL(Segments.Process.LOCALCRAWLING, request.referrerhash());
if (refererURL != null) requestHeader.put(RequestHeader.REFERER, refererURL.toNormalform(true, true));
@ -233,7 +234,7 @@ public final class HTTPLoader {
// create a request header
final RequestHeader requestHeader = new RequestHeader();
requestHeader.put(HeaderFramework.USER_AGENT, MultiProtocolURI.yacybotUserAgent);
requestHeader.put(HeaderFramework.USER_AGENT, ClientIdentification.getUserAgent());
requestHeader.put(HeaderFramework.ACCEPT_LANGUAGE, DEFAULT_LANGUAGE);
requestHeader.put(HeaderFramework.ACCEPT_CHARSET, DEFAULT_CHARSET);
requestHeader.put(HeaderFramework.ACCEPT_ENCODING, DEFAULT_ENCODING);

@ -70,8 +70,8 @@ import java.util.logging.Level;
import java.util.logging.LogManager;
import java.util.logging.Logger;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.protocol.Domains;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.RequestHeader;
@ -99,7 +99,7 @@ import de.anomic.server.serverObjects;
public final class HTTPDProxyHandler {
public static final String yacyUserAgent = "yacyproxy (" + MultiProtocolURI.systemOST +") http://yacy.net/bot.html";
private static final String yacyProxyUserAgent = "yacyproxy (" + ClientIdentification.yacySystem +") http://yacy.net/bot.html";
// static variables
// can only be instantiated upon first instantiation of this class object
@ -1529,7 +1529,7 @@ public final class HTTPDProxyHandler {
private static synchronized String generateUserAgent(final HeaderFramework requestHeaders) {
userAgentStr.setLength(0);
final String browserUserAgent = requestHeaders.get(HeaderFramework.USER_AGENT, yacyUserAgent);
final String browserUserAgent = requestHeaders.get(HeaderFramework.USER_AGENT, yacyProxyUserAgent);
final int pos = browserUserAgent.lastIndexOf(')');
if (pos >= 0) {
userAgentStr

@ -80,6 +80,7 @@ import net.yacy.cora.document.RSSFeed;
import net.yacy.cora.document.RSSMessage;
import net.yacy.cora.document.RSSReader;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.protocol.ConnectionInfo;
import net.yacy.cora.protocol.Domains;
import net.yacy.cora.protocol.HeaderFramework;
@ -831,8 +832,10 @@ public final class Switchboard extends serverSwitch {
setConfig(plasmaSwitchboardConstants.INDEX_RECEIVE_ALLOW, true);
}
*/
MultiProtocolURI.addBotInfo(getConfig(SwitchboardConstants.NETWORK_NAME, "") + (isRobinsonMode() ? "-" : "/") + getConfig(SwitchboardConstants.NETWORK_DOMAIN, "global"));
// write the YaCy network identification inside the yacybot client user agent to distinguish networks
String newagent = ClientIdentification.generateYaCyBot(getConfig(SwitchboardConstants.NETWORK_NAME, "") + (isRobinsonMode() ? "-" : "/") + getConfig(SwitchboardConstants.NETWORK_DOMAIN, "global"));
if (!this.getConfigBool("network.unit.dht", false) && this.getConfig("network.unit.tenant.agent", "").length() > 0) newagent = this.getConfig("network.unit.tenant.agent", "");
ClientIdentification.setUserAgent(newagent);
}
public void switchNetwork(final String networkDefinition) throws FileNotFoundException, IOException {
@ -2598,7 +2601,7 @@ public final class Switchboard extends serverSwitch {
final RequestHeader reqHeader = new RequestHeader();
reqHeader.put(HeaderFramework.PRAGMA, "no-cache");
reqHeader.put(HeaderFramework.CACHE_CONTROL, "no-cache");
reqHeader.put(HeaderFramework.USER_AGENT, MultiProtocolURI.yacybotUserAgent);
reqHeader.put(HeaderFramework.USER_AGENT, ClientIdentification.getUserAgent());
final HTTPClient client = new HTTPClient();
client.setHeader(reqHeader.entrySet());
client.setTimout((int) getConfigLong("bootstrapLoadTimeout", 20000));
@ -2760,7 +2763,7 @@ public final class Switchboard extends serverSwitch {
*/
public static Map<String, String> loadFileAsMap(final DigestURI url) {
final RequestHeader reqHeader = new RequestHeader();
reqHeader.put(HeaderFramework.USER_AGENT, MultiProtocolURI.yacybotUserAgent);
reqHeader.put(HeaderFramework.USER_AGENT, ClientIdentification.getUserAgent());
final HTTPClient client = new HTTPClient();
client.setHeader(reqHeader.entrySet());
try {

@ -42,7 +42,7 @@ import java.util.TreeMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.ConcurrentHashMap;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.protocol.Domains;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.RequestHeader;
@ -570,7 +570,7 @@ public class serverSwitch {
netdef = netdef.trim();
try {
final RequestHeader reqHeader = new RequestHeader();
reqHeader.put(HeaderFramework.USER_AGENT, MultiProtocolURI.yacybotUserAgent);
reqHeader.put(HeaderFramework.USER_AGENT, ClientIdentification.getUserAgent());
final HTTPClient client = new HTTPClient();
client.setHeader(reqHeader.entrySet());
byte[] data = client.GETbytes(uri);

@ -26,7 +26,7 @@ package de.anomic.tools;
import java.util.Hashtable;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.protocol.http.ProxySettings;
import net.yacy.kelondro.data.meta.DigestURI;
@ -118,7 +118,7 @@ public class loaderThreads {
public void run() {
try {
page = url.get(MultiProtocolURI.yacybotUserAgent, timeout);
page = url.get(ClientIdentification.getUserAgent(), timeout);
loaded = true;
process.feed(page);
if (process.status() == loaderCore.STATUS_FAILED) {

@ -62,6 +62,7 @@ import net.yacy.cora.document.RSSFeed;
import net.yacy.cora.document.RSSMessage;
import net.yacy.cora.document.RSSReader;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.protocol.http.HTTPClient;
import net.yacy.cora.services.federated.opensearch.SRURSSConnector;
import net.yacy.kelondro.data.meta.URIMetadataRow;
@ -99,12 +100,12 @@ public final class yacyClient {
private static byte[] postToFile(final yacySeed target, final String filename, final Map<String,ContentBody> parts, final int timeout) throws IOException {
// return HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI("http://" + target.getClusterAddress() + "/yacy/" + filename), timeout, target.getHexHash() + ".yacyh", parts);
final HTTPClient httpClient = new HTTPClient(MultiProtocolURI.yacybotUserAgent, timeout);
final HTTPClient httpClient = new HTTPClient(ClientIdentification.getUserAgent(), timeout);
return httpClient.POSTbytes(new MultiProtocolURI("http://" + target.getClusterAddress() + "/yacy/" + filename), target.getHexHash() + ".yacyh", parts, false);
}
private static byte[] postToFile(final yacySeedDB seedDB, final String targetHash, final String filename, final Map<String,ContentBody> parts, final int timeout) throws IOException {
// return HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI("http://" + targetAddress(seedDB, targetHash) + "/yacy/" + filename), timeout, yacySeed.b64Hash2hexHash(targetHash)+ ".yacyh", parts);
final HTTPClient httpClient = new HTTPClient(MultiProtocolURI.yacybotUserAgent, timeout);
final HTTPClient httpClient = new HTTPClient(ClientIdentification.getUserAgent(), timeout);
return httpClient.POSTbytes(new MultiProtocolURI("http://" + targetAddress(seedDB, targetHash) + "/yacy/" + filename), yacySeed.b64Hash2hexHash(targetHash)+ ".yacyh", parts, false);
}
@ -142,7 +143,7 @@ public final class yacyClient {
// send request
final long start = System.currentTimeMillis();
// final byte[] content = HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI("http://" + address + "/yacy/hello.html"), 30000, yacySeed.b64Hash2hexHash(otherHash) + ".yacyh", parts);
final HTTPClient httpClient = new HTTPClient(MultiProtocolURI.yacybotUserAgent, 30000);
final HTTPClient httpClient = new HTTPClient(ClientIdentification.getUserAgent(), 30000);
final byte[] content = httpClient.POSTbytes(new MultiProtocolURI("http://" + address + "/yacy/hello.html"), yacySeed.b64Hash2hexHash(otherHash) + ".yacyh", parts, false);
yacyCore.log.logInfo("yacyClient.hello thread '" + Thread.currentThread().getName() + "' contacted peer at " + address + ", received " + ((content == null) ? "null" : content.length) + " bytes, time = " + (System.currentTimeMillis() - start) + " milliseconds");
result = FileUtils.table(content);
@ -348,7 +349,7 @@ public final class yacyClient {
parts.put("count", UTF8.StringBody(Integer.toString(maxCount)));
parts.put("time", UTF8.StringBody(Long.toString(maxTime)));
// final byte[] result = HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI("http://" + target.getClusterAddress() + "/yacy/urls.xml"), (int) maxTime, target.getHexHash() + ".yacyh", parts);
final HTTPClient httpClient = new HTTPClient(MultiProtocolURI.yacybotUserAgent, (int) maxTime);
final HTTPClient httpClient = new HTTPClient(ClientIdentification.getUserAgent(), (int) maxTime);
final byte[] result = httpClient.POSTbytes(new MultiProtocolURI("http://" + target.getClusterAddress() + "/yacy/urls.xml"), target.getHexHash() + ".yacyh", parts, false);
final RSSReader reader = RSSReader.parse(RSSFeed.DEFAULT_MAXSIZE, result);
if (reader == null) {
@ -655,7 +656,7 @@ public final class yacyClient {
// resultMap = FileUtils.table(HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI("http://" + hostaddress + "/yacy/search.html"), 60000, hostname, parts));
//resultMap = FileUtils.table(HTTPConnector.getConnector(MultiProtocolURI.crawlerUserAgent).post(new MultiProtocolURI("http://" + target.getClusterAddress() + "/yacy/search.html"), 60000, target.getHexHash() + ".yacyh", parts));
final HTTPClient httpClient = new HTTPClient(MultiProtocolURI.yacybotUserAgent, 60000);
final HTTPClient httpClient = new HTTPClient(ClientIdentification.getUserAgent(), 60000);
resultMap = FileUtils.table(httpClient.POSTbytes(new MultiProtocolURI("http://" + hostaddress + "/yacy/search.html"), hostname, parts, false));
// evaluate request result
@ -803,7 +804,7 @@ public final class yacyClient {
parts.put("lurlEntry", UTF8.StringBody(((entry == null) ? "" : crypt.simpleEncode(entry.toString(), salt))));
// send request
// final byte[] content = HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI("http://" + address + "/yacy/crawlReceipt.html"), 10000, target.getHexHash() + ".yacyh", parts);
final HTTPClient httpClient = new HTTPClient(MultiProtocolURI.yacybotUserAgent, 10000);
final HTTPClient httpClient = new HTTPClient(ClientIdentification.getUserAgent(), 10000);
final byte[] content = httpClient.POSTbytes(new MultiProtocolURI("http://" + address + "/yacy/crawlReceipt.html"), target.getHexHash() + ".yacyh", parts, false);
return FileUtils.table(content);
} catch (final Exception e) {
@ -944,7 +945,7 @@ public final class yacyClient {
parts.put("entryc", UTF8.StringBody(Integer.toString(indexcount)));
parts.put("indexes", UTF8.StringBody(entrypost.toString()));
// final byte[] content = HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI("http://" + address + "/yacy/transferRWI.html"), timeout, targetSeed.getHexHash() + ".yacyh", parts, gzipBody);
final HTTPClient httpClient = new HTTPClient(MultiProtocolURI.yacybotUserAgent, timeout);
final HTTPClient httpClient = new HTTPClient(ClientIdentification.getUserAgent(), timeout);
final byte[] content = httpClient.POSTbytes(new MultiProtocolURI("http://" + address + "/yacy/transferRWI.html"), targetSeed.getHexHash() + ".yacyh", parts, gzipBody);
final Iterator<String> v = FileUtils.strings(content);
// this should return a list of urlhashes that are unknown
@ -990,7 +991,7 @@ public final class yacyClient {
try {
parts.put("urlc", UTF8.StringBody(Integer.toString(urlc)));
// final byte[] content = HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI("http://" + address + "/yacy/transferURL.html"), timeout, targetSeed.getHexHash() + ".yacyh", parts, gzipBody);
final HTTPClient httpClient = new HTTPClient(MultiProtocolURI.yacybotUserAgent, timeout);
final HTTPClient httpClient = new HTTPClient(ClientIdentification.getUserAgent(), timeout);
final byte[] content = httpClient.POSTbytes(new MultiProtocolURI("http://" + address + "/yacy/transferURL.html"), targetSeed.getHexHash() + ".yacyh", parts, gzipBody);
final Iterator<String> v = FileUtils.strings(content);
@ -1014,7 +1015,7 @@ public final class yacyClient {
try {
final Map<String,ContentBody> parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), targetSeed.hash, salt);
// final byte[] content = HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI("http://" + address + "/yacy/profile.html"), 5000, targetSeed.getHexHash() + ".yacyh", parts);
final HTTPClient httpclient = new HTTPClient(MultiProtocolURI.yacybotUserAgent, 5000);
final HTTPClient httpclient = new HTTPClient(ClientIdentification.getUserAgent(), 5000);
final byte[] content = httpclient.POSTbytes(new MultiProtocolURI("http://" + address + "/yacy/profile.html"), targetSeed.getHexHash() + ".yacyh", parts, false);
return FileUtils.table(content);
} catch (final Exception e) {
@ -1104,7 +1105,7 @@ public final class yacyClient {
byte[] res;
try {
// res = HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(url, timeout, vhost, newpost, true);
final HTTPClient httpClient = new HTTPClient(MultiProtocolURI.yacybotUserAgent, timeout);
final HTTPClient httpClient = new HTTPClient(ClientIdentification.getUserAgent(), timeout);
res = httpClient.POSTbytes(url, vhost, newpost, true);
System.out.println(UTF8.String(res));
} catch (IOException e1) {

@ -46,6 +46,7 @@ import java.util.concurrent.ConcurrentHashMap;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.protocol.ResponseHeader;
@ -283,7 +284,7 @@ public final class yacyRelease extends yacyVersion {
File download = null;
// setup httpClient
final RequestHeader reqHeader = new RequestHeader();
reqHeader.put(HeaderFramework.USER_AGENT, MultiProtocolURI.yacybotUserAgent);
reqHeader.put(HeaderFramework.USER_AGENT, ClientIdentification.getUserAgent());
final String name = this.getUrl().getFileName();
byte[] signatureBytes = null;

@ -40,8 +40,8 @@ import java.util.Set;
import java.util.TreeMap;
import java.util.concurrent.ConcurrentHashMap;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.protocol.Domains;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.RequestHeader;
@ -885,7 +885,7 @@ public final class yacySeedDB implements AlternativeDomainNames {
final RequestHeader reqHeader = new RequestHeader();
reqHeader.put(HeaderFramework.PRAGMA, "no-cache");
reqHeader.put(HeaderFramework.CACHE_CONTROL, "no-cache"); // httpc uses HTTP/1.0 is this necessary?
reqHeader.put(HeaderFramework.USER_AGENT, MultiProtocolURI.yacybotUserAgent);
reqHeader.put(HeaderFramework.USER_AGENT, ClientIdentification.getUserAgent());
final HTTPClient client = new HTTPClient();
client.setHeader(reqHeader.entrySet());

@ -84,69 +84,6 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
}
}
/**
* provide system information for client identification
*/
public static final String systemOST = System.getProperty("os.arch", "no-os-arch") + " " +
System.getProperty("os.name", "no-os-name") + " " + System.getProperty("os.version", "no-os-version") +
"; " + "java " + System.getProperty("java.version", "no-java-version") + "; " + generateLocation();
public static String yacybotUserAgent = "yacybot (" + systemOST + ") http://yacy.net/bot.html";
public static void addBotInfo(String addinfo) {
yacybotUserAgent = "yacybot (" + addinfo + "; " + systemOST + ") http://yacy.net/bot.html";
}
/**
* gets the location out of the user agent
*
* location must be after last ; and before first )
*
* @param userAgent in form "useragentinfo (some params; _location_) additional info"
* @return
*/
public static String parseLocationInUserAgent(final String userAgent) {
final String location;
final int firstOpenParenthesis = userAgent.indexOf('(');
final int lastSemicolon = userAgent.lastIndexOf(';');
final int firstClosedParenthesis = userAgent.indexOf(')');
if (lastSemicolon < firstClosedParenthesis) {
// ; Location )
location = (firstClosedParenthesis > 0) ? userAgent.substring(lastSemicolon + 1, firstClosedParenthesis)
.trim() : userAgent.substring(lastSemicolon + 1).trim();
} else {
if (firstOpenParenthesis < userAgent.length()) {
if (firstClosedParenthesis > firstOpenParenthesis) {
// ( Location )
location = userAgent.substring(firstOpenParenthesis + 1, firstClosedParenthesis).trim();
} else {
// ( Location <end>
location = userAgent.substring(firstOpenParenthesis + 1).trim();
}
} else {
location = "";
}
}
return location;
}
/**
* generating the location string
*
* @return
*/
public static String generateLocation() {
String loc = System.getProperty("user.timezone", "nowhere");
final int p = loc.indexOf('/');
if (p > 0) {
loc = loc.substring(0, p);
}
loc = loc + "/" + System.getProperty("user.language", "dumb");
return loc;
}
// class variables
protected final String protocol, userInfo;
protected String host, path, quest, ref;

@ -0,0 +1,117 @@
/**
* ClientIdentification
* Copyright 2011 by Michael Peter Christen, mc@yacy.net, Frankfurt a. M., Germany
* First released 26.04.2011 at http://yacy.net
*
* $LastChangedDate: 2011-04-21 23:59:56 +0200 (Do, 21 Apr 2011) $
* $LastChangedRevision: 7673 $
* $LastChangedBy: orbiter $
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.cora.protocol;
public class ClientIdentification {
/**
* provide system information (this is part of YaCy protocol)
*/
public static final String yacySystem = System.getProperty("os.arch", "no-os-arch") + " " +
System.getProperty("os.name", "no-os-name") + " " + System.getProperty("os.version", "no-os-version") +
"; " + "java " + System.getProperty("java.version", "no-java-version") + "; " + generateLocation();
/**
* the default user agent: YaCy
*/
private static String agent = generateYaCyBot("new");
/**
* produce a YaCy user agent string
* @param addinfo
* @return
*/
public static String generateYaCyBot(String addinfo) {
return "yacybot (" + addinfo + "; " + yacySystem + ") http://yacy.net/bot.html";
}
/**
* set the user agent
* @param newagent
*/
public static void setUserAgent(String newagent) {
agent = newagent;
}
/**
* produce a userAgent String for this cora client
* @return
*/
public static String getUserAgent() {
return agent;
}
/**
* generating the location string
*
* @return
*/
public static String generateLocation() {
String loc = System.getProperty("user.timezone", "nowhere");
final int p = loc.indexOf('/');
if (p > 0) {
loc = loc.substring(0, p);
}
loc = loc + "/" + System.getProperty("user.language", "dumb");
return loc;
}
/**
* gets the location out of the user agent
*
* location must be after last ; and before first )
*
* @param userAgent in form "useragentinfo (some params; _location_) additional info"
* @return
*/
public static String parseLocationInUserAgent(final String userAgent) {
final String location;
final int firstOpenParenthesis = userAgent.indexOf('(');
final int lastSemicolon = userAgent.lastIndexOf(';');
final int firstClosedParenthesis = userAgent.indexOf(')');
if (lastSemicolon < firstClosedParenthesis) {
// ; Location )
location = (firstClosedParenthesis > 0) ? userAgent.substring(lastSemicolon + 1, firstClosedParenthesis)
.trim() : userAgent.substring(lastSemicolon + 1).trim();
} else {
if (firstOpenParenthesis < userAgent.length()) {
if (firstClosedParenthesis > firstOpenParenthesis) {
// ( Location )
location = userAgent.substring(firstOpenParenthesis + 1, firstClosedParenthesis).trim();
} else {
// ( Location <end>
location = userAgent.substring(firstOpenParenthesis + 1).trim();
}
} else {
location = "";
}
}
return location;
}
}

@ -45,6 +45,7 @@ import javax.net.ssl.X509TrustManager;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.protocol.ConnectionInfo;
import org.apache.http.Header;
@ -150,7 +151,7 @@ public class HTTPClient {
*/
HttpProtocolParams.setVersion(httpParams, HttpVersion.HTTP_1_1);
// UserAgent
HttpProtocolParams.setUserAgent(httpParams, MultiProtocolURI.yacybotUserAgent);
HttpProtocolParams.setUserAgent(httpParams, ClientIdentification.getUserAgent());
HttpProtocolParams.setUseExpectContinue(httpParams, false); // IMPORTANT - if not set to 'false' then servers do not process the request until a time-out of 2 seconds
/**
* HTTP connection settings

@ -43,6 +43,7 @@ import net.yacy.cora.document.RSSFeed;
import net.yacy.cora.document.RSSMessage;
import net.yacy.cora.document.RSSReader;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.protocol.http.HTTPClient;
import net.yacy.cora.services.federated.SearchAccumulator;
import net.yacy.cora.services.federated.SearchHub;
@ -203,7 +204,7 @@ public class SRURSSConnector extends Thread implements SearchAccumulator {
parts.put("resource", UTF8.StringBody(global ? "global" : "local"));
parts.put("nav", UTF8.StringBody("none"));
// result = HTTPConnector.getConnector(userAgent == null ? MultiProtocolURI.yacybotUserAgent : userAgent).post(new MultiProtocolURI(rssSearchServiceURL), (int) timeout, uri.getHost(), parts);
final HTTPClient httpClient = new HTTPClient(userAgent == null ? MultiProtocolURI.yacybotUserAgent : userAgent, (int) timeout);
final HTTPClient httpClient = new HTTPClient(userAgent == null ? ClientIdentification.getUserAgent() : userAgent, (int) timeout);
result = httpClient.POSTbytes(new MultiProtocolURI(rssSearchServiceURL), uri.getHost(), parts, false);
final RSSReader reader = RSSReader.parse(RSSFeed.DEFAULT_MAXSIZE, result);

@ -37,6 +37,7 @@ import java.util.regex.Pattern;
import com.ibm.icu.text.CharsetDetector;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.document.AbstractParser;
import net.yacy.document.Document;
import net.yacy.document.Parser;
@ -281,7 +282,7 @@ public class htmlParser extends AbstractParser implements Parser {
MultiProtocolURI url;
try {
url = new MultiProtocolURI(args[0]);
byte[] content = url.get(MultiProtocolURI.yacybotUserAgent, 3000);
byte[] content = url.get(ClientIdentification.getUserAgent(), 3000);
Document[] document = new htmlParser().parse(url, "text/html", null, new ByteArrayInputStream(content));
String title = document[0].dc_title();
System.out.println(title);

@ -47,6 +47,7 @@ import org.xml.sax.SAXParseException;
import net.yacy.cora.date.ISO8601Formatter;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.protocol.ResponseHeader;
@ -114,7 +115,7 @@ public class sitemapParser extends AbstractParser implements Parser {
public static SitemapReader parse(final DigestURI sitemapURL) throws IOException {
// download document
final RequestHeader requestHeader = new RequestHeader();
requestHeader.put(HeaderFramework.USER_AGENT, MultiProtocolURI.yacybotUserAgent);
requestHeader.put(HeaderFramework.USER_AGENT, ClientIdentification.getUserAgent());
final HTTPClient client = new HTTPClient();
client.setTimout(5000);
client.setHeader(requestHeader.entrySet());

@ -42,6 +42,7 @@ import java.util.concurrent.TimeUnit;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.protocol.ResponseHeader;
@ -199,7 +200,7 @@ public final class LoaderDispatcher {
// create request header values and a response object because we need that
// in case that we want to return the cached content in the next step
final RequestHeader requestHeader = new RequestHeader();
requestHeader.put(HeaderFramework.USER_AGENT, MultiProtocolURI.yacybotUserAgent);
requestHeader.put(HeaderFramework.USER_AGENT, ClientIdentification.getUserAgent());
DigestURI refererURL = null;
if (request.referrerhash() != null) refererURL = sb.getURL(Segments.Process.LOCALCRAWLING, request.referrerhash());
if (refererURL != null) requestHeader.put(RequestHeader.REFERER, refererURL.toNormalform(true, true));

@ -47,8 +47,8 @@ import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
import net.yacy.cora.date.GenericFormatter;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.protocol.http.HTTPClient;
import net.yacy.cora.storage.OrderedScoreMap;
@ -286,9 +286,7 @@ public final class yacy {
yacyRelease.deleteOldDownloads(sb.releasePath, deleteOldDownloadsAfterDays );
// set user-agent
final String userAgent = "yacy/" + Float.toString(version) + " (www.yacy.net; "
+ MultiProtocolURI.systemOST + ")";
HTTPClient.setDefaultUserAgent(userAgent);
HTTPClient.setDefaultUserAgent(ClientIdentification.getUserAgent());
// start main threads
final String port = sb.getConfig("port", "8090");

Loading…
Cancel
Save