diff --git a/defaults/yacy.init b/defaults/yacy.init index 6dcfb433f..2350ccb65 100644 --- a/defaults/yacy.init +++ b/defaults/yacy.init @@ -105,6 +105,11 @@ network.unit.definition = defaults/yacy.network.freeworld.unit # This option is only valid if the network.unit.domain property is set to 'any' network.unit.domain.nocheck = false +# in addition to non-dht networks a client may have its own agent name +# this option is only used if the value is non-empty and network.unit.dht = false +# that means it is not usable in YaCy p2p-configurations, only in private portal configurations +network.unit.tenant.agent = + # Update process properties # The update server location is given in the network.unit.definition, # but the settings for update processing and cycles are individual. diff --git a/htroot/ConfigAppearance_p.java b/htroot/ConfigAppearance_p.java index baf9ff433..68a939952 100644 --- a/htroot/ConfigAppearance_p.java +++ b/htroot/ConfigAppearance_p.java @@ -38,7 +38,7 @@ import java.util.Iterator; import java.util.List; import java.util.Map; -import net.yacy.cora.document.MultiProtocolURI; +import net.yacy.cora.protocol.ClientIdentification; import net.yacy.cora.protocol.RequestHeader; import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.util.FileUtils; @@ -99,7 +99,7 @@ public class ConfigAppearance_p { final Iterator it; try { final DigestURI u = new DigestURI(url); - it = FileUtils.strings(u.get(MultiProtocolURI.yacybotUserAgent, 10000)); + it = FileUtils.strings(u.get(ClientIdentification.getUserAgent(), 10000)); } catch (final IOException e) { prop.put("status", "1");// unable to get URL prop.put("status_url", url); diff --git a/htroot/ConfigLanguage_p.java b/htroot/ConfigLanguage_p.java index 3c91ac5ef..01e8143c6 100644 --- a/htroot/ConfigLanguage_p.java +++ b/htroot/ConfigLanguage_p.java @@ -37,7 +37,7 @@ import java.io.PrintWriter; import java.util.Iterator; import java.util.List; -import net.yacy.cora.document.MultiProtocolURI; +import net.yacy.cora.protocol.ClientIdentification; import net.yacy.cora.protocol.RequestHeader; import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.util.FileUtils; @@ -103,7 +103,7 @@ public class ConfigLanguage_p { Iterator it; try { final DigestURI u = new DigestURI(url); - it = FileUtils.strings(u.get(MultiProtocolURI.yacybotUserAgent, 10000)); + it = FileUtils.strings(u.get(ClientIdentification.getUserAgent(), 10000)); } catch(final IOException e) { prop.put("status", "1");//unable to get url prop.put("status_url", url); diff --git a/htroot/Network.java b/htroot/Network.java index 6d266194f..25ac5dd53 100644 --- a/htroot/Network.java +++ b/htroot/Network.java @@ -36,7 +36,7 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; -import net.yacy.cora.document.MultiProtocolURI; +import net.yacy.cora.protocol.ClientIdentification; import net.yacy.cora.protocol.HeaderFramework; import net.yacy.cora.protocol.RequestHeader; import net.yacy.kelondro.util.MapTools; @@ -146,7 +146,7 @@ public class Network { prop.put("table_my-url", seed.get(yacySeed.SEEDLISTURL, "")); // generating the location string - prop.putHTML("table_my-location", MultiProtocolURI.generateLocation()); + prop.putHTML("table_my-location", ClientIdentification.generateLocation()); } // overall results: Network statistics @@ -366,11 +366,11 @@ public class Network { prop.putHTML(STR_TABLE_LIST + conCount + "_fullname", seed.get(yacySeed.NAME, "deadlink")); userAgent = null; if (seed.hash != null && seed.hash.equals(sb.peers.mySeed().hash)) { - userAgent = MultiProtocolURI.yacybotUserAgent; - location = MultiProtocolURI.generateLocation(); + userAgent = ClientIdentification.getUserAgent(); + location = ClientIdentification.generateLocation(); } else { userAgent = sb.peers.peerActions.getUserAgent(seed.getIP()); - location = MultiProtocolURI.parseLocationInUserAgent(userAgent); + location = ClientIdentification.parseLocationInUserAgent(userAgent); } prop.put(STR_TABLE_LIST + conCount + "_location", location); if (complete) { diff --git a/htroot/sharedBlacklist_p.java b/htroot/sharedBlacklist_p.java index b34d182a5..1ed89a5ee 100644 --- a/htroot/sharedBlacklist_p.java +++ b/htroot/sharedBlacklist_p.java @@ -48,8 +48,8 @@ import de.anomic.search.Switchboard; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; import de.anomic.yacy.yacySeed; -import net.yacy.cora.document.MultiProtocolURI; import net.yacy.cora.document.UTF8; +import net.yacy.cora.protocol.ClientIdentification; import net.yacy.cora.protocol.RequestHeader; import net.yacy.document.parser.html.CharacterCoding; import net.yacy.kelondro.data.meta.DigestURI; @@ -145,7 +145,7 @@ public class sharedBlacklist_p { // get List DigestURI u = new DigestURI(downloadURLOld); - otherBlacklist = FileUtils.strings(u.get(MultiProtocolURI.yacybotUserAgent, 10000)); + otherBlacklist = FileUtils.strings(u.get(ClientIdentification.getUserAgent(), 10000)); } catch (final Exception e) { prop.put("status", STATUS_PEER_UNKNOWN); prop.putHTML("status_name", hash); @@ -162,7 +162,7 @@ public class sharedBlacklist_p { try { final DigestURI u = new DigestURI(downloadURL); - otherBlacklist = FileUtils.strings(u.get(MultiProtocolURI.yacybotUserAgent, 10000)); + otherBlacklist = FileUtils.strings(u.get(ClientIdentification.getUserAgent(), 10000)); } catch (final Exception e) { prop.put("status", STATUS_URL_PROBLEM); prop.putHTML("status_address",downloadURL); diff --git a/source/de/anomic/crawler/RobotsTxt.java b/source/de/anomic/crawler/RobotsTxt.java index ea7c4d6c8..44efbd7dd 100644 --- a/source/de/anomic/crawler/RobotsTxt.java +++ b/source/de/anomic/crawler/RobotsTxt.java @@ -38,6 +38,7 @@ import java.util.regex.Pattern; import org.apache.log4j.Logger; import net.yacy.cora.document.MultiProtocolURI; +import net.yacy.cora.protocol.ClientIdentification; import net.yacy.cora.protocol.HeaderFramework; import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.protocol.ResponseHeader; @@ -283,7 +284,7 @@ public class RobotsTxt { RequestHeader reqHeaders = new RequestHeader(); // add yacybot user agent - reqHeaders.put(HeaderFramework.USER_AGENT, MultiProtocolURI.yacybotUserAgent); + reqHeaders.put(HeaderFramework.USER_AGENT, ClientIdentification.getUserAgent()); // adding referer reqHeaders.put(RequestHeader.REFERER, (MultiProtocolURI.newURL(robotsURL,"/")).toNormalform(true, true)); diff --git a/source/de/anomic/crawler/retrieval/HTTPLoader.java b/source/de/anomic/crawler/retrieval/HTTPLoader.java index 4e58ee710..f33b6da4c 100644 --- a/source/de/anomic/crawler/retrieval/HTTPLoader.java +++ b/source/de/anomic/crawler/retrieval/HTTPLoader.java @@ -28,6 +28,7 @@ import java.io.IOException; import java.util.Date; import net.yacy.cora.document.MultiProtocolURI; +import net.yacy.cora.protocol.ClientIdentification; import net.yacy.cora.protocol.HeaderFramework; import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.protocol.ResponseHeader; @@ -111,7 +112,7 @@ public final class HTTPLoader { // create a request header final RequestHeader requestHeader = new RequestHeader(); - requestHeader.put(HeaderFramework.USER_AGENT, MultiProtocolURI.yacybotUserAgent); + requestHeader.put(HeaderFramework.USER_AGENT, ClientIdentification.getUserAgent()); DigestURI refererURL = null; if (request.referrerhash() != null) refererURL = sb.getURL(Segments.Process.LOCALCRAWLING, request.referrerhash()); if (refererURL != null) requestHeader.put(RequestHeader.REFERER, refererURL.toNormalform(true, true)); @@ -233,7 +234,7 @@ public final class HTTPLoader { // create a request header final RequestHeader requestHeader = new RequestHeader(); - requestHeader.put(HeaderFramework.USER_AGENT, MultiProtocolURI.yacybotUserAgent); + requestHeader.put(HeaderFramework.USER_AGENT, ClientIdentification.getUserAgent()); requestHeader.put(HeaderFramework.ACCEPT_LANGUAGE, DEFAULT_LANGUAGE); requestHeader.put(HeaderFramework.ACCEPT_CHARSET, DEFAULT_CHARSET); requestHeader.put(HeaderFramework.ACCEPT_ENCODING, DEFAULT_ENCODING); diff --git a/source/de/anomic/http/server/HTTPDProxyHandler.java b/source/de/anomic/http/server/HTTPDProxyHandler.java index ca326b3e9..c09e52b4f 100644 --- a/source/de/anomic/http/server/HTTPDProxyHandler.java +++ b/source/de/anomic/http/server/HTTPDProxyHandler.java @@ -70,8 +70,8 @@ import java.util.logging.Level; import java.util.logging.LogManager; import java.util.logging.Logger; -import net.yacy.cora.document.MultiProtocolURI; import net.yacy.cora.document.UTF8; +import net.yacy.cora.protocol.ClientIdentification; import net.yacy.cora.protocol.Domains; import net.yacy.cora.protocol.HeaderFramework; import net.yacy.cora.protocol.RequestHeader; @@ -99,7 +99,7 @@ import de.anomic.server.serverObjects; public final class HTTPDProxyHandler { - public static final String yacyUserAgent = "yacyproxy (" + MultiProtocolURI.systemOST +") http://yacy.net/bot.html"; + private static final String yacyProxyUserAgent = "yacyproxy (" + ClientIdentification.yacySystem +") http://yacy.net/bot.html"; // static variables // can only be instantiated upon first instantiation of this class object @@ -1529,7 +1529,7 @@ public final class HTTPDProxyHandler { private static synchronized String generateUserAgent(final HeaderFramework requestHeaders) { userAgentStr.setLength(0); - final String browserUserAgent = requestHeaders.get(HeaderFramework.USER_AGENT, yacyUserAgent); + final String browserUserAgent = requestHeaders.get(HeaderFramework.USER_AGENT, yacyProxyUserAgent); final int pos = browserUserAgent.lastIndexOf(')'); if (pos >= 0) { userAgentStr diff --git a/source/de/anomic/search/Switchboard.java b/source/de/anomic/search/Switchboard.java index 7d88f2151..aff6fc76b 100644 --- a/source/de/anomic/search/Switchboard.java +++ b/source/de/anomic/search/Switchboard.java @@ -80,6 +80,7 @@ import net.yacy.cora.document.RSSFeed; import net.yacy.cora.document.RSSMessage; import net.yacy.cora.document.RSSReader; import net.yacy.cora.document.UTF8; +import net.yacy.cora.protocol.ClientIdentification; import net.yacy.cora.protocol.ConnectionInfo; import net.yacy.cora.protocol.Domains; import net.yacy.cora.protocol.HeaderFramework; @@ -831,8 +832,10 @@ public final class Switchboard extends serverSwitch { setConfig(plasmaSwitchboardConstants.INDEX_RECEIVE_ALLOW, true); } */ - MultiProtocolURI.addBotInfo(getConfig(SwitchboardConstants.NETWORK_NAME, "") + (isRobinsonMode() ? "-" : "/") + getConfig(SwitchboardConstants.NETWORK_DOMAIN, "global")); - + // write the YaCy network identification inside the yacybot client user agent to distinguish networks + String newagent = ClientIdentification.generateYaCyBot(getConfig(SwitchboardConstants.NETWORK_NAME, "") + (isRobinsonMode() ? "-" : "/") + getConfig(SwitchboardConstants.NETWORK_DOMAIN, "global")); + if (!this.getConfigBool("network.unit.dht", false) && this.getConfig("network.unit.tenant.agent", "").length() > 0) newagent = this.getConfig("network.unit.tenant.agent", ""); + ClientIdentification.setUserAgent(newagent); } public void switchNetwork(final String networkDefinition) throws FileNotFoundException, IOException { @@ -2598,7 +2601,7 @@ public final class Switchboard extends serverSwitch { final RequestHeader reqHeader = new RequestHeader(); reqHeader.put(HeaderFramework.PRAGMA, "no-cache"); reqHeader.put(HeaderFramework.CACHE_CONTROL, "no-cache"); - reqHeader.put(HeaderFramework.USER_AGENT, MultiProtocolURI.yacybotUserAgent); + reqHeader.put(HeaderFramework.USER_AGENT, ClientIdentification.getUserAgent()); final HTTPClient client = new HTTPClient(); client.setHeader(reqHeader.entrySet()); client.setTimout((int) getConfigLong("bootstrapLoadTimeout", 20000)); @@ -2760,7 +2763,7 @@ public final class Switchboard extends serverSwitch { */ public static Map loadFileAsMap(final DigestURI url) { final RequestHeader reqHeader = new RequestHeader(); - reqHeader.put(HeaderFramework.USER_AGENT, MultiProtocolURI.yacybotUserAgent); + reqHeader.put(HeaderFramework.USER_AGENT, ClientIdentification.getUserAgent()); final HTTPClient client = new HTTPClient(); client.setHeader(reqHeader.entrySet()); try { diff --git a/source/de/anomic/server/serverSwitch.java b/source/de/anomic/server/serverSwitch.java index fd45634ba..7f67d4498 100644 --- a/source/de/anomic/server/serverSwitch.java +++ b/source/de/anomic/server/serverSwitch.java @@ -42,7 +42,7 @@ import java.util.TreeMap; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.ConcurrentHashMap; -import net.yacy.cora.document.MultiProtocolURI; +import net.yacy.cora.protocol.ClientIdentification; import net.yacy.cora.protocol.Domains; import net.yacy.cora.protocol.HeaderFramework; import net.yacy.cora.protocol.RequestHeader; @@ -570,7 +570,7 @@ public class serverSwitch { netdef = netdef.trim(); try { final RequestHeader reqHeader = new RequestHeader(); - reqHeader.put(HeaderFramework.USER_AGENT, MultiProtocolURI.yacybotUserAgent); + reqHeader.put(HeaderFramework.USER_AGENT, ClientIdentification.getUserAgent()); final HTTPClient client = new HTTPClient(); client.setHeader(reqHeader.entrySet()); byte[] data = client.GETbytes(uri); diff --git a/source/de/anomic/tools/loaderThreads.java b/source/de/anomic/tools/loaderThreads.java index 7c7b21ebe..7a061b4e2 100644 --- a/source/de/anomic/tools/loaderThreads.java +++ b/source/de/anomic/tools/loaderThreads.java @@ -26,7 +26,7 @@ package de.anomic.tools; import java.util.Hashtable; -import net.yacy.cora.document.MultiProtocolURI; +import net.yacy.cora.protocol.ClientIdentification; import net.yacy.cora.protocol.http.ProxySettings; import net.yacy.kelondro.data.meta.DigestURI; @@ -118,7 +118,7 @@ public class loaderThreads { public void run() { try { - page = url.get(MultiProtocolURI.yacybotUserAgent, timeout); + page = url.get(ClientIdentification.getUserAgent(), timeout); loaded = true; process.feed(page); if (process.status() == loaderCore.STATUS_FAILED) { diff --git a/source/de/anomic/yacy/yacyClient.java b/source/de/anomic/yacy/yacyClient.java index cf411812c..1937e9963 100644 --- a/source/de/anomic/yacy/yacyClient.java +++ b/source/de/anomic/yacy/yacyClient.java @@ -62,6 +62,7 @@ import net.yacy.cora.document.RSSFeed; import net.yacy.cora.document.RSSMessage; import net.yacy.cora.document.RSSReader; import net.yacy.cora.document.UTF8; +import net.yacy.cora.protocol.ClientIdentification; import net.yacy.cora.protocol.http.HTTPClient; import net.yacy.cora.services.federated.opensearch.SRURSSConnector; import net.yacy.kelondro.data.meta.URIMetadataRow; @@ -99,12 +100,12 @@ public final class yacyClient { private static byte[] postToFile(final yacySeed target, final String filename, final Map parts, final int timeout) throws IOException { // return HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI("http://" + target.getClusterAddress() + "/yacy/" + filename), timeout, target.getHexHash() + ".yacyh", parts); - final HTTPClient httpClient = new HTTPClient(MultiProtocolURI.yacybotUserAgent, timeout); + final HTTPClient httpClient = new HTTPClient(ClientIdentification.getUserAgent(), timeout); return httpClient.POSTbytes(new MultiProtocolURI("http://" + target.getClusterAddress() + "/yacy/" + filename), target.getHexHash() + ".yacyh", parts, false); } private static byte[] postToFile(final yacySeedDB seedDB, final String targetHash, final String filename, final Map parts, final int timeout) throws IOException { // return HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI("http://" + targetAddress(seedDB, targetHash) + "/yacy/" + filename), timeout, yacySeed.b64Hash2hexHash(targetHash)+ ".yacyh", parts); - final HTTPClient httpClient = new HTTPClient(MultiProtocolURI.yacybotUserAgent, timeout); + final HTTPClient httpClient = new HTTPClient(ClientIdentification.getUserAgent(), timeout); return httpClient.POSTbytes(new MultiProtocolURI("http://" + targetAddress(seedDB, targetHash) + "/yacy/" + filename), yacySeed.b64Hash2hexHash(targetHash)+ ".yacyh", parts, false); } @@ -142,7 +143,7 @@ public final class yacyClient { // send request final long start = System.currentTimeMillis(); // final byte[] content = HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI("http://" + address + "/yacy/hello.html"), 30000, yacySeed.b64Hash2hexHash(otherHash) + ".yacyh", parts); - final HTTPClient httpClient = new HTTPClient(MultiProtocolURI.yacybotUserAgent, 30000); + final HTTPClient httpClient = new HTTPClient(ClientIdentification.getUserAgent(), 30000); final byte[] content = httpClient.POSTbytes(new MultiProtocolURI("http://" + address + "/yacy/hello.html"), yacySeed.b64Hash2hexHash(otherHash) + ".yacyh", parts, false); yacyCore.log.logInfo("yacyClient.hello thread '" + Thread.currentThread().getName() + "' contacted peer at " + address + ", received " + ((content == null) ? "null" : content.length) + " bytes, time = " + (System.currentTimeMillis() - start) + " milliseconds"); result = FileUtils.table(content); @@ -348,7 +349,7 @@ public final class yacyClient { parts.put("count", UTF8.StringBody(Integer.toString(maxCount))); parts.put("time", UTF8.StringBody(Long.toString(maxTime))); // final byte[] result = HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI("http://" + target.getClusterAddress() + "/yacy/urls.xml"), (int) maxTime, target.getHexHash() + ".yacyh", parts); - final HTTPClient httpClient = new HTTPClient(MultiProtocolURI.yacybotUserAgent, (int) maxTime); + final HTTPClient httpClient = new HTTPClient(ClientIdentification.getUserAgent(), (int) maxTime); final byte[] result = httpClient.POSTbytes(new MultiProtocolURI("http://" + target.getClusterAddress() + "/yacy/urls.xml"), target.getHexHash() + ".yacyh", parts, false); final RSSReader reader = RSSReader.parse(RSSFeed.DEFAULT_MAXSIZE, result); if (reader == null) { @@ -655,7 +656,7 @@ public final class yacyClient { // resultMap = FileUtils.table(HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI("http://" + hostaddress + "/yacy/search.html"), 60000, hostname, parts)); //resultMap = FileUtils.table(HTTPConnector.getConnector(MultiProtocolURI.crawlerUserAgent).post(new MultiProtocolURI("http://" + target.getClusterAddress() + "/yacy/search.html"), 60000, target.getHexHash() + ".yacyh", parts)); - final HTTPClient httpClient = new HTTPClient(MultiProtocolURI.yacybotUserAgent, 60000); + final HTTPClient httpClient = new HTTPClient(ClientIdentification.getUserAgent(), 60000); resultMap = FileUtils.table(httpClient.POSTbytes(new MultiProtocolURI("http://" + hostaddress + "/yacy/search.html"), hostname, parts, false)); // evaluate request result @@ -803,7 +804,7 @@ public final class yacyClient { parts.put("lurlEntry", UTF8.StringBody(((entry == null) ? "" : crypt.simpleEncode(entry.toString(), salt)))); // send request // final byte[] content = HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI("http://" + address + "/yacy/crawlReceipt.html"), 10000, target.getHexHash() + ".yacyh", parts); - final HTTPClient httpClient = new HTTPClient(MultiProtocolURI.yacybotUserAgent, 10000); + final HTTPClient httpClient = new HTTPClient(ClientIdentification.getUserAgent(), 10000); final byte[] content = httpClient.POSTbytes(new MultiProtocolURI("http://" + address + "/yacy/crawlReceipt.html"), target.getHexHash() + ".yacyh", parts, false); return FileUtils.table(content); } catch (final Exception e) { @@ -944,7 +945,7 @@ public final class yacyClient { parts.put("entryc", UTF8.StringBody(Integer.toString(indexcount))); parts.put("indexes", UTF8.StringBody(entrypost.toString())); // final byte[] content = HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI("http://" + address + "/yacy/transferRWI.html"), timeout, targetSeed.getHexHash() + ".yacyh", parts, gzipBody); - final HTTPClient httpClient = new HTTPClient(MultiProtocolURI.yacybotUserAgent, timeout); + final HTTPClient httpClient = new HTTPClient(ClientIdentification.getUserAgent(), timeout); final byte[] content = httpClient.POSTbytes(new MultiProtocolURI("http://" + address + "/yacy/transferRWI.html"), targetSeed.getHexHash() + ".yacyh", parts, gzipBody); final Iterator v = FileUtils.strings(content); // this should return a list of urlhashes that are unknown @@ -990,7 +991,7 @@ public final class yacyClient { try { parts.put("urlc", UTF8.StringBody(Integer.toString(urlc))); // final byte[] content = HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI("http://" + address + "/yacy/transferURL.html"), timeout, targetSeed.getHexHash() + ".yacyh", parts, gzipBody); - final HTTPClient httpClient = new HTTPClient(MultiProtocolURI.yacybotUserAgent, timeout); + final HTTPClient httpClient = new HTTPClient(ClientIdentification.getUserAgent(), timeout); final byte[] content = httpClient.POSTbytes(new MultiProtocolURI("http://" + address + "/yacy/transferURL.html"), targetSeed.getHexHash() + ".yacyh", parts, gzipBody); final Iterator v = FileUtils.strings(content); @@ -1014,7 +1015,7 @@ public final class yacyClient { try { final Map parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), targetSeed.hash, salt); // final byte[] content = HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI("http://" + address + "/yacy/profile.html"), 5000, targetSeed.getHexHash() + ".yacyh", parts); - final HTTPClient httpclient = new HTTPClient(MultiProtocolURI.yacybotUserAgent, 5000); + final HTTPClient httpclient = new HTTPClient(ClientIdentification.getUserAgent(), 5000); final byte[] content = httpclient.POSTbytes(new MultiProtocolURI("http://" + address + "/yacy/profile.html"), targetSeed.getHexHash() + ".yacyh", parts, false); return FileUtils.table(content); } catch (final Exception e) { @@ -1104,7 +1105,7 @@ public final class yacyClient { byte[] res; try { // res = HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(url, timeout, vhost, newpost, true); - final HTTPClient httpClient = new HTTPClient(MultiProtocolURI.yacybotUserAgent, timeout); + final HTTPClient httpClient = new HTTPClient(ClientIdentification.getUserAgent(), timeout); res = httpClient.POSTbytes(url, vhost, newpost, true); System.out.println(UTF8.String(res)); } catch (IOException e1) { diff --git a/source/de/anomic/yacy/yacyRelease.java b/source/de/anomic/yacy/yacyRelease.java index 6644cc9b2..f91ea80af 100644 --- a/source/de/anomic/yacy/yacyRelease.java +++ b/source/de/anomic/yacy/yacyRelease.java @@ -46,6 +46,7 @@ import java.util.concurrent.ConcurrentHashMap; import net.yacy.cora.document.MultiProtocolURI; import net.yacy.cora.document.UTF8; +import net.yacy.cora.protocol.ClientIdentification; import net.yacy.cora.protocol.HeaderFramework; import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.protocol.ResponseHeader; @@ -283,7 +284,7 @@ public final class yacyRelease extends yacyVersion { File download = null; // setup httpClient final RequestHeader reqHeader = new RequestHeader(); - reqHeader.put(HeaderFramework.USER_AGENT, MultiProtocolURI.yacybotUserAgent); + reqHeader.put(HeaderFramework.USER_AGENT, ClientIdentification.getUserAgent()); final String name = this.getUrl().getFileName(); byte[] signatureBytes = null; diff --git a/source/de/anomic/yacy/yacySeedDB.java b/source/de/anomic/yacy/yacySeedDB.java index 3022eb1f3..7cb1fa8c1 100644 --- a/source/de/anomic/yacy/yacySeedDB.java +++ b/source/de/anomic/yacy/yacySeedDB.java @@ -40,8 +40,8 @@ import java.util.Set; import java.util.TreeMap; import java.util.concurrent.ConcurrentHashMap; -import net.yacy.cora.document.MultiProtocolURI; import net.yacy.cora.document.UTF8; +import net.yacy.cora.protocol.ClientIdentification; import net.yacy.cora.protocol.Domains; import net.yacy.cora.protocol.HeaderFramework; import net.yacy.cora.protocol.RequestHeader; @@ -885,7 +885,7 @@ public final class yacySeedDB implements AlternativeDomainNames { final RequestHeader reqHeader = new RequestHeader(); reqHeader.put(HeaderFramework.PRAGMA, "no-cache"); reqHeader.put(HeaderFramework.CACHE_CONTROL, "no-cache"); // httpc uses HTTP/1.0 is this necessary? - reqHeader.put(HeaderFramework.USER_AGENT, MultiProtocolURI.yacybotUserAgent); + reqHeader.put(HeaderFramework.USER_AGENT, ClientIdentification.getUserAgent()); final HTTPClient client = new HTTPClient(); client.setHeader(reqHeader.entrySet()); diff --git a/source/net/yacy/cora/document/MultiProtocolURI.java b/source/net/yacy/cora/document/MultiProtocolURI.java index 745edc3b2..18c569e4b 100644 --- a/source/net/yacy/cora/document/MultiProtocolURI.java +++ b/source/net/yacy/cora/document/MultiProtocolURI.java @@ -84,69 +84,6 @@ public class MultiProtocolURI implements Serializable, Comparable 0) ? userAgent.substring(lastSemicolon + 1, firstClosedParenthesis) - .trim() : userAgent.substring(lastSemicolon + 1).trim(); - } else { - if (firstOpenParenthesis < userAgent.length()) { - if (firstClosedParenthesis > firstOpenParenthesis) { - // ( Location ) - location = userAgent.substring(firstOpenParenthesis + 1, firstClosedParenthesis).trim(); - } else { - // ( Location - location = userAgent.substring(firstOpenParenthesis + 1).trim(); - } - } else { - location = ""; - } - } - - return location; - } - - /** - * generating the location string - * - * @return - */ - public static String generateLocation() { - String loc = System.getProperty("user.timezone", "nowhere"); - final int p = loc.indexOf('/'); - if (p > 0) { - loc = loc.substring(0, p); - } - loc = loc + "/" + System.getProperty("user.language", "dumb"); - return loc; - } - // class variables protected final String protocol, userInfo; protected String host, path, quest, ref; diff --git a/source/net/yacy/cora/protocol/ClientIdentification.java b/source/net/yacy/cora/protocol/ClientIdentification.java new file mode 100644 index 000000000..d56fe08cc --- /dev/null +++ b/source/net/yacy/cora/protocol/ClientIdentification.java @@ -0,0 +1,117 @@ +/** + * ClientIdentification + * Copyright 2011 by Michael Peter Christen, mc@yacy.net, Frankfurt a. M., Germany + * First released 26.04.2011 at http://yacy.net + * + * $LastChangedDate: 2011-04-21 23:59:56 +0200 (Do, 21 Apr 2011) $ + * $LastChangedRevision: 7673 $ + * $LastChangedBy: orbiter $ + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program in the file lgpl21.txt + * If not, see . + */ + + +package net.yacy.cora.protocol; + +public class ClientIdentification { + + /** + * provide system information (this is part of YaCy protocol) + */ + public static final String yacySystem = System.getProperty("os.arch", "no-os-arch") + " " + + System.getProperty("os.name", "no-os-name") + " " + System.getProperty("os.version", "no-os-version") + + "; " + "java " + System.getProperty("java.version", "no-java-version") + "; " + generateLocation(); + + /** + * the default user agent: YaCy + */ + private static String agent = generateYaCyBot("new"); + + /** + * produce a YaCy user agent string + * @param addinfo + * @return + */ + public static String generateYaCyBot(String addinfo) { + return "yacybot (" + addinfo + "; " + yacySystem + ") http://yacy.net/bot.html"; + } + + /** + * set the user agent + * @param newagent + */ + public static void setUserAgent(String newagent) { + agent = newagent; + } + + /** + * produce a userAgent String for this cora client + * @return + */ + public static String getUserAgent() { + return agent; + } + + /** + * generating the location string + * + * @return + */ + public static String generateLocation() { + String loc = System.getProperty("user.timezone", "nowhere"); + final int p = loc.indexOf('/'); + if (p > 0) { + loc = loc.substring(0, p); + } + loc = loc + "/" + System.getProperty("user.language", "dumb"); + return loc; + } + + /** + * gets the location out of the user agent + * + * location must be after last ; and before first ) + * + * @param userAgent in form "useragentinfo (some params; _location_) additional info" + * @return + */ + public static String parseLocationInUserAgent(final String userAgent) { + final String location; + + final int firstOpenParenthesis = userAgent.indexOf('('); + final int lastSemicolon = userAgent.lastIndexOf(';'); + final int firstClosedParenthesis = userAgent.indexOf(')'); + + if (lastSemicolon < firstClosedParenthesis) { + // ; Location ) + location = (firstClosedParenthesis > 0) ? userAgent.substring(lastSemicolon + 1, firstClosedParenthesis) + .trim() : userAgent.substring(lastSemicolon + 1).trim(); + } else { + if (firstOpenParenthesis < userAgent.length()) { + if (firstClosedParenthesis > firstOpenParenthesis) { + // ( Location ) + location = userAgent.substring(firstOpenParenthesis + 1, firstClosedParenthesis).trim(); + } else { + // ( Location + location = userAgent.substring(firstOpenParenthesis + 1).trim(); + } + } else { + location = ""; + } + } + + return location; + } +} diff --git a/source/net/yacy/cora/protocol/http/HTTPClient.java b/source/net/yacy/cora/protocol/http/HTTPClient.java index ff0954ab0..36380299d 100644 --- a/source/net/yacy/cora/protocol/http/HTTPClient.java +++ b/source/net/yacy/cora/protocol/http/HTTPClient.java @@ -45,6 +45,7 @@ import javax.net.ssl.X509TrustManager; import net.yacy.cora.document.MultiProtocolURI; import net.yacy.cora.document.UTF8; +import net.yacy.cora.protocol.ClientIdentification; import net.yacy.cora.protocol.ConnectionInfo; import org.apache.http.Header; @@ -150,7 +151,7 @@ public class HTTPClient { */ HttpProtocolParams.setVersion(httpParams, HttpVersion.HTTP_1_1); // UserAgent - HttpProtocolParams.setUserAgent(httpParams, MultiProtocolURI.yacybotUserAgent); + HttpProtocolParams.setUserAgent(httpParams, ClientIdentification.getUserAgent()); HttpProtocolParams.setUseExpectContinue(httpParams, false); // IMPORTANT - if not set to 'false' then servers do not process the request until a time-out of 2 seconds /** * HTTP connection settings diff --git a/source/net/yacy/cora/services/federated/opensearch/SRURSSConnector.java b/source/net/yacy/cora/services/federated/opensearch/SRURSSConnector.java index 59a7e6284..c58f7350a 100644 --- a/source/net/yacy/cora/services/federated/opensearch/SRURSSConnector.java +++ b/source/net/yacy/cora/services/federated/opensearch/SRURSSConnector.java @@ -43,6 +43,7 @@ import net.yacy.cora.document.RSSFeed; import net.yacy.cora.document.RSSMessage; import net.yacy.cora.document.RSSReader; import net.yacy.cora.document.UTF8; +import net.yacy.cora.protocol.ClientIdentification; import net.yacy.cora.protocol.http.HTTPClient; import net.yacy.cora.services.federated.SearchAccumulator; import net.yacy.cora.services.federated.SearchHub; @@ -203,7 +204,7 @@ public class SRURSSConnector extends Thread implements SearchAccumulator { parts.put("resource", UTF8.StringBody(global ? "global" : "local")); parts.put("nav", UTF8.StringBody("none")); // result = HTTPConnector.getConnector(userAgent == null ? MultiProtocolURI.yacybotUserAgent : userAgent).post(new MultiProtocolURI(rssSearchServiceURL), (int) timeout, uri.getHost(), parts); - final HTTPClient httpClient = new HTTPClient(userAgent == null ? MultiProtocolURI.yacybotUserAgent : userAgent, (int) timeout); + final HTTPClient httpClient = new HTTPClient(userAgent == null ? ClientIdentification.getUserAgent() : userAgent, (int) timeout); result = httpClient.POSTbytes(new MultiProtocolURI(rssSearchServiceURL), uri.getHost(), parts, false); final RSSReader reader = RSSReader.parse(RSSFeed.DEFAULT_MAXSIZE, result); diff --git a/source/net/yacy/document/parser/htmlParser.java b/source/net/yacy/document/parser/htmlParser.java index ff7a8a114..b8fdc6b03 100644 --- a/source/net/yacy/document/parser/htmlParser.java +++ b/source/net/yacy/document/parser/htmlParser.java @@ -37,6 +37,7 @@ import java.util.regex.Pattern; import com.ibm.icu.text.CharsetDetector; import net.yacy.cora.document.MultiProtocolURI; +import net.yacy.cora.protocol.ClientIdentification; import net.yacy.document.AbstractParser; import net.yacy.document.Document; import net.yacy.document.Parser; @@ -281,7 +282,7 @@ public class htmlParser extends AbstractParser implements Parser { MultiProtocolURI url; try { url = new MultiProtocolURI(args[0]); - byte[] content = url.get(MultiProtocolURI.yacybotUserAgent, 3000); + byte[] content = url.get(ClientIdentification.getUserAgent(), 3000); Document[] document = new htmlParser().parse(url, "text/html", null, new ByteArrayInputStream(content)); String title = document[0].dc_title(); System.out.println(title); diff --git a/source/net/yacy/document/parser/sitemapParser.java b/source/net/yacy/document/parser/sitemapParser.java index ac3c0db23..9ff28f090 100644 --- a/source/net/yacy/document/parser/sitemapParser.java +++ b/source/net/yacy/document/parser/sitemapParser.java @@ -47,6 +47,7 @@ import org.xml.sax.SAXParseException; import net.yacy.cora.date.ISO8601Formatter; import net.yacy.cora.document.MultiProtocolURI; +import net.yacy.cora.protocol.ClientIdentification; import net.yacy.cora.protocol.HeaderFramework; import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.protocol.ResponseHeader; @@ -114,7 +115,7 @@ public class sitemapParser extends AbstractParser implements Parser { public static SitemapReader parse(final DigestURI sitemapURL) throws IOException { // download document final RequestHeader requestHeader = new RequestHeader(); - requestHeader.put(HeaderFramework.USER_AGENT, MultiProtocolURI.yacybotUserAgent); + requestHeader.put(HeaderFramework.USER_AGENT, ClientIdentification.getUserAgent()); final HTTPClient client = new HTTPClient(); client.setTimout(5000); client.setHeader(requestHeader.entrySet()); diff --git a/source/net/yacy/repository/LoaderDispatcher.java b/source/net/yacy/repository/LoaderDispatcher.java index 37cad0a4b..63496e168 100644 --- a/source/net/yacy/repository/LoaderDispatcher.java +++ b/source/net/yacy/repository/LoaderDispatcher.java @@ -42,6 +42,7 @@ import java.util.concurrent.TimeUnit; import net.yacy.cora.document.MultiProtocolURI; import net.yacy.cora.document.UTF8; +import net.yacy.cora.protocol.ClientIdentification; import net.yacy.cora.protocol.HeaderFramework; import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.protocol.ResponseHeader; @@ -199,7 +200,7 @@ public final class LoaderDispatcher { // create request header values and a response object because we need that // in case that we want to return the cached content in the next step final RequestHeader requestHeader = new RequestHeader(); - requestHeader.put(HeaderFramework.USER_AGENT, MultiProtocolURI.yacybotUserAgent); + requestHeader.put(HeaderFramework.USER_AGENT, ClientIdentification.getUserAgent()); DigestURI refererURL = null; if (request.referrerhash() != null) refererURL = sb.getURL(Segments.Process.LOCALCRAWLING, request.referrerhash()); if (refererURL != null) requestHeader.put(RequestHeader.REFERER, refererURL.toNormalform(true, true)); diff --git a/source/net/yacy/yacy.java b/source/net/yacy/yacy.java index 0b4f37660..0f5980ce1 100644 --- a/source/net/yacy/yacy.java +++ b/source/net/yacy/yacy.java @@ -47,8 +47,8 @@ import java.util.zip.ZipEntry; import java.util.zip.ZipOutputStream; import net.yacy.cora.date.GenericFormatter; -import net.yacy.cora.document.MultiProtocolURI; import net.yacy.cora.document.UTF8; +import net.yacy.cora.protocol.ClientIdentification; import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.protocol.http.HTTPClient; import net.yacy.cora.storage.OrderedScoreMap; @@ -286,9 +286,7 @@ public final class yacy { yacyRelease.deleteOldDownloads(sb.releasePath, deleteOldDownloadsAfterDays ); // set user-agent - final String userAgent = "yacy/" + Float.toString(version) + " (www.yacy.net; " - + MultiProtocolURI.systemOST + ")"; - HTTPClient.setDefaultUserAgent(userAgent); + HTTPClient.setDefaultUserAgent(ClientIdentification.getUserAgent()); // start main threads final String port = sb.getConfig("port", "8090");