- moved yacybot user agent string definition to MultiProtocolURI since there are basic access mechanisms where the bot string is needed

- migrated the 'yacy' user agent to 'yacybot' in many client methods since the 'yacy' user agent is only used for the proxy

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7199 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 14 years ago
parent fc3ee9d8a1
commit d2fd93135c

@ -37,11 +37,11 @@ import java.io.PrintWriter;
import java.util.Iterator;
import java.util.List;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.util.FileUtils;
import de.anomic.crawler.retrieval.HTTPLoader;
import de.anomic.search.Switchboard;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
@ -98,7 +98,7 @@ public class ConfigAppearance_p {
Iterator<String> it;
try {
final DigestURI u = new DigestURI(url, null);
it = FileUtils.strings(u.get(HTTPLoader.yacyUserAgent, 10000));
it = FileUtils.strings(u.get(MultiProtocolURI.yacybotUserAgent, 10000));
} catch (final IOException e) {
prop.put("status", "1");// unable to get URL
prop.put("status_url", url);

@ -37,11 +37,11 @@ import java.io.PrintWriter;
import java.util.Iterator;
import java.util.List;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.util.FileUtils;
import de.anomic.crawler.retrieval.HTTPLoader;
import de.anomic.data.WorkTables;
import de.anomic.data.translator;
import de.anomic.search.Switchboard;
@ -103,7 +103,7 @@ public class ConfigLanguage_p {
Iterator<String> it;
try{
final DigestURI u = new DigestURI(url, null);
it = FileUtils.strings(u.get(HTTPLoader.yacyUserAgent, 10000));
it = FileUtils.strings(u.get(MultiProtocolURI.yacybotUserAgent, 10000));
}catch(final IOException e){
prop.put("status", "1");//unable to get url
prop.put("status_url", url);

@ -36,13 +36,12 @@ import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.protocol.http.HTTPClient;
import net.yacy.kelondro.util.DateFormatter;
import net.yacy.kelondro.util.MapTools;
import de.anomic.crawler.retrieval.HTTPLoader;
//import de.anomic.http.client.Client;
import de.anomic.search.Switchboard;
import de.anomic.search.SwitchboardConstants;
@ -149,7 +148,7 @@ public class Network {
prop.put("table_my-url", seed.get(yacySeed.SEEDLIST, ""));
// generating the location string
prop.putHTML("table_my-location", HTTPClient.generateLocation());
prop.putHTML("table_my-location", MultiProtocolURI.generateLocation());
}
// overall results: Network statistics
@ -363,8 +362,8 @@ public class Network {
prop.putHTML(STR_TABLE_LIST + conCount + "_fullname", seed.get(yacySeed.NAME, "deadlink"));
userAgent = null;
if (seed.hash != null && seed.hash.equals(sb.peers.mySeed().hash)) {
userAgent = HTTPLoader.yacyUserAgent;
location = HTTPClient.generateLocation();
userAgent = MultiProtocolURI.yacybotUserAgent;
location = MultiProtocolURI.generateLocation();
} else {
userAgent = sb.peers.peerActions.getUserAgent(seed.getIP());
location = parseLocationInUserAgent(userAgent);

@ -39,7 +39,6 @@ import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import de.anomic.crawler.retrieval.HTTPLoader;
import de.anomic.data.listManager;
import de.anomic.data.list.ListAccumulator;
import de.anomic.data.list.XMLBlacklistImporter;
@ -48,6 +47,7 @@ import de.anomic.search.Switchboard;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.yacy.yacySeed;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.document.parser.html.CharacterCoding;
import net.yacy.kelondro.data.meta.DigestURI;
@ -143,7 +143,7 @@ public class sharedBlacklist_p {
// get List
DigestURI u = new DigestURI(downloadURLOld, null);
otherBlacklist = FileUtils.strings(u.get(HTTPLoader.yacyUserAgent, 10000));
otherBlacklist = FileUtils.strings(u.get(MultiProtocolURI.yacybotUserAgent, 10000));
} catch (final Exception e) {
prop.put("status", STATUS_PEER_UNKNOWN);
prop.putHTML("status_name", Hash);
@ -160,7 +160,7 @@ public class sharedBlacklist_p {
try {
final DigestURI u = new DigestURI(downloadURL, null);
otherBlacklist = FileUtils.strings(u.get(HTTPLoader.yacyUserAgent, 10000));
otherBlacklist = FileUtils.strings(u.get(MultiProtocolURI.yacybotUserAgent, 10000));
} catch (final Exception e) {
prop.put("status", STATUS_URL_PROBLEM);
prop.putHTML("status_address",downloadURL);

@ -45,8 +45,6 @@ import net.yacy.kelondro.blob.BEncodedHeap;
import net.yacy.kelondro.index.RowSpaceExceededException;
import net.yacy.kelondro.io.ByteCount;
import de.anomic.crawler.retrieval.HTTPLoader;
public class RobotsTxt {
private static Logger log = Logger.getLogger(RobotsTxt.class);
@ -325,7 +323,7 @@ public class RobotsTxt {
RequestHeader reqHeaders = new RequestHeader();
// add yacybot user agent
reqHeaders.put(HeaderFramework.USER_AGENT, HTTPLoader.crawlerUserAgent);
reqHeaders.put(HeaderFramework.USER_AGENT, MultiProtocolURI.yacybotUserAgent);
// adding referer
reqHeaders.put(RequestHeader.REFERER, (MultiProtocolURI.newURL(robotsURL,"/")).toNormalform(true, true));

@ -30,6 +30,7 @@ import java.net.MalformedURLException;
import java.util.Date;
import java.util.zip.GZIPInputStream;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.protocol.ResponseHeader;
@ -39,7 +40,6 @@ import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.data.meta.URIMetadataRow;
import net.yacy.kelondro.io.ByteCountInputStream;
import net.yacy.kelondro.logging.Log;
import de.anomic.crawler.retrieval.HTTPLoader;
import de.anomic.crawler.retrieval.Request;
import de.anomic.search.Segments;
import de.anomic.search.Switchboard;
@ -62,7 +62,7 @@ public class SitemapImporter extends Thread {
public void run() {
// download document
final RequestHeader requestHeader = new RequestHeader();
requestHeader.put(HeaderFramework.USER_AGENT, HTTPLoader.crawlerUserAgent);
requestHeader.put(HeaderFramework.USER_AGENT, MultiProtocolURI.yacybotUserAgent);
final HTTPClient client = new HTTPClient();
client.setTimout(5000);
client.setHeader(requestHeader.entrySet());

@ -50,8 +50,6 @@ public final class HTTPLoader {
private static final String DEFAULT_CHARSET = "ISO-8859-1,utf-8;q=0.7,*;q=0.7";
public static final long DEFAULT_MAXFILESIZE = 1024 * 1024 * 10;
public static final int DEFAULT_CRAWLING_RETRY_COUNT = 5;
public static final String crawlerUserAgent = "yacybot (" + HTTPClient.getSystemOST() +") http://yacy.net/bot.html";
public static final String yacyUserAgent = "yacy (" + HTTPClient.getSystemOST() +") yacy.net";
/**
* The socket timeout that should be used
@ -101,7 +99,7 @@ public final class HTTPLoader {
// create a request header
final RequestHeader requestHeader = new RequestHeader();
requestHeader.put(HeaderFramework.USER_AGENT, crawlerUserAgent);
requestHeader.put(HeaderFramework.USER_AGENT, MultiProtocolURI.yacybotUserAgent);
DigestURI refererURL = null;
if (request.referrerhash() != null) refererURL = sb.getURL(Segments.Process.LOCALCRAWLING, request.referrerhash());
if (refererURL != null) requestHeader.put(RequestHeader.REFERER, refererURL.toNormalform(true, true));
@ -216,7 +214,7 @@ public final class HTTPLoader {
// create a request header
final RequestHeader requestHeader = new RequestHeader();
requestHeader.put(HeaderFramework.USER_AGENT, crawlerUserAgent);
requestHeader.put(HeaderFramework.USER_AGENT, MultiProtocolURI.yacybotUserAgent);
requestHeader.put(HeaderFramework.ACCEPT_LANGUAGE, DEFAULT_LANGUAGE);
requestHeader.put(HeaderFramework.ACCEPT_CHARSET, DEFAULT_CHARSET);
requestHeader.put(HeaderFramework.ACCEPT_ENCODING, DEFAULT_ENCODING);

@ -71,6 +71,7 @@ import java.util.logging.LogManager;
import java.util.logging.Logger;
import java.util.zip.GZIPOutputStream;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.protocol.Domains;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.RequestHeader;
@ -86,7 +87,6 @@ import net.yacy.kelondro.logging.Log;
import net.yacy.kelondro.util.FileUtils;
import net.yacy.repository.Blacklist;
import de.anomic.crawler.retrieval.HTTPLoader;
import de.anomic.crawler.retrieval.Request;
import de.anomic.crawler.retrieval.Response;
//import de.anomic.http.client.Client;
@ -98,6 +98,9 @@ import de.anomic.server.serverObjects;
public final class HTTPDProxyHandler {
public static final String yacyUserAgent = "yacy (" + MultiProtocolURI.systemOST +") yacy.net";
// static variables
// can only be instantiated upon first instantiation of this class object
private static Switchboard sb = null;
@ -1539,7 +1542,7 @@ public final class HTTPDProxyHandler {
private static synchronized String generateUserAgent(final HeaderFramework requestHeaders) {
userAgentStr.setLength(0);
final String browserUserAgent = requestHeaders.get(HeaderFramework.USER_AGENT, HTTPLoader.yacyUserAgent);
final String browserUserAgent = requestHeaders.get(HeaderFramework.USER_AGENT, yacyUserAgent);
final int pos = browserUserAgent.lastIndexOf(')');
if (pos >= 0) {
userAgentStr

@ -124,7 +124,6 @@ import de.anomic.crawler.ResultURLs;
import de.anomic.crawler.RobotsTxt;
import de.anomic.crawler.CrawlProfile.CacheStrategy;
import de.anomic.crawler.retrieval.EventOrigin;
import de.anomic.crawler.retrieval.HTTPLoader;
import de.anomic.crawler.retrieval.Request;
import de.anomic.crawler.retrieval.Response;
import de.anomic.data.LibraryProvider;
@ -2406,7 +2405,7 @@ public final class Switchboard extends serverSwitch {
final RequestHeader reqHeader = new RequestHeader();
reqHeader.put(HeaderFramework.PRAGMA, "no-cache");
reqHeader.put(HeaderFramework.CACHE_CONTROL, "no-cache");
reqHeader.put(HeaderFramework.USER_AGENT, HTTPLoader.yacyUserAgent);
reqHeader.put(HeaderFramework.USER_AGENT, MultiProtocolURI.yacybotUserAgent);
final HTTPClient client = new HTTPClient();
client.setHeader(reqHeader.entrySet());
client.setTimout((int) getConfigLong("bootstrapLoadTimeout", 20000));
@ -2557,7 +2556,7 @@ public final class Switchboard extends serverSwitch {
*/
public static Map<String, String> loadFileAsMap(final DigestURI url) {
final RequestHeader reqHeader = new RequestHeader();
reqHeader.put(HeaderFramework.USER_AGENT, HTTPLoader.yacyUserAgent);
reqHeader.put(HeaderFramework.USER_AGENT, MultiProtocolURI.yacybotUserAgent);
final HTTPClient client = new HTTPClient();
client.setHeader(reqHeader.entrySet());
try {

@ -23,11 +23,10 @@ package de.anomic.tools;
import java.util.Hashtable;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.protocol.http.ProxySettings;
import net.yacy.kelondro.data.meta.DigestURI;
import de.anomic.crawler.retrieval.HTTPLoader;
public class loaderThreads {
// global values for loader threads
@ -116,7 +115,7 @@ public class loaderThreads {
public void run() {
try {
page = url.get(HTTPLoader.crawlerUserAgent, timeout);
page = url.get(MultiProtocolURI.yacybotUserAgent, timeout);
loaded = true;
process.feed(page);
if (process.status() == loaderCore.STATUS_FAILED) {

@ -83,7 +83,6 @@ import org.apache.http.entity.mime.content.StringBody;
import de.anomic.crawler.ResultURLs;
import de.anomic.crawler.retrieval.EventOrigin;
import de.anomic.crawler.retrieval.HTTPLoader;
import de.anomic.search.ContentDomain;
import de.anomic.search.QueryParams;
import de.anomic.search.RankingProfile;
@ -99,10 +98,10 @@ public final class yacyClient {
private static byte[] postToFile(final yacySeed target, final String filename, final LinkedHashMap<String,ContentBody> parts, final int timeout) throws IOException {
return HTTPConnector.getConnector(HTTPLoader.crawlerUserAgent).post(new MultiProtocolURI("http://" + target.getClusterAddress() + "/yacy/" + filename), timeout, target.getHexHash() + ".yacyh", parts);
return HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI("http://" + target.getClusterAddress() + "/yacy/" + filename), timeout, target.getHexHash() + ".yacyh", parts);
}
private static byte[] postToFile(final yacySeedDB seedDB, final String targetHash, final String filename, final LinkedHashMap<String,ContentBody> parts, final int timeout) throws IOException {
return HTTPConnector.getConnector(HTTPLoader.crawlerUserAgent).post(new MultiProtocolURI("http://" + targetAddress(seedDB, targetHash) + "/yacy/" + filename), timeout, yacySeed.b64Hash2hexHash(targetHash)+ ".yacyh", parts);
return HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI("http://" + targetAddress(seedDB, targetHash) + "/yacy/" + filename), timeout, yacySeed.b64Hash2hexHash(targetHash)+ ".yacyh", parts);
}
/**
@ -138,7 +137,7 @@ public final class yacyClient {
parts.put("seed", new StringBody(mySeed.genSeedStr(salt)));
// send request
final long start = System.currentTimeMillis();
final byte[] content = HTTPConnector.getConnector(HTTPLoader.crawlerUserAgent).post(new MultiProtocolURI("http://" + address + "/yacy/hello.html"), 30000, yacySeed.b64Hash2hexHash(otherHash) + ".yacyh", parts);
final byte[] content = HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI("http://" + address + "/yacy/hello.html"), 30000, yacySeed.b64Hash2hexHash(otherHash) + ".yacyh", parts);
yacyCore.log.logInfo("yacyClient.publishMySeed thread '" + Thread.currentThread().getName() + "' contacted peer at " + address + ", received " + ((content == null) ? "null" : content.length) + " bytes, time = " + (System.currentTimeMillis() - start) + " milliseconds");
result = FileUtils.table(content);
} catch (final Exception e) {
@ -331,7 +330,7 @@ public final class yacyClient {
parts.put("call", new StringBody("remotecrawl"));
parts.put("count", new StringBody(Integer.toString(maxCount)));
parts.put("time", new StringBody(Long.toString(maxTime)));
final byte[] result = HTTPConnector.getConnector(HTTPLoader.crawlerUserAgent).post(new MultiProtocolURI("http://" + target.getClusterAddress() + "/yacy/urls.xml"), (int) maxTime, target.getHexHash() + ".yacyh", parts);
final byte[] result = HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI("http://" + target.getClusterAddress() + "/yacy/urls.xml"), (int) maxTime, target.getHexHash() + ".yacyh", parts);
final RSSReader reader = RSSReader.parse(RSSFeed.DEFAULT_MAXSIZE, result);
if (reader == null) {
yacyCore.log.logWarning("yacyClient.queryRemoteCrawlURLs failed asking peer '" + target.getName() + "': probably bad response from remote peer (1), reader == null");
@ -629,8 +628,8 @@ public final class yacyClient {
parts.put("profile", new StringBody(crypt.simpleEncode(rankingProfile.toExternalString())));
parts.put("constraint", new StringBody((constraint == null) ? "" : constraint.exportB64()));
if (secondarySearchSuperviser != null) parts.put("abstracts", new StringBody("auto"));
resultMap = FileUtils.table(HTTPConnector.getConnector(HTTPLoader.crawlerUserAgent).post(new MultiProtocolURI("http://" + hostaddress + "/yacy/search.html"), 60000, hostname, parts));
//resultMap = FileUtils.table(HTTPConnector.getConnector(HTTPLoader.crawlerUserAgent).post(new MultiProtocolURI("http://" + target.getClusterAddress() + "/yacy/search.html"), 60000, target.getHexHash() + ".yacyh", parts));
resultMap = FileUtils.table(HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI("http://" + hostaddress + "/yacy/search.html"), 60000, hostname, parts));
//resultMap = FileUtils.table(HTTPConnector.getConnector(MultiProtocolURI.crawlerUserAgent).post(new MultiProtocolURI("http://" + target.getClusterAddress() + "/yacy/search.html"), 60000, target.getHexHash() + ".yacyh", parts));
// evaluate request result
if (resultMap == null || resultMap.isEmpty()) throw new IOException("resultMap is NULL");
@ -750,7 +749,7 @@ public final class yacyClient {
parts.put("filename", new StringBody(filename));
parts.put("filesize", new StringBody(Long.toString(filesize)));
parts.put("can-send-protocol", new StringBody("http"));
final byte[] content = HTTPConnector.getConnector(HTTPLoader.crawlerUserAgent).post(new MultiProtocolURI("http://" + targetAddress + "/yacy/transfer.html"), 10000, targetAddress, parts);
final byte[] content = HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI("http://" + targetAddress + "/yacy/transfer.html"), 10000, targetAddress, parts);
final Map<String, String> result = FileUtils.table(content);
return result;
} catch (final Exception e) {
@ -774,7 +773,7 @@ public final class yacyClient {
parts.put("md5", new StringBody(Digest.encodeMD5Hex(file)));
parts.put("access", new StringBody(access));
parts.put("filename", new ByteArrayBody(file, filename));
final byte[] content = HTTPConnector.getConnector(HTTPLoader.crawlerUserAgent).post(new MultiProtocolURI("http://" + targetAddress + "/yacy/transfer.html"), 20000, targetAddress, parts);
final byte[] content = HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI("http://" + targetAddress + "/yacy/transfer.html"), 20000, targetAddress, parts);
final Map<String, String> result = FileUtils.table(content);
return result;
} catch (final Exception e) {
@ -851,7 +850,7 @@ public final class yacyClient {
parts.put("wordh", new StringBody(wordhashes));
parts.put("lurlEntry", new StringBody(((entry == null) ? "" : crypt.simpleEncode(entry.toString(), salt))));
// send request
final byte[] content = HTTPConnector.getConnector(HTTPLoader.crawlerUserAgent).post(new MultiProtocolURI("http://" + address + "/yacy/crawlReceipt.html"), 10000, target.getHexHash() + ".yacyh", parts);
final byte[] content = HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI("http://" + address + "/yacy/crawlReceipt.html"), 10000, target.getHexHash() + ".yacyh", parts);
return FileUtils.table(content);
} catch (final Exception e) {
// most probably a network time-out exception
@ -1006,7 +1005,7 @@ public final class yacyClient {
parts.put("wordc", new StringBody(Integer.toString(indexes.size())));
parts.put("entryc", new StringBody(Integer.toString(indexcount)));
parts.put("indexes", new StringBody(entrypost.toString()));
final byte[] content = HTTPConnector.getConnector(HTTPLoader.crawlerUserAgent).post(new MultiProtocolURI("http://" + address + "/yacy/transferRWI.html"), timeout, targetSeed.getHexHash() + ".yacyh", parts);
final byte[] content = HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI("http://" + address + "/yacy/transferRWI.html"), timeout, targetSeed.getHexHash() + ".yacyh", parts);
final Iterator<String> v = FileUtils.strings(content);
// this should return a list of urlhashes that are unknown
@ -1050,7 +1049,7 @@ public final class yacyClient {
}
try {
parts.put("urlc", new StringBody(Integer.toString(urlc)));
final byte[] content = HTTPConnector.getConnector(HTTPLoader.crawlerUserAgent).post(new MultiProtocolURI("http://" + address + "/yacy/transferURL.html"), timeout, targetSeed.getHexHash() + ".yacyh", parts);
final byte[] content = HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI("http://" + address + "/yacy/transferURL.html"), timeout, targetSeed.getHexHash() + ".yacyh", parts);
final Iterator<String> v = FileUtils.strings(content);
final Map<String, String> result = FileUtils.table(v);
@ -1072,7 +1071,7 @@ public final class yacyClient {
if (address == null) { address = "localhost:8080"; }
try {
final LinkedHashMap<String,ContentBody> parts = yacyNetwork.basicRequestParts(Switchboard.getSwitchboard(), targetSeed.hash, salt);
final byte[] content = HTTPConnector.getConnector(HTTPLoader.crawlerUserAgent).post(new MultiProtocolURI("http://" + address + "/yacy/profile.html"), 5000, targetSeed.getHexHash() + ".yacyh", parts);
final byte[] content = HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI("http://" + address + "/yacy/profile.html"), 5000, targetSeed.getHexHash() + ".yacyh", parts);
return FileUtils.table(content);
} catch (final Exception e) {
yacyCore.log.logSevere("yacyClient.getProfile error:" + e.getMessage());
@ -1162,7 +1161,7 @@ public final class yacyClient {
}
byte[] res;
try {
res = HTTPConnector.getConnector(HTTPLoader.crawlerUserAgent).post(url, timeout, vhost, newpost);
res = HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(url, timeout, vhost, newpost);
System.out.println(new String(res));
} catch (IOException e1) {
Log.logException(e1);

@ -57,7 +57,6 @@ import net.yacy.kelondro.util.FileUtils;
import net.yacy.kelondro.util.OS;
import de.anomic.crawler.CrawlProfile;
import de.anomic.crawler.retrieval.HTTPLoader;
import de.anomic.search.Switchboard;
import de.anomic.server.serverCore;
import de.anomic.tools.CryptoLib;
@ -283,7 +282,7 @@ public final class yacyRelease extends yacyVersion {
File download = null;
// setup httpClient
final RequestHeader reqHeader = new RequestHeader();
reqHeader.put(HeaderFramework.USER_AGENT, HTTPLoader.yacyUserAgent);
reqHeader.put(HeaderFramework.USER_AGENT, MultiProtocolURI.yacybotUserAgent);
final String name = this.getUrl().getFileName();
byte[] signatureBytes = null;

@ -38,6 +38,7 @@ import java.util.Map;
import java.util.TreeMap;
import java.util.concurrent.ConcurrentHashMap;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.protocol.Domains;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.RequestHeader;
@ -52,7 +53,6 @@ import net.yacy.kelondro.order.Base64Order;
import net.yacy.kelondro.util.FileUtils;
import net.yacy.kelondro.util.kelondroException;
import de.anomic.crawler.retrieval.HTTPLoader;
//import de.anomic.http.client.Client;
import de.anomic.http.server.HTTPDemon;
import de.anomic.http.server.AlternativeDomainNames;
@ -846,7 +846,7 @@ public final class yacySeedDB implements AlternativeDomainNames {
final RequestHeader reqHeader = new RequestHeader();
reqHeader.put(HeaderFramework.PRAGMA, "no-cache");
reqHeader.put(HeaderFramework.CACHE_CONTROL, "no-cache"); // httpc uses HTTP/1.0 is this necessary?
reqHeader.put(HeaderFramework.USER_AGENT, HTTPLoader.yacyUserAgent);
reqHeader.put(HeaderFramework.USER_AGENT, MultiProtocolURI.yacybotUserAgent);
// init http-client
// final Client client = new Client(10000, reqHeader);

@ -49,7 +49,8 @@ import net.yacy.cora.protocol.http.HTTPClient;
* MultiProtocolURI provides a URL object for multiple protocols like http, https, ftp, smb and file
*
*/
public class MultiProtocolURI implements Serializable {
public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolURI> {
private static final long serialVersionUID = -1173233022912141884L;
public static final int TLD_any_zone_filter = 255; // from TLD zones can be filtered during search; this is the catch-all filter
@ -78,10 +79,47 @@ public class MultiProtocolURI implements Serializable {
}
}
/**
* provide system information for client identification
*/
public static final String systemOST = System.getProperty("os.arch", "no-os-arch") + " " +
System.getProperty("os.name", "no-os-name") + " " + System.getProperty("os.version", "no-os-version") +
"; " + "java " + System.getProperty("java.version", "no-java-version") + "; " + generateLocation();
public static final String yacybotUserAgent = "yacybot (" + systemOST +") http://yacy.net/bot.html";
/**
* generating the location string
*
* @return
*/
public static String generateLocation() {
String loc = System.getProperty("user.timezone", "nowhere");
final int p = loc.indexOf('/');
if (p > 0) {
loc = loc.substring(0, p);
}
loc = loc + "/" + System.getProperty("user.language", "dumb");
return loc;
}
// class variables
protected String protocol, host, userInfo, path, quest, ref;
protected int port;
/**
* initialization of a MultiProtocolURI to produce poison pills for concurrent blocking queues
*/
public MultiProtocolURI() {
this.protocol = null;
this.host = null;
this.userInfo = null;
this.path = null;
this.quest = null;
this.ref = null;
this.port = -1;
}
public MultiProtocolURI(final File file) throws MalformedURLException {
this("file", "", -1, file.getAbsolutePath());
}
@ -762,9 +800,8 @@ public class MultiProtocolURI implements Serializable {
return this.toString().equals(other.toString());
}
public int compareTo(final Object h) {
assert (h instanceof MultiProtocolURI);
return this.toString().compareTo(((MultiProtocolURI) h).toString());
public int compareTo(MultiProtocolURI h) {
return this.toString().compareTo(h.toString());
}
public boolean isPOST() {
@ -1112,4 +1149,5 @@ public class MultiProtocolURI implements Serializable {
}
}
}
}

@ -609,6 +609,28 @@ public class Domains {
return localHostAddresses[0];
}
/**
* generate a list of intranet InetAddresses without the loopback address 127.0.0.1
* @return list of all intranet addresses
*/
public static List<InetAddress> myIntranetIPs() {
// list all local addresses
if (localHostAddresses.length < 2) try {Thread.sleep(1000);} catch (InterruptedException e) {}
ArrayList<InetAddress> list = new ArrayList<InetAddress>(localHostAddresses.length);
if (localHostAddresses.length == 0) {
if (localHostAddress != null && isLocal(localHostAddress.getHostAddress())) {
list.add(localHostAddress);
}
return list;
}
for (int i = 0; i < localHostAddresses.length; i++) {
if ((0Xff & localHostAddresses[i].getAddress()[0]) == 127) continue;
if (!matchesList(localHostAddresses[i].getHostAddress(), localhostPatterns)) continue;
list.add(localHostAddresses[i]);
}
return list;
}
public static int getDomainID(final String host) {
if (host == null || host.length() == 0) return TLD_Local_ID;
if (isLocal(host)) return TLD_Local_ID;

@ -44,6 +44,7 @@ import java.io.RandomAccessFile;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.net.InetAddress;
import java.net.InetSocketAddress;
import java.net.ServerSocket;
import java.net.Socket;
import java.net.SocketException;
@ -85,7 +86,7 @@ public class FTPClient {
private Socket ControlSocket = null;
// socket timeout
private static final int ControlSocketTimeout = 10000;
private static final int ControlSocketTimeout = 1000;
// data socket timeout
private int DataSocketTimeout = 0; // in seconds (default infinite)
@ -1515,13 +1516,14 @@ public class FTPClient {
}
try {
ControlSocket = new Socket(host, port);
ControlSocket = new Socket();
ControlSocket.setSoTimeout(getTimeout());
ControlSocket.setKeepAlive(true);
ControlSocket.setTcpNoDelay(true); // no accumulation until buffer is full
ControlSocket.setSoLinger(false, getTimeout()); // !wait for all data being written on close()
ControlSocket.setSendBufferSize(1440); // read http://www.cisco.com/warp/public/105/38.shtml
ControlSocket.setReceiveBufferSize(1440); // read http://www.cisco.com/warp/public/105/38.shtml
ControlSocket.connect(new InetSocketAddress(host, port), 1000);
clientInput = new BufferedReader(new InputStreamReader(ControlSocket.getInputStream()));
clientOutput = new DataOutputStream(new BufferedOutputStream(ControlSocket.getOutputStream()));

@ -42,6 +42,7 @@ import javax.net.ssl.SSLContext;
import javax.net.ssl.TrustManager;
import javax.net.ssl.X509TrustManager;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.protocol.ConnectionInfo;
import org.apache.http.Header;
@ -93,7 +94,7 @@ public class HTTPClient {
private final static int maxcon = 20;
private static IdledConnectionEvictor idledConnectionEvictor = null;
private static HttpClient httpClient = null;
private static HttpClient httpClient = initConnectionManager();
private Header[] headers = null;
private HttpResponse httpResponse = null;
private HttpUriRequest currentRequest = null;
@ -106,19 +107,13 @@ public class HTTPClient {
public HTTPClient() {
super();
if (httpClient == null) {
initConnectionManager();
}
}
public static void setDefaultUserAgent(final String defaultAgent) {
if (httpClient == null) {
initConnectionManager();
}
HttpProtocolParams.setUserAgent(httpClient.getParams(), defaultAgent);
}
private static void initConnectionManager() {
private static HttpClient initConnectionManager() {
// Create and initialize HTTP parameters
final HttpParams httpParams = new BasicHttpParams();
/**
@ -141,7 +136,7 @@ public class HTTPClient {
*/
HttpProtocolParams.setVersion(httpParams, HttpVersion.HTTP_1_1);
// UserAgent
HttpProtocolParams.setUserAgent(httpParams, "yacy (" + systemOST +") yacy.net");
HttpProtocolParams.setUserAgent(httpParams, MultiProtocolURI.yacybotUserAgent);
HttpProtocolParams.setUseExpectContinue(httpParams, false); // IMPORTANT - if not set to 'false' then servers do not process the request until a time-out of 2 seconds
/**
* HTTP connection settings
@ -175,7 +170,7 @@ public class HTTPClient {
idledConnectionEvictor = new IdledConnectionEvictor(clientConnectionManager);
idledConnectionEvictor.start();
return httpClient;
}
/**
@ -532,35 +527,6 @@ public class HTTPClient {
upbytes));
}
/**
* provide system information for client identification
*/
private static final String systemOST = System.getProperty("os.arch", "no-os-arch") + " " +
System.getProperty("os.name", "no-os-name") + " " + System.getProperty("os.version", "no-os-version") +
"; " + "java " + System.getProperty("java.version", "no-java-version") + "; " + generateLocation();
/**
* generating the location string
*
* @return
*/
public static String generateLocation() {
String loc = System.getProperty("user.timezone", "nowhere");
final int p = loc.indexOf('/');
if (p > 0) {
loc = loc.substring(0, p);
}
loc = loc + "/" + System.getProperty("user.language", "dumb");
return loc;
}
/**
* @return the systemOST
*/
public static String getSystemOST() {
return systemOST;
}
private static SSLSocketFactory getSSLSocketFactory() {
final TrustManager trustManager = new X509TrustManager() {
public void checkClientTrusted(X509Certificate[] chain, String authType)

@ -41,8 +41,6 @@ import net.yacy.cora.protocol.http.HTTPConnector;
import org.apache.http.entity.mime.content.ContentBody;
import org.apache.http.entity.mime.content.StringBody;
import de.anomic.crawler.retrieval.HTTPLoader;
public class Search {
public static BlockingQueue<RSSMessage> search(String rssSearchServiceURL, String query, boolean verify, boolean global, long timeout, int maximumRecords) {
@ -128,7 +126,7 @@ public class Search {
parts.put("maximumRecords", new StringBody(Long.toString(maximumRecords)));
parts.put("verify", new StringBody(verify ? "true" : "false"));
parts.put("resource", new StringBody(global ? "global" : "local"));
final byte[] result = HTTPConnector.getConnector(HTTPLoader.yacyUserAgent).post(new MultiProtocolURI(rssSearchServiceURL), (int) timeout, uri.getHost(), parts);
final byte[] result = HTTPConnector.getConnector(MultiProtocolURI.yacybotUserAgent).post(new MultiProtocolURI(rssSearchServiceURL), (int) timeout, uri.getHost(), parts);
//String debug = new String(result); System.out.println("*** DEBUG: " + debug);
final RSSReader reader = RSSReader.parse(RSSFeed.DEFAULT_MAXSIZE, result);
if (reader == null) {

@ -32,8 +32,6 @@ import java.util.regex.Pattern;
import com.ibm.icu.text.CharsetDetector;
import de.anomic.crawler.retrieval.HTTPLoader;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.document.AbstractParser;
import net.yacy.document.Document;
@ -268,7 +266,7 @@ public class htmlParser extends AbstractParser implements Parser {
MultiProtocolURI url;
try {
url = new MultiProtocolURI(args[0]);
byte[] content = url.get(HTTPLoader.crawlerUserAgent, 3000);
byte[] content = url.get(MultiProtocolURI.yacybotUserAgent, 3000);
Document[] document = new htmlParser().parse(url, "text/html", null, new ByteArrayInputStream(content));
String title = document[0].dc_title();
System.out.println(title);

@ -201,7 +201,7 @@ public final class LoaderDispatcher {
// create request header values and a response object because we need that
// in case that we want to return the cached content in the next step
final RequestHeader requestHeader = new RequestHeader();
requestHeader.put(HeaderFramework.USER_AGENT, HTTPLoader.crawlerUserAgent);
requestHeader.put(HeaderFramework.USER_AGENT, MultiProtocolURI.yacybotUserAgent);
DigestURI refererURL = null;
if (request.referrerhash() != null) refererURL = sb.getURL(Segments.Process.LOCALCRAWLING, request.referrerhash());
if (refererURL != null) requestHeader.put(RequestHeader.REFERER, refererURL.toNormalform(true, true));

@ -46,6 +46,7 @@ import java.util.concurrent.Semaphore;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.protocol.http.HTTPClient;
import net.yacy.gui.YaCyApp;
@ -289,7 +290,7 @@ public final class yacy {
// set user-agent
final String userAgent = "yacy/" + Double.toString(version) + " (www.yacy.net; "
+ HTTPClient.getSystemOST() + ")";
+ MultiProtocolURI.systemOST + ")";
HTTPClient.setDefaultUserAgent(userAgent);
// start main threads

Loading…
Cancel
Save