added many missing user-agent declarations for yacy http client connections.

the most important fix was the addition of the yacybot user-agent for robots.txt loading,
because web masters look for that access to see if the crawler behaves correctly.

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4968 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 17 years ago
parent 474e29ce4a
commit e81be7d4f2

@ -54,6 +54,7 @@ import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import de.anomic.crawler.HTTPLoader;
import de.anomic.data.listManager;
import de.anomic.data.translator;
import de.anomic.http.HttpClient;
@ -96,7 +97,9 @@ public class ConfigLanguage_p {
ArrayList<String> langVector;
try{
yacyURL u = new yacyURL(url, null);
langVector = nxTools.strings(HttpClient.wget(u.toString()), "UTF-8");
httpHeader reqHeader = new httpHeader();
reqHeader.put(httpHeader.USER_AGENT, HTTPLoader.yacyUserAgent);
langVector = nxTools.strings(HttpClient.wget(u.toString(), reqHeader, 10000), "UTF-8");
}catch(IOException e){
prop.put("status", "1");//unable to get url
prop.put("status_url", url);

@ -33,6 +33,7 @@ import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Iterator;
import de.anomic.crawler.HTTPLoader;
import de.anomic.data.listManager;
import de.anomic.http.HttpClient;
import de.anomic.http.httpHeader;
@ -88,7 +89,9 @@ public class ConfigSkins_p {
ArrayList<String> skinVector;
try {
yacyURL u = new yacyURL(url, null);
skinVector = nxTools.strings(HttpClient.wget(u.toString()), "UTF-8");
httpHeader reqHeader = new httpHeader();
reqHeader.put(httpHeader.USER_AGENT, HTTPLoader.yacyUserAgent);
skinVector = nxTools.strings(HttpClient.wget(u.toString(), reqHeader, 10000), "UTF-8");
} catch (IOException e) {
prop.put("status", "1");// unable to get URL
prop.put("status_url", url);

@ -24,7 +24,7 @@
<dt><label for="cookie_value">Value:</label></dt>
<dd><input type="text" name="cookie_value" id="cookie_value" /></dd>
</dl>
<input type="submit" name="set_action" value="Dear server, set this coockie for me!" />
<input type="submit" name="set_action" value="Dear server, set this cookie for me!" />
</fieldset>
</form>
<hr />

@ -54,7 +54,7 @@ import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.server.servletProperties;
public class CookieTest {
public class CookieTest_p {
public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch<?> env) {
@ -69,7 +69,7 @@ public class CookieTest {
final servletProperties prop = new servletProperties();
if(post.containsKey("act")&&post.get("act").equals("clear_cookie")) {
httpHeader outgoingHeader = new httpHeader();
httpHeader outgoingHeader = new httpHeader();
Iterator<Map.Entry<String, String>> it = header.entrySet().iterator();
Map.Entry<String, String> e;
while (it.hasNext()) {

@ -54,6 +54,7 @@ import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import de.anomic.crawler.HTTPLoader;
import de.anomic.http.HttpClient;
import de.anomic.http.JakartaCommonsHttpClient;
import de.anomic.http.httpHeader;
@ -375,8 +376,7 @@ public class Network {
prop.putHTML(STR_TABLE_LIST + conCount + "_fullname", seed.get(yacySeed.NAME, "deadlink"));
userAgent = null;
if (seed.hash.equals(sb.webIndex.seedDB.mySeed().hash)) {
final JakartaCommonsHttpClient httpClient = new JakartaCommonsHttpClient(10000, null, null);
userAgent = httpClient.getUserAgent();
userAgent = HTTPLoader.yacyUserAgent;
location = HttpClient.generateLocation();
} else {
userAgent = sb.webIndex.peerActions.getUserAgent(seed.getIP());

@ -55,6 +55,7 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import de.anomic.crawler.HTTPLoader;
import de.anomic.data.listManager;
import de.anomic.http.HttpClient;
import de.anomic.http.httpHeader;
@ -128,10 +129,11 @@ public class sharedBlacklist_p {
httpHeader reqHeader = new httpHeader();
reqHeader.put(httpHeader.PRAGMA,"no-cache");
reqHeader.put(httpHeader.CACHE_CONTROL,"no-cache");
reqHeader.put(httpHeader.USER_AGENT, HTTPLoader.yacyUserAgent);
// get List
yacyURL u = new yacyURL(downloadURL, null);
otherBlacklist = nxTools.strings(HttpClient.wget(u.toString(), reqHeader), "UTF-8");
otherBlacklist = nxTools.strings(HttpClient.wget(u.toString(), reqHeader, 1000), "UTF-8");
} catch (Exception e) {
prop.put("status", STATUS_PEER_UNKNOWN);
prop.put("page", "1");
@ -147,7 +149,9 @@ public class sharedBlacklist_p {
try {
yacyURL u = new yacyURL(downloadURL, null);
otherBlacklist = nxTools.strings(HttpClient.wget(u.toString()), "UTF-8"); //get List
httpHeader reqHeader = new httpHeader();
reqHeader.put(httpHeader.USER_AGENT, HTTPLoader.yacyUserAgent);
otherBlacklist = nxTools.strings(HttpClient.wget(u.toString(), reqHeader, 10000), "UTF-8"); //get List
} catch (Exception e) {
prop.put("status", STATUS_URL_PROBLEM);
prop.putHTML("status_address",downloadURL);

@ -49,6 +49,7 @@ import java.io.IOException;
import java.io.Writer;
import java.net.MalformedURLException;
import de.anomic.crawler.HTTPLoader;
import de.anomic.data.robotsParser;
import de.anomic.htmlFilter.htmlFilterContentScraper;
import de.anomic.htmlFilter.htmlFilterWriter;
@ -60,6 +61,7 @@ import de.anomic.server.serverSwitch;
import de.anomic.yacy.yacyURL;
public class getpageinfo_p {
public static serverObjects respond(httpHeader header, serverObjects post, serverSwitch<?> env) {
serverObjects prop = new serverObjects();
prop.put("sitemap", "");
@ -81,7 +83,9 @@ public class getpageinfo_p {
if (actions.indexOf("title")>=0) {
try {
yacyURL u = new yacyURL(url, null);
byte[] r = HttpClient.wget(u.toString());
httpHeader reqHeader = new httpHeader();
reqHeader.put(httpHeader.USER_AGENT, HTTPLoader.crawlerUserAgent);
byte[] r = HttpClient.wget(u.toString(), reqHeader, 5000);
if (r == null) return prop;
String contentString=new String(r);

@ -78,7 +78,8 @@ public final class HTTPLoader {
private static final String DEFAULT_CHARSET = "ISO-8859-1,utf-8;q=0.7,*;q=0.7";
private static final long DEFAULT_MAXFILESIZE = 1024 * 1024 * 10;
public static final int DEFAULT_CRAWLING_RETRY_COUNT = 5;
private static final String crawlerUserAgent = "yacybot (" + HttpClient.getSystemOST() +") http://yacy.net/bot.html";
public static final String crawlerUserAgent = "yacybot (" + HttpClient.getSystemOST() +") http://yacy.net/bot.html";
public static final String yacyUserAgent = "yacy (" + HttpClient.getSystemOST() +") yacy.net";
/**
* The socket timeout that should be used

@ -59,9 +59,11 @@ import org.xml.sax.helpers.DefaultHandler;
import de.anomic.crawler.CrawlEntry;
import de.anomic.crawler.CrawlProfile;
import de.anomic.crawler.HTTPLoader;
import de.anomic.crawler.ZURL;
import de.anomic.http.JakartaCommonsHttpClient;
import de.anomic.http.JakartaCommonsHttpResponse;
import de.anomic.http.httpHeader;
import de.anomic.http.httpdByteCountInputStream;
import de.anomic.index.indexURLReference;
import de.anomic.plasma.plasmaSwitchboard;
@ -169,7 +171,9 @@ public class SitemapParser extends DefaultHandler {
*/
public void parse() {
// download document
JakartaCommonsHttpClient client = new JakartaCommonsHttpClient(5000, null, null);
httpHeader header = new httpHeader();
header.put(httpHeader.USER_AGENT, HTTPLoader.crawlerUserAgent);
JakartaCommonsHttpClient client = new JakartaCommonsHttpClient(5000, header, null);
JakartaCommonsHttpResponse res = null;
try {
res = client.GET(siteMapURL.toString());

@ -56,6 +56,7 @@ import java.net.URLDecoder;
import java.util.ArrayList;
import java.util.Date;
import de.anomic.crawler.HTTPLoader;
import de.anomic.crawler.RobotsTxt;
import de.anomic.http.HttpClient;
import de.anomic.http.JakartaCommonsHttpClient;
@ -393,6 +394,9 @@ public final class robotsParser{
// if we previously have downloaded this robots.txt then we can set the if-modified-since header
httpHeader reqHeaders = new httpHeader();
// add yacybot user agent
reqHeaders.put(httpHeader.USER_AGENT, HTTPLoader.crawlerUserAgent);
// adding referer
reqHeaders.put(httpHeader.REFERER, (yacyURL.newURL(robotsURL,"/")).toNormalform(true, true));

@ -61,8 +61,10 @@ import java.util.Properties;
import javax.swing.event.EventListenerList;
import de.anomic.crawler.HTTPLoader;
import de.anomic.data.htmlTools;
import de.anomic.http.HttpClient;
import de.anomic.http.httpHeader;
import de.anomic.server.serverCharBuffer;
import de.anomic.server.serverFileUtils;
import de.anomic.yacy.yacyURL;
@ -507,7 +509,9 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen
public static htmlFilterContentScraper parseResource(yacyURL location) throws IOException {
// load page
byte[] page = HttpClient.wget(location.toString());
httpHeader reqHeader = new httpHeader();
reqHeader.put(httpHeader.USER_AGENT, HTTPLoader.crawlerUserAgent);
byte[] page = HttpClient.wget(location.toString(), reqHeader, 10000);
if (page == null) throw new IOException("no response from url " + location.toString());
// scrape content

@ -78,62 +78,7 @@ public abstract class HttpClient {
return JakartaCommonsHttpClient.date2String(date);
}
/**
* Gets a page (as raw bytes)
*
* @param uri
* @return
*/
public static byte[] wget(final String uri) {
return wget(uri, null, null);
}
/**
* Gets a page (as raw bytes) addressing vhost at host in uri
*
* @param uri
* @param vhost used if host in uri cannot be resolved (yacy tld)
* @return
*/
public static byte[] wget(final String uri, final String vhost) {
return wget(uri, null, vhost);
}
/**
* Gets a page (as raw bytes) aborting after timeout
*
* @param uri
* @param timeout in milliseconds
* @return
*/
public static byte[] wget(final String uri, final int timeout) {
return wget(uri, null, null, timeout);
}
/**
* Gets a page (as raw bytes) with specified header
*
* @param uri
* @param header
* @return
*/
public static byte[] wget(final String uri, final httpHeader header) {
return wget(uri, header, null);
}
/**
* Gets a page (as raw bytes) addressing vhost at host in uri with specified header
*
* @param uri
* @param header
* @param vhost
* @return
* @require uri != null
*/
public static byte[] wget(final String uri, final httpHeader header, final String vhost) {
return wget(uri, header, vhost, 10000);
}
/**
* Gets a page (as raw bytes) addressing vhost at host in uri with specified header and timeout
*
@ -143,13 +88,13 @@ public abstract class HttpClient {
* @param timeout in milliseconds
* @return
*/
public static byte[] wget(final String uri, httpHeader header, final String vhost, final int timeout) {
public static byte[] wget(final String uri, final httpHeader header, int timeout) {
return wget(uri, header, timeout, null);
}
public static byte[] wget(final String uri, httpHeader header, final int timeout, final String vhost) {
assert uri != null : "precondition violated: uri != null";
final JakartaCommonsHttpClient client = new JakartaCommonsHttpClient(timeout, null, null);
// set header
header = addHostHeader(header, vhost);
client.setHeader(header);
addHostHeader(header, vhost);
final JakartaCommonsHttpClient client = new JakartaCommonsHttpClient(timeout, header, null);
// do the request
try {
@ -168,7 +113,7 @@ public abstract class HttpClient {
* @param vhost
* @return
*/
private static httpHeader addHostHeader(httpHeader header, final String vhost) {
private static void addHostHeader(httpHeader header, final String vhost) {
if (vhost != null) {
if (header != null) {
header = new httpHeader();
@ -176,7 +121,6 @@ public abstract class HttpClient {
// set host-header
header.add(httpHeader.HOST, vhost);
}
return header;
}
/**

@ -546,7 +546,7 @@ public class JakartaCommonsHttpClient {
// wget
System.out.println("wget " + url);
System.out.println("--------------------------------------");
System.out.println(new String(de.anomic.http.HttpClient.wget(url)));
System.out.println(new String(de.anomic.http.HttpClient.wget(url, null, 10000)));
}
} catch (final IOException e) {
e.printStackTrace();

@ -76,6 +76,7 @@ import java.util.logging.LogManager;
import java.util.logging.Logger;
import java.util.zip.GZIPOutputStream;
import de.anomic.crawler.HTTPLoader;
import de.anomic.htmlFilter.htmlFilterContentTransformer;
import de.anomic.htmlFilter.htmlFilterTransformer;
import de.anomic.htmlFilter.htmlFilterWriter;
@ -113,7 +114,6 @@ public final class httpdProxyHandler {
* *The* remote Proxy configuration
*/
private static httpRemoteProxyConfig remoteProxyConfig = null;
private static final String proxyUserAgent = "yacy (" + HttpClient.getSystemOST() +") yacy.net";
private static File htRootPath = null;
//private Properties connectionProperties = null;
@ -1618,7 +1618,7 @@ public final class httpdProxyHandler {
private static synchronized String generateUserAgent(httpHeader requestHeaders) {
userAgentStr.setLength(0);
String browserUserAgent = (String) requestHeaders.get(httpHeader.USER_AGENT, proxyUserAgent);
String browserUserAgent = (String) requestHeaders.get(httpHeader.USER_AGENT, HTTPLoader.yacyUserAgent);
int pos = browserUserAgent.lastIndexOf(')');
if (pos >= 0) {
userAgentStr

@ -64,7 +64,7 @@ public class natLib {
rm status.htm
*/
try {
ArrayList<String> x = nxTools.strings(HttpClient.wget("http://admin:"+password+"@192.168.0.1:80/status.htm"));
ArrayList<String> x = nxTools.strings(HttpClient.wget("http://admin:"+password+"@192.168.0.1:80/status.htm", null, 10000));
x = nxTools.grep(x, 1, "IP Address");
if ((x == null) || (x.size() == 0)) return null;
String line = nxTools.tail1(x);
@ -76,7 +76,7 @@ public class natLib {
private static String getWhatIsMyIP() {
try {
ArrayList<String> x = nxTools.strings(HttpClient.wget("http://www.whatismyip.com/"));
ArrayList<String> x = nxTools.strings(HttpClient.wget("http://www.whatismyip.com/", null, 10000));
x = nxTools.grep(x, 0, "Your IP is");
String line = nxTools.tail1(x);
return nxTools.awk(line, " ", 4);
@ -87,7 +87,7 @@ public class natLib {
private static String getStanford() {
try {
ArrayList<String> x = nxTools.strings(HttpClient.wget("http://www.slac.stanford.edu/cgi-bin/nph-traceroute.pl"));
ArrayList<String> x = nxTools.strings(HttpClient.wget("http://www.slac.stanford.edu/cgi-bin/nph-traceroute.pl", null, 10000));
x = nxTools.grep(x, 0, "firewall protecting your browser");
String line = nxTools.tail1(x);
return nxTools.awk(line, " ", 7);
@ -98,7 +98,7 @@ public class natLib {
private static String getIPID() {
try {
ArrayList<String> x = nxTools.strings(HttpClient.wget("http://ipid.shat.net/"), "UTF-8");
ArrayList<String> x = nxTools.strings(HttpClient.wget("http://ipid.shat.net/", null, 10000), "UTF-8");
x = nxTools.grep(x, 2, "Your IP address");
String line = nxTools.tail1(x);
return nxTools.awk(nxTools.awk(nxTools.awk(line, " ", 5), ">", 2), "<", 1);

@ -58,7 +58,9 @@ import com.catcode.odf.ODFMetaFileAnalyzer;
import com.catcode.odf.OpenDocumentMetadata;
import com.catcode.odf.OpenDocumentTextInputStream;
import de.anomic.crawler.HTTPLoader;
import de.anomic.http.HttpClient;
import de.anomic.http.httpHeader;
import de.anomic.plasma.plasmaParserDocument;
import de.anomic.plasma.parser.AbstractParser;
import de.anomic.plasma.parser.Parser;
@ -250,7 +252,9 @@ public class odtParser extends AbstractParser implements Parser {
testParser.setLogger(new serverLog("PARSER.ODT"));
// downloading the document content
byte[] content = HttpClient.wget(contentUrl.toString());
httpHeader reqHeader = new httpHeader();
reqHeader.put(httpHeader.USER_AGENT, HTTPLoader.crawlerUserAgent);
byte[] content = HttpClient.wget(contentUrl.toString(), reqHeader, 10000);
ByteArrayInputStream input = new ByteArrayInputStream(content);
// parsing the document

@ -52,7 +52,9 @@ import java.util.Hashtable;
import com.jguild.jrpm.io.RPMFile;
import com.jguild.jrpm.io.datatype.DataTypeIf;
import de.anomic.crawler.HTTPLoader;
import de.anomic.http.HttpClient;
import de.anomic.http.httpHeader;
import de.anomic.plasma.plasmaParserDocument;
import de.anomic.plasma.parser.AbstractParser;
import de.anomic.plasma.parser.Parser;
@ -180,7 +182,9 @@ public class rpmParser extends AbstractParser implements Parser {
yacyURL contentUrl = new yacyURL(args[0], null);
rpmParser testParser = new rpmParser();
byte[] content = HttpClient.wget(contentUrl.toString());
httpHeader reqHeader = new httpHeader();
reqHeader.put(httpHeader.USER_AGENT, HTTPLoader.crawlerUserAgent);
byte[] content = HttpClient.wget(contentUrl.toString(), reqHeader, 10000);
ByteArrayInputStream input = new ByteArrayInputStream(content);
testParser.parse(contentUrl, "application/x-rpm", null, input);
} catch (Exception e) {

@ -53,7 +53,9 @@ import java.util.Hashtable;
import java.util.Iterator;
import java.util.LinkedList;
import de.anomic.crawler.HTTPLoader;
import de.anomic.http.HttpClient;
import de.anomic.http.httpHeader;
import de.anomic.kelondro.kelondroBase64Order;
import de.anomic.plasma.plasmaParserDocument;
import de.anomic.plasma.parser.AbstractParser;
@ -291,7 +293,9 @@ public class vcfParser extends AbstractParser implements Parser {
yacyURL contentUrl = new yacyURL(args[0], null);
vcfParser testParser = new vcfParser();
byte[] content = HttpClient.wget(contentUrl.toString());
httpHeader reqHeader = new httpHeader();
reqHeader.put(httpHeader.USER_AGENT, HTTPLoader.crawlerUserAgent);
byte[] content = HttpClient.wget(contentUrl.toString(), reqHeader, 10000);
ByteArrayInputStream input = new ByteArrayInputStream(content);
testParser.parse(contentUrl, "text/x-vcard", "UTF-8",input);
} catch (Exception e) {

@ -112,6 +112,7 @@ import de.anomic.crawler.CrawlProfile;
import de.anomic.crawler.CrawlQueues;
import de.anomic.crawler.CrawlStacker;
import de.anomic.crawler.ErrorURL;
import de.anomic.crawler.HTTPLoader;
import de.anomic.crawler.ImporterManager;
import de.anomic.crawler.IndexingStack;
import de.anomic.crawler.NoticedURL;
@ -2703,8 +2704,9 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
// load the seed list
try {
httpHeader reqHeader = new httpHeader();
reqHeader.put(httpHeader.PRAGMA,"no-cache");
reqHeader.put(httpHeader.CACHE_CONTROL,"no-cache");
reqHeader.put(httpHeader.PRAGMA, "no-cache");
reqHeader.put(httpHeader.CACHE_CONTROL, "no-cache");
reqHeader.put(httpHeader.USER_AGENT, HTTPLoader.yacyUserAgent);
url = new yacyURL(seedListFileURL, null);
long start = System.currentTimeMillis();
@ -2722,7 +2724,7 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
yacyCore.log.logInfo("BOOTSTRAP: seed-list URL " + seedListFileURL + " too old (" + (header.age() / 86400000) + " days)");
} else {
ssc++;
final byte[] content = HttpClient.wget(url.toString(), reqHeader, null, (int) getConfigLong("bootstrapLoadTimeout", 20000));
final byte[] content = HttpClient.wget(url.toString(), reqHeader, (int) getConfigLong("bootstrapLoadTimeout", 20000));
seedList = nxTools.strings(content, "UTF-8");
enu = seedList.iterator();
lc = 0;
@ -2787,10 +2789,9 @@ public final class plasmaSwitchboard extends serverAbstractSwitch<IndexingStack.
public static Map<String, String> loadHashMap(yacyURL url) {
try {
// sending request
final HashMap<String, String> result = nxTools.table(
HttpClient.wget(url.toString())
, "UTF-8");
httpHeader reqHeader = new httpHeader();
reqHeader.put(httpHeader.USER_AGENT, HTTPLoader.yacyUserAgent);
final HashMap<String, String> result = nxTools.table(HttpClient.wget(url.toString(), reqHeader, 10000), "UTF-8");
if (result == null) return new HashMap<String, String>();
return result;
} catch (Exception e) {

@ -40,13 +40,13 @@
package de.anomic.tools;
import java.net.MalformedURLException;
import java.util.ArrayList;
import java.util.Hashtable;
import de.anomic.crawler.HTTPLoader;
import de.anomic.http.HttpClient;
import de.anomic.http.httpHeader;
import de.anomic.http.httpRemoteProxyConfig;
import de.anomic.http.httpdProxyHandler;
import de.anomic.yacy.yacyURL;
public class loaderThreads {
@ -141,7 +141,9 @@ public class loaderThreads {
public void run() {
try {
page = HttpClient.wget(url.toString(), timeout);
httpHeader reqHeader = new httpHeader();
reqHeader.put(httpHeader.USER_AGENT, HTTPLoader.crawlerUserAgent);
page = HttpClient.wget(url.toString(), reqHeader, timeout);
loaded = true;
process.feed(page);
if (process.status() == loaderCore.STATUS_FAILED) {
@ -223,6 +225,7 @@ public class loaderThreads {
}
}
/*
public static void main(String[] args) {
httpdProxyHandler.setRemoteProxyConfig(httpRemoteProxyConfig.init("192.168.1.122", 3128));
loaderThreads loader = new loaderThreads();
@ -232,5 +235,5 @@ public class loaderThreads {
}
}
*/
}

@ -60,6 +60,7 @@ import org.apache.commons.httpclient.methods.multipart.FilePart;
import org.apache.commons.httpclient.methods.multipart.Part;
import org.apache.commons.httpclient.methods.multipart.StringPart;
import de.anomic.crawler.HTTPLoader;
import de.anomic.crawler.ResultURLs;
import de.anomic.http.HttpClient;
import de.anomic.http.JakartaCommonsHttpClient;
@ -263,13 +264,11 @@ public final class yacyClient {
* @throws IOException
*/
private static byte[] wput(final String url, String vhost, final List<Part> post, final int timeout, boolean gzipBody) throws IOException {
JakartaCommonsHttpClient client = new JakartaCommonsHttpClient(timeout, null, null);
client.setProxy(proxyConfig());
// address vhost
httpHeader header = new httpHeader();
header.add(httpHeader.HOST, vhost);
client.setHeader(header);
header.put(httpHeader.USER_AGENT, HTTPLoader.yacyUserAgent);
header.put(httpHeader.HOST, vhost);
JakartaCommonsHttpClient client = new JakartaCommonsHttpClient(timeout, header, null);
client.setProxy(proxyConfig());
JakartaCommonsHttpResponse res = null;
byte[] content = null;
@ -1068,6 +1067,8 @@ public final class yacyClient {
final String wordhashe = indexWord.word2hash("test");
//System.out.println("permission=" + permissionMessage(args[1]));
httpHeader reqHeader = new httpHeader();
reqHeader.put(httpHeader.USER_AGENT, HTTPLoader.crawlerUserAgent);
final byte[] content = HttpClient.wget(
"http://" + target.getPublicAddress() + "/yacy/search.html" +
"?myseed=" + sb.webIndex.seedDB.mySeed().genSeedStr(null) +
@ -1077,7 +1078,7 @@ public final class yacyClient {
"&resource=global" +
"&query=" + wordhashe +
"&network.unit.name=" + plasmaSwitchboard.getSwitchboard().getConfig("network.unit.name", yacySeed.DFLT_NETWORK_UNIT),
target.getHexHash() + ".yacyh");
reqHeader, 10000, target.getHexHash() + ".yacyh");
final HashMap<String, String> result = nxTools.table(content, "UTF-8");
System.out.println("Result=" + result.toString());
} catch (Exception e) {

@ -60,6 +60,7 @@ import java.util.Iterator;
import java.util.Map;
import java.util.TreeMap;
import de.anomic.crawler.HTTPLoader;
import de.anomic.http.JakartaCommonsHttpClient;
import de.anomic.http.JakartaCommonsHttpResponse;
import de.anomic.http.httpHeader;
@ -849,7 +850,8 @@ public final class yacySeedDB implements httpdAlternativeDomainNames {
// Configure http headers
httpHeader reqHeader = new httpHeader();
reqHeader.put(httpHeader.PRAGMA, "no-cache");
reqHeader.put(httpHeader.CACHE_CONTROL, "no-cache"); // httpc uses HTTP/1.0 is this necessary?
reqHeader.put(httpHeader.CACHE_CONTROL, "no-cache"); // httpc uses HTTP/1.0 is this necessary?
reqHeader.put(httpHeader.USER_AGENT, HTTPLoader.yacyUserAgent);
// init http-client
JakartaCommonsHttpClient client = new JakartaCommonsHttpClient(10000, reqHeader, null);

@ -42,9 +42,11 @@ import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import de.anomic.crawler.HTTPLoader;
import de.anomic.htmlFilter.htmlFilterContentScraper;
import de.anomic.http.JakartaCommonsHttpClient;
import de.anomic.http.JakartaCommonsHttpResponse;
import de.anomic.http.httpHeader;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverCore;
import de.anomic.server.serverFileUtils;
@ -336,7 +338,9 @@ public final class yacyVersion implements Comparator<yacyVersion>, Comparable<ya
File storagePath = plasmaSwitchboard.getSwitchboard().releasePath;
// load file
File download = null;
JakartaCommonsHttpClient client = new JakartaCommonsHttpClient(120000, null, null);
httpHeader header = new httpHeader();
header.put(httpHeader.USER_AGENT, HTTPLoader.yacyUserAgent);
JakartaCommonsHttpClient client = new JakartaCommonsHttpClient(120000, header, null);
JakartaCommonsHttpResponse res = null;
String name = release.url.getFileName();
try {

@ -439,7 +439,7 @@ public final class yacy {
if (server.isAlive()) try {
// TODO only send request, don't read response (cause server is already down resulting in error)
yacyURL u = new yacyURL((server.withSSL()?"https":"http")+"://localhost:" + serverCore.getPortNr(port), null);
HttpClient.wget(u.toString()); // kick server
HttpClient.wget(u.toString(), null, 10000); // kick server
serverLog.logConfig("SHUTDOWN", "sent termination signal to server socket");
} catch (IOException ee) {
serverLog.logConfig("SHUTDOWN", "termination signal to server socket missed (server shutdown, ok)");

Loading…
Cancel
Save