- removed unnecessary synchronized and deadlock in crawler

- removed problem with monitoring object on Balancer.wait
- added missing user agent settings
pull/1/head
Michael Peter Christen 13 years ago
parent 8952153ecf
commit a33e2742cb

@ -2042,9 +2042,7 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
return new ByteArrayInputStream(b); return new ByteArrayInputStream(b);
} }
if (isHTTP() || isHTTPS()) { if (isHTTP() || isHTTPS()) {
final HTTPClient client = new HTTPClient(); final HTTPClient client = new HTTPClient(userAgent, timeout);
client.setTimout(timeout);
client.setUserAgent(userAgent);
client.setHost(getHost()); client.setHost(getHost());
return new ByteArrayInputStream(client.GETbytes(this)); return new ByteArrayInputStream(client.GETbytes(this));
} }
@ -2063,9 +2061,7 @@ public class MultiProtocolURI implements Serializable, Comparable<MultiProtocolU
return b; return b;
} }
if (isHTTP() || isHTTPS()) { if (isHTTP() || isHTTPS()) {
final HTTPClient client = new HTTPClient(); final HTTPClient client = new HTTPClient(userAgent, timeout);
client.setTimout(timeout);
client.setUserAgent(userAgent);
client.setHost(getHost()); client.setHost(getHost());
return client.GETbytes(this); return client.GETbytes(this);
} }

@ -27,6 +27,8 @@ package net.yacy.cora.protocol;
public class ClientIdentification { public class ClientIdentification {
public static final int DEFAULT_TIMEOUT = 10000;
/** /**
* provide system information (this is part of YaCy protocol) * provide system information (this is part of YaCy protocol)
*/ */

@ -118,19 +118,17 @@ public class HTTPClient {
private boolean redirecting = true; private boolean redirecting = true;
private String realm = null; private String realm = null;
public HTTPClient() {
super();
}
public HTTPClient(final String userAgent) { public HTTPClient(final String userAgent) {
super(); super();
this.userAgent = userAgent; this.userAgent = userAgent;
HttpProtocolParams.setUserAgent(httpClient.getParams(), userAgent);
} }
public HTTPClient(final String userAgent, final int timeout) { public HTTPClient(final String userAgent, final int timeout) {
super(); super();
this.userAgent = userAgent; this.userAgent = userAgent;
this.timeout = timeout; this.timeout = timeout;
HttpProtocolParams.setUserAgent(httpClient.getParams(), userAgent);
} }
public static void setDefaultUserAgent(final String defaultAgent) { public static void setDefaultUserAgent(final String defaultAgent) {
@ -759,7 +757,7 @@ public class HTTPClient {
} catch (final UnsupportedEncodingException e) { } catch (final UnsupportedEncodingException e) {
System.out.println(e.getStackTrace()); System.out.println(e.getStackTrace());
} }
final HTTPClient client = new HTTPClient(); final HTTPClient client = new HTTPClient(ClientIdentification.getUserAgent(), ClientIdentification.DEFAULT_TIMEOUT);
client.setUserAgent("foobar"); client.setUserAgent("foobar");
client.setRedirecting(false); client.setRedirecting(false);
// Get some // Get some

@ -310,6 +310,15 @@ public class Balancer {
return sleeptime; return sleeptime;
} }
/**
* load a robots.txt to get the robots time.
* ATTENTION: this method causes that a robots.txt is loaded from the web which may cause a longer delay in execution.
* This shall therefore not be called in synchronized environments.
* @param robots
* @param profileEntry
* @param crawlURL
* @return
*/
private long getRobotsTime(final RobotsTxt robots, final CrawlProfile profileEntry, final DigestURI crawlURL) { private long getRobotsTime(final RobotsTxt robots, final CrawlProfile profileEntry, final DigestURI crawlURL) {
if (profileEntry == null) return 0; if (profileEntry == null) return 0;
long sleeptime = Latency.waitingRobots(crawlURL, robots, this.myAgentIDs); // this uses the robots.txt database and may cause a loading of robots.txt from the server long sleeptime = Latency.waitingRobots(crawlURL, robots, this.myAgentIDs); // this uses the robots.txt database and may cause a loading of robots.txt from the server
@ -455,10 +464,10 @@ public class Balancer {
rest = rest + 1000 * loops; rest = rest + 1000 * loops;
loops = 0; loops = 0;
} }
if (rest > 0) {try {this.wait(rest); } catch (final InterruptedException e) {}} if (rest > 0) {try {Thread.sleep(rest);} catch (final InterruptedException e) {}}
for (int i = 0; i < loops; i++) { for (int i = 0; i < loops; i++) {
Log.logInfo("BALANCER", "waiting for " + crawlEntry.url().getHost() + ": " + (loops - i) + " seconds remaining..."); Log.logInfo("BALANCER", "waiting for " + crawlEntry.url().getHost() + ": " + (loops - i) + " seconds remaining...");
try {this.wait(1000); } catch (final InterruptedException e) {} try {Thread.sleep(1000); } catch (final InterruptedException e) {}
} }
Latency.updateAfterSelection(crawlEntry.url(), robotsTime); Latency.updateAfterSelection(crawlEntry.url(), robotsTime);
} }
@ -488,6 +497,7 @@ public class Balancer {
byte[] besturlhash = null; byte[] besturlhash = null;
String besthost = null; String besthost = null;
OrderedScoreMap<Map.Entry<String, byte[]>> nextZeroCandidates = new OrderedScoreMap<Map.Entry<String, byte[]>>(null); OrderedScoreMap<Map.Entry<String, byte[]>> nextZeroCandidates = new OrderedScoreMap<Map.Entry<String, byte[]>>(null);
int newCandidatesForward = 10;
while (i.hasNext() && nextZeroCandidates.size() < 1000) { while (i.hasNext() && nextZeroCandidates.size() < 1000) {
entry = i.next(); entry = i.next();
@ -516,7 +526,13 @@ public class Balancer {
} }
if (w <= 0) { if (w <= 0) {
nextZeroCandidates.set(new AbstractMap.SimpleEntry<String, byte[]>(entry.getKey(), urlhash), w == Integer.MIN_VALUE ? 1000 /* get new domains a chance */ : entry.getValue().size()); if (w == Integer.MIN_VALUE && newCandidatesForward > 0) {
// give new domains a chance, but not too much; otherwise a massive downloading of robots.txt from too much domains (dns lock!) will more likely block crawling
newCandidatesForward--;
nextZeroCandidates.set(new AbstractMap.SimpleEntry<String, byte[]>(entry.getKey(), urlhash), 1000);
} else {
nextZeroCandidates.set(new AbstractMap.SimpleEntry<String, byte[]>(entry.getKey(), urlhash), entry.getValue().size());
}
} }
if (w < smallestWaiting || (w == smallestWaiting && this.random.nextBoolean())) { if (w < smallestWaiting || (w == smallestWaiting && this.random.nextBoolean())) {
smallestWaiting = w; smallestWaiting = w;

@ -374,7 +374,7 @@ public final class CrawlStacker {
final DigestURI referrerURL = (entry.referrerhash() == null || entry.referrerhash().length == 0) ? null : this.nextQueue.getURL(entry.referrerhash()); final DigestURI referrerURL = (entry.referrerhash() == null || entry.referrerhash().length == 0) ? null : this.nextQueue.getURL(entry.referrerhash());
// add domain to profile domain list // add domain to profile domain list
if (profile.domMaxPages() != Integer.MAX_VALUE) { if (profile.domMaxPages() != Integer.MAX_VALUE && profile.domMaxPages() > 0) {
profile.domInc(entry.url().getHost(), (referrerURL == null) ? null : referrerURL.getHost().toLowerCase(), entry.depth()); profile.domInc(entry.url().getHost(), (referrerURL == null) ? null : referrerURL.getHost().toLowerCase(), entry.depth());
} }
@ -478,7 +478,7 @@ public final class CrawlStacker {
// deny urls that exceed allowed number of occurrences // deny urls that exceed allowed number of occurrences
final int maxAllowedPagesPerDomain = profile.domMaxPages(); final int maxAllowedPagesPerDomain = profile.domMaxPages();
if (maxAllowedPagesPerDomain < Integer.MAX_VALUE) { if (maxAllowedPagesPerDomain < Integer.MAX_VALUE && maxAllowedPagesPerDomain > 0) {
final DomProfile dp = profile.getDom(url.getHost()); final DomProfile dp = profile.getDom(url.getHost());
if (dp != null && dp.count >= maxAllowedPagesPerDomain) { if (dp != null && dp.count >= maxAllowedPagesPerDomain) {
if (this.log.isFine()) this.log.logFine("URL '" + urlstring + "' appeared too often in crawl stack, a maximum of " + profile.domMaxPages() + " is allowed."); if (this.log.isFine()) this.log.logFine("URL '" + urlstring + "' appeared too often in crawl stack, a maximum of " + profile.domMaxPages() + " is allowed.");

@ -95,7 +95,7 @@ public class NoticedURL {
this.noloadStack.clear(); this.noloadStack.clear();
} }
protected synchronized void close() { protected void close() {
Log.logInfo("NoticedURL", "CLOSING ALL STACKS"); Log.logInfo("NoticedURL", "CLOSING ALL STACKS");
if (this.coreStack != null) { if (this.coreStack != null) {
this.coreStack.close(); this.coreStack.close();
@ -303,19 +303,17 @@ public class NoticedURL {
int s; int s;
Request entry; Request entry;
int errors = 0; int errors = 0;
synchronized (balancer) { while ((s = balancer.size()) > 0) {
while ((s = balancer.size()) > 0) { entry = balancer.pop(delay, cs, robots);
entry = balancer.pop(delay, cs, robots); if (entry == null) {
if (entry == null) { if (s > balancer.size()) continue;
if (s > balancer.size()) continue; errors++;
errors++; if (errors < 100) continue;
if (errors < 100) continue; final int aftersize = balancer.size();
final int aftersize = balancer.size(); balancer.clear(); // the balancer is broken and cannot shrink
balancer.clear(); // the balancer is broken and cannot shrink Log.logWarning("BALANCER", "entry is null, balancer cannot shrink (bevore pop = " + s + ", after pop = " + aftersize + "); reset of balancer");
Log.logWarning("BALANCER", "entry is null, balancer cannot shrink (bevore pop = " + s + ", after pop = " + aftersize + "); reset of balancer");
}
return entry;
} }
return entry;
} }
return null; return null;
} }

@ -126,7 +126,7 @@ public final class HTTPLoader {
requestHeader.put(HeaderFramework.ACCEPT_ENCODING, this.sb.getConfig("crawler.http.acceptEncoding", DEFAULT_ENCODING)); requestHeader.put(HeaderFramework.ACCEPT_ENCODING, this.sb.getConfig("crawler.http.acceptEncoding", DEFAULT_ENCODING));
// HTTP-Client // HTTP-Client
final HTTPClient client = new HTTPClient(); final HTTPClient client = new HTTPClient(ClientIdentification.getUserAgent(), ClientIdentification.DEFAULT_TIMEOUT);
client.setRedirecting(false); // we want to handle redirection ourselves, so we don't index pages twice client.setRedirecting(false); // we want to handle redirection ourselves, so we don't index pages twice
client.setTimout(this.socketTimeout); client.setTimout(this.socketTimeout);
client.setHeader(requestHeader.entrySet()); client.setHeader(requestHeader.entrySet());
@ -252,7 +252,7 @@ public final class HTTPLoader {
requestHeader.put(HeaderFramework.ACCEPT_CHARSET, DEFAULT_CHARSET); requestHeader.put(HeaderFramework.ACCEPT_CHARSET, DEFAULT_CHARSET);
requestHeader.put(HeaderFramework.ACCEPT_ENCODING, DEFAULT_ENCODING); requestHeader.put(HeaderFramework.ACCEPT_ENCODING, DEFAULT_ENCODING);
final HTTPClient client = new HTTPClient(); final HTTPClient client = new HTTPClient(ClientIdentification.getUserAgent(), ClientIdentification.DEFAULT_TIMEOUT);
client.setTimout(20000); client.setTimout(20000);
client.setHeader(requestHeader.entrySet()); client.setHeader(requestHeader.entrySet());
final byte[] responseBody = client.GETbytes(request.url()); final byte[] responseBody = client.GETbytes(request.url());

@ -287,7 +287,7 @@ public class RobotsTxt {
return port; return port;
} }
static Object[] downloadRobotsTxt(final MultiProtocolURI robotsURL, int redirectionCount, final RobotsTxtEntry entry) throws Exception { protected static Object[] downloadRobotsTxt(final MultiProtocolURI robotsURL, int redirectionCount, final RobotsTxtEntry entry) throws Exception {
if (robotsURL == null || !robotsURL.getProtocol().startsWith("http")) return null; if (robotsURL == null || !robotsURL.getProtocol().startsWith("http")) return null;
if (redirectionCount < 0) return new Object[]{Boolean.FALSE,null,null}; if (redirectionCount < 0) return new Object[]{Boolean.FALSE,null,null};
@ -319,7 +319,7 @@ public class RobotsTxt {
// setup http-client // setup http-client
//TODO: adding Traffic statistic for robots download? //TODO: adding Traffic statistic for robots download?
final HTTPClient client = new HTTPClient(); final HTTPClient client = new HTTPClient(ClientIdentification.getUserAgent(), ClientIdentification.DEFAULT_TIMEOUT);
client.setHeader(reqHeaders.entrySet()); client.setHeader(reqHeaders.entrySet());
try { try {
// check for interruption // check for interruption

@ -40,6 +40,7 @@ import net.yacy.cora.date.GenericFormatter;
import net.yacy.cora.document.ASCII; import net.yacy.cora.document.ASCII;
import net.yacy.cora.document.UTF8; import net.yacy.cora.document.UTF8;
import net.yacy.cora.order.Base64Order; import net.yacy.cora.order.Base64Order;
import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.protocol.http.HTTPClient; import net.yacy.cora.protocol.http.HTTPClient;
import net.yacy.cora.storage.HandleSet; import net.yacy.cora.storage.HandleSet;
import net.yacy.cora.util.SpaceExceededException; import net.yacy.cora.util.SpaceExceededException;
@ -214,7 +215,7 @@ public class WorkTables extends Tables {
*/ */
public Map<String, Integer> execAPICalls(String host, int port, String realm, Collection<String> pks) { public Map<String, Integer> execAPICalls(String host, int port, String realm, Collection<String> pks) {
// now call the api URLs and store the result status // now call the api URLs and store the result status
final HTTPClient client = new HTTPClient(); final HTTPClient client = new HTTPClient(ClientIdentification.getUserAgent(), ClientIdentification.DEFAULT_TIMEOUT);
client.setRealm(realm); client.setRealm(realm);
client.setTimout(120000); client.setTimout(120000);
Tables.Row row; Tables.Row row;
@ -246,7 +247,7 @@ public class WorkTables extends Tables {
public static int execAPICall(String host, int port, String realm, String path, byte[] pk) { public static int execAPICall(String host, int port, String realm, String path, byte[] pk) {
// now call the api URLs and store the result status // now call the api URLs and store the result status
final HTTPClient client = new HTTPClient(); final HTTPClient client = new HTTPClient(ClientIdentification.getUserAgent(), ClientIdentification.DEFAULT_TIMEOUT);
client.setRealm(realm); client.setRealm(realm);
client.setTimout(120000); client.setTimout(120000);
String url = "http://" + host + ":" + port + path; String url = "http://" + host + ":" + port + path;

@ -42,7 +42,6 @@ import javax.xml.parsers.DocumentBuilderFactory;
import net.yacy.cora.date.ISO8601Formatter; import net.yacy.cora.date.ISO8601Formatter;
import net.yacy.cora.document.MultiProtocolURI; import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.protocol.ClientIdentification; import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.protocol.ResponseHeader; import net.yacy.cora.protocol.ResponseHeader;
import net.yacy.cora.protocol.http.HTTPClient; import net.yacy.cora.protocol.http.HTTPClient;
@ -113,9 +112,7 @@ public class sitemapParser extends AbstractParser implements Parser {
// download document // download document
Log.logInfo("SitemapReader", "loading sitemap from " + sitemapURL.toNormalform(true)); Log.logInfo("SitemapReader", "loading sitemap from " + sitemapURL.toNormalform(true));
final RequestHeader requestHeader = new RequestHeader(); final RequestHeader requestHeader = new RequestHeader();
requestHeader.put(HeaderFramework.USER_AGENT, ClientIdentification.getUserAgent()); final HTTPClient client = new HTTPClient(ClientIdentification.getUserAgent(), 5000);
final HTTPClient client = new HTTPClient();
client.setTimout(5000);
client.setHeader(requestHeader.entrySet()); client.setHeader(requestHeader.entrySet());
try { try {
client.GET(sitemapURL.toString()); client.GET(sitemapURL.toString());

@ -9,6 +9,7 @@ import java.io.StringReader;
import java.net.URLEncoder; import java.net.URLEncoder;
import net.yacy.cora.document.ASCII; import net.yacy.cora.document.ASCII;
import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.protocol.Domains; import net.yacy.cora.protocol.Domains;
import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.protocol.http.HTTPClient; import net.yacy.cora.protocol.http.HTTPClient;
@ -31,7 +32,7 @@ public class AugmentHtmlStream {
* @return the web page with integrated REFLECT elements * @return the web page with integrated REFLECT elements
*/ */
private static String processExternal(String url, String fieldname, String data) throws IOException { private static String processExternal(String url, String fieldname, String data) throws IOException {
final HTTPClient client = new HTTPClient(); final HTTPClient client = new HTTPClient(ClientIdentification.getUserAgent(), ClientIdentification.DEFAULT_TIMEOUT);
try { try {
StringBuilder postdata = new StringBuilder(); StringBuilder postdata = new StringBuilder();
postdata.append(fieldname); postdata.append(fieldname);

@ -9,6 +9,7 @@ import java.util.Map;
import java.util.Set; import java.util.Set;
import net.yacy.cora.document.UTF8; import net.yacy.cora.document.UTF8;
import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.protocol.HeaderFramework; import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.protocol.http.HTTPClient; import net.yacy.cora.protocol.http.HTTPClient;
@ -186,7 +187,7 @@ public static String Tableentry(String url, String type, String comment, String
Seed host = sb.peers.lookupByName(sb.getConfig("interaction.contribution.accumulationpeer", "")); Seed host = sb.peers.lookupByName(sb.getConfig("interaction.contribution.accumulationpeer", ""));
return (UTF8.String(new HTTPClient().POSTbytes( return (UTF8.String(new HTTPClient(ClientIdentification.getUserAgent(), ClientIdentification.DEFAULT_TIMEOUT).POSTbytes(
"http://"+host.getPublicAddress()+"/interaction/Contribution.json" "http://"+host.getPublicAddress()+"/interaction/Contribution.json"
+ "?url=" + url + "&comment=" + comment + "?url=" + url + "&comment=" + comment
+ "&from=" + from + "&peer=" + peer, + "&from=" + from + "&peer=" + peer,

@ -6,6 +6,7 @@ import java.net.MalformedURLException;
import java.net.URL; import java.net.URL;
import net.yacy.cora.document.UTF8; import net.yacy.cora.document.UTF8;
import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.protocol.http.HTTPClient; import net.yacy.cora.protocol.http.HTTPClient;
import net.yacy.data.ymark.YMarkEntry; import net.yacy.data.ymark.YMarkEntry;
import net.yacy.data.ymark.YMarkSMWJSONImporter; import net.yacy.data.ymark.YMarkSMWJSONImporter;
@ -15,47 +16,31 @@ import net.yacy.search.Switchboard;
public class ContentControlImportThread { public class ContentControlImportThread {
private final Switchboard sb; private final Switchboard sb;
private Boolean locked = false; private Boolean locked = false;
private String lastsync = "1900-01-01T01:00:00"; private String lastsync = "1900-01-01T01:00:00";
private String currenttimestamp = "1900-01-01T01:00:00"; private String currenttimestamp = "1900-01-01T01:00:00";
private long offset = 0; private long offset = 0;
private final long limit = 500; private final long limit = 500;
private long currentmax = 0; private long currentmax = 0;
private boolean runningjob = false; private boolean runningjob = false;
public ContentControlImportThread(final Switchboard sb) { public ContentControlImportThread(final Switchboard sb) {
//final long time = System.currentTimeMillis();
this.sb = sb; this.sb = sb;
if (this.sb.getConfigBool("contentcontrol.smwimport.purgelistoninit", if (this.sb.getConfigBool("contentcontrol.smwimport.purgelistoninit",false)) {
false)) { this.sb.tables.clear(this.sb.getConfig("contentcontrol.smwimport.targetlist", "contentcontrol"));
this.sb.tables.clear(this.sb.getConfig(
"contentcontrol.smwimport.targetlist", "contentcontrol"));
} }
} }
private final String wikiurlify (String s) { private final String wikiurlify (String s) {
String ret = s; String ret = s;
ret = ret.replace("-", "-2D"); ret = ret.replace("-", "-2D");
ret = ret.replace("+", "-2B"); ret = ret.replace("+", "-2B");
ret = ret.replace(" ", "-20"); ret = ret.replace(" ", "-20");
ret = ret.replace("[", "-5B"); ret = ret.replace("[", "-5B");
ret = ret.replace("]", "-5D"); ret = ret.replace("]", "-5D");
ret = ret.replace(":", "-3A"); ret = ret.replace(":", "-3A");
ret = ret.replace(">", "-3E"); ret = ret.replace(">", "-3E");
ret = ret.replace("?", "-3F"); ret = ret.replace("?", "-3F");
return ret; return ret;
@ -64,30 +49,17 @@ public class ContentControlImportThread {
public final void run() { public final void run() {
if (!this.locked) { if (!this.locked) {
this.locked = true; this.locked = true;
if (this.sb.getConfigBool("contentcontrol.smwimport.enabled", false) == true) { if (this.sb.getConfigBool("contentcontrol.smwimport.enabled", false) == true) {
if (this.runningjob) { if (this.runningjob) {
Log.logInfo("CONTENTCONTROL", Log.logInfo("CONTENTCONTROL",
"CONTENTCONTROL importing max. " + this.limit "CONTENTCONTROL importing max. " + this.limit
+ " elements at " + this.offset + " of " + " elements at " + this.offset + " of "
+ this.currentmax + ", since " + this.currentmax + ", since "
+ this.currenttimestamp); + this.currenttimestamp);
URL bmks_json; URL bmks_json;
//String currenttimestampurl = wikiurlify (this.currenttimestamp);
try { try {
if (!this.sb.getConfig("contentcontrol.smwimport.baseurl","").equals("")) {
if (!this.sb.getConfig("contentcontrol.smwimport.baseurl",
"").equals("")) {
bmks_json = new URL( bmks_json = new URL(
this.sb.getConfig( this.sb.getConfig(
"contentcontrol.smwimport.baseurl", "contentcontrol.smwimport.baseurl",
@ -99,9 +71,7 @@ public class ContentControlImportThread {
+ "/offset%3D" + this.offset + "/offset%3D" + this.offset
+ "/limit%3D" + this.limit + "/limit%3D" + this.limit
+ "/format%3Djson"); + "/format%3Djson");
this.offset += this.limit; this.offset += this.limit;
if (this.offset > this.currentmax) { if (this.offset > this.currentmax) {
this.runningjob = false; this.runningjob = false;
} }
@ -111,7 +81,6 @@ public class ContentControlImportThread {
reader = new InputStreamReader( reader = new InputStreamReader(
bmks_json.openStream(), "UTF-8"); bmks_json.openStream(), "UTF-8");
} catch (Exception e) { } catch (Exception e) {
Log.logException(e); Log.logException(e);
this.runningjob = false; this.runningjob = false;
} }
@ -126,22 +95,14 @@ public class ContentControlImportThread {
Log.logException(e); Log.logException(e);
this.runningjob = false; this.runningjob = false;
} }
Thread t; Thread t;
YMarkEntry bmk; YMarkEntry bmk;
t = new Thread(bookmarkImporter,"YMarks - Network bookmark importer");
t = new Thread(bookmarkImporter,
"YMarks - Network bookmark importer");
t.start(); t.start();
while ((bmk = bookmarkImporter.take()) != YMarkEntry.POISON) { while ((bmk = bookmarkImporter.take()) != YMarkEntry.POISON) {
if (bmk == YMarkEntry.EMPTY) { if (bmk == YMarkEntry.EMPTY) {
this.runningjob = false; this.runningjob = false;
} else { } else {
try { try {
this.sb.tables.bookmarks.addBookmark( this.sb.tables.bookmarks.addBookmark(
this.sb.getConfig("contentcontrol.smwimport.targetlist", "contentcontrol"), bmk, this.sb.getConfig("contentcontrol.smwimport.targetlist", "contentcontrol"), bmk,
@ -153,12 +114,10 @@ public class ContentControlImportThread {
} }
} }
} }
} else { } else {
} }
} }
else { else {
} }
@ -167,14 +126,9 @@ public class ContentControlImportThread {
// TODO Auto-generated catch block // TODO Auto-generated catch block
e2.printStackTrace(); e2.printStackTrace();
} }
} else { } else {
try { try {
if (!this.sb.getConfig("contentcontrol.smwimport.baseurl","").equals("")) {
if (!this.sb.getConfig("contentcontrol.smwimport.baseurl",
"").equals("")) {
URL bmks_count; URL bmks_count;
bmks_count = new URL( bmks_count = new URL(
@ -182,23 +136,16 @@ public class ContentControlImportThread {
"contentcontrol.smwimport.baseurl", "contentcontrol.smwimport.baseurl",
"") "")
+ wikiurlify ("/[[Category:Web Page]] [[Modification date::>" +this.lastsync+ "]]") + wikiurlify ("/[[Category:Web Page]] [[Modification date::>" +this.lastsync+ "]]")
+ wikiurlify ("/?Url/?Filter/?Article has average rating/?Category") + wikiurlify ("/?Url/?Filter/?Article has average rating/?Category")
+ "/mainlabel%3D" + "/mainlabel%3D"
+ "/format%3Dystat"); + "/format%3Dystat");
String reply = UTF8.String(new HTTPClient() String reply = UTF8.String(new HTTPClient(ClientIdentification.getUserAgent(), ClientIdentification.DEFAULT_TIMEOUT).GETbytes(bmks_count.toString()));
.GETbytes(bmks_count.toString()));
String overallcount = reply.split(",")[0]; String overallcount = reply.split(",")[0];
String lastsyncstring = reply.split(",")[1]; String lastsyncstring = reply.split(",")[1];
this.currentmax = Integer.parseInt(overallcount); this.currentmax = Integer.parseInt(overallcount);
if (this.currentmax > 0) { if (this.currentmax > 0) {
Log.logInfo("CONTENTCONTROL", Log.logInfo("CONTENTCONTROL",
"CONTENTCONTROL import job counts " "CONTENTCONTROL import job counts "
+ this.currentmax + this.currentmax
@ -216,7 +163,6 @@ public class ContentControlImportThread {
Log.logWarning("CONTENTCONTROL", Log.logWarning("CONTENTCONTROL",
"No SMWimport URL defined"); "No SMWimport URL defined");
} }
} catch (MalformedURLException e) { } catch (MalformedURLException e) {
// TODO Auto-generated catch block // TODO Auto-generated catch block
e.printStackTrace(); e.printStackTrace();
@ -224,14 +170,10 @@ public class ContentControlImportThread {
// TODO Auto-generated catch block // TODO Auto-generated catch block
e.printStackTrace(); e.printStackTrace();
} }
} }
this.locked = false; this.locked = false;
} }
} }
return; return;
} }

@ -204,14 +204,8 @@ public abstract class AbstractBusyThread extends AbstractThread implements BusyT
// ratzen: German for to sleep (coll.) // ratzen: German for to sleep (coll.)
private void ratz(final long millis) { private void ratz(final long millis) {
try {/* try {
if (this.syncObject != null) { Thread.sleep(millis);
synchronized (this.syncObject) {
this.syncObject.wait(millis);
}
} else {*/
Thread.sleep(millis);
//}
} catch (final InterruptedException e) { } catch (final InterruptedException e) {
if (log != null) if (log != null)
log.logConfig("thread '" + this.getName() + "' interrupted because of shutdown."); log.logConfig("thread '" + this.getName() + "' interrupted because of shutdown.");

@ -823,7 +823,7 @@ public final class SeedDB implements AlternativeDomainNames {
reqHeader.put(HeaderFramework.CACHE_CONTROL, "no-cache"); // httpc uses HTTP/1.0 is this necessary? reqHeader.put(HeaderFramework.CACHE_CONTROL, "no-cache"); // httpc uses HTTP/1.0 is this necessary?
reqHeader.put(HeaderFramework.USER_AGENT, ClientIdentification.getUserAgent()); reqHeader.put(HeaderFramework.USER_AGENT, ClientIdentification.getUserAgent());
final HTTPClient client = new HTTPClient(); final HTTPClient client = new HTTPClient(ClientIdentification.getUserAgent(), ClientIdentification.DEFAULT_TIMEOUT);
client.setHeader(reqHeader.entrySet()); client.setHeader(reqHeader.entrySet());
byte[] content = null; byte[] content = null;
try { try {

@ -49,8 +49,6 @@ import net.yacy.cora.document.UTF8;
import net.yacy.cora.federate.yacy.CacheStrategy; import net.yacy.cora.federate.yacy.CacheStrategy;
import net.yacy.cora.order.Base64Order; import net.yacy.cora.order.Base64Order;
import net.yacy.cora.protocol.ClientIdentification; import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.protocol.HeaderFramework;
import net.yacy.cora.protocol.RequestHeader;
import net.yacy.cora.protocol.ResponseHeader; import net.yacy.cora.protocol.ResponseHeader;
import net.yacy.cora.protocol.http.HTTPClient; import net.yacy.cora.protocol.http.HTTPClient;
import net.yacy.cora.storage.Files; import net.yacy.cora.storage.Files;
@ -288,16 +286,10 @@ public final class yacyRelease extends yacyVersion {
public File downloadRelease() { public File downloadRelease() {
final File storagePath = Switchboard.getSwitchboard().releasePath; final File storagePath = Switchboard.getSwitchboard().releasePath;
File download = null; File download = null;
// setup httpClient
final RequestHeader reqHeader = new RequestHeader();
reqHeader.put(HeaderFramework.USER_AGENT, ClientIdentification.getUserAgent());
final String name = getUrl().getFileName(); final String name = getUrl().getFileName();
byte[] signatureBytes = null; byte[] signatureBytes = null;
final HTTPClient client = new HTTPClient(ClientIdentification.getUserAgent(), ClientIdentification.DEFAULT_TIMEOUT);
final HTTPClient client = new HTTPClient();
client.setTimout(6000);
client.setHeader(reqHeader.entrySet());
// download signature first, if public key is available // download signature first, if public key is available
try { try {

@ -3359,10 +3359,8 @@ public final class Switchboard extends serverSwitch
final RequestHeader reqHeader = new RequestHeader(); final RequestHeader reqHeader = new RequestHeader();
reqHeader.put(HeaderFramework.PRAGMA, "no-cache"); reqHeader.put(HeaderFramework.PRAGMA, "no-cache");
reqHeader.put(HeaderFramework.CACHE_CONTROL, "no-cache"); reqHeader.put(HeaderFramework.CACHE_CONTROL, "no-cache");
reqHeader.put(HeaderFramework.USER_AGENT, ClientIdentification.getUserAgent()); final HTTPClient client = new HTTPClient(ClientIdentification.getUserAgent(), timeout);
final HTTPClient client = new HTTPClient();
client.setHeader(reqHeader.entrySet()); client.setHeader(reqHeader.entrySet());
client.setTimout(timeout);
client.HEADResponse(url.toString()); client.HEADResponse(url.toString());
int statusCode = client.getHttpResponse().getStatusLine().getStatusCode(); int statusCode = client.getHttpResponse().getStatusLine().getStatusCode();

@ -1074,8 +1074,7 @@ public final class HTTPDProxyHandler {
*/ */
private static HTTPClient setupHttpClient(final RequestHeader requestHeader, final String connectHost) { private static HTTPClient setupHttpClient(final RequestHeader requestHeader, final String connectHost) {
// setup HTTP-client // setup HTTP-client
final HTTPClient client = new HTTPClient(); final HTTPClient client = new HTTPClient(ClientIdentification.getUserAgent(), timeout);
client.setTimout(timeout);
client.setHeader(requestHeader.entrySet()); client.setHeader(requestHeader.entrySet());
client.setRedirecting(false); client.setRedirecting(false);
return client; return client;

@ -610,7 +610,7 @@ public class serverSwitch
try { try {
final RequestHeader reqHeader = new RequestHeader(); final RequestHeader reqHeader = new RequestHeader();
reqHeader.put(HeaderFramework.USER_AGENT, ClientIdentification.getUserAgent()); reqHeader.put(HeaderFramework.USER_AGENT, ClientIdentification.getUserAgent());
final HTTPClient client = new HTTPClient(); final HTTPClient client = new HTTPClient(ClientIdentification.getUserAgent(), ClientIdentification.DEFAULT_TIMEOUT);
client.setHeader(reqHeader.entrySet()); client.setHeader(reqHeader.entrySet());
byte[] data = client.GETbytes(uri); byte[] data = client.GETbytes(uri);
if ( data == null || data.length == 0 ) { if ( data == null || data.length == 0 ) {

@ -531,7 +531,7 @@ public final class yacy {
final RequestHeader requestHeader = new RequestHeader(); final RequestHeader requestHeader = new RequestHeader();
requestHeader.put(RequestHeader.AUTHORIZATION, "realm=" + encodedPassword); // for http-authentify requestHeader.put(RequestHeader.AUTHORIZATION, "realm=" + encodedPassword); // for http-authentify
// final Client con = new Client(10000, requestHeader); // final Client con = new Client(10000, requestHeader);
final HTTPClient con = new HTTPClient(); final HTTPClient con = new HTTPClient(ClientIdentification.getUserAgent(), ClientIdentification.DEFAULT_TIMEOUT);
con.setHeader(requestHeader.entrySet()); con.setHeader(requestHeader.entrySet());
// ResponseContainer res = null; // ResponseContainer res = null;
try { try {

Loading…
Cancel
Save