Michael Peter Christen 4 years ago
commit 3078b74e1d

@ -133,7 +133,7 @@ document.getElementById("apilink").setAttribute("href", "Network.xml?" + window.
#{list}#
<tr class="TableCell#(dark)#Light::Dark::Summary#(/dark)#">
<td >#[hash]#</td>
<td #(special)#::class="TableCellActive"#(/special)#>#[shortname]##(ssl)#::<a href="https://#[ip]#:#[portssl]#/Network.html?page=1" class="forceNoExternalIcon" target="_blank"><img src="env/grafics/lock.gif" width="11" height="15" title="https supported" alt="https supported" /></a>#(/ssl)#</td>
<td #(special)#::class="TableCellActive"#(/special)#>#[shortname]##(ssl)#::<a href="https://#[ip]#:#[portssl]#/Network.html?page=1" class="forceNoExternalIcon" target="_blank"><img src="env/grafics/lockclose.png" width="11" height="15" title="https supported" alt="https supported" /></a>#(/ssl)#</td>
<td nowrap>
#(type)##(direct)#<img src="env/grafics/JuniorPassive.gif" width="11" height="11" title="Type: Junior | Contact: passive" alt="Junior passive" />::<img src="env/grafics/JuniorDirect.gif" width="11" height="11" title="Type: Junior | Contact: direct" alt="Junior direct" />::<img src="env/grafics/JuniorOffline.gif" width="11" height="11" title="Type: Junior | Contact: offline" alt="Junior offline" />#(/direct)#::#(direct)#<img src="env/grafics/SeniorPassive.gif" width="11" height="11" title="Type: Senior | Contact: passive" alt="senior passive" />::<img src="env/grafics/SeniorDirect.gif" width="11" height="11" title="Type: Senior | Contact: direct" alt="Senior direct" />::<img src="env/grafics/SeniorOffline.gif" width="11" height="11" title="Type: Senior | Contact: offline" alt="Senior offline" />#(/direct)#::<a href="#[url]#" class="forceNoExternalIcon">#(direct)#<img src="env/grafics/PrincipalPassive.gif" width="11" height="11" title="Type: Principal | Contact: passive | Seed download: possible" alt="Principal passive" />::<img src="env/grafics/PrincipalDirect.gif" width="11" height="11" title="Type: Principal | Contact: direct | Seed download: possible" alt="Principal active" />::<img src="env/grafics/PrincipalOffline.gif" width="11" height="11" title="Type: Principal | Contact: offline | Seed download: ?" alt="Principal offline" />#(/direct)#</a>#(/type)##(acceptcrawl)#<img src="env/grafics/CrawlNo.gif" width="11" height="11" title="Accept Crawl: no" alt="no crawl" />::<img src="env/grafics/CrawlYes.gif" width="11" height="11" title="Accept Crawl: yes" alt="crawl possible" />::<img src="env/grafics/CrawlYesOffline.gif" width="11" height="11" title="Accept Crawl: yes" alt="crawl possible" />#(/acceptcrawl)##(dhtreceive)#<img src="env/grafics/DHTReceiveNo.gif" width="11" height="11" title="DHT Receive: no; #[peertags]#" alt="no DHT receive" />::<img src="env/grafics/DHTReceiveYes.gif" width="11" height="11" title="DHT Receive: yes" alt="DHT receive enabled" />::<img src="env/grafics/DHTReceiveYesOffline.gif" width="11" height="11" title="DHT Receive: yes" alt="DHT receive enabled" />#(/dhtreceive)##{ips}#<a href="#(c)#http://#(ipv6)#::[#(/ipv6)##[ip]##(ipv6)#::]#(/ipv6)#:#[port]#/Network.html?page=1::Network.html?page=4&amp;peerHash=#[hash]#&amp;peerIP=#[ip]#&amp;peerPort=#[port]#&amp;addPeer=add+Peer#(/c)#"#(c)#:: target="_blank"#(/c)#class="forceNoExternalIcon"><img src="env/grafics/#(nodestate)#NodeDisqualified::NodeQualified#(/nodestate)##(c)##(ipv6)#IPv4::IPv6#(/ipv6)#::#(ipv6)#IPv4::IPv6#(/ipv6)##(/c)#.gif" width="11" height="11" title="#(c)##(ipv6)#IPv4::IPv6#(/ipv6)#::#(ipv6)#IPv4::IPv6#(/ipv6)##(/c)# #(nodestate)#Peer::Node Peer#(/nodestate)#" /></a>#{/ips}#</td>
<td align="right">#[version]#</td>
@ -249,7 +249,7 @@ document.getElementById("apilink").setAttribute("href", "Network.xml?" + window.
<td>QPH<br/>(remote)</td>
</tr>
<tr class="TableCellLight">
<td>#[my-name]##(my-ssl)#::<img src="env/grafics/lock.gif" width="11" height="15" title="https supported" alt="https supported" />#(/my-ssl)#</td>
<td>#[my-name]##(my-ssl)#::<img src="env/grafics/lockclose.png" width="11" height="15" title="https supported" alt="https supported" />#(/my-ssl)#</td>
<td nowrap>#(my-info)#<img src="env/grafics/Virgin.gif" width="11" height="11" title="Type: Virgin" alt="Virgin" />::<img src="env/grafics/JuniorDirect.gif" width="11" height="11" title="Type: Junior" alt="Junior" />::<img src="env/grafics/SeniorDirect.gif" width="11" height="11" title="Type: Senior" alt="Senior" />::<img src="env/grafics/PrincipalDirect.gif" width="11" height="11" title="Type: Principal" alt="Principal" />#(/my-info)##(my-acceptcrawl)#<img src="env/grafics/CrawlNo.gif" width="11" height="11" title="Accept Crawl: no" alt="no crawl" />::<img src="env/grafics/CrawlYes.gif" width="11" height="11" title="Accept Crawl: yes" alt="Crawl enabled" />#(/my-acceptcrawl)##(my-dhtreceive)#<img src="env/grafics/DHTReceiveNo.gif" width="11" height="11" title="DHT Receive: no" alt="no DHT receive" />::<img src="env/grafics/DHTReceiveYes.gif" width="11" height="11" title="DHT Receive: yes" alt="DHT Receive enabled" />#(/my-dhtreceive)##{ips}#<a href="#(c)#http://#(ipv6)#::[#(/ipv6)##[ip]##(ipv6)#::]#(/ipv6)#:#[port]#/Network.html?page=1::Network.html?page=4&amp;peerHash=#[hash]#&amp;peerIP=#[ip]#&amp;peerPort=#[port]#&amp;addPeer=add+Peer#(/c)#"#(c)#:: target="_blank"#(/c)#class="forceNoExternalIcon"><img src="env/grafics/#(nodestate)#NodeDisqualified::NodeQualified#(/nodestate)##(c)##(ipv6)#IPv4::IPv6#(/ipv6)#::#(ipv6)#IPv4::IPv6#(/ipv6)##(/c)#.gif" width="11" height="11" title="#(c)##(ipv6)#IPv4::IPv6#(/ipv6)#::#(ipv6)#IPv4::IPv6#(/ipv6)##(/c)# #(nodestate)#Peer::Node Peer#(/nodestate)#" /></a>#{/ips}#</td>
<td align="right">#[my-version]#</td>
<td align="right">#[my-utc]#</td>

@ -134,7 +134,7 @@
You can download a more recent version of YaCy. Click here to install this update and restart YaCy:
<form action="Status.html" method="get" class="PeerControl" accept-charset="UTF-8">
<button type="submit" name="aquirerelease" class="btn btn-primary" value="Update YaCy">
<img src="env/grafics/lock.gif" alt="lock icon"/>
<img src="env/grafics/lockclose.png" alt="lock icon"/>
Install YaCy v#[latestVersion]#
</button>
</form>

@ -57,12 +57,12 @@
<form action="Surftips.html" method="get" class="PeerControl" accept-charset="UTF-8"><div>
#(publicSurftips)#
<button type="submit" name="publicPage" class="btn btn-primary" value="1">
<img src="env/grafics/lock.gif" alt="authentication required" />
<img src="env/grafics/lockclose.png" alt="authentication required" />
Show surftips to everyone
</button>
::
<button type="submit" name="publicPage" class="btn btn-primary" value="0">
<img src="env/grafics/lock.gif" alt="authentication required" />
<img src="env/grafics/lockclose.png" alt="authentication required" />
Hide surftips for users without autorization
</button>
#(/publicSurftips)#

@ -331,14 +331,21 @@ public class HostBalancer implements Balancer {
if (size <= 10) {smallStacksExist = true; break smallsearch;}
}
}
if (singletonStacksExist || smallStacksExist) {
Set<String> freshhosts = new HashSet<>();
Iterator<String> i = this.roundRobinHostHashes.iterator();
smallstacks: while (i.hasNext()) {
if (this.roundRobinHostHashes.size() <= 10) break smallstacks; // don't shrink the hosts until nothing is left
String s = i.next();
HostQueue hq = this.queues.get(s);
String hosthash = i.next();
HostQueue hq = this.queues.get(hosthash);
if (hq == null) {i.remove(); continue smallstacks;}
int delta = Latency.waitingRemainingGuessed(hq.getHost(), hq.getPort(), s, robots, ClientIdentification.yacyInternetCrawlerAgent);
int delta = Latency.waitingRemainingGuessed(hq.getHost(), hq.getPort(), hosthash, robots, ClientIdentification.yacyInternetCrawlerAgent);
if (delta == Integer.MIN_VALUE) {
// never-crawled hosts; we do not want to have too many of them in here. Loading new hosts means: waiting for robots.txt to load
freshhosts.add(hosthash);
i.remove();
continue smallstacks;
}
if (singletonStacksExist || smallStacksExist) {
if (delta < 0) continue; // keep all non-waiting stacks; they are useful to speed up things
// to protect all small stacks which have a fast throughput, remove all with long waiting time
if (delta >= 1000) {i.remove(); continue smallstacks;}
@ -350,6 +357,10 @@ public class HostBalancer implements Balancer {
}
}
}
// put at least one of the fresh hosts back
if (freshhosts.size() > 0) this.roundRobinHostHashes.add(freshhosts.iterator().next());
// result
if (this.roundRobinHostHashes.size() == 1) {
if (log.isFine()) log.fine("(re-)initialized the round-robin queue with one host");
} else {

@ -98,14 +98,13 @@ public final class HTTPLoader {
* @return a response with full meta data and embedding on open input stream on content. Don't forget to close the stream.
* @throws IOException when an error occurred
*/
public StreamResponse openInputStream(final Request request, CrawlProfile profile, final int retryCount,
final int maxFileSize, final BlacklistType blacklistType, final ClientIdentification.Agent agent)
throws IOException {
public StreamResponse openInputStream(
final Request request, CrawlProfile profile, final int retryCount,
final int maxFileSize, final BlacklistType blacklistType, final ClientIdentification.Agent agent
) throws IOException {
if (retryCount < 0) {
this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), profile,
FailCategory.TEMPORARY_NETWORK_FAILURE, "retry counter exceeded", -1);
throw new IOException(
"retry counter exceeded for URL " + request.url().toString() + ". Processing aborted.$");
this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), profile, FailCategory.TEMPORARY_NETWORK_FAILURE, "retry counter exceeded", -1);
throw new IOException( "retry counter exceeded for URL " + request.url().toString() + ". Processing aborted.$");
}
DigestURL url = request.url();
@ -158,8 +157,7 @@ public final class HTTPLoader {
if (statusCode > 299 && statusCode < 310) {
client.finish();
final DigestURL redirectionUrl = extractRedirectURL(request, profile, url, statusline,
responseHeader, requestURLString);
final DigestURL redirectionUrl = extractRedirectURL(request, profile, url, statusline, responseHeader, requestURLString);
if (this.sb.getConfigBool(SwitchboardConstants.CRAWLER_FOLLOW_REDIRECTS, true)) {
// we have two use cases here: loading from a crawl or just
@ -196,15 +194,20 @@ public final class HTTPLoader {
"CRAWLER Redirect of URL=" + requestURLString + " aborted because of server shutdown.$");
}
// check if the redirected URL is the same as the requested URL
// this shortcuts a time-out using retryCount
if (redirectionUrl.equals(url)) {
this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), profile, FailCategory.TEMPORARY_NETWORK_FAILURE, "redirect to same url", -1);
throw new IOException( "retry counter exceeded for URL " + request.url().toString() + ". Processing aborted.$");
}
// retry crawling with new url
request.redirectURL(redirectionUrl);
return openInputStream(request, profile, retryCount - 1, maxFileSize, blacklistType, agent);
}
// we don't want to follow redirects
this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), profile,
FailCategory.FINAL_PROCESS_CONTEXT, "redirection not wanted", statusCode);
throw new IOException("REJECTED UNWANTED REDIRECTION '" + statusline
+ "' for URL '" + requestURLString + "'$");
this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), profile, FailCategory.FINAL_PROCESS_CONTEXT, "redirection not wanted", statusCode);
throw new IOException("REJECTED UNWANTED REDIRECTION '" + statusline + "' for URL '" + requestURLString + "'$");
} else if (statusCode == HttpStatus.SC_OK || statusCode == HttpStatus.SC_NON_AUTHORITATIVE_INFORMATION) {
// the transfer is ok
@ -397,8 +400,6 @@ public final class HTTPLoader {
throw new IOException("CRAWLER Redirect of URL=" + requestURLString + " aborted. Reason : " + rejectReason);
}
throw new IOException("CRAWLER Redirect of URL=" + requestURLString + " to " + redirectionUrl.toNormalform(false) + " placed on crawler queue for double-check");
}
// if we are already doing a shutdown we don't need to retry crawling

@ -399,7 +399,7 @@ public final class LoaderDispatcher {
// load resource from the internet
StreamResponse response;
if (protocol.equals("http") || protocol.equals("https")) {
response = this.httpLoader.openInputStream(request, crawlProfile, 1, maxFileSize, blacklistType, agent);
response = this.httpLoader.openInputStream(request, crawlProfile, 2, maxFileSize, blacklistType, agent);
} else if (protocol.equals("ftp")) {
response = this.ftpLoader.openInputStream(request, true);
} else if (protocol.equals("smb")) {

Loading…
Cancel
Save