fix for connections not getting closed to get favicon.ico during seach

pull/41/merge
sixcooler 9 years ago
parent e1dd808e1c
commit 5cb7ba0dc4

@ -28,6 +28,8 @@ import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import org.apache.http.StatusLine;
import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.federate.solr.FailCategory;
import net.yacy.cora.protocol.ClientIdentification;
@ -142,14 +144,16 @@ public final class HTTPLoader {
// send request
client.GET(url, false);
final int statusCode = client.getHttpResponse().getStatusLine().getStatusCode();
final StatusLine statusline = client.getHttpResponse().getStatusLine();
final int statusCode = statusline.getStatusCode();
final ResponseHeader responseHeader = new ResponseHeader(statusCode, client.getHttpResponse().getAllHeaders());
String requestURLString = request.url().toNormalform(true);
// check redirection
if (statusCode > 299 && statusCode < 310) {
client.finish();
final DigestURL redirectionUrl = extractRedirectURL(request, profile, url, client, statusCode,
final DigestURL redirectionUrl = extractRedirectURL(request, profile, url, statusline,
responseHeader, requestURLString);
if (this.sb.getConfigBool(SwitchboardConstants.CRAWLER_FOLLOW_REDIRECTS, true)) {
@ -182,7 +186,7 @@ public final class HTTPLoader {
// we don't want to follow redirects
this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), profile,
FailCategory.FINAL_PROCESS_CONTEXT, "redirection not wanted", statusCode);
throw new IOException("REJECTED UNWANTED REDIRECTION '" + client.getHttpResponse().getStatusLine()
throw new IOException("REJECTED UNWANTED REDIRECTION '" + statusline
+ "' for URL '" + requestURLString + "'$");
} else if (statusCode == 200 || statusCode == 203) {
// the transfer is ok
@ -192,12 +196,14 @@ public final class HTTPLoader {
*/
long contentLength = client.getHttpResponse().getEntity().getContentLength();
if (profile != null && profile.storeHTCache() && contentLength > 0 && contentLength < (Response.CRAWLER_MAX_SIZE_TO_CACHE) && !url.isLocal()) {
byte[] content = HTTPClient.getByteArray(client.getHttpResponse().getEntity(), maxFileSize);
byte[] content = null;
try {
content = HTTPClient.getByteArray(client.getHttpResponse().getEntity(), maxFileSize);
Cache.store(url, responseHeader, content);
} catch (final IOException e) {
this.log.warn("cannot write " + url + " to Cache (3): " + e.getMessage(), e);
} finally {
client.finish();
}
return new ByteArrayInputStream(content);
@ -209,10 +215,11 @@ public final class HTTPLoader {
*/
return new HTTPInputStream(client);
} else {
client.finish();
// if the response has not the right response type then reject file
this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), profile,
FailCategory.TEMPORARY_NETWORK_FAILURE, "wrong http status code", statusCode);
throw new IOException("REJECTED WRONG STATUS TYPE '" + client.getHttpResponse().getStatusLine()
throw new IOException("REJECTED WRONG STATUS TYPE '" + statusline
+ "' for URL '" + requestURLString + "'$");
}
}
@ -223,7 +230,7 @@ public final class HTTPLoader {
* @throws IOException when an error occured
*/
private DigestURL extractRedirectURL(final Request request, CrawlProfile profile, DigestURL url,
final HTTPClient client, final int statusCode, final ResponseHeader responseHeader, String requestURLString)
final StatusLine statusline, final ResponseHeader responseHeader, String requestURLString)
throws IOException {
// read redirection URL
String redirectionUrlString = responseHeader.get(HeaderFramework.LOCATION);
@ -232,8 +239,8 @@ public final class HTTPLoader {
if (redirectionUrlString.isEmpty()) {
this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), profile,
FailCategory.TEMPORARY_NETWORK_FAILURE,
"no redirection url provided, field '" + HeaderFramework.LOCATION + "' is empty", statusCode);
throw new IOException("REJECTED EMTPY REDIRECTION '" + client.getHttpResponse().getStatusLine()
"no redirection url provided, field '" + HeaderFramework.LOCATION + "' is empty", statusline.getStatusCode());
throw new IOException("REJECTED EMTPY REDIRECTION '" + statusline
+ "' for URL '" + requestURLString + "'$");
}
@ -241,7 +248,7 @@ public final class HTTPLoader {
final DigestURL redirectionUrl = DigestURL.newURL(request.url(), redirectionUrlString);
// restart crawling with new url
this.log.info("CRAWLER Redirection detected ('" + client.getHttpResponse().getStatusLine() + "') for URL "
this.log.info("CRAWLER Redirection detected ('" + statusline + "') for URL "
+ requestURLString);
this.log.info("CRAWLER ..Redirecting request to: " + redirectionUrl.toNormalform(false));
@ -249,7 +256,7 @@ public final class HTTPLoader {
if (this.sb.getConfigBool(SwitchboardConstants.CRAWLER_RECORD_REDIRECTS, true)) {
this.sb.crawlQueues.errorURL.push(request.url(), request.depth(), profile,
FailCategory.FINAL_REDIRECT_RULE, "redirect to " + redirectionUrlString, statusCode);
FailCategory.FINAL_REDIRECT_RULE, "redirect to " + redirectionUrlString, statusline.getStatusCode());
}
return redirectionUrl;
}
@ -335,7 +342,7 @@ public final class HTTPLoader {
// check redirection
if (statusCode > 299 && statusCode < 310) {
final DigestURL redirectionUrl = extractRedirectURL(request, profile, url, client, statusCode,
final DigestURL redirectionUrl = extractRedirectURL(request, profile, url, client.getHttpResponse().getStatusLine(),
responseHeader, requestURLString);
if (this.sb.getConfigBool(SwitchboardConstants.CRAWLER_FOLLOW_REDIRECTS, true)) {

Loading…
Cancel
Save