fixing redirects and status codes: storing of status code in

ResponseHeader to make it available for late evaluations, like storage
in solr.
pull/1/head
Michael Peter Christen 13 years ago
parent 8dd469b9dd
commit 77f795756c

@ -1,4 +1,4 @@
// CacheResource_p.java
// CacheResource_p.java
// -----------------------
// (C) by Michael Peter Christen; mc@yacy.net
// first published on http://www.anomic.de
@ -30,7 +30,6 @@ import net.yacy.cora.protocol.ResponseHeader;
import net.yacy.document.ImageParser;
import net.yacy.kelondro.data.meta.DigestURI;
import net.yacy.kelondro.logging.Log;
import de.anomic.crawler.Cache;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
@ -41,9 +40,9 @@ public class CacheResource_p {
public static Object respond(final RequestHeader header, final serverObjects post, final serverSwitch env) {
final servletProperties prop = new servletProperties();
prop.put("resource", new byte[0]);
if (post == null) return prop;
final String u = post.get("url", "");
DigestURI url;
try {
@ -52,10 +51,10 @@ public class CacheResource_p {
Log.logException(e);
return prop;
}
byte[] resource = Cache.getContent(url.hash());
if (resource == null) return prop;
// check request type
if (header.get("EXT", "html").equals("png")) {
// a png was requested
@ -65,11 +64,11 @@ public class CacheResource_p {
ResponseHeader responseHeader = Cache.getResponseHeader(url.hash());
String resMime = responseHeader == null ? null : responseHeader.mime();
if (resMime != null) {
final ResponseHeader outgoingHeader = new ResponseHeader();
final ResponseHeader outgoingHeader = new ResponseHeader(200);
outgoingHeader.put(HeaderFramework.CONTENT_TYPE, resMime);
prop.setOutgoingHeader(outgoingHeader);
}
}
// add resource
prop.put("resource", resource);
return prop;

@ -31,7 +31,6 @@ import java.util.Iterator;
import java.util.Map;
import net.yacy.cora.protocol.ResponseHeader;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.server.servletProperties;
@ -39,7 +38,7 @@ import de.anomic.server.servletProperties;
public class CookieTest_p {
public static serverObjects respond(final ResponseHeader header, final serverObjects post, final serverSwitch env) {
// case if no values are requested
if (post == null || env == null) {
@ -48,10 +47,10 @@ public class CookieTest_p {
final serverObjects prop = new serverObjects();
return prop;
}
final servletProperties prop = new servletProperties();
if (post.containsKey("act") && "clear_cookie".equals(post.get("act"))) {
final ResponseHeader outgoingHeader = new ResponseHeader();
final ResponseHeader outgoingHeader = new ResponseHeader(200);
final Iterator<Map.Entry<String, String>> it = header.entrySet().iterator();
Map.Entry<String, String> e;
while (it.hasNext()) {
@ -65,15 +64,15 @@ public class CookieTest_p {
}
}
}
prop.setOutgoingHeader(outgoingHeader);
prop.put("coockiesout", "0");
//header.
} else if (post.containsKey("act") && "set_cookie".equals(post.get("act"))) {
final String cookieName = post.get("cookie_name").trim();
final String cookieValue = post.get("cookie_value").trim();
final ResponseHeader outgoingHeader = new ResponseHeader();
final ResponseHeader outgoingHeader = new ResponseHeader(200);
outgoingHeader.setCookie(cookieName,cookieValue);
prop.setOutgoingHeader(outgoingHeader);

@ -455,7 +455,7 @@ public class Crawler_p {
prop.put("info", "6"); // Error with url
prop.putHTML("info_crawlingStart", crawlingStart);
prop.putHTML("info_error", e.getMessage());
Log.logException(e);
Log.logInfo("Crawler_p", "start url rejected: " + e.getMessage());
}
} else if ("file".equals(crawlingMode)) {

@ -112,7 +112,7 @@ public class User{
cookie=sb.userDB.getAdminCookie();
if(entry != null || staticAdmin){
final ResponseHeader outgoingHeader=new ResponseHeader();
final ResponseHeader outgoingHeader=new ResponseHeader(200);
outgoingHeader.setCookie("login", cookie);
prop.setOutgoingHeader(outgoingHeader);

@ -120,7 +120,7 @@ public class suggest {
// Adding CORS Access header for xml output
if (xml) {
final ResponseHeader outgoingHeader = new ResponseHeader();
final ResponseHeader outgoingHeader = new ResponseHeader(200);
outgoingHeader.put(HeaderFramework.CORS_ALLOW_ORIGIN, "*");
prop.setOutgoingHeader(outgoingHeader);
}

@ -205,7 +205,7 @@ public class yacysearch {
// Adding CORS Access header for yacysearch.rss output
if ( rss ) {
final ResponseHeader outgoingHeader = new ResponseHeader();
final ResponseHeader outgoingHeader = new ResponseHeader(200);
outgoingHeader.put(HeaderFramework.CORS_ALLOW_ORIGIN, "*");
prop.setOutgoingHeader(outgoingHeader);
}

@ -333,7 +333,7 @@ public class RobotsTxt {
ByteCount.addAccountCount(ByteCount.CRAWLER, robotsTxt.length);
}
final int code = client.getHttpResponse().getStatusLine().getStatusCode();
final ResponseHeader header = new ResponseHeader(client.getHttpResponse().getAllHeaders());
final ResponseHeader header = new ResponseHeader(code, client.getHttpResponse().getAllHeaders());
// check the response status
if (code > 199 && code < 300) {

@ -124,7 +124,7 @@ public class FTPLoader {
if (dirList == null) {
response = null;
} else {
final ResponseHeader responseHeader = new ResponseHeader();
final ResponseHeader responseHeader = new ResponseHeader(200);
responseHeader.put(HeaderFramework.LAST_MODIFIED, HeaderFramework.formatRFC1123(new Date()));
responseHeader.put(HeaderFramework.CONTENT_TYPE, "text/html");
final CrawlProfile profile = this.sb.crawler.getActive(request.profileHandle().getBytes());
@ -132,7 +132,6 @@ public class FTPLoader {
request,
requestHeader,
responseHeader,
"200",
profile,
false,
dirList.toString().getBytes());
@ -226,7 +225,7 @@ public class FTPLoader {
final DigestURI refurl = this.sb.getURL(Segments.Process.LOCALCRAWLING, request.referrerhash());
if (refurl != null) requestHeader.put(RequestHeader.REFERER, refurl.toNormalform(true, false));
}
final ResponseHeader responseHeader = new ResponseHeader();
final ResponseHeader responseHeader = new ResponseHeader(200);
responseHeader.put(HeaderFramework.LAST_MODIFIED, HeaderFramework.formatRFC1123(fileDate));
responseHeader.put(HeaderFramework.CONTENT_TYPE, mime);
@ -252,7 +251,6 @@ public class FTPLoader {
request,
requestHeader,
responseHeader,
"200",
profile,
false,
null);
@ -268,7 +266,6 @@ public class FTPLoader {
request,
requestHeader,
responseHeader,
"200",
profile,
false,
b);

@ -85,7 +85,7 @@ public class FileLoader {
StringBuilder content = FTPClient.dirhtml(u, null, null, null, list, true);
ResponseHeader responseHeader = new ResponseHeader();
ResponseHeader responseHeader = new ResponseHeader(200);
responseHeader.put(HeaderFramework.LAST_MODIFIED, HeaderFramework.formatRFC1123(new Date()));
responseHeader.put(HeaderFramework.CONTENT_TYPE, "text/html");
final CrawlProfile profile = this.sb.crawler.getActive(request.profileHandle().getBytes());
@ -93,7 +93,6 @@ public class FileLoader {
request,
requestHeader,
responseHeader,
"200",
profile,
false,
content.toString().getBytes());
@ -103,7 +102,7 @@ public class FileLoader {
// create response header
String mime = Classification.ext2mime(url.getFileExtension());
ResponseHeader responseHeader = new ResponseHeader();
ResponseHeader responseHeader = new ResponseHeader(200);
responseHeader.put(HeaderFramework.LAST_MODIFIED, HeaderFramework.formatRFC1123(new Date(url.lastModified())));
responseHeader.put(HeaderFramework.CONTENT_TYPE, mime);
@ -134,7 +133,6 @@ public class FileLoader {
request,
requestHeader,
responseHeader,
"200",
profile,
false,
url.toTokens().getBytes());
@ -152,7 +150,6 @@ public class FileLoader {
request,
requestHeader,
responseHeader,
"200",
profile,
false,
b);

@ -80,8 +80,8 @@ public final class HTTPLoader {
private Response load(final Request request, final int retryCount, final int maxFileSize, final boolean checkBlacklist) throws IOException {
if (retryCount < 0) {
this.sb.crawlQueues.errorURL.push(request, this.sb.peers.mySeed().hash.getBytes(), new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "redirection counter exceeded", -1);
throw new IOException("Redirection counter exceeded for URL " + request.url().toString() + ". Processing aborted.");
this.sb.crawlQueues.errorURL.push(request, this.sb.peers.mySeed().hash.getBytes(), new Date(), 1, FailCategory.TEMPORARY_NETWORK_FAILURE, "retry counter exceeded", -1);
throw new IOException("retry counter exceeded for URL " + request.url().toString() + ". Processing aborted.");
}
DigestURI url = request.url();
@ -131,15 +131,15 @@ public final class HTTPLoader {
// send request
final byte[] responseBody = client.GETbytes(url, maxFileSize);
final ResponseHeader header = new ResponseHeader(client.getHttpResponse().getAllHeaders());
final int code = client.getHttpResponse().getStatusLine().getStatusCode();
final int code = client.getHttpResponse().getStatusLine().getStatusCode();
final ResponseHeader responseHeader = new ResponseHeader(code, client.getHttpResponse().getAllHeaders());
if (code > 299 && code < 310) {
// redirection (content may be empty)
if (this.sb.getConfigBool(SwitchboardConstants.CRAWLER_FOLLOW_REDIRECTS, true)) {
if (header.containsKey(HeaderFramework.LOCATION)) {
if (responseHeader.containsKey(HeaderFramework.LOCATION)) {
// getting redirection URL
String redirectionUrlString = header.get(HeaderFramework.LOCATION);
String redirectionUrlString = responseHeader.get(HeaderFramework.LOCATION);
redirectionUrlString = redirectionUrlString.trim();
if (redirectionUrlString.length() == 0) {
@ -202,8 +202,7 @@ public final class HTTPLoader {
response = new Response(
request,
requestHeader,
header,
Integer.toString(code),
responseHeader,
profile,
false,
responseBody
@ -254,8 +253,8 @@ public final class HTTPLoader {
client.setTimout(20000);
client.setHeader(requestHeader.entrySet());
final byte[] responseBody = client.GETbytes(request.url());
final ResponseHeader header = new ResponseHeader(client.getHttpResponse().getAllHeaders());
final int code = client.getHttpResponse().getStatusLine().getStatusCode();
final int code = client.getHttpResponse().getStatusLine().getStatusCode();
final ResponseHeader header = new ResponseHeader(code, client.getHttpResponse().getAllHeaders());
// FIXME: 30*-handling (bottom) is never reached
// we always get the final content because httpClient.followRedirects = true
@ -272,7 +271,6 @@ public final class HTTPLoader {
request,
requestHeader,
header,
Integer.toString(code),
null,
false,
responseBody

@ -63,7 +63,6 @@ public class Response {
private final Request request;
private final RequestHeader requestHeader;
private final ResponseHeader responseHeader;
private final String responseStatus;
private final CrawlProfile profile;
private byte[] content;
private int status; // tracker indexing status, see status defs below
@ -151,7 +150,6 @@ public class Response {
final Request request,
final RequestHeader requestHeader,
final ResponseHeader responseHeader,
final String responseStatus,
final CrawlProfile profile,
final boolean fromCache,
final byte[] content) {
@ -159,7 +157,6 @@ public class Response {
// request and response headers may be zero in case that we process surrogates
this.requestHeader = requestHeader;
this.responseHeader = responseHeader;
this.responseStatus = responseStatus;
this.profile = profile;
this.status = QUEUE_STATE_FRESH;
this.content = content;
@ -176,10 +173,9 @@ public class Response {
this.request = request;
// request and response headers may be zero in case that we process surrogates
this.requestHeader = new RequestHeader();
this.responseHeader = new ResponseHeader();
this.responseHeader = new ResponseHeader(200);
this.responseHeader.put(HeaderFramework.CONTENT_TYPE, "text/plain"); // tell parser how to handle the content
if (request.size() > 0) this.responseHeader.put(HeaderFramework.CONTENT_LENGTH, Long.toString(request.size()));
this.responseStatus = "200";
this.profile = profile;
this.status = QUEUE_STATE_FRESH;
this.content = request.name().length() > 0 ? request.name().getBytes() : request.url().toTokens().getBytes();
@ -190,10 +186,9 @@ public class Response {
final Request request,
final RequestHeader requestHeader,
final ResponseHeader responseHeader,
final String responseStatus,
final CrawlProfile profile,
final boolean fromCache) {
this(request, requestHeader, responseHeader, responseStatus, profile, fromCache, null);
this(request, requestHeader, responseHeader, profile, fromCache, null);
}
public void updateStatus(final int newStatus) {
@ -371,7 +366,7 @@ public class Response {
// check status code
if (!validResponseStatus()) {
return "bad_status_" + this.responseStatus;
return "bad_status_" + this.responseHeader.getStatusCode();
}
if (this.requestHeader != null) {
@ -796,7 +791,8 @@ public class Response {
}
public boolean validResponseStatus() {
return (this.responseStatus == null) ? false : this.responseStatus.startsWith("200") || this.responseStatus.startsWith("203");
int status = this.responseHeader.getStatusCode();
return status == 200 || status == 203;
}
public Date ifModifiedSince() {

@ -104,7 +104,7 @@ public class SMBLoader {
StringBuilder content = FTPClient.dirhtml(u, null, null, null, list, true);
ResponseHeader responseHeader = new ResponseHeader();
ResponseHeader responseHeader = new ResponseHeader(200);
responseHeader.put(HeaderFramework.LAST_MODIFIED, HeaderFramework.formatRFC1123(new Date()));
responseHeader.put(HeaderFramework.CONTENT_TYPE, "text/html");
final CrawlProfile profile = this.sb.crawler.getActive(request.profileHandle().getBytes());
@ -112,7 +112,6 @@ public class SMBLoader {
request,
requestHeader,
responseHeader,
"200",
profile,
false,
content.toString().getBytes());
@ -122,7 +121,7 @@ public class SMBLoader {
// create response header
String mime = Classification.ext2mime(url.getFileExtension());
ResponseHeader responseHeader = new ResponseHeader();
ResponseHeader responseHeader = new ResponseHeader(200);
responseHeader.put(HeaderFramework.LAST_MODIFIED, HeaderFramework.formatRFC1123(new Date(url.lastModified())));
responseHeader.put(HeaderFramework.CONTENT_TYPE, mime);
@ -153,7 +152,6 @@ public class SMBLoader {
request,
requestHeader,
responseHeader,
"200",
profile,
false,
url.toTokens().getBytes());
@ -171,7 +169,6 @@ public class SMBLoader {
request,
requestHeader,
responseHeader,
"200",
profile,
false,
b);

@ -227,7 +227,7 @@ public final class HTTPDFileHandler {
}
private static final ResponseHeader getDefaultHeaders(final String path) {
final ResponseHeader headers = new ResponseHeader();
final ResponseHeader headers = new ResponseHeader(200);
String ext;
int pos;
if ((pos = path.lastIndexOf('.')) < 0) {
@ -526,7 +526,7 @@ public final class HTTPDFileHandler {
aBuffer.append(" </ul>\n</body>\n</html>\n");
// write the list to the client
HTTPDemon.sendRespondHeader(conProp, out, httpVersion, 200, null, "text/html; charset=UTF-8", aBuffer.length(), new Date(targetFile.lastModified()), null, new ResponseHeader(), null, null, true);
HTTPDemon.sendRespondHeader(conProp, out, httpVersion, 200, null, "text/html; charset=UTF-8", aBuffer.length(), new Date(targetFile.lastModified()), null, new ResponseHeader(200), null, null, true);
if (!method.equals(HeaderFramework.METHOD_HEAD)) {
out.write(UTF8.getBytes(aBuffer.toString()));
}
@ -1075,9 +1075,10 @@ public final class HTTPDFileHandler {
// apply templates
TemplateEngine.writeTemplate(fis, o, templatePatterns, UNRESOLVED_PATTERN);
fis.close();
ResponseHeader rh = (templatePatterns == null) ? new ResponseHeader(200) : templatePatterns.getOutgoingHeader();
HTTPDemon.sendRespondHeader(conProp, out,
httpVersion, 200, null, mimeType, -1,
targetDate, expireDate, (templatePatterns == null) ? new ResponseHeader() : templatePatterns.getOutgoingHeader(),
httpVersion, rh.getStatusCode(), null, mimeType, -1,
targetDate, expireDate, rh,
null, "chunked", nocache);
// send the content in chunked parts, see RFC 2616 section 3.6.1
final ChunkedOutputStream chos = new ChunkedOutputStream(out);
@ -1107,16 +1108,17 @@ public final class HTTPDFileHandler {
ServerSideIncludes.writeSSI(o1, o, realmProp, clientIP, requestHeader);
//httpTemplate.writeTemplate(fis, o, tp, "-UNRESOLVED_PATTERN-".getBytes("UTF-8"));
}
ResponseHeader rh = (templatePatterns == null) ? new ResponseHeader(200) : templatePatterns.getOutgoingHeader();
if (method.equals(HeaderFramework.METHOD_HEAD)) {
HTTPDemon.sendRespondHeader(conProp, out,
httpVersion, 200, null, mimeType, o.length(),
targetDate, expireDate, (templatePatterns == null) ? new ResponseHeader() : templatePatterns.getOutgoingHeader(),
httpVersion, rh.getStatusCode(), null, mimeType, o.length(),
targetDate, expireDate, rh,
contentEncoding, null, nocache);
} else {
final byte[] result = o.getBytes(); // this interrupts streaming (bad idea!)
HTTPDemon.sendRespondHeader(conProp, out,
httpVersion, 200, null, mimeType, result.length,
targetDate, expireDate, (templatePatterns == null) ? new ResponseHeader() : templatePatterns.getOutgoingHeader(),
httpVersion, rh.getStatusCode(), null, mimeType, result.length,
targetDate, expireDate, rh,
contentEncoding, null, nocache);
FileUtils.copy(result, out);
}
@ -1125,7 +1127,7 @@ public final class HTTPDFileHandler {
int statusCode = 200;
int rangeStartOffset = 0;
final ResponseHeader header = new ResponseHeader();
final ResponseHeader header = new ResponseHeader(statusCode);
// adding the accept ranges header
header.put(HeaderFramework.ACCEPT_RANGES, "bytes");
@ -1429,8 +1431,8 @@ public final class HTTPDFileHandler {
String strARGS = (String) conProp.get("ARGS");
if(strARGS.startsWith("action=")) {
int detectnextargument = strARGS.indexOf("&");
action = strARGS.substring (7, detectnextargument);
strARGS = strARGS.substring(detectnextargument+1);
action = strARGS.substring (7, detectnextargument);
strARGS = strARGS.substring(detectnextargument+1);
}
if(strARGS.startsWith("url=")) {
final String strUrl = strARGS.substring(4); // strip url=
@ -1467,7 +1469,7 @@ public final class HTTPDFileHandler {
requestHeader.remove("Authorization");
requestHeader.remove("Connection");
requestHeader.put(HeaderFramework.HOST, proxyurl.getHost());
// temporarily add argument to header to pass it on to augmented browsing
requestHeader.put("YACYACTION", action);
@ -1475,7 +1477,7 @@ public final class HTTPDFileHandler {
HTTPDProxyHandler.doGet(prop, requestHeader, o);
// reparse header to extract content-length and mimetype
final ResponseHeader outgoingHeader = new ResponseHeader();
final ResponseHeader outgoingHeader = new ResponseHeader(200);
final InputStream in = new ByteArrayInputStream(o.toByteArray());
String line = readLine(in);
while(line != null && !line.equals("")) {

@ -403,7 +403,6 @@ public final class HTTPDProxyHandler {
request,
requestHeader,
cachedResponseHeader,
"200 OK",
sb.crawler.defaultProxyProfile,
true
);
@ -495,20 +494,20 @@ public final class HTTPDProxyHandler {
if (log.isFinest()) log.logFinest(reqID +" response status: "+ client.getHttpResponse().getStatusLine());
conProp.put(HeaderFramework.CONNECTION_PROP_CLIENT_REQUEST_HEADER, requestHeader);
final ResponseHeader responseHeader = new ResponseHeader(client.getHttpResponse().getAllHeaders());
int statusCode = client.getHttpResponse().getStatusLine().getStatusCode();
final ResponseHeader responseHeader = new ResponseHeader(statusCode, client.getHttpResponse().getAllHeaders());
// determine if it's an internal error of the httpc
if (responseHeader.isEmpty()) {
throw new Exception(client.getHttpResponse().getStatusLine().toString());
}
if(AugmentedHtmlStream.supportsMime(responseHeader.mime())) {
if (AugmentedHtmlStream.supportsMime(responseHeader.mime())) {
// enable chunk encoding, because we don't know the length after annotating
responseHeader.remove(HeaderFramework.CONTENT_LENGTH);
responseHeader.put(HeaderFramework.TRANSFER_ENCODING, "chunked");
}
ChunkedOutputStream chunkedOut = setTransferEncoding(conProp, responseHeader, client.getHttpResponse().getStatusLine().getStatusCode(), respond);
ChunkedOutputStream chunkedOut = setTransferEncoding(conProp, responseHeader, statusCode, respond);
// the cache does either not exist or is (supposed to be) stale
long sizeBeforeDelete = -1;
@ -558,7 +557,7 @@ public final class HTTPDProxyHandler {
conProp,
respond,
httpVer,
client.getHttpResponse().getStatusLine().getStatusCode(),
statusCode,
client.getHttpResponse().getStatusLine().toString(), // status text
responseHeader);
@ -569,7 +568,6 @@ public final class HTTPDProxyHandler {
request,
requestHeader,
responseHeader,
Integer.toString(client.getHttpResponse().getStatusLine().getStatusCode()),
sb.crawler.defaultProxyProfile,
true
);
@ -845,7 +843,8 @@ public final class HTTPDProxyHandler {
// if (responseHeader.isEmpty()) {
// throw new Exception(res.getStatusLine());
// }
final ResponseHeader responseHeader = new ResponseHeader(client.getHttpResponse().getAllHeaders());
int statusCode = client.getHttpResponse().getStatusLine().getStatusCode();
final ResponseHeader responseHeader = new ResponseHeader(statusCode, client.getHttpResponse().getAllHeaders());
if (responseHeader.isEmpty()) {
throw new Exception(client.getHttpResponse().getStatusLine().toString());
}
@ -860,7 +859,7 @@ public final class HTTPDProxyHandler {
conProp,
respond,
httpVer,
client.getHttpResponse().getStatusLine().getStatusCode(),
statusCode,
client.getHttpResponse().getStatusLine().toString(),
responseHeader);
respond.flush();
@ -951,7 +950,8 @@ public final class HTTPDProxyHandler {
client.POST(getUrl, body, contentLength);
if (log.isFinest()) log.logFinest(reqID +" response status: "+ client.getHttpResponse().getStatusLine());
final ResponseHeader responseHeader = new ResponseHeader(client.getHttpResponse().getAllHeaders());
int statusCode = client.getHttpResponse().getStatusLine().getStatusCode();
final ResponseHeader responseHeader = new ResponseHeader(statusCode, client.getHttpResponse().getAllHeaders());
// determine if it's an internal error of the httpc
if (responseHeader.isEmpty()) {
throw new Exception(client.getHttpResponse().getStatusLine().toString());
@ -971,7 +971,7 @@ public final class HTTPDProxyHandler {
HTTPDemon.sendRespondHeader(conProp,
countedRespond,
httpVer,
client.getHttpResponse().getStatusLine().getStatusCode(),
statusCode,
client.getHttpResponse().getStatusLine().toString(), // status text
responseHeader);
@ -1249,11 +1249,12 @@ public final class HTTPDProxyHandler {
try {
remoteProxy.HEADResponse("http://" + host + ":" + port);
final ResponseHeader header = new ResponseHeader(remoteProxy.getHttpResponse().getAllHeaders());
int statusCode = remoteProxy.getHttpResponse().getStatusLine().getStatusCode();
final ResponseHeader header = new ResponseHeader(statusCode, remoteProxy.getHttpResponse().getAllHeaders());
// outputs a logline to the serverlog with the current status
log.logInfo("CONNECT-RESPONSE: status=" + remoteProxy.getHttpResponse().getStatusLine() + ", header=" + header.toString());
final boolean success = remoteProxy.getHttpResponse().getStatusLine().getStatusCode() >= 200 && remoteProxy.getHttpResponse().getStatusLine().getStatusCode() <= 399;
final boolean success = statusCode >= 200 && statusCode <= 399;
if (success) {
// replace connection details
host = ProxySettings.host;

@ -1136,9 +1136,7 @@ public final class HTTPDemon implements serverHandler, Cloneable {
final byte[] result = o.toByteArray();
o.close(); o = null;
if(header == null) {
header = new ResponseHeader();
}
if (header == null) header = new ResponseHeader(httpStatusCode);
header.put(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_STATUS, Integer.toString(httpStatusCode));
header.put(HeaderFramework.DATE, systemDate);
header.put(HeaderFramework.CONTENT_TYPE, "text/html");
@ -1189,9 +1187,7 @@ public final class HTTPDemon implements serverHandler, Cloneable {
}
}
if (headers == null) {
headers = new ResponseHeader();
}
if (headers == null) headers = new ResponseHeader(httpStatusCode);
final Date now = new Date(System.currentTimeMillis());
headers.put(HeaderFramework.SERVER, "AnomicHTTPD (www.anomic.de)");
@ -1240,7 +1236,7 @@ public final class HTTPDemon implements serverHandler, Cloneable {
if (respond == null) throw new NullPointerException("The outputstream must not be null.");
if (conProp == null) throw new NullPointerException("The connection property structure must not be null.");
if (httpVersion == null) httpVersion = (String) conProp.get(HeaderFramework.CONNECTION_PROP_HTTP_VER); if (httpVersion == null) httpVersion = HeaderFramework.HTTP_VERSION_1_1;
if (responseHeader == null) responseHeader = new ResponseHeader();
if (responseHeader == null) responseHeader = new ResponseHeader(httpStatusCode);
try {
if ((httpStatusText == null)||(httpStatusText.length()==0)) {

@ -26,7 +26,7 @@ import net.yacy.cora.protocol.ResponseHeader;
public class servletProperties extends serverObjects {
private static final long serialVersionUID = 1L;
public static final String PEER_STAT_VERSION = "version";
public static final String PEER_STAT_UPTIME = "uptime";
public static final String PEER_STAT_MYTIME = "mytime";
@ -34,56 +34,62 @@ public class servletProperties extends serverObjects {
public static final String PEER_STAT_CLIENTID = "clientid";
private String prefix="";
private ResponseHeader outgoingHeader;
public servletProperties(){
super();
}
public servletProperties(final serverObjects so) {
super(so);
}
public void setOutgoingHeader(final ResponseHeader outgoingHeader) {
this.outgoingHeader = outgoingHeader;
}
public ResponseHeader getOutgoingHeader() {
if(outgoingHeader == null)
return new ResponseHeader();
return outgoingHeader;
if (this.outgoingHeader == null) return new ResponseHeader(200);
return this.outgoingHeader;
}
public void setPrefix(final String myprefix) {
prefix=myprefix;
this.prefix=myprefix;
}
@Override
public String put(final String key, final byte[] value) {
return super.put(prefix + key, value);
return super.put(this.prefix + key, value);
}
@Override
public long put(final String key, final long value) {
return super.put(prefix + key, value);
return super.put(this.prefix + key, value);
}
@Override
public long inc(final String key) {
return super.inc(prefix+key);
return super.inc(this.prefix+key);
}
@Override
public Object get(final String key, final Object dflt) {
return super.get(prefix+key, dflt);
return super.get(this.prefix+key, dflt);
}
@Override
public String get(final String key, final String dflt) {
return super.get(prefix+key, dflt);
return super.get(this.prefix+key, dflt);
}
@Override
public int getInt(final String key, final int dflt) {
return super.getInt(prefix+key, dflt);
return super.getInt(this.prefix+key, dflt);
}
@Override
public long getLong(final String key, final long dflt) {
return super.getLong(prefix+key, dflt);
return super.getLong(this.prefix+key, dflt);
}
}

@ -82,7 +82,6 @@ public class HeaderFramework extends TreeMap<String, String> implements Map<Stri
public static final String ACCEPT_ENCODING = "Accept-Encoding";
public static final String ACCEPT_CHARSET = "Accept-Charset";
public static final String CONTENT_LENGTH = "Content-Length";
public static final String CONTENT_TYPE = "Content-Type";
public static final String CONTENT_MD5 = "Content-MD5";
@ -135,6 +134,11 @@ public class HeaderFramework extends TreeMap<String, String> implements Map<Stri
public static final String METHOD_POST = "POST";
public static final String METHOD_CONNECT = "CONNECT";
/*
* constanst for metadata which is stored in the ResponseHeader
*/
public static final String STATUS_CODE = "STATUS_CODE";
/* =============================================================
* defining default http status messages
* ============================================================= */

@ -39,25 +39,38 @@ public class ResponseHeader extends HeaderFramework {
private static final long serialVersionUID = 0L;
private static Logger log = Logger.getLogger(ResponseHeader.class);
public ResponseHeader() {
public ResponseHeader(final int statusCode) {
super();
this.put(HeaderFramework.STATUS_CODE, Integer.toString(statusCode));
}
public ResponseHeader(final Header[] headers) {
public ResponseHeader(final int statusCode, final Header[] headers) {
super();
this.put(HeaderFramework.STATUS_CODE, Integer.toString(statusCode));
for (final Header h : headers) {
add(h.getName(), h.getValue());
}
}
public ResponseHeader(final HashMap<String, String> reverseMappingCache) {
public ResponseHeader(final int statusCode, final HashMap<String, String> reverseMappingCache) {
super(reverseMappingCache);
this.put(HeaderFramework.STATUS_CODE, Integer.toString(statusCode));
}
public ResponseHeader(final HashMap<String, String> reverseMappingCache, final Map<String, String> othermap) {
super(reverseMappingCache, othermap);
}
public int getStatusCode() {
String statuscode = this.get(HeaderFramework.STATUS_CODE);
if (statuscode == null) return 200;
try {
return Integer.parseInt(statuscode);
} catch (NumberFormatException e) {
return 200;
}
}
public Date date() {
final Date d = headerDate(HeaderFramework.DATE);
if (d == null) return new Date(); else return d;

@ -47,6 +47,7 @@ import net.yacy.cora.document.MultiProtocolURI;
import net.yacy.cora.document.UTF8;
import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.protocol.ConnectionInfo;
import net.yacy.cora.protocol.Domains;
import net.yacy.cora.protocol.HeaderFramework;
import org.apache.http.Header;
@ -156,7 +157,7 @@ public class HTTPClient {
// connections per host (2 default)
clientConnectionManager.setDefaultMaxPerRoute(2);
// Increase max connections for localhost
final HttpHost localhost = new HttpHost("localhost");
final HttpHost localhost = new HttpHost("127.0.0.1");
clientConnectionManager.setMaxPerRoute(new HttpRoute(localhost), maxcon);
/**
* HTTP protocol settings
@ -339,7 +340,7 @@ public class HTTPClient {
* @throws IOException
*/
public byte[] GETbytes(final MultiProtocolURI url, final int maxBytes) throws IOException {
final boolean localhost = url.getHost().equals("localhost");
final boolean localhost = Domains.isLocalhost(url.getHost());
final String urix = url.toNormalform(true, false);
final HttpGet httpGet = new HttpGet(urix);
if (!localhost) setHost(url.getHost()); // overwrite resolved IP, needed for shared web hosting DO NOT REMOVE, see http://en.wikipedia.org/wiki/Shared_web_hosting_service
@ -708,17 +709,18 @@ public class HTTPClient {
final SSLSocketFactory sslSF = new SSLSocketFactory(sslContext, SSLSocketFactory.ALLOW_ALL_HOSTNAME_VERIFIER);
return sslSF;
}
/**
* If the Keep-Alive header is not present in the response,
* HttpClient assumes the connection can be kept alive indefinitely.
* Here we limit this to 5 seconds.
*
* Here we limit this to 5 seconds.
*
* @param defaultHttpClient
*/
private static void addCustomKeepAliveStrategy(final DefaultHttpClient defaultHttpClient) {
defaultHttpClient.setKeepAliveStrategy(new ConnectionKeepAliveStrategy() {
public long getKeepAliveDuration(HttpResponse response, HttpContext context) {
@Override
public long getKeepAliveDuration(HttpResponse response, HttpContext context) {
// Honor 'keep-alive' header
String param, value;
HeaderElement element;
@ -726,7 +728,7 @@ public class HTTPClient {
response.headerIterator(HTTP.CONN_KEEP_ALIVE));
while (it.hasNext()) {
element = it.nextElement();
param = element.getName();
param = element.getName();
value = element.getValue();
if (value != null && param.equalsIgnoreCase("timeout")) {
try {

@ -78,6 +78,7 @@ public class AbstractSolrConnector implements SolrConnector {
public synchronized void close() {
try {
this.server.commit();
this.server = null;
} catch (SolrServerException e) {
Log.logException(e);
} catch (IOException e) {

@ -625,6 +625,17 @@ public class ContentScraper extends AbstractScraper implements Scraper {
return this.li.toArray(new String[this.li.size()]);
}
public MultiProtocolURI[] getFlash() {
String ext;
ArrayList<MultiProtocolURI> f = new ArrayList<MultiProtocolURI>();
for (final MultiProtocolURI url: this.anchors.keySet()) {
ext = url.getFileExtension();
if (ext == null) continue;
if (ext.equals("swf")) f.add(url);
}
return f.toArray(new MultiProtocolURI[f.size()]);
}
public boolean containsFlash() {
String ext;
for (final MultiProtocolURI url: this.anchors.keySet()) {

@ -128,7 +128,8 @@ public class sitemapParser extends AbstractParser implements Parser {
}
// get some metadata
final ResponseHeader header = new ResponseHeader(client.getHttpResponse().getAllHeaders());
int statusCode = client.getHttpResponse().getStatusLine().getStatusCode();
final ResponseHeader header = new ResponseHeader(statusCode, client.getHttpResponse().getAllHeaders());
final String contentMimeType = header.mime();
InputStream contentStream = client.getContentstream();

@ -309,7 +309,8 @@ public final class yacyRelease extends yacyVersion {
}
client.setTimout(120000);
client.GET(getUrl().toString());
final ResponseHeader header = new ResponseHeader(client.getHttpResponse().getAllHeaders());
int statusCode = client.getHttpResponse().getStatusLine().getStatusCode();
final ResponseHeader header = new ResponseHeader(statusCode, client.getHttpResponse().getAllHeaders());
final boolean unzipped = header.gzip() && (header.mime().toLowerCase().equals("application/x-tar")); // if true, then the httpc has unzipped the file
if (unzipped && name.endsWith(".tar.gz")) {

@ -217,7 +217,6 @@ public final class LoaderDispatcher {
request,
requestHeader,
cachedResponse,
"200",
crawlProfile,
true,
content);

@ -1807,7 +1807,7 @@ public final class Switchboard extends serverSwitch
0,
0,
0);
response = new Response(request, null, null, "200", this.crawler.defaultSurrogateProfile, false);
response = new Response(request, null, null, this.crawler.defaultSurrogateProfile, false);
final indexingQueueEntry queueEntry =
new indexingQueueEntry(Segments.Process.SURROGATES, response, new Document[] {
document
@ -3357,7 +3357,8 @@ public final class Switchboard extends serverSwitch
url = new DigestURI(seedListFileURL);
//final long start = System.currentTimeMillis();
client.HEADResponse(url.toString());
header = new ResponseHeader(client.getHttpResponse().getAllHeaders());
int statusCode = client.getHttpResponse().getStatusLine().getStatusCode();
header = new ResponseHeader(statusCode, client.getHttpResponse().getAllHeaders());
//final long loadtime = System.currentTimeMillis() - start;
/*if (header == null) {
if (loadtime > getConfigLong("bootstrapLoadTimeout", 6000)) {

@ -86,7 +86,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
it.remove();
}
}
}
}
protected void addSolr(final SolrDoc solrdoc, final SolrField key, final String value) {
if (isEmpty() || contains(key.name())) solrdoc.addSolr(key, value);
@ -344,7 +344,15 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
}
// flash embedded
addSolr(solrdoc, SolrField.flash_b, html.containsFlash());
if (isEmpty() || contains(SolrField.flash_b.name())) {
MultiProtocolURI[] flashURLs = html.getFlash();
for (MultiProtocolURI u: flashURLs) {
// remove all flash links from ibound/outbound links
inboundLinks.remove(u);
ouboundLinks.remove(u);
}
addSolr(solrdoc, SolrField.flash_b, flashURLs.length > 0);
}
// generic evaluation pattern
for (final String model: html.getEvaluationModelNames()) {
@ -446,7 +454,7 @@ public class SolrConfiguration extends ConfigurationSet implements Serializable
addSolr(solrdoc, SolrField.lon_coordinate, yacydoc.lon());
addSolr(solrdoc, SolrField.lat_coordinate, yacydoc.lat());
}
addSolr(solrdoc, SolrField.httpstatus_i, 200);
addSolr(solrdoc, SolrField.httpstatus_i, header.getStatusCode());
return solrdoc;
}

Loading…
Cancel
Save