make use of header.getContentType where possible (mime is normalized afterwards)

otherwise use header.mime() differentiated in prev. commit.
pull/37/head
reger 9 years ago
parent 7a8c077838
commit b7e8358645

@ -425,8 +425,9 @@ public class HeaderFramework extends TreeMap<String, String> implements Map<Stri
*/
public String mime() {
final String tmpstr = get(CONTENT_TYPE, "application/octet-stream");
if (tmpstr.indexOf(';') > 0) {
return tmpstr.substring(0,tmpstr.indexOf(';')).trim();
final int pos = tmpstr.indexOf(';');
if (pos > 0) {
return tmpstr.substring(0, pos).trim();
} else {
return tmpstr;
}

@ -254,12 +254,16 @@ public class Response {
return doctype;
}
/**
* Get respons header last modified date
* if missing the first seen date or current date
* @return valid date always != null
*/
public Date lastModified() {
Date docDate = null;
if (this.responseHeader != null) {
docDate = this.responseHeader.lastModified();
if (docDate == null) docDate = this.responseHeader.date();
docDate = this.responseHeader.lastModified(); // is always != null
}
if (docDate == null && this.request != null) docDate = this.request.appdate();
if (docDate == null) docDate = new Date();
@ -594,13 +598,6 @@ public class Response {
// -ranges in request
// we checked that in shallStoreCache
// a picture cannot be indexed
/*
if (Classification.isMediaExtension(url().getFileExtension())) {
return "Media_Content_(forbidden)";
}
*/
// -cookies in request
// unfortunately, we cannot index pages which have been requested with a cookie
// because the returned content may be special for the client
@ -614,14 +611,7 @@ public class Response {
// the set-cookie from the server does not indicate that the content is special
// thus we do not care about it here for indexing
// a picture cannot be indexed
final String mimeType = this.responseHeader.mime();
/*
if (Classification.isPictureMime(mimeType)) {
return "Media_Content_(Picture)";
}
*/
final String parserError = TextParser.supportsMime(mimeType);
final String parserError = TextParser.supportsMime(this.responseHeader.getContentType());
if (parserError != null) {
return "Media_Content, no parser: " + parserError;
}
@ -736,16 +726,10 @@ public class Response {
// check if document can be indexed
if (this.responseHeader != null) {
final String mimeType = this.responseHeader.mime();
final String mimeType = this.responseHeader.getContentType();
final String parserError = TextParser.supportsMime(mimeType);
if (parserError != null && TextParser.supportsExtension(url()) != null) return "no parser available: " + parserError;
}
/*
if (Classification.isMediaExtension(url().getFileExtension()) &&
!Classification.isImageExtension((url().getFileExtension()))) {
return "Media_Content_(forbidden)";
}
*/
// -if-modified-since in request
// if the page is fresh at the very moment we can index it
@ -783,14 +767,21 @@ public class Response {
return null;
}
/**
* Get Mime type from http header or null if unknown (not included in response header)
* @return mime (trimmed and lowercase) or null
*/
public String getMimeType() {
if (this.responseHeader == null) return null;
String mimeType = this.responseHeader.mime();
mimeType = mimeType.trim().toLowerCase();
String mimeType = this.responseHeader.getContentType();
if (mimeType != null) {
mimeType = mimeType.trim().toLowerCase();
final int pos = mimeType.indexOf(';');
return ((pos < 0) ? mimeType : mimeType.substring(0, pos));
final int pos = mimeType.indexOf(';');
return ((pos < 0) ? mimeType : mimeType.substring(0, pos));
}
return null;
}
public String getCharacterEncoding() {
@ -864,10 +855,10 @@ public class Response {
}
public Document[] parse() throws Parser.Failure {
final String supportError = TextParser.supports(url(), this.responseHeader == null ? null : this.responseHeader.mime());
final String supportError = TextParser.supports(url(), this.responseHeader == null ? null : this.responseHeader.getContentType());
if (supportError != null) throw new Parser.Failure("no parser support:" + supportError, url());
try {
return TextParser.parseSource(new AnchorURL(url()), this.responseHeader == null ? null : this.responseHeader.mime(), this.responseHeader == null ? "UTF-8" : this.responseHeader.getCharacterEncoding(), new VocabularyScraper(), this.request.timezoneOffset(), this.request.depth(), this.content);
return TextParser.parseSource(new AnchorURL(url()), this.responseHeader == null ? null : this.responseHeader.getContentType(), this.responseHeader == null ? "UTF-8" : this.responseHeader.getCharacterEncoding(), new VocabularyScraper(), this.request.timezoneOffset(), this.request.depth(), this.content);
} catch (final Exception e) {
return null;
}

@ -172,7 +172,7 @@ public class YaCyProxyServlet extends ProxyServlet implements Servlet {
response.setContentType(mimeType);
response.setStatus(httpStatus);
if ((mimeType != null) && (mimeType.startsWith("text/html") || mimeType.startsWith("text"))) {
if ((mimeType != null) && (mimeType.startsWith("text"))) {
final StringWriter buffer = new StringWriter();
if (proxyResponseHeader.containsKey(HeaderFramework.TRANSFER_ENCODING) && proxyResponseHeader.get(HeaderFramework.TRANSFER_ENCODING).contains("chunked")) {

@ -568,10 +568,10 @@ public final class LoaderDispatcher {
if (responseHeader == null) throw new IOException("responseHeader == null");
Document[] documents = null;
final String supportError = TextParser.supports(url, responseHeader.mime());
final String supportError = TextParser.supports(url, responseHeader.getContentType());
if (supportError != null) throw new IOException("no parser support: " + supportError);
try {
documents = TextParser.parseSource(url, responseHeader.mime(), responseHeader.getCharacterEncoding(), response.profile().scraper(), timezoneOffset, response.depth(), response.getContent());
documents = TextParser.parseSource(url, responseHeader.getContentType(), responseHeader.getCharacterEncoding(), response.profile().scraper(), timezoneOffset, response.depth(), response.getContent());
if (documents == null) throw new IOException("document == null");
} catch (final Exception e) {
throw new IOException("parser error: " + e.getMessage());

@ -1187,9 +1187,6 @@ public final class HTTPDProxyHandler {
if (conProp.containsKey(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_HEADER)) {
final HeaderFramework proxyRespondHeader = (HeaderFramework) conProp.get(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_HEADER);
mime = proxyRespondHeader.mime();
if (mime.indexOf(';') != -1) {
mime = mime.substring(0,mime.indexOf(';'));
}
}
logMessage.append(mime);

Loading…
Cancel
Save