move unnecessary nested else out of condition

pull/1/head
orbiter 11 years ago
parent 0d8072aa99
commit c9f66be20b

@ -111,181 +111,178 @@ public class UrlProxyServlet extends ProxyServlet implements Servlet {
if ("CONNECT".equalsIgnoreCase(request.getMethod())) { if ("CONNECT".equalsIgnoreCase(request.getMethod())) {
return; return;
} else { }
final Continuation continuation = ContinuationSupport.getContinuation(request);
final Continuation continuation = ContinuationSupport.getContinuation(request);
if (!continuation.isInitial()) { if (!continuation.isInitial()) {
response.sendError(HttpServletResponse.SC_GATEWAY_TIMEOUT); // Need better test that isInitial response.sendError(HttpServletResponse.SC_GATEWAY_TIMEOUT); // Need better test that isInitial
return; return;
} }
// 2 - get target url // 2 - get target url
URL proxyurl = null; URL proxyurl = null;
String strARGS = request.getQueryString(); String strARGS = request.getQueryString();
if (strARGS == null) { if (strARGS == null) {
response.sendError(HttpServletResponse.SC_NOT_FOUND,"url parameter missing"); response.sendError(HttpServletResponse.SC_NOT_FOUND,"url parameter missing");
return; return;
} }
if (strARGS.startsWith("url=")) { if (strARGS.startsWith("url=")) {
final String strUrl = strARGS.substring(4); // strip "url=" final String strUrl = strARGS.substring(4); // strip "url="
try { try {
proxyurl = new URL(strUrl); proxyurl = new URL(strUrl);
} catch (final MalformedURLException e) { } catch (final MalformedURLException e) {
proxyurl = new URL(URLDecoder.decode(strUrl, UTF8.charset.name())); proxyurl = new URL(URLDecoder.decode(strUrl, UTF8.charset.name()));
}
}
if (proxyurl == null) {
response.sendError(HttpServletResponse.SC_NOT_FOUND,"url parameter missing");
return;
} }
}
if (proxyurl == null) {
response.sendError(HttpServletResponse.SC_NOT_FOUND,"url parameter missing");
return;
}
String hostwithport = proxyurl.getHost(); String hostwithport = proxyurl.getHost();
if (proxyurl.getPort() != -1) { if (proxyurl.getPort() != -1) {
hostwithport += ":" + proxyurl.getPort(); hostwithport += ":" + proxyurl.getPort();
} }
// 4 - get target url // 4 - get target url
RequestHeader yacyRequestHeader = ProxyHandler.convertHeaderFromJetty(request); RequestHeader yacyRequestHeader = ProxyHandler.convertHeaderFromJetty(request);
yacyRequestHeader.remove(RequestHeader.KEEP_ALIVE); yacyRequestHeader.remove(RequestHeader.KEEP_ALIVE);
yacyRequestHeader.remove(HeaderFramework.CONTENT_LENGTH); yacyRequestHeader.remove(HeaderFramework.CONTENT_LENGTH);
final HashMap<String, Object> prop = new HashMap<String, Object>(); final HashMap<String, Object> prop = new HashMap<String, Object>();
prop.put(HeaderFramework.CONNECTION_PROP_HTTP_VER, HeaderFramework.HTTP_VERSION_1_1); prop.put(HeaderFramework.CONNECTION_PROP_HTTP_VER, HeaderFramework.HTTP_VERSION_1_1);
prop.put(HeaderFramework.CONNECTION_PROP_HOST, hostwithport); prop.put(HeaderFramework.CONNECTION_PROP_HOST, hostwithport);
prop.put(HeaderFramework.CONNECTION_PROP_PATH, proxyurl.getPath().replaceAll(" ", "%20")); prop.put(HeaderFramework.CONNECTION_PROP_PATH, proxyurl.getPath().replaceAll(" ", "%20"));
if (proxyurl.getQuery() != null) prop.put(HeaderFramework.CONNECTION_PROP_ARGS, proxyurl.getQuery()); if (proxyurl.getQuery() != null) prop.put(HeaderFramework.CONNECTION_PROP_ARGS, proxyurl.getQuery());
prop.put(HeaderFramework.CONNECTION_PROP_CLIENTIP, Domains.LOCALHOST); prop.put(HeaderFramework.CONNECTION_PROP_CLIENTIP, Domains.LOCALHOST);
yacyRequestHeader.put(HeaderFramework.HOST, hostwithport ); yacyRequestHeader.put(HeaderFramework.HOST, hostwithport );
yacyRequestHeader.put(HeaderFramework.CONNECTION_PROP_PATH, proxyurl.getPath()); yacyRequestHeader.put(HeaderFramework.CONNECTION_PROP_PATH, proxyurl.getPath());
// 4 & 5 get & index target url // 4 & 5 get & index target url
final ByteArrayOutputStream tmpproxyout = new ByteArrayOutputStream(); final ByteArrayOutputStream tmpproxyout = new ByteArrayOutputStream();
HTTPDProxyHandler.doGet(prop, yacyRequestHeader, tmpproxyout, ClientIdentification.yacyProxyAgent); HTTPDProxyHandler.doGet(prop, yacyRequestHeader, tmpproxyout, ClientIdentification.yacyProxyAgent);
// reparse header to extract content-length and mimetype // reparse header to extract content-length and mimetype
final ResponseHeader proxyResponseHeader = new ResponseHeader(200); // final ResponseHeader proxyResponseHeader = new ResponseHeader(200); //
InputStream proxyout = new ByteArrayInputStream(tmpproxyout.toByteArray()); InputStream proxyout = new ByteArrayInputStream(tmpproxyout.toByteArray());
String line = readLine(proxyout); String line = readLine(proxyout);
while (line != null && !line.equals("")) { while (line != null && !line.equals("")) {
int p; int p;
if ((p = line.indexOf(':')) >= 0) { if ((p = line.indexOf(':')) >= 0) {
// store a property // store a property
proxyResponseHeader.put(line.substring(0, p).trim(), line.substring(p + 1).trim()); proxyResponseHeader.put(line.substring(0, p).trim(), line.substring(p + 1).trim());
}
line = readLine(proxyout);
}
if (line == null) {
response.sendError(HttpServletResponse.SC_INTERNAL_SERVER_ERROR,"Proxy Header missing");
return;
} }
line = readLine(proxyout);
}
if (line == null) {
response.sendError(HttpServletResponse.SC_INTERNAL_SERVER_ERROR,"Proxy Header missing");
return;
}
if (proxyResponseHeader.containsKey(HeaderFramework.LOCATION)) { if (proxyResponseHeader.containsKey(HeaderFramework.LOCATION)) {
// rewrite location header // rewrite location header
String location = proxyResponseHeader.get(HeaderFramework.LOCATION); String location = proxyResponseHeader.get(HeaderFramework.LOCATION);
if (location.startsWith("http")) { if (location.startsWith("http")) {
location = request.getServletPath() + "?url=" + location; location = request.getServletPath() + "?url=" + location;
} else { } else {
location = request.getServletPath() + "?url=http://" + hostwithport + "/" + location; location = request.getServletPath() + "?url=http://" + hostwithport + "/" + location;
}
response.addHeader(HeaderFramework.LOCATION, location);
} }
response.addHeader(HeaderFramework.LOCATION, location);
}
final int httpStatus = proxyResponseHeader.getStatusCode(); final int httpStatus = proxyResponseHeader.getStatusCode();
final String mimeType = proxyResponseHeader.getContentType(); final String mimeType = proxyResponseHeader.getContentType();
response.setStatus(httpStatus); response.setStatus(httpStatus);
response.setContentType(mimeType); response.setContentType(mimeType);
if ((httpStatus < HttpServletResponse.SC_BAD_REQUEST) && (mimeType != null) && mimeType.startsWith("text")) { if ((httpStatus < HttpServletResponse.SC_BAD_REQUEST) && (mimeType != null) && mimeType.startsWith("text")) {
if (proxyResponseHeader.containsKey(HeaderFramework.TRANSFER_ENCODING) && proxyResponseHeader.get(HeaderFramework.TRANSFER_ENCODING).contains("chunked")) { if (proxyResponseHeader.containsKey(HeaderFramework.TRANSFER_ENCODING) && proxyResponseHeader.get(HeaderFramework.TRANSFER_ENCODING).contains("chunked")) {
proxyout = new ChunkedInputStream(proxyout); proxyout = new ChunkedInputStream(proxyout);
} }
// 7 - modify target content // 7 - modify target content
final String servletstub = request.getScheme() + "://" + request.getServerName() + ":" + request.getServerPort() + request.getServletPath() + "?url="; final String servletstub = request.getScheme() + "://" + request.getServerName() + ":" + request.getServerPort() + request.getServletPath() + "?url=";
Document doc; Document doc;
try { try {
doc = Jsoup.parse(proxyout, UTF8.charset.name(), proxyurl.toString()); doc = Jsoup.parse(proxyout, UTF8.charset.name(), proxyurl.toString());
} catch (IOException eio) { } catch (IOException eio) {
response.sendError(HttpServletResponse.SC_INTERNAL_SERVER_ERROR,"Proxy: parser error on " + proxyurl.toString()); response.sendError(HttpServletResponse.SC_INTERNAL_SERVER_ERROR,"Proxy: parser error on " + proxyurl.toString());
return; return;
} }
Element bde = doc.body(); // start with body element to rewrite href links Element bde = doc.body(); // start with body element to rewrite href links
// rewrite all href with abs proxy url (must be abs because of <base> head tag // rewrite all href with abs proxy url (must be abs because of <base> head tag
Elements taglist = bde.getElementsByAttribute("href"); Elements taglist = bde.getElementsByAttribute("href");
final Switchboard sb = Switchboard.getSwitchboard(); final Switchboard sb = Switchboard.getSwitchboard();
for (Element e : taglist) { for (Element e : taglist) {
if (e.tagName().equals("a")) { // get <a> tag if (e.tagName().equals("a")) { // get <a> tag
String absurl = e.absUrl("href"); // get href attribut as abs url String absurl = e.absUrl("href"); // get href attribut as abs url
if (absurl.startsWith("data:") || absurl.startsWith("#") || absurl.startsWith("mailto:") || absurl.startsWith("javascript:")) { if (absurl.startsWith("data:") || absurl.startsWith("#") || absurl.startsWith("mailto:") || absurl.startsWith("javascript:")) {
continue; continue;
} else { }
if (sb.getConfig("proxyURL.rewriteURLs", "all").equals("domainlist")) { if (sb.getConfig("proxyURL.rewriteURLs", "all").equals("domainlist")) {
try { try {
if (sb.crawlStacker.urlInAcceptedDomain(new DigestURL(absurl)) != null) { if (sb.crawlStacker.urlInAcceptedDomain(new DigestURL(absurl)) != null) {
continue; continue;
}
} catch (MalformedURLException ex) {
ConcurrentLog.fine("PROXY", "ProxyServlet: malformed url for url-rewirte " + absurl);
continue;
}
} }
e.attr("href", servletstub + absurl); // rewrite with abs proxy-url } catch (MalformedURLException ex) {
ConcurrentLog.fine("PROXY", "ProxyServlet: malformed url for url-rewirte " + absurl);
continue;
} }
} }
e.attr("href", servletstub + absurl); // rewrite with abs proxy-url
} }
}
Element hd = doc.head(); Element hd = doc.head();
if (hd != null) { if (hd != null) {
// add a base url if not exist (to make sure relative links point to original) // add a base url if not exist (to make sure relative links point to original)
Elements basetags = hd.getElementsByTag("base"); Elements basetags = hd.getElementsByTag("base");
if (basetags.isEmpty()) { if (basetags.isEmpty()) {
Element newbasetag = hd.prependElement("base"); Element newbasetag = hd.prependElement("base");
String basestr = proxyurl.getProtocol() + "://" + hostwithport + proxyurl.getPath(); //+directory; String basestr = proxyurl.getProtocol() + "://" + hostwithport + proxyurl.getPath(); //+directory;
newbasetag.attr("href", basestr); newbasetag.attr("href", basestr);
}
} }
// 8 - add interaction elements (e.g. proxy exit button to switch back to original url)
// TODO: use a template file for
if (_stopProxyText != null) {
bde.prepend("<div width='100%' style='padding:5px; background:white; border-bottom: medium solid lightgrey;'>"
+ "<div align='center' style='font-size:11px; color:darkgrey;'><a href='" + proxyurl + "'>" + _stopProxyText + "</a></div></div>");
} }
// 9 - deliver to client // 8 - add interaction elements (e.g. proxy exit button to switch back to original url)
byte[] sbb = UTF8.getBytes(doc.toString()); // TODO: use a template file for
if (_stopProxyText != null) {
bde.prepend("<div width='100%' style='padding:5px; background:white; border-bottom: medium solid lightgrey;'>"
+ "<div align='center' style='font-size:11px; color:darkgrey;'><a href='" + proxyurl + "'>" + _stopProxyText + "</a></div></div>");
}
// add some proxy-headers to response header // 9 - deliver to client
if (proxyResponseHeader.containsKey(HeaderFramework.SERVER)) { byte[] sbb = UTF8.getBytes(doc.toString());
response.setHeader(HeaderFramework.SERVER, proxyResponseHeader.get(HeaderFramework.SERVER));
}
if (proxyResponseHeader.containsKey(HeaderFramework.DATE)) {
response.setHeader(HeaderFramework.DATE, proxyResponseHeader.get(HeaderFramework.DATE));
}
if (proxyResponseHeader.containsKey(HeaderFramework.LAST_MODIFIED)) {
response.setHeader(HeaderFramework.LAST_MODIFIED, proxyResponseHeader.get(HeaderFramework.LAST_MODIFIED));
}
if (proxyResponseHeader.containsKey(HeaderFramework.EXPIRES)) {
response.setHeader(HeaderFramework.EXPIRES, proxyResponseHeader.get(HeaderFramework.EXPIRES));
}
response.setIntHeader(HeaderFramework.CONTENT_LENGTH, sbb.length); // add some proxy-headers to response header
response.getOutputStream().write(sbb); if (proxyResponseHeader.containsKey(HeaderFramework.SERVER)) {
response.setHeader(HeaderFramework.SERVER, proxyResponseHeader.get(HeaderFramework.SERVER));
}
if (proxyResponseHeader.containsKey(HeaderFramework.DATE)) {
response.setHeader(HeaderFramework.DATE, proxyResponseHeader.get(HeaderFramework.DATE));
}
if (proxyResponseHeader.containsKey(HeaderFramework.LAST_MODIFIED)) {
response.setHeader(HeaderFramework.LAST_MODIFIED, proxyResponseHeader.get(HeaderFramework.LAST_MODIFIED));
}
if (proxyResponseHeader.containsKey(HeaderFramework.EXPIRES)) {
response.setHeader(HeaderFramework.EXPIRES, proxyResponseHeader.get(HeaderFramework.EXPIRES));
}
} else { response.setIntHeader(HeaderFramework.CONTENT_LENGTH, sbb.length);
if (httpStatus >= HttpServletResponse.SC_BAD_REQUEST) { response.getOutputStream().write(sbb);
response.sendError(httpStatus,"Site " + proxyurl + " returned with status");
return; } else {
} if (httpStatus >= HttpServletResponse.SC_BAD_REQUEST) {
if ((response.getHeader(HeaderFramework.CONTENT_LENGTH) == null) && prop.containsKey(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_SIZE)) { response.sendError(httpStatus,"Site " + proxyurl + " returned with status");
response.setHeader(HeaderFramework.CONTENT_LENGTH, (String) prop.get(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_SIZE)); return;
} }
FileUtils.copy(proxyout, response.getOutputStream()); if ((response.getHeader(HeaderFramework.CONTENT_LENGTH) == null) && prop.containsKey(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_SIZE)) {
response.setHeader(HeaderFramework.CONTENT_LENGTH, (String) prop.get(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_SIZE));
} }
FileUtils.copy(proxyout, response.getOutputStream());
} }
} }

@ -81,207 +81,205 @@ public class YaCyProxyServlet extends ProxyServlet implements Servlet {
if ("CONNECT".equalsIgnoreCase(request.getMethod())) { if ("CONNECT".equalsIgnoreCase(request.getMethod())) {
return; return;
} else { }
final Continuation continuation = ContinuationSupport.getContinuation(request);
final Continuation continuation = ContinuationSupport.getContinuation(request);
if (!continuation.isInitial()) { if (!continuation.isInitial()) {
response.sendError(HttpServletResponse.SC_GATEWAY_TIMEOUT); // Need better test that isInitial response.sendError(HttpServletResponse.SC_GATEWAY_TIMEOUT); // Need better test that isInitial
return; return;
} }
URL proxyurl = null; URL proxyurl = null;
String strARGS = request.getQueryString(); String strARGS = request.getQueryString();
if (strARGS == null) { if (strARGS == null) {
response.sendError(HttpServletResponse.SC_NOT_FOUND,"url parameter missing"); response.sendError(HttpServletResponse.SC_NOT_FOUND,"url parameter missing");
return; return;
} }
if (strARGS.startsWith("url=")) { if (strARGS.startsWith("url=")) {
final String strUrl = strARGS.substring(4); // strip "url=" final String strUrl = strARGS.substring(4); // strip "url="
try { try {
proxyurl = new URL(strUrl); proxyurl = new URL(strUrl);
} catch (final MalformedURLException e) { } catch (final MalformedURLException e) {
proxyurl = new URL(URLDecoder.decode(strUrl, UTF8.charset.name())); proxyurl = new URL(URLDecoder.decode(strUrl, UTF8.charset.name()));
}
}
if (proxyurl == null) {
response.sendError(HttpServletResponse.SC_NOT_FOUND,"url parameter missing");
return;
} }
}
if (proxyurl == null) {
response.sendError(HttpServletResponse.SC_NOT_FOUND,"url parameter missing");
return;
}
String hostwithport = proxyurl.getHost(); String hostwithport = proxyurl.getHost();
if (proxyurl.getPort() != -1) { if (proxyurl.getPort() != -1) {
hostwithport += ":" + proxyurl.getPort(); hostwithport += ":" + proxyurl.getPort();
} }
RequestHeader yacyRequestHeader = ProxyHandler.convertHeaderFromJetty(request); RequestHeader yacyRequestHeader = ProxyHandler.convertHeaderFromJetty(request);
yacyRequestHeader.remove(RequestHeader.KEEP_ALIVE); yacyRequestHeader.remove(RequestHeader.KEEP_ALIVE);
yacyRequestHeader.remove(HeaderFramework.CONTENT_LENGTH); yacyRequestHeader.remove(HeaderFramework.CONTENT_LENGTH);
final HashMap<String, Object> prop = new HashMap<String, Object>(); final HashMap<String, Object> prop = new HashMap<String, Object>();
prop.put(HeaderFramework.CONNECTION_PROP_HTTP_VER, HeaderFramework.HTTP_VERSION_1_1); prop.put(HeaderFramework.CONNECTION_PROP_HTTP_VER, HeaderFramework.HTTP_VERSION_1_1);
prop.put(HeaderFramework.CONNECTION_PROP_HOST, hostwithport); prop.put(HeaderFramework.CONNECTION_PROP_HOST, hostwithport);
prop.put(HeaderFramework.CONNECTION_PROP_PATH, proxyurl.getPath().replaceAll(" ", "%20")); prop.put(HeaderFramework.CONNECTION_PROP_PATH, proxyurl.getPath().replaceAll(" ", "%20"));
prop.put(HeaderFramework.CONNECTION_PROP_CLIENTIP, Domains.LOCALHOST); prop.put(HeaderFramework.CONNECTION_PROP_CLIENTIP, Domains.LOCALHOST);
yacyRequestHeader.put(HeaderFramework.HOST, hostwithport ); yacyRequestHeader.put(HeaderFramework.HOST, hostwithport );
yacyRequestHeader.put(HeaderFramework.CONNECTION_PROP_PATH, proxyurl.getPath()); yacyRequestHeader.put(HeaderFramework.CONNECTION_PROP_PATH, proxyurl.getPath());
final ByteArrayOutputStream tmpproxyout = new ByteArrayOutputStream(); final ByteArrayOutputStream tmpproxyout = new ByteArrayOutputStream();
HTTPDProxyHandler.doGet(prop, yacyRequestHeader, tmpproxyout, ClientIdentification.yacyProxyAgent); HTTPDProxyHandler.doGet(prop, yacyRequestHeader, tmpproxyout, ClientIdentification.yacyProxyAgent);
// reparse header to extract content-length and mimetype // reparse header to extract content-length and mimetype
final ResponseHeader proxyResponseHeader = new ResponseHeader(200); // final ResponseHeader proxyResponseHeader = new ResponseHeader(200); //
final InputStream proxyout = new ByteArrayInputStream(tmpproxyout.toByteArray()); final InputStream proxyout = new ByteArrayInputStream(tmpproxyout.toByteArray());
String line = readLine(proxyout); String line = readLine(proxyout);
while (line != null && !line.equals("")) { while (line != null && !line.equals("")) {
int p; int p;
if ((p = line.indexOf(':')) >= 0) { if ((p = line.indexOf(':')) >= 0) {
// store a property // store a property
proxyResponseHeader.add(line.substring(0, p).trim(), line.substring(p + 1).trim()); proxyResponseHeader.add(line.substring(0, p).trim(), line.substring(p + 1).trim());
}
line = readLine(proxyout);
}
if (line == null) {
response.sendError(HttpServletResponse.SC_INTERNAL_SERVER_ERROR,"Proxy Header missing");
return;
} }
line = readLine(proxyout);
}
if (line == null) {
response.sendError(HttpServletResponse.SC_INTERNAL_SERVER_ERROR,"Proxy Header missing");
return;
}
final int httpStatus = Integer.parseInt((String) prop.get(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_STATUS)); final int httpStatus = Integer.parseInt((String) prop.get(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_STATUS));
String directory = ""; String directory = "";
if (proxyurl.getPath().lastIndexOf('/') > 0) { if (proxyurl.getPath().lastIndexOf('/') > 0) {
directory = proxyurl.getPath().substring(0, proxyurl.getPath().lastIndexOf('/')); directory = proxyurl.getPath().substring(0, proxyurl.getPath().lastIndexOf('/'));
} }
if (response.getHeader(HeaderFramework.LOCATION) != null) { if (response.getHeader(HeaderFramework.LOCATION) != null) {
// rewrite location header // rewrite location header
String location = response.getHeader(HeaderFramework.LOCATION); String location = response.getHeader(HeaderFramework.LOCATION);
if (location.startsWith("http")) { if (location.startsWith("http")) {
location = request.getServletPath() + "?url=" + location; location = request.getServletPath() + "?url=" + location;
} else { } else {
location = request.getServletPath() + "?url=http://" + hostwithport + "/" + location; location = request.getServletPath() + "?url=http://" + hostwithport + "/" + location;
}
response.addHeader(HeaderFramework.LOCATION, location);
} }
response.addHeader(HeaderFramework.LOCATION, location);
}
final String mimeType = proxyResponseHeader.getContentType(); final String mimeType = proxyResponseHeader.getContentType();
response.setContentType(mimeType); response.setContentType(mimeType);
response.setStatus(httpStatus); response.setStatus(httpStatus);
if ((mimeType != null) && (mimeType.startsWith("text/html") || mimeType.startsWith("text"))) {
final StringWriter buffer = new StringWriter();
if (proxyResponseHeader.containsKey(HeaderFramework.TRANSFER_ENCODING) && proxyResponseHeader.get(HeaderFramework.TRANSFER_ENCODING).contains("chunked")) { if ((mimeType != null) && (mimeType.startsWith("text/html") || mimeType.startsWith("text"))) {
FileUtils.copy(new ChunkedInputStream(proxyout), buffer, UTF8.charset); final StringWriter buffer = new StringWriter();
} else {
FileUtils.copy(proxyout, buffer, UTF8.charset);
}
final String sbuffer = buffer.toString();
final Pattern p = Pattern.compile("(href=\"|src=\")([^\"]+)|(href='|src=')([^']+)|(url\\(')([^']+)|(url\\(\")([^\"]+)|(url\\()([^\\)]+)");
final Matcher m = p.matcher(sbuffer);
final StringBuffer result = new StringBuffer(80);
final Switchboard sb = Switchboard.getSwitchboard();
final String servletstub = request.getServletPath()+"?url=";
while (m.find()) {
String init = null;
if (m.group(1) != null) { init = m.group(1); }
if (m.group(3) != null) { init = m.group(3); }
if (m.group(5) != null) { init = m.group(5); }
if (m.group(7) != null) { init = m.group(7); }
if (m.group(9) != null) { init = m.group(9); }
String url = null;
if (m.group(2) != null) { url = m.group(2); }
if (m.group(4) != null) { url = m.group(4); }
if (m.group(6) != null) { url = m.group(6); }
if (m.group(8) != null) { url = m.group(8); }
if (m.group(10) != null) { url = m.group(10); }
if (url.startsWith("data:") || url.startsWith("#") || url.startsWith("mailto:") || url.startsWith("javascript:")) {
String newurl = init + url;
newurl = newurl.replaceAll("\\$", "\\\\\\$");
m.appendReplacement(result, newurl);
} else if (url.startsWith("http")) { if (proxyResponseHeader.containsKey(HeaderFramework.TRANSFER_ENCODING) && proxyResponseHeader.get(HeaderFramework.TRANSFER_ENCODING).contains("chunked")) {
// absoulte url of form href="http://domain.com/path" FileUtils.copy(new ChunkedInputStream(proxyout), buffer, UTF8.charset);
if (sb.getConfig("proxyURL.rewriteURLs", "all").equals("domainlist")) { } else {
try { FileUtils.copy(proxyout, buffer, UTF8.charset);
if (sb.crawlStacker.urlInAcceptedDomain(new DigestURL(url)) != null) { }
continue; final String sbuffer = buffer.toString();
}
} catch (final MalformedURLException e) { final Pattern p = Pattern.compile("(href=\"|src=\")([^\"]+)|(href='|src=')([^']+)|(url\\(')([^']+)|(url\\(\")([^\"]+)|(url\\()([^\\)]+)");
ConcurrentLog.fine("PROXY","ProxyServlet: malformed url for url-rewirte " + url); final Matcher m = p.matcher(sbuffer);
final StringBuffer result = new StringBuffer(80);
final Switchboard sb = Switchboard.getSwitchboard();
final String servletstub = request.getServletPath()+"?url=";
while (m.find()) {
String init = null;
if (m.group(1) != null) { init = m.group(1); }
if (m.group(3) != null) { init = m.group(3); }
if (m.group(5) != null) { init = m.group(5); }
if (m.group(7) != null) { init = m.group(7); }
if (m.group(9) != null) { init = m.group(9); }
String url = null;
if (m.group(2) != null) { url = m.group(2); }
if (m.group(4) != null) { url = m.group(4); }
if (m.group(6) != null) { url = m.group(6); }
if (m.group(8) != null) { url = m.group(8); }
if (m.group(10) != null) { url = m.group(10); }
if (url.startsWith("data:") || url.startsWith("#") || url.startsWith("mailto:") || url.startsWith("javascript:")) {
String newurl = init + url;
newurl = newurl.replaceAll("\\$", "\\\\\\$");
m.appendReplacement(result, newurl);
} else if (url.startsWith("http")) {
// absoulte url of form href="http://domain.com/path"
if (sb.getConfig("proxyURL.rewriteURLs", "all").equals("domainlist")) {
try {
if (sb.crawlStacker.urlInAcceptedDomain(new DigestURL(url)) != null) {
continue; continue;
} }
} catch (final MalformedURLException e) {
ConcurrentLog.fine("PROXY","ProxyServlet: malformed url for url-rewirte " + url);
continue;
} }
}
String newurl = init + servletstub + url; String newurl = init + servletstub + url;
newurl = newurl.replaceAll("\\$", "\\\\\\$"); newurl = newurl.replaceAll("\\$", "\\\\\\$");
m.appendReplacement(result, newurl); m.appendReplacement(result, newurl);
} else if (url.startsWith("//")) { } else if (url.startsWith("//")) {
// absoulte url but same protocol of form href="//domain.com/path" // absoulte url but same protocol of form href="//domain.com/path"
final String complete_url = proxyurl.getProtocol() + ":" + url; final String complete_url = proxyurl.getProtocol() + ":" + url;
if (sb.getConfig("proxyURL.rewriteURLs", "all").equals("domainlist")) { if (sb.getConfig("proxyURL.rewriteURLs", "all").equals("domainlist")) {
try { try {
if (sb.crawlStacker.urlInAcceptedDomain(new DigestURL(complete_url)) != null) { if (sb.crawlStacker.urlInAcceptedDomain(new DigestURL(complete_url)) != null) {
continue; continue;
}
} catch (MalformedURLException ex) {
ConcurrentLog.fine("PROXY","ProxyServlet: malformed url for url-rewirte " + complete_url);
continue;
}
} }
} catch (MalformedURLException ex) {
ConcurrentLog.fine("PROXY","ProxyServlet: malformed url for url-rewirte " + complete_url);
continue;
}
}
String newurl = init + servletstub + complete_url; String newurl = init + servletstub + complete_url;
newurl = newurl.replaceAll("\\$", "\\\\\\$"); newurl = newurl.replaceAll("\\$", "\\\\\\$");
m.appendReplacement(result, newurl); m.appendReplacement(result, newurl);
} else if (url.startsWith("/")) { } else if (url.startsWith("/")) {
// absolute path of form href="/absolute/path/to/linked/page" // absolute path of form href="/absolute/path/to/linked/page"
String newurl = init + servletstub + "http://" + hostwithport + url; String newurl = init + servletstub + "http://" + hostwithport + url;
newurl = newurl.replaceAll("\\$", "\\\\\\$");
m.appendReplacement(result, newurl);
} else {
// relative path of form href="relative/path"
try {
MultiProtocolURL target = new MultiProtocolURL("http://" + hostwithport + directory + "/" + url);
String newurl = init + servletstub + target.toString();
newurl = newurl.replaceAll("\\$", "\\\\\\$"); newurl = newurl.replaceAll("\\$", "\\\\\\$");
m.appendReplacement(result, newurl); m.appendReplacement(result, newurl);
} catch (final MalformedURLException e) {}
} else {
// relative path of form href="relative/path"
try {
MultiProtocolURL target = new MultiProtocolURL("http://" + hostwithport + directory + "/" + url);
String newurl = init + servletstub + target.toString();
newurl = newurl.replaceAll("\\$", "\\\\\\$");
m.appendReplacement(result, newurl);
} catch (final MalformedURLException e) {}
}
} }
m.appendTail(result); }
m.appendTail(result);
byte[] sbb = UTF8.getBytes(result.toString()); byte[] sbb = UTF8.getBytes(result.toString());
// add some proxy-headers to response header // add some proxy-headers to response header
response.setContentType(proxyResponseHeader.getContentType()); response.setContentType(proxyResponseHeader.getContentType());
if (proxyResponseHeader.containsKey(HeaderFramework.SERVER)) { if (proxyResponseHeader.containsKey(HeaderFramework.SERVER)) {
response.addHeader(HeaderFramework.SERVER, proxyResponseHeader.get(HeaderFramework.SERVER)); response.addHeader(HeaderFramework.SERVER, proxyResponseHeader.get(HeaderFramework.SERVER));
} }
if (proxyResponseHeader.containsKey(HeaderFramework.DATE)) { if (proxyResponseHeader.containsKey(HeaderFramework.DATE)) {
response.addHeader(HeaderFramework.DATE, proxyResponseHeader.get(HeaderFramework.DATE)); response.addHeader(HeaderFramework.DATE, proxyResponseHeader.get(HeaderFramework.DATE));
} }
if (proxyResponseHeader.containsKey(HeaderFramework.LAST_MODIFIED)) { if (proxyResponseHeader.containsKey(HeaderFramework.LAST_MODIFIED)) {
response.addHeader(HeaderFramework.LAST_MODIFIED, proxyResponseHeader.get(HeaderFramework.LAST_MODIFIED)); response.addHeader(HeaderFramework.LAST_MODIFIED, proxyResponseHeader.get(HeaderFramework.LAST_MODIFIED));
} }
if (proxyResponseHeader.containsKey(HeaderFramework.EXPIRES)) { if (proxyResponseHeader.containsKey(HeaderFramework.EXPIRES)) {
response.addHeader(HeaderFramework.EXPIRES, proxyResponseHeader.get(HeaderFramework.EXPIRES)); response.addHeader(HeaderFramework.EXPIRES, proxyResponseHeader.get(HeaderFramework.EXPIRES));
} }
response.setIntHeader(HeaderFramework.CONTENT_LENGTH, sbb.length); response.setIntHeader(HeaderFramework.CONTENT_LENGTH, sbb.length);
response.getOutputStream().write(sbb); response.getOutputStream().write(sbb);
} else { } else {
if ((response.getHeader(HeaderFramework.CONTENT_LENGTH) == null) && prop.containsKey(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_SIZE)) { if ((response.getHeader(HeaderFramework.CONTENT_LENGTH) == null) && prop.containsKey(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_SIZE)) {
response.addHeader(HeaderFramework.CONTENT_LENGTH, (String) prop.get(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_SIZE)); response.addHeader(HeaderFramework.CONTENT_LENGTH, (String) prop.get(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_SIZE));
}
FileUtils.copy(proxyout, response.getOutputStream());
} }
FileUtils.copy(proxyout, response.getOutputStream());
} }
} }

Loading…
Cancel
Save