- based on Jetty ProxyServlet - at this time use existing HTTPD ProxyHandler for url rewrite - add jetty-client jar (dependency in Jetty ProxyServlet) reuse ProxyHandler.convertHeaderFromJetty in YaCyDefaultServletpull/1/head
parent
cb2dbcb843
commit
06da6f517c
Binary file not shown.
@ -0,0 +1,418 @@
|
||||
// package de.spieleck.servlets;
|
||||
// ProxyServlet - serving pages from foreign servers....
|
||||
//
|
||||
|
||||
import java.io.*;
|
||||
import java.net.*;
|
||||
import java.lang.Integer;
|
||||
import java.util.StringTokenizer;
|
||||
|
||||
import javax.servlet.*;
|
||||
import javax.servlet.http.*;
|
||||
|
||||
/**
|
||||
* Serves pages which are fetched from another HTTP-Server
|
||||
* useful for going thru firewalls and other trickery...
|
||||
* <P>
|
||||
* The communication is somewhat this way:
|
||||
* <UL>
|
||||
* <LI>Client requests data from servlet
|
||||
* <LI>Servlet interprets path and requests data from remote server
|
||||
* <LI>Servlet obtains answer from remote server and forwards it to client
|
||||
* <LI>Client obtains answer
|
||||
* </UL>
|
||||
* <P>
|
||||
* XXX There is a problem with If-Modified and If-None-Match requests:
|
||||
* the 304 Not Modified answer does not go thru the servelet in the
|
||||
* backward direction. It could be that the HttpServletResponse does hava
|
||||
* some sideeffects which are not helpfull in this special situation.
|
||||
* This type of request is currently avoided by removing all "If-" requests.
|
||||
* <br />
|
||||
* <b>Note:</b> This servlet is actually buggy. It is buggy since it does
|
||||
* not solve all problems, it only solves the problems I needed to solve.
|
||||
* Many thanks to Thorsten Gast the creator of dirjack
|
||||
* for pointing at least some bugs.
|
||||
* @author <a href="mailto:frank -at- spieleck.de">Frank Nestel</a>.
|
||||
*/
|
||||
|
||||
public class ProxyServlet extends HttpServlet
|
||||
{
|
||||
/**
|
||||
* "Official" HTTP line end
|
||||
*/
|
||||
public final static String CRLF = "\r\n";
|
||||
public final static String LF = "\n";
|
||||
|
||||
/**
|
||||
* remote path
|
||||
*/
|
||||
protected String remotePath;
|
||||
|
||||
/**
|
||||
* remote server
|
||||
*/
|
||||
protected String remoteServer;
|
||||
|
||||
/**
|
||||
* Port at remote server
|
||||
*/
|
||||
protected int remotePort;
|
||||
|
||||
/**
|
||||
* Debug mode?
|
||||
*/
|
||||
protected boolean debugFlag;
|
||||
|
||||
/** Init
|
||||
*/
|
||||
public void init(ServletConfig config)
|
||||
throws ServletException
|
||||
{
|
||||
super.init(config);
|
||||
remotePath = getInitParameter("remotePath");
|
||||
remoteServer = getInitParameter("remoteServer");
|
||||
String remotePortStr= getInitParameter("remotePort");
|
||||
if ( remotePath == null || remoteServer == null )
|
||||
throw new ServletException(
|
||||
"Servlet needs remotePath & remoteServer.");
|
||||
if ( remotePortStr != null )
|
||||
{
|
||||
try
|
||||
{
|
||||
remotePort = Integer.parseInt(remotePortStr);
|
||||
}
|
||||
catch ( Exception e )
|
||||
{
|
||||
throw new ServletException("Port must be a number!");
|
||||
}
|
||||
}
|
||||
else
|
||||
remotePort = 80;
|
||||
if ( "".equals(remotePath) )
|
||||
remotePath = ""; // XXX ??? "/"
|
||||
else if ( remotePath.charAt(0) != '/' )
|
||||
remotePath = "/"+remotePath;
|
||||
debugFlag = "true".equals(getInitParameter("debug"));
|
||||
//
|
||||
log("remote="+remoteServer+" "+remotePort+" "+remotePath);
|
||||
}
|
||||
|
||||
/// Returns a string containing information about the author, version, and
|
||||
// copyright of the servlet.
|
||||
public String getServletInfo()
|
||||
{
|
||||
return "Online redirecting content.";
|
||||
}
|
||||
|
||||
/// Services a single request from the client.
|
||||
// @param req the servlet request
|
||||
// @param req the servlet response
|
||||
// @exception ServletException when an exception has occurred
|
||||
public void service( HttpServletRequest req, HttpServletResponse res )
|
||||
throws ServletException, IOException
|
||||
{
|
||||
//
|
||||
// Connect to "remote" server:
|
||||
Socket sock;
|
||||
OutputStream out;
|
||||
InputStream in;
|
||||
//
|
||||
try
|
||||
{
|
||||
sock = new Socket(remoteServer, remotePort); // !!!!!!!!
|
||||
out = new BufferedOutputStream(sock.getOutputStream());
|
||||
in = new BufferedInputStream(sock.getInputStream());
|
||||
}
|
||||
catch (IOException e)
|
||||
{
|
||||
res.sendError( HttpServletResponse.SC_INTERNAL_SERVER_ERROR,
|
||||
"Socket opening: "+remoteServer+" "+remotePort);
|
||||
return;
|
||||
}
|
||||
try
|
||||
{
|
||||
//
|
||||
// Build up a HTTP request from pure strings:
|
||||
StringBuffer sb = new StringBuffer(200);
|
||||
sb.append(req.getMethod());
|
||||
sb.append(' ');
|
||||
String pi = req.getPathInfo();
|
||||
sb.append(remotePath);
|
||||
if ( pi != null )
|
||||
{
|
||||
appendCleaned(sb, pi);
|
||||
}
|
||||
else
|
||||
sb.append("/");
|
||||
if ( req.getQueryString() != null )
|
||||
{
|
||||
sb.append('?');
|
||||
appendCleaned(sb, req.getQueryString());
|
||||
}
|
||||
sb.append(' ');
|
||||
sb.append("HTTP/1.0");
|
||||
sb.append(CRLF);
|
||||
log(sb.toString());
|
||||
out.write(sb.toString().getBytes());
|
||||
java.util.Enumeration en = req.getHeaderNames();
|
||||
while ( en.hasMoreElements() )
|
||||
{
|
||||
String k = (String) en.nextElement();
|
||||
// Filter incoming headers:
|
||||
if ( "Host".equalsIgnoreCase(k) )
|
||||
{
|
||||
sb.setLength(0);
|
||||
sb.append(k);
|
||||
sb.append(": ");
|
||||
sb.append(remoteServer);
|
||||
sb.append(":");
|
||||
sb.append(remotePort);
|
||||
sb.append(CRLF);
|
||||
log("c["+k+"]: "+sb+" "+req.getHeader(k));
|
||||
out.write(sb.toString().getBytes());
|
||||
}
|
||||
//
|
||||
// Throw away persistant connections between servers
|
||||
// Throw away request potentially causing a 304 response.
|
||||
else if (
|
||||
! "Connection".equalsIgnoreCase(k)
|
||||
&& ! "If-Modified-Since".equalsIgnoreCase(k)
|
||||
&& ! "If-None-Match".equalsIgnoreCase(k)
|
||||
)
|
||||
{
|
||||
sb.setLength(0);
|
||||
sb.append(k);
|
||||
sb.append(": ");
|
||||
sb.append(req.getHeader(k));
|
||||
sb.append(CRLF);
|
||||
log("=["+k+"]: "+req.getHeader(k));
|
||||
out.write(sb.toString().getBytes());
|
||||
}
|
||||
else
|
||||
{
|
||||
log("*["+k+"]: "+req.getHeader(k));
|
||||
}
|
||||
}
|
||||
// Finish request header by an empty line
|
||||
out.write(CRLF.getBytes());
|
||||
// Copy post data
|
||||
InputStream inr = req.getInputStream();
|
||||
copyStream(inr, out);
|
||||
out.flush();
|
||||
log("Remote request finished. Reading answer.");
|
||||
|
||||
// Now we have finished the outgoing request.
|
||||
// We'll now see, what is coming back from remote:
|
||||
|
||||
// Get the answer, treat its header and copy the stream data:
|
||||
if ( treatHeader(in, req, res) )
|
||||
{
|
||||
log("+ copyStream");
|
||||
// if ( debugFlag ) res.setContentType("text/plain");
|
||||
out = res.getOutputStream();
|
||||
copyStream(in, out);
|
||||
}
|
||||
else
|
||||
log("- copyStream");
|
||||
}
|
||||
catch (IOException e)
|
||||
{
|
||||
log("out-in.open!");
|
||||
// res.sendError( HttpServletResponse.SC_INTERNAL_SERVER_ERROR,
|
||||
// "out-in open!");
|
||||
return;
|
||||
}
|
||||
try
|
||||
{
|
||||
// out.close();
|
||||
in.close();
|
||||
sock.close();
|
||||
}
|
||||
catch (IOException ignore)
|
||||
{
|
||||
log("Exception "+ignore);
|
||||
}
|
||||
}
|
||||
|
||||
public static void appendCleaned(StringBuffer sb, String str)
|
||||
{
|
||||
for(int i = 0; i < str.length(); i++)
|
||||
{
|
||||
char ch = str.charAt(i);
|
||||
if ( ch == ' ' )
|
||||
sb.append("%20");
|
||||
else
|
||||
sb.append(ch);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Forward and filter header from backend Request.
|
||||
*/
|
||||
private boolean treatHeader(InputStream in,
|
||||
HttpServletRequest req,
|
||||
HttpServletResponse res)
|
||||
throws ServletException
|
||||
{
|
||||
boolean retval = true;
|
||||
byte[] lineBytes = new byte[4096];
|
||||
int len;
|
||||
String line;
|
||||
|
||||
try
|
||||
{
|
||||
// Read the first line of the request.
|
||||
len = readLine(in, lineBytes );
|
||||
if ( len == -1 || len == 0 )
|
||||
throw new ServletException( "No Request found in Data." );
|
||||
{
|
||||
String line2 = new String( lineBytes, 0, len );
|
||||
log("head: "+line2+" "+len);
|
||||
}
|
||||
|
||||
// We mainly skip the header by the foreign server
|
||||
// assuming, that we can handle protocoll mismatch or so!
|
||||
res.setHeader("viaJTTP","JTTP");
|
||||
|
||||
// Some more headers require special care ....
|
||||
boolean firstline = true;
|
||||
// Shortcut evaluation skips the read on first time!
|
||||
while ( firstline || ((len=readLine(in,lineBytes)) > 0) )
|
||||
{
|
||||
line = new String( lineBytes, 0, len );
|
||||
int colonPos = line.indexOf( ":" );
|
||||
if ( firstline && colonPos == -1 )
|
||||
{
|
||||
// Special first line considerations ...
|
||||
String headl[] = wordStr(line);
|
||||
log("head: "+line+" "+headl.length);
|
||||
try
|
||||
{
|
||||
res.setStatus(Integer.parseInt(headl[1]));
|
||||
}
|
||||
catch ( NumberFormatException ignore )
|
||||
{
|
||||
log("ID exception: "+headl);
|
||||
}
|
||||
catch ( Exception panik )
|
||||
{
|
||||
log("First line invalid!");
|
||||
return true;
|
||||
}
|
||||
}
|
||||
else if ( colonPos != -1 )
|
||||
{
|
||||
String head = line.substring(0,colonPos);
|
||||
// XXX Skip LWS (what is LWS)
|
||||
int i = colonPos + 1;
|
||||
while ( isLWS(line.charAt(i)) ) i++;
|
||||
String value= line.substring(i);
|
||||
log("<"+head+">=<"+ value+">");
|
||||
if ( head.equalsIgnoreCase("Location") )
|
||||
{
|
||||
// res.setStatus(HttpServletResponse.SC_MOVED_TEMPORARILY);
|
||||
// res.setHeader(head, value );
|
||||
log("Location cutted: "+value);
|
||||
}
|
||||
else if ( head.equalsIgnoreCase( "Content-type" ) )
|
||||
res.setContentType( value );
|
||||
else if ( head.equalsIgnoreCase( "Content-length" ) )
|
||||
{
|
||||
try
|
||||
{
|
||||
int cLen = Integer.parseInt( value );
|
||||
retval = ( cLen > 0 );
|
||||
res.setContentLength(cLen);
|
||||
}
|
||||
catch ( NumberFormatException ignore ) {}
|
||||
}
|
||||
// Generically treat unknown headers
|
||||
else
|
||||
{
|
||||
log("^- generic.");
|
||||
res.setHeader(head, value );
|
||||
}
|
||||
}
|
||||
// XXX We do not treat multiline continuation Headers here
|
||||
// which have not occured anywhere yet.
|
||||
firstline = false;
|
||||
}
|
||||
}
|
||||
catch ( IOException e )
|
||||
{
|
||||
log("Header skip problem:");
|
||||
throw new ServletException("Header skip problem: "+e.getMessage());
|
||||
}
|
||||
log( "--------------" );
|
||||
return retval;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read a RFC2616 line from an InputStream:
|
||||
*/
|
||||
public int readLine(InputStream in, byte[] b )
|
||||
throws IOException
|
||||
{
|
||||
int off2 = 0;
|
||||
while ( off2 < b.length )
|
||||
{
|
||||
int r = in.read();
|
||||
if ( r == -1 )
|
||||
{
|
||||
if (off2 == 0 )
|
||||
return -1;
|
||||
break;
|
||||
}
|
||||
if ( r == 13 )
|
||||
continue;
|
||||
if ( r == 10 )
|
||||
break;
|
||||
b[off2] = (byte) r;
|
||||
++off2;
|
||||
}
|
||||
return off2;
|
||||
}
|
||||
|
||||
/** Copy a file from in to out.
|
||||
* Sub-classes can override this in order to do filtering of some sort.
|
||||
*/
|
||||
public void copyStream( InputStream in, OutputStream out )
|
||||
throws IOException
|
||||
{
|
||||
BufferedInputStream bin = new BufferedInputStream(in);
|
||||
int b;
|
||||
while ( ( b = bin.read() ) != -1 )
|
||||
out.write(b);
|
||||
}
|
||||
|
||||
/**
|
||||
* Split a blank separated string into
|
||||
*/
|
||||
public String[] wordStr( String inp )
|
||||
{
|
||||
StringTokenizer tok = new StringTokenizer(inp, " ");
|
||||
int i, n = tok.countTokens();
|
||||
String[] res = new String[n];
|
||||
for(i = 0; i < n; i++ )
|
||||
res[i] = tok.nextToken();
|
||||
return res;
|
||||
}
|
||||
|
||||
/**
|
||||
* XXX Should identify RFC2616 LWS
|
||||
*/
|
||||
protected boolean isLWS(char c)
|
||||
{
|
||||
return c == ' ';
|
||||
}
|
||||
|
||||
/**
|
||||
* Capture awaay the standard servlet log ..
|
||||
*/
|
||||
public void log(String msg)
|
||||
{
|
||||
if ( debugFlag )
|
||||
System.err.println("## "+msg);
|
||||
}
|
||||
}
|
@ -0,0 +1,377 @@
|
||||
package net.yacy.http;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.StringWriter;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.net.URLDecoder;
|
||||
import java.util.HashMap;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import javax.servlet.Servlet;
|
||||
import javax.servlet.ServletConfig;
|
||||
import javax.servlet.ServletException;
|
||||
import javax.servlet.ServletRequest;
|
||||
import javax.servlet.ServletResponse;
|
||||
import javax.servlet.http.HttpServletRequest;
|
||||
import javax.servlet.http.HttpServletResponse;
|
||||
import net.yacy.cora.document.encoding.UTF8;
|
||||
import net.yacy.cora.document.id.DigestURL;
|
||||
import net.yacy.cora.document.id.MultiProtocolURL;
|
||||
import net.yacy.cora.protocol.ClientIdentification;
|
||||
import net.yacy.cora.protocol.HeaderFramework;
|
||||
import net.yacy.cora.protocol.RequestHeader;
|
||||
import net.yacy.cora.protocol.ResponseHeader;
|
||||
import net.yacy.cora.util.ConcurrentLog;
|
||||
import net.yacy.kelondro.util.FileUtils;
|
||||
import net.yacy.search.Switchboard;
|
||||
import net.yacy.server.http.ChunkedInputStream;
|
||||
import net.yacy.server.http.HTTPDProxyHandler;
|
||||
import org.eclipse.jetty.continuation.Continuation;
|
||||
import org.eclipse.jetty.continuation.ContinuationSupport;
|
||||
import org.eclipse.jetty.http.HttpURI;
|
||||
import org.eclipse.jetty.servlets.ProxyServlet;
|
||||
|
||||
/**
|
||||
* Servlet to implement proxy via url parameter "/proxy.html?url=xyz_urltoproxy"
|
||||
* this implementation uses the existing proxy functions from YaCy HTTPDProxyHandler
|
||||
*
|
||||
* InitParameters
|
||||
* ProxyHost : hostname of proxy host, default is "localhost"
|
||||
* ProxyPort : port of the proxy host, default 8090
|
||||
*
|
||||
* functionality
|
||||
* - get parameters
|
||||
* - convert headers to YaCy style headers and parameters
|
||||
* - call existing HTTPDProxy
|
||||
* - revert response headers back from YaCy style to servlet specification
|
||||
* - handle rewrite of link (to point to proxy)
|
||||
* - send to client
|
||||
*
|
||||
* later improvemnts should/could use implementation to avoid back and forth converting
|
||||
* between YaCy and Servlet header/parameter style and use proxy implementation within
|
||||
* servlet specification or a existing reverse-proxy library.
|
||||
*
|
||||
*/
|
||||
public class YaCyProxyServlet extends ProxyServlet implements Servlet {
|
||||
|
||||
@Override
|
||||
public void init(ServletConfig config) throws ServletException {
|
||||
super.init(config);
|
||||
|
||||
// must be lower case (header names are internally converted to lower)
|
||||
_DontProxyHeaders.add("host"); // to prevent Host header setting from original servletrequest (which is localhost)
|
||||
|
||||
}
|
||||
/* ------------------------------------------------------------ */
|
||||
|
||||
@Override
|
||||
public void service (ServletRequest req, ServletResponse res) throws ServletException, IOException {
|
||||
|
||||
final HttpServletRequest request = (HttpServletRequest) req;
|
||||
final HttpServletResponse response = (HttpServletResponse) res;
|
||||
|
||||
if ("CONNECT".equalsIgnoreCase(request.getMethod())) {
|
||||
handleConnect(request, response);
|
||||
} else {
|
||||
String action = null;
|
||||
|
||||
final Continuation continuation = ContinuationSupport.getContinuation(request);
|
||||
|
||||
if (!continuation.isInitial()) {
|
||||
response.sendError(HttpServletResponse.SC_GATEWAY_TIMEOUT); // Need better test that isInitial
|
||||
return;
|
||||
} else {
|
||||
URL proxyurl = null;
|
||||
String strARGS = request.getQueryString();
|
||||
if (strARGS == null) {
|
||||
response.sendError(HttpServletResponse.SC_NOT_FOUND,"url parameter missing");
|
||||
return;
|
||||
}
|
||||
|
||||
if (strARGS.startsWith("action=")) {
|
||||
int detectnextargument = strARGS.indexOf("&");
|
||||
action = strARGS.substring(7, detectnextargument);
|
||||
strARGS = strARGS.substring(detectnextargument + 1);
|
||||
}
|
||||
if (strARGS.startsWith("url=")) {
|
||||
final String strUrl = strARGS.substring(4); // strip "url="
|
||||
|
||||
try {
|
||||
proxyurl = new URL(strUrl);
|
||||
} catch (final MalformedURLException e) {
|
||||
proxyurl = new URL(URLDecoder.decode(strUrl, UTF8.charset.name()));
|
||||
|
||||
}
|
||||
}
|
||||
if (proxyurl == null) {
|
||||
response.sendError(HttpServletResponse.SC_NOT_FOUND,"url parameter missing");
|
||||
return;
|
||||
}
|
||||
int port = proxyurl.getPort();
|
||||
if (port < 1) {
|
||||
port = 80;
|
||||
}
|
||||
|
||||
String host = proxyurl.getHost();
|
||||
if (proxyurl.getPort() != -1) {
|
||||
host += ":" + proxyurl.getPort();
|
||||
}
|
||||
RequestHeader yacyRequestHeader = ProxyHandler.convertHeaderFromJetty(request);
|
||||
yacyRequestHeader.remove(RequestHeader.KEEP_ALIVE);
|
||||
yacyRequestHeader.remove(RequestHeader.CONTENT_LENGTH);
|
||||
|
||||
final HashMap<String, Object> prop = new HashMap<String, Object>();
|
||||
prop.put(HeaderFramework.CONNECTION_PROP_HTTP_VER, HeaderFramework.HTTP_VERSION_1_1);
|
||||
prop.put(HeaderFramework.CONNECTION_PROP_HOST, proxyurl.getHost());
|
||||
prop.put(HeaderFramework.CONNECTION_PROP_PATH, proxyurl.getFile().replaceAll(" ", "%20"));
|
||||
prop.put(HeaderFramework.CONNECTION_PROP_REQUESTLINE, "PROXY");
|
||||
prop.put("CLIENTIP", "0:0:0:0:0:0:0:1");
|
||||
|
||||
yacyRequestHeader.put(HeaderFramework.HOST, proxyurl.getHost());
|
||||
// temporarily add argument to header to pass it on to augmented browsing
|
||||
if (action != null) yacyRequestHeader.put("YACYACTION", action);
|
||||
|
||||
final ByteArrayOutputStream tmpproxyout = new ByteArrayOutputStream();
|
||||
HTTPDProxyHandler.doGet(prop, yacyRequestHeader, tmpproxyout, ClientIdentification.yacyProxyAgent);
|
||||
|
||||
// reparse header to extract content-length and mimetype
|
||||
final ResponseHeader outgoingHeader = new ResponseHeader(200); //
|
||||
final InputStream proxyout = new ByteArrayInputStream(tmpproxyout.toByteArray());
|
||||
String line = readLine(proxyout);
|
||||
while (line != null && !line.equals("")) {
|
||||
int p;
|
||||
if ((p = line.indexOf(':')) >= 0) {
|
||||
// store a property
|
||||
outgoingHeader.add(line.substring(0, p).trim(), line.substring(p + 1).trim());
|
||||
}
|
||||
line = readLine(proxyout);
|
||||
}
|
||||
if (line == null) {
|
||||
response.sendError(HttpServletResponse.SC_INTERNAL_SERVER_ERROR,"Proxy Header missing");
|
||||
return;
|
||||
}
|
||||
|
||||
final int httpStatus = Integer.parseInt((String) prop.get(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_STATUS));
|
||||
|
||||
String directory = "";
|
||||
if (proxyurl.getPath().lastIndexOf('/') > 0) {
|
||||
directory = proxyurl.getPath().substring(0, proxyurl.getPath().lastIndexOf('/'));
|
||||
}
|
||||
|
||||
if (response.getHeader(HeaderFramework.LOCATION) != null) {
|
||||
// rewrite location header
|
||||
String location = response.getHeader(HeaderFramework.LOCATION);
|
||||
String actioncmdstr = (action != null) ? "action=" + action + "&" : "";
|
||||
if (location.startsWith("http")) {
|
||||
location = "/proxy.html?" + actioncmdstr + "url=" + location;
|
||||
} else {
|
||||
location = "/proxy.html?" + actioncmdstr + "url=http://" + proxyurl.getHost() + "/" + location;
|
||||
}
|
||||
//outgoingHeader.put(HeaderFramework.LOCATION, location);
|
||||
response.addHeader(HeaderFramework.LOCATION, location);
|
||||
}
|
||||
|
||||
//final String mimeType = outgoingHeader.getContentType();
|
||||
final String mimeType = outgoingHeader.getContentType();
|
||||
if ((mimeType != null) && (mimeType.startsWith("text/html") || mimeType.startsWith("text"))) {
|
||||
final StringWriter buffer = new StringWriter();
|
||||
|
||||
if (outgoingHeader.containsKey(HeaderFramework.TRANSFER_ENCODING)) {
|
||||
FileUtils.copy(new ChunkedInputStream(proxyout), buffer, UTF8.charset);
|
||||
} else {
|
||||
FileUtils.copy(proxyout, buffer, UTF8.charset);
|
||||
}
|
||||
final String sbuffer = buffer.toString();
|
||||
|
||||
final Pattern p = Pattern.compile("(href=\"|src=\")([^\"]+)|(href='|src=')([^']+)|(url\\(')([^']+)|(url\\(\")([^\"]+)|(url\\()([^\\)]+)");
|
||||
final Matcher m = p.matcher(sbuffer);
|
||||
final StringBuffer result = new StringBuffer(80);
|
||||
String init, url;
|
||||
Switchboard sb = Switchboard.getSwitchboard();
|
||||
while (m.find()) {
|
||||
init = null;
|
||||
if (m.group(1) != null) { init = m.group(1); }
|
||||
if (m.group(3) != null) { init = m.group(3); }
|
||||
if (m.group(5) != null) { init = m.group(5); }
|
||||
if (m.group(7) != null) { init = m.group(7); }
|
||||
if (m.group(9) != null) { init = m.group(9); }
|
||||
url = null;
|
||||
if (m.group(2) != null) { url = m.group(2); }
|
||||
if (m.group(4) != null) { url = m.group(4); }
|
||||
if (m.group(6) != null) { url = m.group(6); }
|
||||
if (m.group(8) != null) { url = m.group(8); }
|
||||
if (m.group(10) != null) { url = m.group(10); }
|
||||
if (url.startsWith("data:") || url.startsWith("#") || url.startsWith("mailto:") || url.startsWith("javascript:")) {
|
||||
String newurl = init + url;
|
||||
newurl = newurl.replaceAll("\\$", "\\\\\\$");
|
||||
m.appendReplacement(result, newurl);
|
||||
|
||||
} else if (url.startsWith("http")) {
|
||||
// absoulte url of form href="http://domain.com/path"
|
||||
if (sb.getConfig("proxyURL.rewriteURLs", "all").equals("domainlist")) {
|
||||
try {
|
||||
if (sb.crawlStacker.urlInAcceptedDomain(new DigestURL(url)) != null) {
|
||||
continue;
|
||||
}
|
||||
} catch (final MalformedURLException e) {
|
||||
ConcurrentLog.fine("ProxyServlet", "malformed url for url-rewirte " + url);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
String newurl = init + "/proxy.html?url=" + url;
|
||||
newurl = newurl.replaceAll("\\$", "\\\\\\$");
|
||||
m.appendReplacement(result, newurl);
|
||||
|
||||
} else if (url.startsWith("//")) {
|
||||
// absoulte url but same protocol of form href="//domain.com/path"
|
||||
final String complete_url = proxyurl.getProtocol() + ":" + url;
|
||||
if (sb.getConfig("proxyURL.rewriteURLs", "all").equals("domainlist")) {
|
||||
try {
|
||||
if (sb.crawlStacker.urlInAcceptedDomain(new DigestURL(complete_url)) != null) {
|
||||
continue;
|
||||
}
|
||||
} catch (MalformedURLException ex) {
|
||||
ConcurrentLog.fine("ProxyServlet", "malformed url for url-rewirte " + complete_url);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
String newurl = init + "/proxy.html?url=" + complete_url;
|
||||
newurl = newurl.replaceAll("\\$", "\\\\\\$");
|
||||
m.appendReplacement(result, newurl);
|
||||
|
||||
} else if (url.startsWith("/")) {
|
||||
// absolute path of form href="/absolute/path/to/linked/page"
|
||||
String newurl = init + "/proxy.html?url=http://" + host + url;
|
||||
newurl = newurl.replaceAll("\\$", "\\\\\\$");
|
||||
m.appendReplacement(result, newurl);
|
||||
|
||||
} else {
|
||||
// relative path of form href="relative/path"
|
||||
try {
|
||||
MultiProtocolURL target = new MultiProtocolURL("http://" + host + directory + "/" + url);
|
||||
String newurl = init + "/proxy.html?url=" + target.toString();
|
||||
newurl = newurl.replaceAll("\\$", "\\\\\\$");
|
||||
m.appendReplacement(result, newurl);
|
||||
} catch (final MalformedURLException e) {}
|
||||
}
|
||||
}
|
||||
m.appendTail(result);
|
||||
|
||||
byte[] sbb = UTF8.getBytes(result.toString());
|
||||
|
||||
// add some proxy-headers to response header
|
||||
response.setContentType(outgoingHeader.getContentType());
|
||||
if (outgoingHeader.containsKey(HeaderFramework.SERVER)) {
|
||||
response.addHeader(HeaderFramework.SERVER, outgoingHeader.get(HeaderFramework.SERVER));
|
||||
}
|
||||
if (outgoingHeader.containsKey(HeaderFramework.DATE)) {
|
||||
response.addHeader(HeaderFramework.DATE, outgoingHeader.get(HeaderFramework.DATE));
|
||||
}
|
||||
if (outgoingHeader.containsKey(HeaderFramework.LAST_MODIFIED)) {
|
||||
response.addHeader(HeaderFramework.LAST_MODIFIED, outgoingHeader.get(HeaderFramework.LAST_MODIFIED));
|
||||
}
|
||||
if (outgoingHeader.containsKey(HeaderFramework.EXPIRES)) {
|
||||
response.addHeader(HeaderFramework.EXPIRES, outgoingHeader.get(HeaderFramework.EXPIRES));
|
||||
}
|
||||
|
||||
response.setStatus(httpStatus);
|
||||
response.addIntHeader(HeaderFramework.CONTENT_LENGTH, sbb.length);
|
||||
response.getOutputStream().write(sbb);
|
||||
|
||||
} else {
|
||||
if ((response.getHeader(HeaderFramework.CONTENT_LENGTH) == null) && prop.containsKey(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_SIZE)) {
|
||||
response.addHeader(HeaderFramework.CONTENT_LENGTH, (String) prop.get(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_SIZE));
|
||||
}
|
||||
response.setStatus(httpStatus);
|
||||
FileUtils.copy(proxyout, response.getOutputStream());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private String readLine(final InputStream in) throws IOException {
|
||||
final ByteArrayOutputStream buf = new ByteArrayOutputStream();
|
||||
int b;
|
||||
while ((b = in.read()) != '\r' && b != -1) {
|
||||
buf.write(b);
|
||||
}
|
||||
if (b == -1) {
|
||||
return null;
|
||||
}
|
||||
b = in.read(); // read \n
|
||||
if (b == -1) {
|
||||
return null;
|
||||
}
|
||||
return buf.toString("UTF-8");
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* get a destination url from a querysting with parameter &url=_destinationurl_
|
||||
*
|
||||
* @param querystring
|
||||
* @return destinationURL
|
||||
* @throws MalformedURLException
|
||||
*/
|
||||
protected HttpURI proxyHttpURIfromQueryString(String querystring) throws MalformedURLException {
|
||||
URL newurl = null;
|
||||
String strARGS = querystring;
|
||||
String action;
|
||||
if (strARGS.startsWith("action=")) {
|
||||
int detectnextargument = strARGS.indexOf("&");
|
||||
action = strARGS.substring(7, detectnextargument);
|
||||
strARGS = strARGS.substring(detectnextargument + 1);
|
||||
}
|
||||
if (strARGS.startsWith("url=")) {
|
||||
final String strUrl = strARGS.substring(4); // strip url=
|
||||
|
||||
try {
|
||||
newurl = new URL(strUrl);
|
||||
} catch (final MalformedURLException e) {
|
||||
|
||||
}
|
||||
}
|
||||
int port = newurl.getPort();
|
||||
if (port < 1) {
|
||||
port = newurl.getDefaultPort();
|
||||
}
|
||||
return proxyHttpURI(newurl.getProtocol(), newurl.getHost(), port, newurl.getPath());
|
||||
}
|
||||
|
||||
@Override
|
||||
protected HttpURI proxyHttpURI(HttpServletRequest request, String uri) throws MalformedURLException {
|
||||
URL newurl = null;
|
||||
String strARGS = request.getQueryString();
|
||||
if (strARGS.startsWith("action=")) {
|
||||
int detectnextargument = strARGS.indexOf("&");
|
||||
strARGS = strARGS.substring(detectnextargument + 1);
|
||||
}
|
||||
if (strARGS.startsWith("url=")) {
|
||||
final String strUrl = strARGS.substring(4); // strip url=
|
||||
|
||||
try {
|
||||
newurl = new URL(strUrl);
|
||||
} catch (final MalformedURLException e) {
|
||||
|
||||
}
|
||||
}
|
||||
int port = newurl.getPort();
|
||||
if (port < 1) {
|
||||
port = newurl.getDefaultPort();
|
||||
}
|
||||
return proxyHttpURI(newurl.getProtocol(), newurl.getHost(), port, newurl.getPath());
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getServletInfo() {
|
||||
return "YaCy Proxy Servlet";
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in new issue