// HTTPDProxyHandler.java // (C) 2004 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany // first published 2004 on http://yacy.net // // $LastChangedDate$ // $LastChangedRevision$ // $LastChangedBy$ // // LICENSE // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // Contributions: // [AS] Alexander Schier: Blacklist (404 response for AGIS hosts) // [TL] Timo Leise: url-wildcards for blacklists /* Class documentation: This class is a servlet to the httpd daemon. It is accessed each time an URL in a GET, HEAD or POST command contains the whole host information or a host is given in the header host field of an HTTP/1.0 / HTTP/1.1 command. Transparency is maintained, whenever appropriate. We change header attributes if necessary for the indexing mechanism; i.e. we do not support gzip-ed encoding. We also do not support unrealistic 'expires' values that would force a cache to be flushed immediately pragma non-cache attributes are supported */ package net.yacy.server.http; import java.io.BufferedReader; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.OutputStream; import java.io.PrintWriter; import java.net.BindException; import java.net.ConnectException; import java.net.InetAddress; import java.net.MalformedURLException; import java.net.NoRouteToHostException; import java.net.Socket; import java.net.SocketException; import java.net.SocketTimeoutException; import java.net.UnknownHostException; import java.util.Arrays; import java.util.Date; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.logging.FileHandler; import java.util.logging.Level; import java.util.logging.LogManager; import java.util.logging.Logger; import net.yacy.cora.document.UTF8; import net.yacy.cora.protocol.ClientIdentification; import net.yacy.cora.protocol.Domains; import net.yacy.cora.protocol.HeaderFramework; import net.yacy.cora.protocol.RequestHeader; import net.yacy.cora.protocol.ResponseHeader; import net.yacy.cora.protocol.http.HTTPClient; import net.yacy.cora.protocol.http.ProxySettings; import net.yacy.crawler.data.Cache; import net.yacy.crawler.retrieval.Request; import net.yacy.crawler.retrieval.Response; import net.yacy.document.TextParser; import net.yacy.document.parser.html.ContentTransformer; import net.yacy.document.parser.html.Transformer; import net.yacy.kelondro.data.meta.DigestURI; import net.yacy.kelondro.io.ByteCountOutputStream; import net.yacy.kelondro.logging.Log; import net.yacy.kelondro.util.FileUtils; import net.yacy.repository.Blacklist.BlacklistType; import net.yacy.search.Switchboard; import net.yacy.search.SwitchboardConstants; import net.yacy.server.serverCore; import net.yacy.server.serverObjects; public final class HTTPDProxyHandler { private static final String yacyProxyUserAgent = "yacyproxy (" + ClientIdentification.yacySystem +") http://yacy.net/bot.html"; // static variables // can only be instantiated upon first instantiation of this class object private static Switchboard sb = null; private static final HashSet yellowList; private static int timeout = 60000; private static boolean yacyTrigger = true; public static boolean isTransparentProxy = false; private static Process redirectorProcess = null; private static boolean redirectorEnabled = false; private static PrintWriter redirectorWriter = null; private static BufferedReader redirectorReader = null; private static Transformer transformer = null; private static File htRootPath = null; //private Properties connectionProperties = null; // creating a logger private static final Log log = new Log("PROXY"); private static boolean doAccessLogging = false; /** * Do logging configuration for special proxy access log file */ static { // get a switchboard sb = Switchboard.getSwitchboard(); if (sb != null) { isTransparentProxy = Boolean.parseBoolean(sb.getConfig("isTransparentProxy","false")); // set timeout timeout = Integer.parseInt(sb.getConfig("proxy.clientTimeout", "10000")); // create a htRootPath: system pages htRootPath = new File(sb.getAppPath(), sb.getConfig("htRootPath","htroot")); if (!(htRootPath.exists())) { if(!htRootPath.mkdir()) Log.logSevere("PROXY", "could not create htRoot "+ htRootPath); } // do logger initialization try { log.logInfo("Configuring proxy access logging ..."); // getting the logging manager final LogManager manager = LogManager.getLogManager(); final String className = HTTPDProxyHandler.class.getName(); // determining if proxy access logging is enabled final String enabled = manager.getProperty(className + ".logging.enabled"); if ("true".equalsIgnoreCase(enabled)) { // reading out some needed configuration properties int limit = 1024*1024, count = 20; String pattern = manager.getProperty(className + ".logging.FileHandler.pattern"); if (pattern == null) pattern = "DATA/LOG/proxyAccess%u%g.log"; // make pattern absolute if (!new File(pattern).isAbsolute()) pattern = new File(sb.getDataPath(), pattern).getAbsolutePath(); final String limitStr = manager.getProperty(className + ".logging.FileHandler.limit"); if (limitStr != null) try { limit = Integer.parseInt(limitStr); } catch (final NumberFormatException e) {} final String countStr = manager.getProperty(className + ".logging.FileHandler.count"); if (countStr != null) try { count = Integer.parseInt(countStr); } catch (final NumberFormatException e) {} // creating the proxy access logger final Logger proxyLogger = Logger.getLogger("PROXY.access"); proxyLogger.setUseParentHandlers(false); proxyLogger.setLevel(Level.FINEST); final FileHandler txtLog = new FileHandler(pattern, limit, count, true); txtLog.setFormatter(new ProxyLogFormatter()); txtLog.setLevel(Level.FINEST); proxyLogger.addHandler(txtLog); doAccessLogging = true; log.logInfo("Proxy access logging configuration done." + "\n\tFilename: " + pattern + "\n\tLimit: " + limitStr + "\n\tCount: " + countStr); } else { log.logInfo("Proxy access logging is deactivated."); } } catch (final Exception e) { log.logSevere("Unable to configure proxy access logging.",e); } // load a transformer transformer = new ContentTransformer(); transformer.init(new File(sb.getAppPath(), sb.getConfig(SwitchboardConstants.LIST_BLUE, "")).toString()); // load the yellow-list final String f = sb.getConfig("proxyYellowList", null); if (f != null) { yellowList = FileUtils.loadList(new File(f)); log.logConfig("loaded yellow-list from file " + f + ", " + yellowList.size() + " entries"); } else { yellowList = new HashSet(); } final String redirectorPath = sb.getConfig("externalRedirector", ""); if (redirectorPath.length() > 0 && !redirectorEnabled) { try { redirectorProcess=Runtime.getRuntime().exec(redirectorPath); redirectorWriter = new PrintWriter(redirectorProcess.getOutputStream()); redirectorReader = new BufferedReader(new InputStreamReader(redirectorProcess.getInputStream())); redirectorEnabled=true; } catch (final IOException e) { System.out.println("redirector not Found"); } } } else { yellowList = null; } } /** * Special logger instance for proxy access logging much similar * to the squid access.log file */ private static final Log proxyLog = new Log("PROXY.access"); /** * Reusable {@link StringBuilder} for logging */ private static final StringBuilder logMessage = new StringBuilder(); /** * Reusable {@link StringBuilder} to generate the useragent string */ private static final StringBuilder userAgentStr = new StringBuilder(); private static void handleOutgoingCookies(final RequestHeader requestHeader, final String targethost, final String clienthost) { /* The syntax for the header is: cookie = "Cookie:" cookie-version 1*((";" | ",") cookie-value) cookie-value = NAME "=" VALUE [";" path] [";" domain] cookie-version = "$Version" "=" value NAME = attr VALUE = value path = "$Path" "=" value domain = "$Domain" "=" value */ if (sb.getConfigBool("proxy.monitorCookies", false)) { if (requestHeader.containsKey(RequestHeader.COOKIE)) { final Object[] entry = new Object[]{new Date(), clienthost, requestHeader.getMultiple(RequestHeader.COOKIE)}; synchronized(sb.outgoingCookies) { sb.outgoingCookies.put(targethost, entry); } } } } private static void handleIncomingCookies(final ResponseHeader respondHeader, final String serverhost, final String targetclient) { /* The syntax for the Set-Cookie response header is set-cookie = "Set-Cookie:" cookies cookies = 1#cookie cookie = NAME "=" VALUE *(";" cookie-av) NAME = attr VALUE = value cookie-av = "Comment" "=" value | "Domain" "=" value | "Max-Age" "=" value | "Path" "=" value | "Secure" | "Version" "=" 1*DIGIT */ if (sb.getConfigBool("proxy.monitorCookies", false)) { if (respondHeader.containsKey(HeaderFramework.SET_COOKIE)) { final Object[] entry = new Object[]{new Date(), targetclient, respondHeader.getMultiple(HeaderFramework.SET_COOKIE)}; synchronized(sb.incomingCookies) { sb.incomingCookies.put(serverhost, entry); } } } } /** * @param conProp a collection of properties about the connection, like URL * @param requestHeader The header lines of the connection from the request * @param respond the OutputStream to the client * @see de.anomic.http.httpdHandler#doGet(java.util.Properties, net.yacy.cora.protocol.HeaderFramework, java.io.OutputStream) */ public static void doGet(final HashMap conProp, final RequestHeader requestHeader, final OutputStream respond) { ByteCountOutputStream countedRespond = null; try { final int reqID = requestHeader.hashCode(); // remembering the starting time of the request final Date requestDate = new Date(); // remember the time... conProp.put(HeaderFramework.CONNECTION_PROP_REQUEST_START, Long.valueOf(requestDate.getTime())); if (yacyTrigger) net.yacy.peers.Network.triggerOnlineAction(); sb.proxyLastAccess = System.currentTimeMillis(); // using an ByteCount OutputStream to count the send bytes (needed for the logfile) countedRespond = new ByteCountOutputStream(respond,((String) conProp.get(HeaderFramework.CONNECTION_PROP_REQUESTLINE)).length() + 2,"PROXY"); String host = (String) conProp.get(HeaderFramework.CONNECTION_PROP_HOST); String path = (String) conProp.get(HeaderFramework.CONNECTION_PROP_PATH); // always starts with leading '/' final String args = (String) conProp.get(HeaderFramework.CONNECTION_PROP_ARGS); // may be null if no args were given final String ip = (String) conProp.get(HeaderFramework.CONNECTION_PROP_CLIENTIP); // the ip from the connecting peer int pos=0; int port=0; DigestURI url = null; try { url = DigestURI.toDigestURI(HeaderFramework.getRequestURL(conProp)); if (log.isFine()) log.logFine(reqID +" GET "+ url); if (log.isFinest()) log.logFinest(reqID +" header: "+ requestHeader); //redirector if (redirectorEnabled){ synchronized(redirectorProcess){ redirectorWriter.println(url.toNormalform(true)); redirectorWriter.flush(); } final String newUrl = redirectorReader.readLine(); if (!newUrl.equals("")) { try { url = new DigestURI(newUrl); } catch(final MalformedURLException e){}//just keep the old one } if (log.isFinest()) log.logFinest(reqID +" using redirector to "+ url); conProp.put(HeaderFramework.CONNECTION_PROP_HOST, url.getHost()+":"+url.getPort()); conProp.put(HeaderFramework.CONNECTION_PROP_PATH, url.getPath()); requestHeader.put(HeaderFramework.HOST, url.getHost()+":"+url.getPort()); requestHeader.put(HeaderFramework.CONNECTION_PROP_PATH, url.getPath()); } } catch (final MalformedURLException e) { final String errorMsg = "ERROR: internal error with url generation: host=" + host + ", port=" + port + ", path=" + path + ", args=" + args; log.logSevere(errorMsg); HTTPDemon.sendRespondError(conProp,countedRespond,4,501,null,errorMsg,e); return; } if ((pos = host.indexOf(':')) < 0) { port = 80; } else { port = Integer.parseInt(host.substring(pos + 1)); host = host.substring(0, pos); } // check the blacklist // blacklist idea inspired by [AS]: // respond a 404 for all AGIS ("all you get is shit") servers final String hostlow = host.toLowerCase(); if (args != null) { path = path + "?" + args; } if (Switchboard.urlBlacklist.isListed(BlacklistType.PROXY, hostlow, path)) { log.logInfo("AGIS blocking of host '" + hostlow + "'"); HTTPDemon.sendRespondError(conProp,countedRespond,4,403,null, "URL '" + hostlow + "' blocked by yacy proxy (blacklisted)",null); return; } // handle outgoing cookies handleOutgoingCookies(requestHeader, host, ip); prepareRequestHeader(conProp, requestHeader, hostlow); final ResponseHeader cachedResponseHeader = Cache.getResponseHeader(url.hash()); // why are files unzipped upon arrival? why not zip all files in cache? // This follows from the following premises // (a) no file shall be unzip-ed more than once to prevent unnecessary computing time // (b) old cache entries shall be comparable with refill-entries to detect/distinguish case 3+4 // (c) the indexing mechanism needs files unzip-ed, a schedule could do that later // case b and c contradicts, if we use a scheduler, because files in a stale cache would be unzipped // and the newly arrival would be zipped and would have to be unzipped upon load. But then the // scheduler is superfluous. Therefore the only reminding case is // (d) cached files shall be either all zipped or unzipped // case d contradicts with a, because files need to be unzipped for indexing. Therefore // the only remaining case is to unzip files right upon load. Thats what we do here. // finally use existing cache if appropriate // here we must decide weather or not to save the data // to a cache // we distinguish four CACHE STATE cases: // 1. cache fill // 2. cache fresh - no refill // 3. cache stale - refill - necessary // 4. cache stale - refill - superfluous // in two of these cases we trigger a scheduler to handle newly arrived files: // case 1 and case 3 if (cachedResponseHeader == null) { if (log.isFinest()) log.logFinest(reqID + " page not in cache: fulfill request from web"); fulfillRequestFromWeb(conProp, url, requestHeader, cachedResponseHeader, countedRespond); } else { final Request request = new Request( null, url, requestHeader.referer() == null ? null : DigestURI.toDigestURI(requestHeader.referer()).hash(), "", cachedResponseHeader.lastModified(), sb.crawler.defaultProxyProfile.handle(), 0, 0, 0, 0); final Response response = new Response( request, requestHeader, cachedResponseHeader, sb.crawler.defaultProxyProfile, true ); final byte[] cacheContent = Cache.getContent(url.hash()); if (cacheContent != null && response.isFreshForProxy()) { if (log.isFinest()) log.logFinest(reqID + " fulfill request from cache"); fulfillRequestFromCache(conProp, url, requestHeader, cachedResponseHeader, cacheContent, countedRespond); } else { if (log.isFinest()) log.logFinest(reqID + " fulfill request from web"); fulfillRequestFromWeb(conProp, url, requestHeader, cachedResponseHeader, countedRespond); } } } catch (final Exception e) { try { final String exTxt = e.getMessage(); if ((exTxt!=null)&&(exTxt.startsWith("Socket closed"))) { forceConnectionClose(conProp); } else if (!conProp.containsKey(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_HEADER)) { final String errorMsg = "Unexpected Error. " + e.getClass().getName() + ": " + e.getMessage(); HTTPDemon.sendRespondError(conProp,countedRespond,4,501,null,errorMsg,e); log.logSevere(errorMsg); } else { forceConnectionClose(conProp); } } catch (final Exception ee) { forceConnectionClose(conProp); } } finally { try { if(countedRespond != null) countedRespond.flush(); else if(respond != null) respond.flush(); } catch (final Exception e) {} if (countedRespond != null) countedRespond.finish(); conProp.put(HeaderFramework.CONNECTION_PROP_REQUEST_END, Long.valueOf(System.currentTimeMillis())); conProp.put(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_SIZE,(countedRespond != null) ? Long.toString(countedRespond.getCount()) : -1L); logProxyAccess(conProp); } } private static void fulfillRequestFromWeb(final HashMap conProp, final DigestURI url, final RequestHeader requestHeader, final ResponseHeader cachedResponseHeader, final OutputStream respond) { try { final boolean proxyAugmentation = sb.getConfigBool("proxyAugmentation", false); final int reqID = requestHeader.hashCode(); String host = (String) conProp.get(HeaderFramework.CONNECTION_PROP_HOST); String path = (String) conProp.get(HeaderFramework.CONNECTION_PROP_PATH); // always starts with leading '/' final String args = (String) conProp.get(HeaderFramework.CONNECTION_PROP_ARGS); // may be null if no args were given final String ip = (String) conProp.get(HeaderFramework.CONNECTION_PROP_CLIENTIP); // the ip from the connecting peer final String httpVer = (String) conProp.get(HeaderFramework.CONNECTION_PROP_HTTP_VER); // the ip from the connecting peer int port, pos; if ((pos = host.indexOf(':')) < 0) { port = 80; } else { port = Integer.parseInt(host.substring(pos + 1)); host = host.substring(0, pos); } // point virtual directory to my peer if (path.startsWith("/currentyacypeer/")) { host = sb.peers.myIP(); port = sb.peers.myPort(); path = path.substring(16); } // resolve yacy and yacyh domains String yAddress = resolveYacyDomains(host); // re-calc the url path final String remotePath = (args == null) ? path : (path + "?" + args); // with leading '/' // remove yacy-subdomain-path, when accessing /env if ( (yAddress != null) && (remotePath.startsWith("/env")) && ((pos = yAddress.indexOf('/')) != -1) ) yAddress = yAddress.substring(0, yAddress.indexOf('/')); modifyProxyHeaders(requestHeader, httpVer); final String connectHost = hostPart(host, port, yAddress); final String getUrl = "http://"+ connectHost + remotePath; requestHeader.remove(HeaderFramework.HOST); final HTTPClient client = setupHttpClient(requestHeader, connectHost); // send request try { client.GET(getUrl); if (log.isFinest()) log.logFinest(reqID +" response status: "+ client.getHttpResponse().getStatusLine()); conProp.put(HeaderFramework.CONNECTION_PROP_CLIENT_REQUEST_HEADER, requestHeader); int statusCode = client.getHttpResponse().getStatusLine().getStatusCode(); final ResponseHeader responseHeader = new ResponseHeader(statusCode, client.getHttpResponse().getAllHeaders()); // determine if it's an internal error of the httpc if (responseHeader.isEmpty()) { throw new Exception(client.getHttpResponse().getStatusLine().toString()); } if (proxyAugmentation && AugmentedHtmlStream.supportsMime(responseHeader.mime())) { // enable chunk encoding, because we don't know the length after annotating responseHeader.remove(HeaderFramework.CONTENT_LENGTH); responseHeader.put(HeaderFramework.TRANSFER_ENCODING, "chunked"); } ChunkedOutputStream chunkedOut = setTransferEncoding(conProp, responseHeader, statusCode, respond); // the cache does either not exist or is (supposed to be) stale long sizeBeforeDelete = -1; if (cachedResponseHeader != null) { // delete the cache final ResponseHeader rh = Cache.getResponseHeader(url.hash()); if (rh != null && (sizeBeforeDelete = rh.getContentLength()) == 0) { final byte[] b = Cache.getContent(url.hash()); if (b != null) sizeBeforeDelete = b.length; } Cache.delete(url.hash()); conProp.put(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_CODE, "TCP_REFRESH_MISS"); } // reserver cache entry final Request request = new Request( null, url, requestHeader.referer() == null ? null : DigestURI.toDigestURI(requestHeader.referer()).hash(), "", responseHeader.lastModified(), sb.crawler.defaultProxyProfile.handle(), 0, 0, 0, sizeBeforeDelete < 0 ? 0 : sizeBeforeDelete); // handle incoming cookies handleIncomingCookies(responseHeader, host, ip); // prepareResponseHeader(responseHeader, res.getHttpVer()); prepareResponseHeader(responseHeader, client.getHttpResponse().getProtocolVersion().toString()); if(proxyAugmentation && AugmentedHtmlStream.supportsMime(responseHeader.mime())) { // chunked encoding disables somewhere, add it again responseHeader.put(HeaderFramework.TRANSFER_ENCODING, "chunked"); } // sending the respond header back to the client if (chunkedOut != null) { responseHeader.put(HeaderFramework.TRANSFER_ENCODING, "chunked"); } if (log.isFinest()) log.logFinest(reqID +" sending response header: "+ responseHeader); HTTPDemon.sendRespondHeader( conProp, respond, httpVer, statusCode, client.getHttpResponse().getStatusLine().toString(), // status text responseHeader); if (hasBody(client.getHttpResponse().getStatusLine().getStatusCode())) { OutputStream outStream = chunkedOut != null ? chunkedOut : respond; final Response response = new Response( request, requestHeader, responseHeader, sb.crawler.defaultProxyProfile, true ); final String storeError = response.shallStoreCacheForProxy(); final boolean storeHTCache = response.profile().storeHTCache(); final String supportError = TextParser.supports(response.url(), response.getMimeType()); if(proxyAugmentation && AugmentedHtmlStream.supportsMime(responseHeader.mime())) { outStream = new AugmentedHtmlStream(outStream, responseHeader.getCharSet(), url, requestHeader); } if ( /* * Now we store the response into the htcache directory if * a) the response is cacheable AND */ (storeError == null) && /* * b) the user has configured to use the htcache OR * c) the content should be indexed */ ((storeHTCache) || (supportError != null)) ) { // we don't write actually into a file, only to RAM, and schedule writing the file. // int l = res.getResponseHeader().size(); final int l = responseHeader.size(); final ByteArrayOutputStream byteStream = new ByteArrayOutputStream((l < 32) ? 32 : l); final OutputStream toClientAndMemory = new MultiOutputStream(new OutputStream[] {outStream, byteStream}); // FileUtils.copy(res.getDataAsStream(), toClientAndMemory); client.writeTo(toClientAndMemory); // cached bytes byte[] cacheArray; if (byteStream.size() > 0) { cacheArray = byteStream.toByteArray(); } else { cacheArray = null; } if (log.isFine()) log.logFine(reqID +" writeContent of " + url + " produced cacheArray = " + ((cacheArray == null) ? "null" : ("size=" + cacheArray.length))); if (sizeBeforeDelete == -1) { // totally fresh file response.setContent(cacheArray); try { Cache.store(response.url(), response.getResponseHeader(), cacheArray); sb.toIndexer(response); } catch (final IOException e) { log.logWarning("cannot write " + response.url() + " to Cache (1): " + e.getMessage(), e); } conProp.put(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_CODE, "TCP_MISS"); } else if (cacheArray != null && sizeBeforeDelete == cacheArray.length) { // before we came here we deleted a cache entry cacheArray = null; //cacheManager.push(cacheEntry); // unnecessary update conProp.put(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_CODE, "TCP_REF_FAIL_HIT"); } else { // before we came here we deleted a cache entry response.setContent(cacheArray); try { Cache.store(response.url(), response.getResponseHeader(), cacheArray); sb.toIndexer(response); } catch (final IOException e) { log.logWarning("cannot write " + response.url() + " to Cache (2): " + e.getMessage(), e); } conProp.put(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_CODE, "TCP_REFRESH_MISS"); } } else { // no caching if (log.isFine()) log.logFine(reqID +" "+ url.toString() + " not cached." + " StoreError=" + ((storeError==null)?"None":storeError) + " StoreHTCache=" + storeHTCache + " SupportError=" + supportError); // FileUtils.copy(res.getDataAsStream(), outStream); client.writeTo(outStream); conProp.put(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_CODE,"TCP_MISS"); } outStream.close(); if (chunkedOut != null) { chunkedOut.finish(); chunkedOut.flush(); } } // end hasBody } catch(final SocketException se) { // if opened ... // if(res != null) { // // client cut proxy connection, abort download // res.abort(); // } client.finish(); handleProxyException(se,conProp,respond,url); } finally { // if opened ... // if(res != null) { // // ... close connection // res.closeStream(); // } client.finish(); } } catch (final Exception e) { handleProxyException(e,conProp,respond,url); } } /** * determines if the response should have a body * * @param statusCode * @param responseHeader * @return */ private static boolean hasBody(final int statusCode) { // "All 1xx (informational), 204 (no content), and 304 (not modified) responses MUST NOT // include a message-body." // [RFC 2616 HTTP/1.1, Sect. 4.3] and like [RFC 1945 HTTP/1.0, Sect. 7.2] if((statusCode >= 100 && statusCode < 200) || statusCode == 204 || statusCode == 304) { return false; } return true; } private static void fulfillRequestFromCache( final HashMap conProp, final DigestURI url, final RequestHeader requestHeader, final ResponseHeader cachedResponseHeader, final byte[] cacheEntry, OutputStream respond ) throws IOException { final String httpVer = (String) conProp.get(HeaderFramework.CONNECTION_PROP_HTTP_VER); // we respond on the request by using the cache, the cache is fresh try { prepareResponseHeader(cachedResponseHeader, httpVer); // replace date field in old header by actual date, this is according to RFC cachedResponseHeader.put(HeaderFramework.DATE, HeaderFramework.formatRFC1123(new Date())); // check if we can send a 304 instead the complete content if (requestHeader.containsKey(RequestHeader.IF_MODIFIED_SINCE)) { // conditional request: freshness of cache for that condition was already // checked within shallUseCache(). Now send only a 304 response log.logInfo("CACHE HIT/304 " + url.toString()); conProp.put(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_CODE,"TCP_REFRESH_HIT"); // setting the content length header to 0 cachedResponseHeader.put(HeaderFramework.CONTENT_LENGTH, Integer.toString(0)); // send cached header with replaced date and added length HTTPDemon.sendRespondHeader(conProp,respond,httpVer,304,cachedResponseHeader); //respondHeader(respond, "304 OK", cachedResponseHeader); // respond with 'not modified' } else { // unconditional request: send content of cache log.logInfo("CACHE HIT/203 " + url.toString()); conProp.put(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_CODE,"TCP_HIT"); // setting the content header to the proper length cachedResponseHeader.put(HeaderFramework.CONTENT_LENGTH, Long.toString(cacheEntry.length)); // send cached header with replaced date and added length HTTPDemon.sendRespondHeader(conProp,respond,httpVer,203,cachedResponseHeader); //respondHeader(respond, "203 OK", cachedResponseHeader); // respond with 'non-authoritative' if(sb.getConfigBool("proxyAugmentation", false) && AugmentedHtmlStream.supportsMime(cachedResponseHeader.mime())) { respond = new AugmentedHtmlStream(respond, cachedResponseHeader.getCharSet(), url, requestHeader); } // send also the complete body now from the cache // simply read the file and transfer to out socket FileUtils.copy(cacheEntry, respond); } // that's it! } catch (final Exception e) { // this happens if the client stops loading the file // we do nothing here if (conProp.containsKey(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_HEADER)) { log.logWarning("Error while trying to send cached message body."); conProp.put(HeaderFramework.CONNECTION_PROP_PERSISTENT,"close"); } else { HTTPDemon.sendRespondError(conProp,respond,4,503,"socket error: " + e.getMessage(),"socket error: " + e.getMessage(), e); } } finally { try { respond.flush(); respond.close(); } catch (final Exception e) {} } return; } public static void doHead(final HashMap conProp, final RequestHeader requestHeader, OutputStream respond) { // ResponseContainer res = null; DigestURI url = null; try { final int reqID = requestHeader.hashCode(); // remembering the starting time of the request final Date requestDate = new Date(); // remember the time... conProp.put(HeaderFramework.CONNECTION_PROP_REQUEST_START, Long.valueOf(requestDate.getTime())); if (yacyTrigger) net.yacy.peers.Network.triggerOnlineAction(); sb.proxyLastAccess = System.currentTimeMillis(); // using an ByteCount OutputStream to count the send bytes respond = new ByteCountOutputStream(respond,((String) conProp.get(HeaderFramework.CONNECTION_PROP_REQUESTLINE)).length() + 2,"PROXY"); String host = (String) conProp.get(HeaderFramework.CONNECTION_PROP_HOST); final String path = (String) conProp.get(HeaderFramework.CONNECTION_PROP_PATH); final String args = (String) conProp.get(HeaderFramework.CONNECTION_PROP_ARGS); final String httpVer = (String) conProp.get(HeaderFramework.CONNECTION_PROP_HTTP_VER); int port, pos; if ((pos = host.indexOf(':')) < 0) { port = 80; } else { port = Integer.parseInt(host.substring(pos + 1)); host = host.substring(0, pos); } try { url = new DigestURI("http", host, port, (args == null) ? path : path + "?" + args); } catch (final MalformedURLException e) { final String errorMsg = "ERROR: internal error with url generation: host=" + host + ", port=" + port + ", path=" + path + ", args=" + args; log.logSevere(errorMsg); HTTPDemon.sendRespondError(conProp,respond,4,501,null,errorMsg,e); return; } if (log.isFine()) log.logFine(reqID +" HEAD "+ url); if (log.isFinest()) log.logFinest(reqID +" header: "+ requestHeader); // check the blacklist, inspired by [AS]: respond a 404 for all AGIS (all you get is shit) servers final String hostlow = host.toLowerCase(); // re-calc the url path final String remotePath = (args == null) ? path : (path + "?" + args); if (Switchboard.urlBlacklist.isListed(BlacklistType.PROXY, hostlow, remotePath)) { HTTPDemon.sendRespondError(conProp,respond,4,403,null, "URL '" + hostlow + "' blocked by yacy proxy (blacklisted)",null); log.logInfo("AGIS blocking of host '" + hostlow + "'"); return; } prepareRequestHeader(conProp, requestHeader, hostlow); // resolve yacy and yacyh domains String yAddress = resolveYacyDomains(host); // remove yacy-subdomain-path, when accessing /env if ( (yAddress != null) && (remotePath.startsWith("/env")) && ((pos = yAddress.indexOf('/')) != -1) ) yAddress = yAddress.substring(0, yAddress.indexOf('/')); modifyProxyHeaders(requestHeader, httpVer); // generate request-url final String connectHost = hostPart(host, port, yAddress); final String getUrl = "http://"+ connectHost + remotePath; if (log.isFinest()) log.logFinest(reqID +" using url: "+ getUrl); final HTTPClient client = setupHttpClient(requestHeader, connectHost); // send request // try { // res = client.HEAD(getUrl); // if (log.isFinest()) log.logFinest(reqID +" response status: "+ res.getStatusLine()); client.HEADResponse(getUrl); if (log.isFinest()) log.logFinest(reqID +" response status: "+ client.getHttpResponse().getStatusLine()); // determine if it's an internal error of the httpc // final ResponseHeader responseHeader = res.getResponseHeader(); // if (responseHeader.isEmpty()) { // throw new Exception(res.getStatusLine()); // } int statusCode = client.getHttpResponse().getStatusLine().getStatusCode(); final ResponseHeader responseHeader = new ResponseHeader(statusCode, client.getHttpResponse().getAllHeaders()); if (responseHeader.isEmpty()) { throw new Exception(client.getHttpResponse().getStatusLine().toString()); } // prepareResponseHeader(responseHeader, res.getHttpVer()); prepareResponseHeader(responseHeader, client.getHttpResponse().getStatusLine().getProtocolVersion().toString()); // sending the server respond back to the client if (log.isFinest()) log.logFinest(reqID +" sending response header: "+ responseHeader); // HTTPDemon.sendRespondHeader(conProp,respond,httpVer,res.getStatusCode(),res.getStatusLine().substring(4),responseHeader); HTTPDemon.sendRespondHeader( conProp, respond, httpVer, statusCode, client.getHttpResponse().getStatusLine().toString(), responseHeader); respond.flush(); // } finally { // if(res != null) { // // ... close connection // res.closeStream(); // } // } } catch (final Exception e) { handleProxyException(e,conProp,respond,url); } } public static void doPost(final HashMap conProp, final RequestHeader requestHeader, final OutputStream respond, final InputStream body) throws IOException { assert conProp != null : "precondition violated: conProp != null"; assert requestHeader != null : "precondition violated: requestHeader != null"; assert body != null : "precondition violated: body != null"; DigestURI url = null; ByteCountOutputStream countedRespond = null; try { final int reqID = requestHeader.hashCode(); // remembering the starting time of the request final Date requestDate = new Date(); // remember the time... conProp.put(HeaderFramework.CONNECTION_PROP_REQUEST_START, Long.valueOf(requestDate.getTime())); if (yacyTrigger) net.yacy.peers.Network.triggerOnlineAction(); sb.proxyLastAccess = System.currentTimeMillis(); // using an ByteCount OutputStream to count the send bytes countedRespond = new ByteCountOutputStream(respond,((String) conProp.get(HeaderFramework.CONNECTION_PROP_REQUESTLINE)).length() + 2,"PROXY"); String host = (String) conProp.get(HeaderFramework.CONNECTION_PROP_HOST); final String path = (String) conProp.get(HeaderFramework.CONNECTION_PROP_PATH); final String args = (String) conProp.get(HeaderFramework.CONNECTION_PROP_ARGS); // may be null if no args were given final String httpVer = (String) conProp.get(HeaderFramework.CONNECTION_PROP_HTTP_VER); int port, pos; if ((pos = host.indexOf(':')) < 0) { port = 80; } else { port = Integer.parseInt(host.substring(pos + 1)); host = host.substring(0, pos); } try { url = new DigestURI("http", host, port, (args == null) ? path : path + "?" + args); } catch (final MalformedURLException e) { final String errorMsg = "ERROR: internal error with url generation: host=" + host + ", port=" + port + ", path=" + path + ", args=" + args; log.logSevere(errorMsg); HTTPDemon.sendRespondError(conProp,countedRespond,4,501,null,errorMsg,e); return; } if (log.isFine()) log.logFine(reqID +" POST "+ url); if (log.isFinest()) log.logFinest(reqID +" header: "+ requestHeader); prepareRequestHeader(conProp, requestHeader, host.toLowerCase()); String yAddress = resolveYacyDomains(host); // re-calc the url path final String remotePath = (args == null) ? path : (path + "?" + args); // remove yacy-subdomain-path, when accessing /env if ( (yAddress != null) && (remotePath.startsWith("/env")) && ((pos = yAddress.indexOf('/')) != -1) ) yAddress = yAddress.substring(0, yAddress.indexOf('/')); modifyProxyHeaders(requestHeader, httpVer); final String connectHost = hostPart(host, port, yAddress); final String getUrl = "http://"+ connectHost + remotePath; if (log.isFinest()) log.logFinest(reqID +" using url: "+ getUrl); // the CONTENT_LENGTH will be added by entity and cause a ClientProtocolException if set final int contentLength = requestHeader.getContentLength(); requestHeader.remove(HeaderFramework.CONTENT_LENGTH); final HTTPClient client = setupHttpClient(requestHeader, connectHost); // check input if(body == null) { log.logSevere("no body to POST!"); } try { // sending the request client.POST(getUrl, body, contentLength); if (log.isFinest()) log.logFinest(reqID +" response status: "+ client.getHttpResponse().getStatusLine()); int statusCode = client.getHttpResponse().getStatusLine().getStatusCode(); final ResponseHeader responseHeader = new ResponseHeader(statusCode, client.getHttpResponse().getAllHeaders()); // determine if it's an internal error of the httpc if (responseHeader.isEmpty()) { throw new Exception(client.getHttpResponse().getStatusLine().toString()); } final ChunkedOutputStream chunked = setTransferEncoding(conProp, responseHeader, client.getHttpResponse().getStatusLine().getStatusCode(), countedRespond); prepareResponseHeader(responseHeader, client.getHttpResponse().getProtocolVersion().toString()); // sending the respond header back to the client if (chunked != null) { responseHeader.put(HeaderFramework.TRANSFER_ENCODING, "chunked"); } // sending response headers if (log.isFinest()) log.logFinest(reqID +" sending response header: "+ responseHeader); HTTPDemon.sendRespondHeader(conProp, countedRespond, httpVer, statusCode, client.getHttpResponse().getStatusLine().toString(), // status text responseHeader); final OutputStream outStream = (chunked != null) ? chunked : countedRespond; client.writeTo(outStream); if (chunked != null) { chunked.finish(); } outStream.flush(); } catch(final SocketException se) { // connection closed by client, abort download client.finish(); } finally { client.finish(); } } catch (final Exception e) { handleProxyException(e,conProp,countedRespond,url); } finally { if(countedRespond != null) { countedRespond.flush(); countedRespond.finish(); } if(respond != null) { respond.flush(); } conProp.put(HeaderFramework.CONNECTION_PROP_REQUEST_END, Long.valueOf(System.currentTimeMillis())); conProp.put(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_SIZE,(countedRespond != null) ? Long.toString(countedRespond.getCount()) : "-1"); logProxyAccess(conProp); } } /** * resolve yacy and yacyh domains * * @param host * @return */ private static String resolveYacyDomains(final String host) { return (HTTPDemon.getAlternativeResolver() == null) ? null : HTTPDemon.getAlternativeResolver().resolve(host); } /** * @param host * @param port * @param yAddress * @return */ private static String hostPart(final String host, final int port, final String yAddress) { final String connectHost = (yAddress == null) ? host +":"+ port : yAddress; return connectHost; } /** * @param conProp * @param requestHeader * @param hostlow */ private static void prepareRequestHeader(final HashMap conProp, final RequestHeader requestHeader, final String hostlow) { // set another userAgent, if not yellow-listed if ((yellowList != null) && (!(yellowList.contains(domain(hostlow))))) { // change the User-Agent requestHeader.put(HeaderFramework.USER_AGENT, generateUserAgent(requestHeader)); } // only gzip-encoding is supported, remove other encodings (e. g. deflate) if ((requestHeader.get(HeaderFramework.ACCEPT_ENCODING,"")).indexOf("gzip",0) != -1) { requestHeader.put(HeaderFramework.ACCEPT_ENCODING, "gzip"); } else { requestHeader.put(HeaderFramework.ACCEPT_ENCODING, ""); } addXForwardedForHeader(conProp, requestHeader); } private static String domain(final String host) { String domain = host; int pos = domain.lastIndexOf('.'); if (pos >= 0) { // truncate from last part domain = domain.substring(0, pos); pos = domain.lastIndexOf('.'); if (pos >= 0) { // truncate from first part domain = domain.substring(pos + 1); } } return domain; } /** * creates a new HttpClient and sets parameters according to proxy needs * * @param requestHeader * @param connectHost may be 'host:port' or 'host:port/path' * @return */ private static HTTPClient setupHttpClient(final RequestHeader requestHeader, final String connectHost) { // setup HTTP-client final HTTPClient client = new HTTPClient(); client.setTimout(timeout); client.setHeader(requestHeader.entrySet()); client.setRedirecting(false); return client; } /** * determines in which form the response should be send and sets header accordingly * if the content length is not set we need to use chunked content encoding * Implemented: * if !content-length * switch httpVer * case 0.9: * case 1.0: * close connection after transfer * break; * default: * new ChunkedStream around respond * end if * * @param conProp * @param responseHeader * @param statusCode * @param respond * @return */ private static ChunkedOutputStream setTransferEncoding( final HashMap conProp, final ResponseHeader responseHeader, final int statusCode, final OutputStream respond) { final String httpVer = (String) conProp.get(HeaderFramework.CONNECTION_PROP_HTTP_VER); ChunkedOutputStream chunkedOut = null; // gzipped response is ungzipped an therefor the length is unknown if (responseHeader.gzip() || responseHeader.getContentLength() < 0) { // according to http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html // a 204,304 message must not contain a message body. // Therefore we need to set the content-length to 0. if (statusCode == 204 || statusCode == 304) { responseHeader.put(HeaderFramework.CONTENT_LENGTH, "0"); } else { if (httpVer.equals(HeaderFramework.HTTP_VERSION_0_9) || httpVer.equals(HeaderFramework.HTTP_VERSION_1_0)) { forceConnectionClose(conProp); } else { chunkedOut = new ChunkedOutputStream(respond); } responseHeader.remove(HeaderFramework.CONTENT_LENGTH); } } return chunkedOut; } /** * @param res * @param responseHeader */ private static void prepareResponseHeader(final ResponseHeader responseHeader, final String httpVer) { modifyProxyHeaders(responseHeader, httpVer); correctContentEncoding(responseHeader); } /** * @param responseHeader */ private static void correctContentEncoding(final ResponseHeader responseHeader) { // TODO gzip again? set "correct" encoding? if(responseHeader.gzip()) { responseHeader.remove(HeaderFramework.CONTENT_ENCODING); responseHeader.remove(HeaderFramework.CONTENT_LENGTH); // remove gziped length } } /** * adds the client-IP of conProp to the requestHeader * * @param conProp * @param requestHeader */ private static void addXForwardedForHeader(final HashMap conProp, final RequestHeader requestHeader) { // setting the X-Forwarded-For Header if (sb.getConfigBool("proxy.sendXForwardedForHeader", true)) { requestHeader.put(HeaderFramework.X_FORWARDED_FOR, (String) conProp.get(HeaderFramework.CONNECTION_PROP_CLIENTIP)); } } /** * removing hop by hop headers and adding additional headers * * @param requestHeader * @param httpVer */ private static void modifyProxyHeaders(final HeaderFramework requestHeader, final String httpVer) { removeHopByHopHeaders(requestHeader); setViaHeader(requestHeader, httpVer); } private static void removeHopByHopHeaders(final HeaderFramework headers) { /* - Trailers */ headers.remove(RequestHeader.CONNECTION); headers.remove(RequestHeader.KEEP_ALIVE); headers.remove(RequestHeader.UPGRADE); headers.remove(RequestHeader.TE); headers.remove(RequestHeader.PROXY_CONNECTION); headers.remove(RequestHeader.PROXY_AUTHENTICATE); headers.remove(RequestHeader.PROXY_AUTHORIZATION); // special headers inserted by squid headers.remove(RequestHeader.X_CACHE); headers.remove(RequestHeader.X_CACHE_LOOKUP); // remove transfer encoding header headers.remove(HeaderFramework.TRANSFER_ENCODING); //removing yacy status headers headers.remove(HeaderFramework.X_YACY_KEEP_ALIVE_REQUEST_COUNT); headers.remove(HeaderFramework.X_YACY_ORIGINAL_REQUEST_LINE); } private static void setViaHeader(final HeaderFramework header, final String httpVer) { if (!sb.getConfigBool("proxy.sendViaHeader", true)) return; final String myAddress = (HTTPDemon.getAlternativeResolver() == null) ? null : HTTPDemon.getAlternativeResolver().myAlternativeAddress(); if (myAddress != null) { // getting header set by other proxies in the chain final StringBuilder viaValue = new StringBuilder(80); if (header.containsKey(HeaderFramework.VIA)) viaValue.append(header.get(HeaderFramework.VIA)); if (viaValue.length() > 0) viaValue.append(", "); // appending info about this peer viaValue .append(httpVer).append(" ") .append(myAddress).append(" ") .append("(YaCy ").append(sb.getConfig("vString", "0.0")).append(")"); // storing header back header.put(HeaderFramework.VIA, viaValue.toString()); } } public static void doConnect(final HashMap conProp, final RequestHeader requestHeader, final InputStream clientIn, final OutputStream clientOut) throws IOException { sb.proxyLastAccess = System.currentTimeMillis(); String host = (String) conProp.get(HeaderFramework.CONNECTION_PROP_HOST); final String httpVersion = (String) conProp.get(HeaderFramework.CONNECTION_PROP_HTTP_VER); String path = (String) conProp.get(HeaderFramework.CONNECTION_PROP_PATH); final String args = (String) conProp.get(HeaderFramework.CONNECTION_PROP_ARGS); if (args != null) { path = path + "?" + args; } int port, pos; if ((pos = host.indexOf(':')) < 0) { port = 80; } else { port = Integer.parseInt(host.substring(pos + 1)); host = host.substring(0, pos); } // check the blacklist // blacklist idea inspired by [AS]: // respond a 404 for all AGIS ("all you get is shit") servers final String hostlow = host.toLowerCase(); if (Switchboard.urlBlacklist.isListed(BlacklistType.PROXY, hostlow, path)) { HTTPDemon.sendRespondError(conProp,clientOut,4,403,null, "URL '" + hostlow + "' blocked by yacy proxy (blacklisted)",null); log.logInfo("AGIS blocking of host '" + hostlow + "'"); forceConnectionClose(conProp); return; } // possibly branch into PROXY-PROXY connection if (ProxySettings.use && ProxySettings.use4ssl) { final HTTPClient remoteProxy = setupHttpClient(requestHeader, host); try { remoteProxy.HEADResponse("http://" + host + ":" + port); int statusCode = remoteProxy.getHttpResponse().getStatusLine().getStatusCode(); final ResponseHeader header = new ResponseHeader(statusCode, remoteProxy.getHttpResponse().getAllHeaders()); // outputs a logline to the serverlog with the current status log.logInfo("CONNECT-RESPONSE: status=" + remoteProxy.getHttpResponse().getStatusLine() + ", header=" + header.toString()); final boolean success = statusCode >= 200 && statusCode <= 399; if (success) { // replace connection details host = ProxySettings.host; port = ProxySettings.port; // go on (see below) } else { // pass error response back to client HTTPDemon.sendRespondHeader( conProp, clientOut, httpVersion, remoteProxy.getHttpResponse().getStatusLine().getStatusCode(), remoteProxy.getHttpResponse().getStatusLine().toString(), header); //respondHeader(clientOut, response.status, response.responseHeader); forceConnectionClose(conProp); return; } } catch (final Exception e) { throw new IOException(e.getMessage()); } } // try to establish connection to remote host final Socket sslSocket = new Socket(host, port); sslSocket.setSoTimeout(timeout); // waiting time for write sslSocket.setSoLinger(true, timeout); // waiting time for read final InputStream promiscuousIn = sslSocket.getInputStream(); final OutputStream promiscuousOut = sslSocket.getOutputStream(); // now then we can return a success message clientOut.write(UTF8.getBytes(httpVersion + " 200 Connection established" + serverCore.CRLF_STRING + "Proxy-agent: YACY" + serverCore.CRLF_STRING + serverCore.CRLF_STRING)); log.logInfo("SSL connection to " + host + ":" + port + " established."); // start stream passing with mediate processes final Mediate cs = new Mediate(sslSocket, clientIn, promiscuousOut); final Mediate sc = new Mediate(sslSocket, promiscuousIn, clientOut); cs.start(); sc.start(); while ((sslSocket != null) && (sslSocket.isBound()) && (!(sslSocket.isClosed())) && (sslSocket.isConnected()) && ((cs.isAlive()) || (sc.isAlive()))) { // idle try {Thread.sleep(1000);} catch (final InterruptedException e) {} // wait a while } // set stop mode cs.pleaseTerminate(); sc.pleaseTerminate(); // wake up thread cs.interrupt(); sc.interrupt(); // ...hope they have terminated... } public static class Mediate extends Thread { boolean terminate; Socket socket; InputStream in; OutputStream out; public Mediate(final Socket socket, final InputStream in, final OutputStream out) { this.terminate = false; this.in = in; this.out = out; this.socket = socket; } @Override public void run() { final byte[] buffer = new byte[512]; int len; try { while ((this.socket != null) && (this.socket.isBound()) && (!(this.socket.isClosed())) && (this.socket.isConnected()) && (!(this.terminate)) && (this.in != null) && (this.out != null) && ((len = this.in.read(buffer)) >= 0) ) { this.out.write(buffer, 0, len); } } catch (final IOException e) { // do nothing } catch (final Exception e) { Log.logException(e); } } public void pleaseTerminate() { this.terminate = true; } } private static void handleProxyException(final Exception e, final HashMap conProp, final OutputStream respond, final DigestURI url) { // this may happen if // - the targeted host does not exist // - anything with the remote server was wrong. // - the client unexpectedly closed the connection ... try { // doing some errorhandling ... int httpStatusCode = 404; String httpStatusText = null; String errorMessage = null; Exception errorExc = null; boolean unknownError = false; // for customized error messages boolean detailedErrorMsg = false; String detailedErrorMsgFile = null; serverObjects detailedErrorMsgMap = null; if (e instanceof ConnectException) { httpStatusCode = 403; httpStatusText = "Connection refused"; errorMessage = "Connection refused by destination host"; } else if (e instanceof BindException) { errorMessage = "Unable to establish a connection to the destination host"; } else if (e instanceof NoRouteToHostException) { errorMessage = "No route to destination host"; } else if (e instanceof UnknownHostException) { //errorMessage = "IP address of the destination host could not be determined"; try { detailedErrorMsgMap = unknownHostHandling(conProp); httpStatusText = "Unknown Host"; detailedErrorMsg = true; detailedErrorMsgFile = "proxymsg/unknownHost.inc"; } catch (final Exception e1) { errorMessage = "IP address of the destination host could not be determined"; } } else if (e instanceof SocketTimeoutException) { errorMessage = "Unable to establish a connection to the destination host. Connect timed out."; } else { final String exceptionMsg = e.getMessage(); if ((exceptionMsg != null) && (exceptionMsg.indexOf("Corrupt GZIP trailer",0) >= 0)) { // just do nothing, we leave it this way if (log.isFine()) log.logFine("ignoring bad gzip trail for URL " + url + " (" + e.getMessage() + ")"); forceConnectionClose(conProp); } else if ((exceptionMsg != null) && (exceptionMsg.indexOf("Connection reset",0)>= 0)) { errorMessage = "Connection reset"; } else if ((exceptionMsg != null) && (exceptionMsg.indexOf("unknown host",0)>=0)) { try { detailedErrorMsgMap = unknownHostHandling(conProp); httpStatusText = "Unknown Host"; detailedErrorMsg = true; detailedErrorMsgFile = "proxymsg/unknownHost.inc"; } catch (final Exception e1) { errorMessage = "IP address of the destination host could not be determined"; } } else if ((exceptionMsg != null) && ( (exceptionMsg.indexOf("socket write error",0)>=0) || (exceptionMsg.indexOf("Read timed out",0) >= 0) || (exceptionMsg.indexOf("Broken pipe",0) >= 0) || (exceptionMsg.indexOf("server has closed connection",0) >= 0) )) { errorMessage = exceptionMsg; Log.logException(e); } else { errorMessage = "Unexpected Error. " + e.getClass().getName() + ": " + e.getMessage(); unknownError = true; errorExc = e; } } // sending back an error message to the client if (!conProp.containsKey(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_HEADER)) { if (detailedErrorMsg) { HTTPDemon.sendRespondError(conProp,respond, httpStatusCode, httpStatusText, new File(detailedErrorMsgFile), detailedErrorMsgMap, errorExc); } else { HTTPDemon.sendRespondError(conProp,respond,4,httpStatusCode,httpStatusText,errorMessage,errorExc); } } else { if (unknownError) { log.logSevere("Unknown Error while processing request '" + conProp.get(HeaderFramework.CONNECTION_PROP_REQUESTLINE) + "':" + "\n" + Thread.currentThread().getName() + "\n" + errorMessage,e); } else { log.logWarning("Error while processing request '" + conProp.get(HeaderFramework.CONNECTION_PROP_REQUESTLINE) + "':" + "\n" + Thread.currentThread().getName() + "\n" + errorMessage); } forceConnectionClose(conProp); } } catch (final Exception ee) { forceConnectionClose(conProp); } } private static void forceConnectionClose(final HashMap conProp) { if (conProp != null) { conProp.put(HeaderFramework.CONNECTION_PROP_PERSISTENT,"close"); } } private static serverObjects unknownHostHandling(final HashMap conProp) throws Exception { final serverObjects detailedErrorMsgMap = new serverObjects(); // generic toplevel domains final HashSet topLevelDomains = new HashSet(Arrays.asList(new String[]{ "aero", // Fluggesellschaften/Luftfahrt "arpa", // Einrichtung des ARPANet "biz", // Business "com", // Commercial "coop", // genossenschaftliche Unternehmen "edu", // Education "gov", // Government "info", // Informationsangebote "int", // International "jobs", // Jobangebote von Unternemen "mil", // Military (US-Militaer) // "museum", // Museen "name", // Privatpersonen "nato", // NATO (veraltet) "net", // Net (Netzwerkbetreiber) "org", // Organization (Nichtkommerzielle Organisation) "pro", // Professionals "travel", // Touristikindustrie // some country tlds "de", "at", "ch", "it", "uk" })); // getting some connection properties String orgHostPort = "80"; String orgHostName = (String) conProp.get(HeaderFramework.CONNECTION_PROP_HOST); if (orgHostName == null) orgHostName = "unknown"; orgHostName = orgHostName.toLowerCase(); int pos = orgHostName.indexOf(':'); if (pos != -1) { orgHostPort = orgHostName.substring(pos+1); orgHostName = orgHostName.substring(0,pos); } String orgHostPath = (String) conProp.get(HeaderFramework.CONNECTION_PROP_PATH); if (orgHostPath == null) orgHostPath = ""; String orgHostArgs = (String) conProp.get(HeaderFramework.CONNECTION_PROP_ARGS); if (orgHostArgs == null) orgHostArgs = ""; if (orgHostArgs.length() > 0) orgHostArgs = "?" + orgHostArgs; detailedErrorMsgMap.put("hostName", orgHostName); // guessing hostnames final HashSet testHostNames = new HashSet(); String testHostName = null; if (!orgHostName.startsWith("www.")) { testHostName = "www." + orgHostName; final InetAddress addr = Domains.dnsResolve(testHostName); if (addr != null) testHostNames.add(testHostName); } else if (orgHostName.startsWith("www.")) { testHostName = orgHostName.substring(4); final InetAddress addr = Domains.dnsResolve(testHostName); if (addr != null) if (addr != null) testHostNames.add(testHostName); } if (orgHostName.length()>4 && orgHostName.startsWith("www") && (orgHostName.charAt(3) != '.')) { testHostName = orgHostName.substring(0,3) + "." + orgHostName.substring(3); final InetAddress addr = Domains.dnsResolve(testHostName); if (addr != null) if (addr != null) testHostNames.add(testHostName); } pos = orgHostName.lastIndexOf('.'); if (pos != -1) { final Iterator iter = topLevelDomains.iterator(); while (iter.hasNext()) { final String topLevelDomain = iter.next(); testHostName = orgHostName.substring(0,pos) + "." + topLevelDomain; final InetAddress addr = Domains.dnsResolve(testHostName); if (addr != null) if (addr != null) testHostNames.add(testHostName); } } int hostNameCount = 0; final Iterator iter = testHostNames.iterator(); while (iter.hasNext()) { testHostName = iter.next(); detailedErrorMsgMap.put("list_" + hostNameCount + "_hostName",testHostName); detailedErrorMsgMap.put("list_" + hostNameCount + "_hostPort",orgHostPort); detailedErrorMsgMap.put("list_" + hostNameCount + "_hostPath",orgHostPath); detailedErrorMsgMap.put("list_" + hostNameCount + "_hostArgs",orgHostArgs); hostNameCount++; } detailedErrorMsgMap.put("list", hostNameCount); if (hostNameCount != 0) { detailedErrorMsgMap.put("showList", 1); } else { detailedErrorMsgMap.put("showList", 0); } return detailedErrorMsgMap; } private static synchronized String generateUserAgent(final HeaderFramework requestHeaders) { userAgentStr.setLength(0); final String browserUserAgent = requestHeaders.get(HeaderFramework.USER_AGENT, yacyProxyUserAgent); final int pos = browserUserAgent.lastIndexOf(')'); if (pos >= 0) { userAgentStr .append(browserUserAgent.substring(0,pos)) .append("; YaCy ") .append(sb.getConfig("vString","0.1")) .append("; yacy.net") .append(browserUserAgent.substring(pos)); } else { userAgentStr.append(browserUserAgent); } return userAgentStr.toString(); } /** * This function is used to generate a logging message according to the * squid logging format.

* e.g.
* 1117528623.857 178 192.168.1.201 TCP_MISS/200 1069 GET http://www.yacy.de/ - DIRECT/81.169.145.74 text/html */ private final static synchronized void logProxyAccess(final HashMap conProp) { if (!doAccessLogging) return; logMessage.setLength(0); // Timestamp final String currentTimestamp = Long.toString(System.currentTimeMillis()); final int offset = currentTimestamp.length()-3; logMessage.append(currentTimestamp.substring(0,offset)); logMessage.append('.'); logMessage.append(currentTimestamp.substring(offset)); logMessage.append(' '); // Elapsed time final Long requestStart = (Long) conProp.get(HeaderFramework.CONNECTION_PROP_REQUEST_START); final Long requestEnd = (Long) conProp.get(HeaderFramework.CONNECTION_PROP_REQUEST_END); final String elapsed = Long.toString(requestEnd.longValue()-requestStart.longValue()); for (int i=0; i<6-elapsed.length(); i++) logMessage.append(' '); logMessage.append(elapsed); logMessage.append(' '); // Remote Host final String clientIP = (String) conProp.get(HeaderFramework.CONNECTION_PROP_CLIENTIP); logMessage.append(clientIP); logMessage.append(' '); // Code/Status final String respondStatus = (String) conProp.get(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_STATUS); String respondCode = (String) conProp.get(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_CODE); if (respondCode == null) respondCode = "UNKNOWN"; logMessage.append(respondCode); logMessage.append("/"); logMessage.append(respondStatus); logMessage.append(' '); // Bytes final String bytes = (String) conProp.get(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_SIZE); logMessage.append(bytes.toString()); logMessage.append(' '); // Method final String requestMethod = (String) conProp.get(HeaderFramework.CONNECTION_PROP_METHOD); logMessage.append(requestMethod); logMessage.append(' '); // URL final String requestURL = (String) conProp.get(HeaderFramework.CONNECTION_PROP_URL); final String requestArgs = (String) conProp.get(HeaderFramework.CONNECTION_PROP_ARGS); logMessage.append(requestURL); if (requestArgs != null) { logMessage.append("?") .append(requestArgs); } logMessage.append(' '); // Rfc931 logMessage.append("-"); logMessage.append(' '); // Peerstatus/Peerhost final String host = (String) conProp.get(HeaderFramework.CONNECTION_PROP_HOST); logMessage.append("DIRECT/"); logMessage.append(host); logMessage.append(' '); // Type String mime = "-"; if (conProp.containsKey(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_HEADER)) { final HeaderFramework proxyRespondHeader = (HeaderFramework) conProp.get(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_HEADER); mime = proxyRespondHeader.mime(); if (mime.indexOf(';') != -1) { mime = mime.substring(0,mime.indexOf(';')); } } logMessage.append(mime); // sending the logging message to the logger if (proxyLog.isFine()) proxyLog.logFine(logMessage.toString()); } } /* proxy test: http://www.chipchapin.com/WebTools/cookietest.php? http://xlists.aza.org/moderator/cookietest/cookietest1.php http://vancouver-webpages.com/proxy/cache-test.html */