From 499723891d91436f7510c1a916d2badb6ecdb8cc Mon Sep 17 00:00:00 2001 From: orbiter Date: Wed, 8 Jul 2009 22:24:34 +0000 Subject: [PATCH] removed all non-http daemons; they had not been used and may be a potential security risk. git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6185 6c8d7289-2bf4-0310-a012-ef5d649a1542 --- htroot/Connections_p.java | 8 +- source/de/anomic/icap/icapHeader.java | 286 ------------- source/de/anomic/icap/icapd.java | 433 -------------------- source/de/anomic/server/serverCore.java | 21 +- source/de/anomic/server/urlRedirector.pl | 142 ------- source/de/anomic/server/urlRedirectord.java | 232 ----------- 6 files changed, 5 insertions(+), 1117 deletions(-) delete mode 100644 source/de/anomic/icap/icapHeader.java delete mode 100644 source/de/anomic/icap/icapd.java delete mode 100644 source/de/anomic/server/urlRedirector.pl delete mode 100644 source/de/anomic/server/urlRedirectord.java diff --git a/htroot/Connections_p.java b/htroot/Connections_p.java index 66f871c02..1a50bb477 100644 --- a/htroot/Connections_p.java +++ b/htroot/Connections_p.java @@ -45,7 +45,6 @@ import de.anomic.server.serverHandler; import de.anomic.server.serverObjects; import de.anomic.server.serverSwitch; import de.anomic.server.serverThread; -import de.anomic.server.urlRedirectord; import de.anomic.server.serverCore.Session; import de.anomic.yacy.yacySeed; @@ -154,12 +153,7 @@ public final class Connections_p { // getting the destination host dest = conProp.getProperty(httpHeader.CONNECTION_PROP_HOST); if (dest==null)continue; - } else if (cmdObj instanceof urlRedirectord) { - prot = "urlRedirector"; - - final urlRedirectord urlRedir = (urlRedirectord)cmdObj; - commandLine = urlRedir.getURL(); - } + } if ((dest != null) && (dest.equals(virtualHost))) dest = sb.peers.mySeed().getName() + ".yacy"; diff --git a/source/de/anomic/icap/icapHeader.java b/source/de/anomic/icap/icapHeader.java deleted file mode 100644 index fecbb7b35..000000000 --- a/source/de/anomic/icap/icapHeader.java +++ /dev/null @@ -1,286 +0,0 @@ -//icapHeader.java -//----------------------- -//(C) by Michael Peter Christen; mc@yacy.net -//first published on http://www.anomic.de -//Frankfurt, Germany, 2004 -// -//This file is contributed by Martin Thelian -//last major change: $LastChangedDate$ by $LastChangedBy$ -//Revision: $LastChangedRevision$ -// -//This program is free software; you can redistribute it and/or modify -//it under the terms of the GNU General Public License as published by -//the Free Software Foundation; either version 2 of the License, or -//(at your option) any later version. -// -//This program is distributed in the hope that it will be useful, -//but WITHOUT ANY WARRANTY; without even the implied warranty of -//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -//GNU General Public License for more details. -// -//You should have received a copy of the GNU General Public License -//along with this program; if not, write to the Free Software -//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -package de.anomic.icap; - -import java.text.Collator; -import java.util.HashMap; -import java.util.Iterator; -import java.util.Locale; -import java.util.Map; -import java.util.Properties; -import java.util.TreeMap; - -import de.anomic.server.serverCore; - -public class icapHeader extends TreeMap implements Map { - - private static final long serialVersionUID = 1L; - - /* ============================================================= - * Constants defining icap methods - * ============================================================= */ - public static final String METHOD_REQMOD = "REQMOD"; - public static final String METHOD_RESPMOD = "RESPMOD"; - public static final String METHOD_OPTIONS = "OPTIONS"; - - /* ============================================================= - * Constants defining http header names - * ============================================================= */ - public static final String HOST = "Host"; - public static final String USER_AGENT = "User-Agent"; - public static final String CONNECTION = "Connection"; - public static final String DATE = "Date"; - public static final String SERVER = "Server"; - public static final String ISTAG = "ISTAG"; - public static final String METHODS = "Methods"; - public static final String ALLOW = "Allow"; - public static final String ENCAPSULATED = "Encapsulated"; - public static final String MAX_CONNECTIONS = "Max-Connections"; - public static final String OPTIONS_TTL = "Options-TTL"; - public static final String SERVICE = "Service"; - public static final String SERVICE_ID = "Service-ID"; - public static final String PREVIEW = "Preview"; - public static final String TRANSFER_PREVIEW = "Transfer-Preview"; - public static final String TRANSFER_IGNORE = "Transfer-Ignore"; - public static final String TRANSFER_COMPLETE = "Transfer-Complete"; - - public static final String X_YACY_KEEP_ALIVE_REQUEST_COUNT = "X-Keep-Alive-Request-Count"; - - /* ============================================================= - * defining default icap status messages - * ============================================================= */ - public static final HashMap icap1_0 = new HashMap(); - static { - // (1yz) Informational codes - icap1_0.put("100","Continue after ICAP preview"); - - // (2yz) Success codes: - icap1_0.put("200","OK"); - icap1_0.put("204","No modifications needed"); - - // (4yz) Client error codes: - icap1_0.put("400","Bad request"); - icap1_0.put("404","ICAP Service not found"); - icap1_0.put("405","Method not allowed for service"); - icap1_0.put("408","Request timeout"); - - // (5yz) Server error codes: - icap1_0.put("500","Server error"); - icap1_0.put("501","Method not implemented"); - icap1_0.put("502","Bad Gateway"); - icap1_0.put("503","Service overloaded"); - icap1_0.put("505","ICAP version not supported by server"); - } - - /* PROPERTIES: General properties */ - public static final String CONNECTION_PROP_ICAP_VER = "ICAP"; - public static final String CONNECTION_PROP_HOST = "HOST"; - public static final String CONNECTION_PROP_PATH = "PATH"; - public static final String CONNECTION_PROP_EXT = "EXT"; - public static final String CONNECTION_PROP_METHOD = "METHOD"; - public static final String CONNECTION_PROP_REQUESTLINE = "REQUESTLINE"; - public static final String CONNECTION_PROP_CLIENTIP = "CLIENTIP"; - public static final String CONNECTION_PROP_URL = "URL"; - public static final String CONNECTION_PROP_ARGS = "ARGS"; - public static final String CONNECTION_PROP_PERSISTENT = "PERSISTENT"; - public static final String CONNECTION_PROP_KEEP_ALIVE_COUNT = "KEEP-ALIVE_COUNT"; - - private static final Collator insensitiveCollator = Collator.getInstance(Locale.US); - static { - insensitiveCollator.setStrength(Collator.SECONDARY); - insensitiveCollator.setDecomposition(Collator.NO_DECOMPOSITION); - } - - public icapHeader() { - super(insensitiveCollator); - } - - public boolean allow(final int statusCode) { - if (!super.containsKey("Allow")) return false; - - final String allow = get("Allow"); - return (allow.indexOf(Integer.toString(statusCode))!=-1); - } - - // to make the occurrence of multiple keys possible, we add them using a counter - public String add(final String key, final String value) { - final int c = keyCount(key); - if (c == 0) return put(key, value); - return put("*" + key + "-" + c, value); - } - - public int keyCount(final String key) { - if (!(containsKey(key))) return 0; - int c = 1; - while (containsKey("*" + key + "-" + c)) c++; - return c; - } - - // a convenience method to access the map with fail-over defaults - public Object get(final Object key, final Object dflt) { - final Object result = get(key); - if (result == null) return dflt; - return result; - } - - // return multiple results - public Object getSingle(final Object key, final int count) { - if (count == 0) return get(key, null); - return get("*" + key + "-" + count, null); - } - - public StringBuilder toHeaderString(final String icapVersion, final int icapStatusCode, String icapStatusText) { - - if ((icapStatusText == null)||(icapStatusText.length()==0)) { - if (icapVersion.equals("ICAP/1.0") && icapHeader.icap1_0.containsKey(Integer.toString(icapStatusCode))) - icapStatusText = icapHeader.icap1_0.get(Integer.toString(icapStatusCode)); - } - - final StringBuilder theHeader = new StringBuilder(); - - // write status line - theHeader.append(icapVersion).append(" ") - .append(Integer.toString(icapStatusCode)).append(" ") - .append(icapStatusText).append("\r\n"); - - // write header - final Iterator i = keySet().iterator(); - String key; - char tag; - int count; - while (i.hasNext()) { - key = i.next(); - tag = key.charAt(0); - if ((tag != '*') && (tag != '#')) { // '#' in key is reserved for proxy attributes as artificial header values - count = keyCount(key); - for (int j = 0; j < count; j++) { - theHeader.append(key).append(": ").append((String) getSingle(key, j)).append("\r\n"); - } - } - } - // end header - theHeader.append("\r\n"); - - - return theHeader; - } - - public static Properties parseRequestLine(final String cmd, String s, final Properties prop, final String virtualHost) { - - // reset property from previous run - prop.clear(); - - // storing informations about the request - prop.setProperty(CONNECTION_PROP_METHOD, cmd); - prop.setProperty(CONNECTION_PROP_REQUESTLINE,cmd + " " + s); - - - // this parses a whole URL - if (s.length() == 0) { - prop.setProperty(CONNECTION_PROP_HOST, virtualHost); - prop.setProperty(CONNECTION_PROP_PATH, "/"); - prop.setProperty(CONNECTION_PROP_ICAP_VER, "ICAP/1.0"); - prop.setProperty(CONNECTION_PROP_EXT, ""); - return prop; - } - - // store the version propery "ICAP" and cut the query at both ends - int sep = s.indexOf(" "); - if (sep >= 0) { - // ICAP version is given - prop.setProperty(CONNECTION_PROP_ICAP_VER, s.substring(sep + 1).trim()); - s = s.substring(0, sep).trim(); // cut off ICAP version mark - } else { - // ICAP version is not given, it will be treated as ver 0.9 - prop.setProperty(CONNECTION_PROP_ICAP_VER, "ICAP/1.0"); - } - - - String argsString = ""; - sep = s.indexOf("?"); - if (sep >= 0) { - // there are values attached to the query string - argsString = s.substring(sep + 1); // cut haed from tail of query - s = s.substring(0, sep); - } - prop.setProperty(CONNECTION_PROP_URL, s); // store URL - if (argsString.length() != 0) prop.setProperty(CONNECTION_PROP_ARGS, argsString); // store arguments in original form - - // finally find host string - if (s.toUpperCase().startsWith("ICAP://")) { - // a host was given. extract it and set path - s = s.substring(7); - sep = s.indexOf("/"); - if (sep < 0) { - // this is a malformed url, something like - // http://index.html - // we are lazy and guess that it means - // /index.html - // which is a localhost access to the file servlet - prop.setProperty(CONNECTION_PROP_HOST, virtualHost); - prop.setProperty(CONNECTION_PROP_PATH, "/" + s); - } else { - // THIS IS THE "GOOD" CASE - // a perfect formulated url - prop.setProperty(CONNECTION_PROP_HOST, s.substring(0, sep)); - prop.setProperty(CONNECTION_PROP_PATH, s.substring(sep)); // yes, including beginning "/" - } - } else { - // no host in url. set path - if (s.startsWith("/")) { - // thats also fine, its a perfect localhost access - // in this case, we simulate a - // http://localhost/s - // access by setting a virtual host - prop.setProperty(CONNECTION_PROP_HOST, virtualHost); - prop.setProperty(CONNECTION_PROP_PATH, s); - } else { - // the client 'forgot' to set a leading '/' - // this is the same case as above, with some lazyness - prop.setProperty(CONNECTION_PROP_HOST, virtualHost); - prop.setProperty(CONNECTION_PROP_PATH, "/" + s); - } - } - return prop; - - } - - public static icapHeader readHeader(final Properties prop, final serverCore.Session theSession) { - // reading all headers - final icapHeader header = new icapHeader(); - int p; - String line; - while ((line = theSession.readLineAsString()) != null) { - if (line.length() == 0) break; // this seperates the header of the HTTP request from the body - // parse the header line: a property seperated with the ':' sign - if ((p = line.indexOf(":")) >= 0) { - // store a property - header.add(line.substring(0, p).trim(), line.substring(p + 1).trim()); - } - } - - return header; - } -} diff --git a/source/de/anomic/icap/icapd.java b/source/de/anomic/icap/icapd.java deleted file mode 100644 index 6a0038782..000000000 --- a/source/de/anomic/icap/icapd.java +++ /dev/null @@ -1,433 +0,0 @@ -//icapd.java -//----------------------- -//(C) by Michael Peter Christen; mc@yacy.net -//first published on http://www.anomic.de -//Frankfurt, Germany, 2004 -// -//This file is contributed by Martin Thelian -//last major change: $LastChangedDate$ by $LastChangedBy$ -//Revision: $LastChangedRevision$ -// -//This program is free software; you can redistribute it and/or modify -//it under the terms of the GNU General Public License as published by -//the Free Software Foundation; either version 2 of the License, or -//(at your option) any later version. -// -//This program is distributed in the hope that it will be useful, -//but WITHOUT ANY WARRANTY; without even the implied warranty of -//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -//GNU General Public License for more details. -// -//You should have received a copy of the GNU General Public License -//along with this program; if not, write to the Free Software -//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -package de.anomic.icap; - -import java.io.BufferedOutputStream; -import java.io.BufferedReader; -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.OutputStream; -import java.net.InetAddress; -import java.util.Date; -import java.util.Properties; - -import de.anomic.document.ParserDispatcher; -import de.anomic.http.httpChunkedInputStream; -import de.anomic.http.httpHeader; -import de.anomic.http.httpRequestHeader; -import de.anomic.http.httpResponseHeader; -import de.anomic.http.httpDocument; -import de.anomic.kelondro.util.DateFormatter; -import de.anomic.kelondro.util.FileUtils; -import de.anomic.plasma.plasmaHTCache; -import de.anomic.plasma.plasmaSwitchboard; -import de.anomic.server.serverCore; -import de.anomic.server.serverHandler; -import de.anomic.server.serverCore.Session; -import de.anomic.yacy.yacyURL; -import de.anomic.yacy.logging.Log; - -/** - * @author theli - */ -public class icapd implements serverHandler, Cloneable { - - - private serverCore.Session session; // holds the session object of the calling class - - // the connection properties - private final Properties prop = new Properties(); - - // the address of the client - private InetAddress userAddress; - private String clientIP; - private int keepAliveRequestCount = 0; - - // needed for logging - private static final Log log = new Log("ICAPD"); - - private static plasmaSwitchboard sb = null; - private static String virtualHost = null; - private static boolean keepAliveSupport = true; - - - - public icapd() { - if (sb == null) { - sb = plasmaSwitchboard.getSwitchboard(); - virtualHost = sb.getConfig("fileHost","localhost"); - } - - } - - public icapd clone(){ - return new icapd(); - } - - public void initSession(final Session aSession) throws IOException { - this.session = aSession; - this.userAddress = aSession.userAddress; // client InetAddress - this.clientIP = this.userAddress.getHostAddress(); - if (this.userAddress.isAnyLocalAddress()) this.clientIP = "localhost"; - if (this.clientIP.startsWith("0:0:0:0:0:0:0:1")) this.clientIP = "localhost"; - if (this.clientIP.startsWith("127.")) this.clientIP = "localhost"; - } - - public String greeting() { - // TODO Auto-generated method stub - return null; - } - - public String error(final Throwable e) { - // TODO Auto-generated method stub - return null; - } - - public void reset() { - } - - public Boolean EMPTY(final String arg) throws IOException { - // TODO Auto-generated method stub - return serverCore.TERMINATE_CONNECTION; - } - - public Boolean UNKNOWN(final String requestLine) throws IOException { - // TODO Auto-generated method stub - return serverCore.TERMINATE_CONNECTION; - } - - public icapHeader getDefaultHeaders() { - final icapHeader newHeaders = new icapHeader(); - - newHeaders.put(icapHeader.SERVER,"YaCy/" + sb.getConfig("vString","")); - newHeaders.put(icapHeader.DATE, DateFormatter.formatRFC1123(new Date())); - newHeaders.put(icapHeader.ISTAG, "\"" + sb.getConfig("vString","") + "\""); - - return newHeaders; - } - - public Boolean OPTIONS(final String arg) throws IOException { - - final BufferedOutputStream out = new BufferedOutputStream(this.session.out); - - // parsing the http request line - parseRequestLine(icapHeader.METHOD_OPTIONS,arg); - - // reading the headers - final icapHeader icapReqHeader = icapHeader.readHeader(this.prop,this.session); - - // determines if the connection should be kept alive - final boolean persistent = handlePersistentConnection(icapReqHeader); - - // setting the icap response headers - final icapHeader resHeader = getDefaultHeaders(); - resHeader.put(icapHeader.ALLOW,"204"); - resHeader.put(icapHeader.ENCAPSULATED,"null-body=0"); - resHeader.put(icapHeader.MAX_CONNECTIONS,"1000"); - resHeader.put(icapHeader.OPTIONS_TTL,"300"); - resHeader.put(icapHeader.SERVICE_ID, "???"); - resHeader.put(icapHeader.PREVIEW, "30"); - resHeader.put(icapHeader.TRANSFER_COMPLETE, "*"); - //resHeader.put(icapHeader.TRANSFER_PREVIEW, "*"); - if (!persistent) resHeader.put(icapHeader.CONNECTION, "close"); - - - // determining the requested service and call it or send back an error message - final String reqService = this.prop.getProperty(icapHeader.CONNECTION_PROP_PATH,""); - if (reqService.equalsIgnoreCase("/resIndexing")) { - resHeader.put(icapHeader.SERVICE, "YaCy ICAP Indexing Service 1.0"); - resHeader.put(icapHeader.METHODS,icapHeader.METHOD_RESPMOD); - - String transferIgnoreList = ParserDispatcher.getMediaExtList(); - transferIgnoreList = transferIgnoreList.substring(1,transferIgnoreList.length()-1); - resHeader.put(icapHeader.TRANSFER_IGNORE, transferIgnoreList); - } else { - resHeader.put(icapHeader.SERVICE, "YaCy ICAP Service 1.0"); - } - - - final StringBuilder header = resHeader.toHeaderString("ICAP/1.0",200,null); - out.write(header.toString().getBytes()); - out.flush(); - - return this.prop.getProperty(icapHeader.CONNECTION_PROP_PERSISTENT).equals("keep-alive") ? serverCore.RESUME_CONNECTION : serverCore.TERMINATE_CONNECTION; - } - - public Boolean REQMOD() { - return serverCore.TERMINATE_CONNECTION; - } - - public Boolean RESPMOD(final String arg) { - try { - final InputStream in = this.session.in; - final OutputStream out = this.session.out; - - // parsing the icap request line - parseRequestLine(icapHeader.METHOD_RESPMOD,arg); - - // reading the icap request header - final icapHeader icapReqHeader = icapHeader.readHeader(this.prop,this.session); - - // determines if the connection should be kept alive - handlePersistentConnection(icapReqHeader); - - // determining the requested service and call it or send back an error message - final String reqService = this.prop.getProperty(icapHeader.CONNECTION_PROP_PATH,""); - if (reqService.equalsIgnoreCase("/resIndexing")) { - indexingService(icapReqHeader,in,out); - } else { - final icapHeader icapResHeader = getDefaultHeaders(); - icapResHeader.put(icapHeader.ENCAPSULATED,icapReqHeader.get(icapHeader.ENCAPSULATED)); - icapResHeader.put(icapHeader.SERVICE, "YaCy ICAP Service 1.0"); - // icapResHeader.put(icapHeader.CONNECTION, "close"); - - final StringBuilder header = icapResHeader.toHeaderString("ICAP/1.0",404,null); - out.write((new String(header)).getBytes()); - out.flush(); - } - - - - } catch (final Exception e) { - e.printStackTrace(); - } finally { - - } - return this.prop.getProperty(icapHeader.CONNECTION_PROP_PERSISTENT).equals("keep-alive") ? serverCore.RESUME_CONNECTION : serverCore.TERMINATE_CONNECTION; - } - - /* - private void blacklistService(icapHeader reqHeader, InputStream in, OutputStream out) { - try { - - } catch (Exception e) { - e.printStackTrace(); - } - } - */ - - private void indexingService(final icapHeader reqHeader, final InputStream in, final OutputStream out) { - try { - - /* ========================================================================= - * Reading the various message parts into buffers - * ========================================================================= */ - ByteArrayInputStream reqHdrStream = null, resHdrStream = null, resBodyStream = null; - final String[] encapsulated = (reqHeader.get(icapHeader.ENCAPSULATED)).split(","); - int prevLength = 0, currLength=0; - for (int i=0; i < encapsulated.length; i++) { - // reading the request header - if (encapsulated[i].indexOf("req-hdr")>=0) { - prevLength = currLength; - currLength = Integer.parseInt(encapsulated[i+1].split("=")[1]); - - final byte[] buffer = new byte[currLength-prevLength]; - final int bytesRead = in.read(buffer, 0, buffer.length); - assert bytesRead == buffer.length; - - reqHdrStream = new ByteArrayInputStream(buffer); - - // reading the response header - } else if (encapsulated[i].indexOf("res-hdr")>=0) { - prevLength = currLength; - currLength = Integer.parseInt(encapsulated[i+1].split("=")[1]); - - final byte[] buffer = new byte[currLength-prevLength]; - final int bytesRead = in.read(buffer, 0, buffer.length); - assert bytesRead == buffer.length; - - resHdrStream = new ByteArrayInputStream(buffer); - - // reading the response body - } else if (encapsulated[i].indexOf("res-body")>=0) { - final httpChunkedInputStream chunkedIn = new httpChunkedInputStream(in); - final ByteArrayOutputStream bout = new ByteArrayOutputStream(); - int l = 0,len = 0; - final byte[] buffer = new byte[2048]; - while ((l = chunkedIn.read(buffer)) >= 0) { - len += l; - bout.write(buffer,0,l); - } - resBodyStream = new ByteArrayInputStream(bout.toByteArray()); - } - } - - /* ========================================================================= - * sending back the icap status - * ========================================================================= */ - final icapHeader icapResHeader = getDefaultHeaders(); - if (reqHeader.allow(204)) { - icapResHeader.put(icapHeader.ENCAPSULATED,reqHeader.get(icapHeader.ENCAPSULATED)); - icapResHeader.put(icapHeader.SERVICE, "YaCy ICAP Service 1.0"); - // resHeader.put(icapHeader.CONNECTION, "close"); - - final StringBuilder header = icapResHeader.toHeaderString("ICAP/1.0",204,null); - out.write((new String(header)).getBytes()); - out.flush(); - } else { - icapResHeader.put(icapHeader.ENCAPSULATED,reqHeader.get(icapHeader.ENCAPSULATED)); - icapResHeader.put(icapHeader.SERVICE, "YaCy ICAP Service 1.0"); - // icapResHeader.put(icapHeader.CONNECTION, "close"); - - final StringBuilder header = icapResHeader.toHeaderString("ICAP/1.0",503,null); - out.write((new String(header)).getBytes()); - out.flush(); - } - - /* ========================================================================= - * Parsing request data - * ========================================================================= */ - // reading the requestline - BufferedReader reader = new BufferedReader(new InputStreamReader(reqHdrStream)); - final String httpRequestLine = reader.readLine(); - - // parsing the requestline - final Properties httpReqProps = new Properties(); - httpRequestHeader.parseRequestLine(httpRequestLine,httpReqProps,virtualHost); - - if (!httpReqProps.getProperty(httpHeader.CONNECTION_PROP_METHOD).equals(httpHeader.METHOD_GET)) { - log.logInfo("Wrong http request method for indexing:" + - "\nRequest Method: " + httpReqProps.getProperty(httpHeader.CONNECTION_PROP_METHOD) + - "\nRequest Line: " + httpRequestLine); - reader.close(); - if(reqHdrStream != null) { - reqHdrStream.close(); - } - return; - } - - // reading all request headers - final httpRequestHeader httpReqHeader = new httpRequestHeader(); - httpReqHeader.readHttpHeader(reader); - reader.close(); - if(reqHdrStream != null) { - reqHdrStream.close(); - } - - // handle transparent proxy support: this function call is needed to set the host property properly - httpHeader.handleTransparentProxySupport(httpReqHeader,httpReqProps,virtualHost,true); - - // getting the request URL - final yacyURL httpRequestURL = httpHeader.getRequestURL(httpReqProps); - - /* ========================================================================= - * Parsing response data - * ========================================================================= */ - // getting the response status - reader = new BufferedReader(new InputStreamReader(resHdrStream)); - final String httpRespStatusLine = reader.readLine(); - - final Object[] httpRespStatus = httpResponseHeader.parseResponseLine(httpRespStatusLine); - - if (!(httpRespStatus[1].equals(Integer.valueOf(200)) || httpRespStatus[1].equals(Integer.valueOf(203)))) { - log.logInfo("Wrong status code for indexing:" + - "\nStatus Code: " + httpRespStatus[1] + - "\nRequest Line: " + httpRequestLine + - "\nResponse Line: " + httpRespStatusLine); - reader.close(); - if(resHdrStream != null) { - resHdrStream.close(); - } - return; - } - - // reading all response headers - final httpResponseHeader httpResHeader = new httpResponseHeader(); - httpResHeader.readHttpHeader(reader); - reader.close(); - if(resHdrStream != null) { - resHdrStream.close(); - } - - if (!ParserDispatcher.supportedContent(httpRequestURL, httpResHeader.mime())) { - log.logInfo("Wrong mimeType or fileExtension for indexing:" + - "\nMimeType: " + httpResHeader.mime() + - "\nRequest Line:" + httpRequestLine); - return ; - } - - - /* ========================================================================= - * Prepare data for indexing - * ========================================================================= */ - - // generating a htcache entry object - final httpDocument cacheEntry = new httpDocument( - 0, - httpRequestURL, - "", - httpRespStatusLine, - httpReqHeader, httpResHeader, - null, - sb.crawler.defaultProxyProfile - ); - - // copy the response body into the file - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - FileUtils.copy(resBodyStream, baos); - if(resBodyStream != null) { - resBodyStream.close(); resBodyStream = null; - } - cacheEntry.setCacheArray(baos.toByteArray()); - plasmaHTCache.storeMetadata(httpResHeader, cacheEntry); - - // indexing the response - sb.htEntryStoreProcess(cacheEntry); - } catch (final Exception e) { - e.printStackTrace(); - } - } - - private final void parseRequestLine(final String cmd, final String s) { - // parsing the requestlin - icapHeader.parseRequestLine(cmd,s, this.prop,virtualHost); - - // adding the client ip prop - this.prop.setProperty(icapHeader.CONNECTION_PROP_CLIENTIP, this.clientIP); - - // counting the amount of received requests within this permanent conneciton - this.prop.setProperty(icapHeader.CONNECTION_PROP_KEEP_ALIVE_COUNT, Integer.toString(++this.keepAliveRequestCount)); - } - - private boolean handlePersistentConnection(final icapHeader header) { - - if (!keepAliveSupport) { - this.prop.put(icapHeader.CONNECTION_PROP_PERSISTENT,"close"); - return false; - } - - boolean persistent = true; - if (((String)header.get(icapHeader.CONNECTION, "keep-alive")).toLowerCase().equals("close")) { - persistent = false; - } - - this.prop.put(icapHeader.CONNECTION_PROP_PERSISTENT,persistent?"keep-alive":"close"); - return persistent; - } - -} diff --git a/source/de/anomic/server/serverCore.java b/source/de/anomic/server/serverCore.java index 8b00f26a8..5f2c5bc09 100644 --- a/source/de/anomic/server/serverCore.java +++ b/source/de/anomic/server/serverCore.java @@ -56,7 +56,6 @@ import javax.net.ssl.SSLContext; import javax.net.ssl.SSLSocket; import javax.net.ssl.SSLSocketFactory; -import de.anomic.icap.icapd; import de.anomic.kelondro.util.ByteBuffer; import de.anomic.tools.PKCS12Tool; import de.anomic.yacy.logging.Log; @@ -696,23 +695,11 @@ public final class serverCore extends serverAbstractBusyThread implements server // now we need to initialize the session if (this.commandCounter == 0) { // first we need to determine the proper protocol handler - if (this.request.indexOf("ICAP") >= 0) reqProtocol = "ICAP"; - else if (this.request.startsWith("REDIRECTOR")) reqProtocol = "REDIRECTOR"; - else reqProtocol = "HTTP"; + if (this.request.indexOf("HTTP") >= 0) reqProtocol = "HTTP"; + else reqProtocol = null; - // next we need to get the proper protocol handler - if (reqProtocol.equals("ICAP")) { - this.commandObj = new icapd(); - } else if (reqProtocol.equals("REDIRECTOR")) { - this.commandObj = new urlRedirectord(); - } else { -// if ((this.commandObj != null) && -// (this.commandObj.getClass().getName().equals(serverCore.this.handlerPrototype.getClass().getName()))) { -// this.commandObj.reset(); -// } else { -// this.commandObj = (serverHandler) serverCore.this.handlerPrototype.clone(); -// } - + if (this.request == null) break; + if (reqProtocol.equals("HTTP")) { this.commandObj = serverCore.this.handlerPrototype.clone(); } diff --git a/source/de/anomic/server/urlRedirector.pl b/source/de/anomic/server/urlRedirector.pl deleted file mode 100644 index 7c6918e41..000000000 --- a/source/de/anomic/server/urlRedirector.pl +++ /dev/null @@ -1,142 +0,0 @@ -#!/usr/bin/perl -w -# -# This is an URL Redirector Script for squid that can be -# used to bundle YaCy and Squid together via the squid -# redirector support. -# See: http://www.squid-cache.org/Doc/FAQ/FAQ-15.html -# -# This scripts forwards URLs from squid to YaCy where the -# URLs are used to download and index the content of the URLs. -use strict; -use Socket qw(:DEFAULT :crlf); -use IO::Handle; -use Digest::MD5; - -# setting administrator username + pwd, hostname + port -my $user = "user"; -my $pwd = ""; -my $host = "localhost"; -my $port = "8080"; - -my $allowCgi = 0; -my $allowPost = 0; - -my @mediaExt; -my @requestData; - -$|=1; - -sub isCGI { - my $url = lc shift; - return ((rindex $url, ".cgi") != -1) || - ((rindex $url, ".exe") != -1) || - ((rindex $url, ";jsessionid=") != -1) || - ((rindex $url, "sessionid/") != -1) || - ((rindex $url, "phpsessid=") != -1); -} - -sub isPOST { - my $url = lc shift; - return ((rindex $url, "?") != -1) || - ((rindex $url, "&") != -1); -} - -sub isMediaExt { - my $url = $_[0]; - my @extList = @{$_[1]}; - my $pos = rindex $url, "."; - - if ($pos != -1) { - my $ext = substr($url,$pos+1,length($url)); - my @match = grep(/$ext/,@extList); - return scalar(@match); - } - return 0; -} - -my ($bytes_out,$bytes_in) = (0,0); -my ($msg_in,$msg_out); - -my $protocol = getprotobyname('tcp'); -$host = inet_aton($host) or die "$host: unknown host"; - -socket(SOCK, AF_INET, SOCK_STREAM, $protocol) or die "socket() failed: $!"; -my $dest_addr = sockaddr_in($port,$host); -connect(SOCK,$dest_addr) or die("connect() failed: $!"); - -# enabling autoflush -SOCK->autoflush(1); - -# sending the REDIRECTOR command to yacy to enable the proper -# command handler -print SOCK "REDIRECTOR".CRLF; - -# Doing authentication -my $ctx = Digest::MD5->new; -$ctx->add($user.":".$pwd); -my $md5Pwd = $ctx->hexdigest; - -print SOCK "USER ".$user.CRLF; -print SOCK "PWD ".$md5Pwd.CRLF; - -# Getting a list of file extensions that should be ignored -print SOCK "MEDIAEXT".CRLF; -$msg_in = lc ; -chomp $msg_in; -@mediaExt = split(/,\s*/, $msg_in); - -# 1) Reading URLs from stdIn -# 2) Send it to Yacy -# 3) Receive response from YaCy -# 4) Print response to StdOut -while (defined($msg_out = <>)) { - chomp $msg_out; - - # splitting request into it's various parts - # - # One squid redirector request line typically looks like this: - # http://www.pageresource.com/styles/tuts.css 192.168.0.5/- - GET - @requestData = split(/\s+/, $msg_out); - - # testing if the URL is CGI - if (!$allowCgi && isCGI($requestData[0])) { - print STDOUT CRLF; - print STDERR "URL is cgi: ".$msg_out.CRLF; - next; - } - - # testing if the URL is a POST request - if (!$allowPost && isPOST($requestData[0])){ - print STDOUT CRLF; - print STDERR "URL is post: ".$msg_out.CRLF; - next; - } - - # testing if the requested content is a media content - if (isMediaExt($requestData[0],\@mediaExt)) { - print STDOUT CRLF; - print STDERR "URL has media extension: ".$msg_out.CRLF; - next; - } - - # sending the whole request line to YaCy - $msg_out .= CRLF; - print SOCK $msg_out; - - # reading the response - if (defined($msg_in = )) { - print STDOUT $msg_in; - } else { - print STDERR "Socket closed".CRLF; - close SOCK; - exit(1); - } - - $bytes_out += length($msg_out); - $bytes_in += length($msg_in); -} -print SOCK "EXIT".CRLF; - -close SOCK; -print STDERR "bytes_sent = $bytes_out, bytes_received = $bytes_in\n"; - diff --git a/source/de/anomic/server/urlRedirectord.java b/source/de/anomic/server/urlRedirectord.java deleted file mode 100644 index f1b3c8a89..000000000 --- a/source/de/anomic/server/urlRedirectord.java +++ /dev/null @@ -1,232 +0,0 @@ -package de.anomic.server; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStreamReader; -import java.io.PrintWriter; -import java.net.MalformedURLException; -import java.util.Date; - -import de.anomic.crawler.CrawlEntry; -import de.anomic.crawler.CrawlProfile; -import de.anomic.data.userDB; -import de.anomic.document.ParserDispatcher; -import de.anomic.http.httpClient; -import de.anomic.http.httpResponseHeader; -import de.anomic.plasma.plasmaSwitchboard; -import de.anomic.server.serverCore.Session; -import de.anomic.yacy.yacyURL; -import de.anomic.yacy.logging.Log; - -public class urlRedirectord implements serverHandler, Cloneable { - - private serverCore.Session session; - private static plasmaSwitchboard sb = null; - private static final Log theLogger = new Log("URL-REDIRECTOR"); - private static CrawlProfile.entry profile = null; - private String nextURL; - - public urlRedirectord() { - if (sb == null) { - sb = plasmaSwitchboard.getSwitchboard(); - } - - if (profile == null) { - profile = sb.crawler.profilesActiveCrawls.newEntry( - // name - "URL Redirector", - // start URL - null, - // crawling filter - CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER, - // depth - 0, - // recrawlIfOlder (minutes), if negative: do not re-crawl - -1, - // domFilterDepth, if negative: no auto-filter - -1, - // domMaxPages, if negative: no count restriction - -1, - // crawlDynamic - false, - // indexText - true, - // indexMedia - true, - // storeHTCache - false, - // storeTxCache - true, - // remoteIndexing - false, - // xsstopw - true, - // xdstopw - true, - // xpstopw - true - ); - } - } - - public String getURL() { - return this.nextURL; - } - - public void initSession(final Session theSession){ - // getting current session - this.session = theSession; - } - - public String greeting() { - return null; - } - - public String error(final Throwable e) { - return null; - } - - public urlRedirectord clone() { - return null; - } - - public void reset() { - this.session = null; - } - - public Boolean EMPTY(final String arg) throws IOException { - return null; - } - - public Boolean UNKNOWN(final String requestLine) throws IOException { - return null; - } - - public Boolean REDIRECTOR(final String requestLine) { - try { - - boolean authenticated = false; - String userName = null; - String md5Pwd = null; - - // setting timeout - this.session.controlSocket.setSoTimeout(0); - - String line = null; - final BufferedReader inputReader = new BufferedReader(new InputStreamReader(this.session.in)); - final PrintWriter outputWriter = new PrintWriter(this.session.out); - - while ((line = inputReader.readLine()) != null) { - if (line.equals("EXIT")) { - break; - } else if (line.startsWith("#")) { - outputWriter.print("\r\n"); - outputWriter.flush(); - continue; - } else if (line.startsWith("USER")) { - userName = line.substring(line.indexOf(" ")).trim(); - } else if (line.startsWith("PWD")) { - if (userName != null) { - final userDB.Entry userEntry = sb.userDB.getEntry(userName); - if (userEntry != null) { - md5Pwd = line.substring(line.indexOf(" ")).trim(); - if (userEntry.getMD5EncodedUserPwd().equals(md5Pwd)) { - authenticated = true; - } - } - } - } else if (line.startsWith("MEDIAEXT")) { - String transferIgnoreList = ParserDispatcher.getMediaExtList(); - transferIgnoreList = transferIgnoreList.substring(1,transferIgnoreList.length()-1); - - outputWriter.print(transferIgnoreList); - outputWriter.print("\r\n"); - outputWriter.flush(); - } else if (line.startsWith("DEPTH")) { - final int pos = line.indexOf(" "); - if (pos != -1) { - final String newDepth = line.substring(pos).trim(); - theLogger.logFine("Changing crawling depth to '" + newDepth + "'."); - sb.crawler.profilesActiveCrawls.changeEntry(profile, "generalDepth",newDepth); - } - outputWriter.print("\r\n"); - outputWriter.flush(); - } else if (line.startsWith("CRAWLDYNAMIC")) { - final int pos = line.indexOf(" "); - if (pos != -1) { - final String newValue = line.substring(pos).trim(); - theLogger.logFine("Changing crawl dynamic setting to '" + newValue + "'"); - sb.crawler.profilesActiveCrawls.changeEntry(profile, "crawlingQ",newValue); - } - outputWriter.print("\r\n"); - outputWriter.flush(); - } else { - if (!authenticated) { - return Boolean.FALSE; - } - - final int pos = line.indexOf(" "); - this.nextURL = (pos != -1) ? line.substring(0,pos):line; - - theLogger.logFine("Receiving request " + line); - outputWriter.print("\r\n"); - outputWriter.flush(); - - String reasonString = null; - try { - // generating URL Object - final yacyURL reqURL = new yacyURL(this.nextURL, null); - - // getting URL mimeType - final httpResponseHeader header = httpClient.whead(reqURL.toString()); - - if (ParserDispatcher.supportedContent( - reqURL, - header.mime()) - ) { - // first delete old entry, if exists - final String urlhash = reqURL.hash(); - sb.indexSegment.urlMetadata().remove(urlhash); - sb.crawlQueues.noticeURL.removeByURLHash(urlhash); - sb.crawlQueues.errorURL.remove(urlhash); - - // enqueuing URL for crawling - sb.crawlStacker.enqueueEntry(new CrawlEntry( - sb.peers.mySeed().hash, - reqURL, - null, - "URL Redirector", - new Date(), - null, - profile.handle(), - 0, - 0, - 0 - )); - } else { - reasonString = "Unsupporte file extension"; - } - } catch (final MalformedURLException badUrlEx) { - reasonString = "Malformed URL"; - } - - if (reasonString != null) { - theLogger.logFine("URL " + nextURL + " rejected. Reason: " + reasonString); - } - nextURL = null; - } - } - - theLogger.logFine("Connection terminated"); - - // Terminating connection - return serverCore.TERMINATE_CONNECTION; - } catch (final Exception e) { - theLogger.logSevere("Unexpected Error: " + e.getMessage(),e); - return serverCore.TERMINATE_CONNECTION; - } - } - - - -}