removed all non-http daemons; they had not been used and may be a potential security risk.

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6185 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
orbiter 16 years ago
parent 0e8647d62f
commit 499723891d

@ -45,7 +45,6 @@ import de.anomic.server.serverHandler;
import de.anomic.server.serverObjects;
import de.anomic.server.serverSwitch;
import de.anomic.server.serverThread;
import de.anomic.server.urlRedirectord;
import de.anomic.server.serverCore.Session;
import de.anomic.yacy.yacySeed;
@ -154,12 +153,7 @@ public final class Connections_p {
// getting the destination host
dest = conProp.getProperty(httpHeader.CONNECTION_PROP_HOST);
if (dest==null)continue;
} else if (cmdObj instanceof urlRedirectord) {
prot = "urlRedirector";
final urlRedirectord urlRedir = (urlRedirectord)cmdObj;
commandLine = urlRedir.getURL();
}
}
if ((dest != null) && (dest.equals(virtualHost))) dest = sb.peers.mySeed().getName() + ".yacy";

@ -1,286 +0,0 @@
//icapHeader.java
//-----------------------
//(C) by Michael Peter Christen; mc@yacy.net
//first published on http://www.anomic.de
//Frankfurt, Germany, 2004
//
//This file is contributed by Martin Thelian
//last major change: $LastChangedDate$ by $LastChangedBy$
//Revision: $LastChangedRevision$
//
//This program is free software; you can redistribute it and/or modify
//it under the terms of the GNU General Public License as published by
//the Free Software Foundation; either version 2 of the License, or
//(at your option) any later version.
//
//This program is distributed in the hope that it will be useful,
//but WITHOUT ANY WARRANTY; without even the implied warranty of
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
//GNU General Public License for more details.
//
//You should have received a copy of the GNU General Public License
//along with this program; if not, write to the Free Software
//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package de.anomic.icap;
import java.text.Collator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Locale;
import java.util.Map;
import java.util.Properties;
import java.util.TreeMap;
import de.anomic.server.serverCore;
public class icapHeader extends TreeMap<String, String> implements Map<String, String> {
private static final long serialVersionUID = 1L;
/* =============================================================
* Constants defining icap methods
* ============================================================= */
public static final String METHOD_REQMOD = "REQMOD";
public static final String METHOD_RESPMOD = "RESPMOD";
public static final String METHOD_OPTIONS = "OPTIONS";
/* =============================================================
* Constants defining http header names
* ============================================================= */
public static final String HOST = "Host";
public static final String USER_AGENT = "User-Agent";
public static final String CONNECTION = "Connection";
public static final String DATE = "Date";
public static final String SERVER = "Server";
public static final String ISTAG = "ISTAG";
public static final String METHODS = "Methods";
public static final String ALLOW = "Allow";
public static final String ENCAPSULATED = "Encapsulated";
public static final String MAX_CONNECTIONS = "Max-Connections";
public static final String OPTIONS_TTL = "Options-TTL";
public static final String SERVICE = "Service";
public static final String SERVICE_ID = "Service-ID";
public static final String PREVIEW = "Preview";
public static final String TRANSFER_PREVIEW = "Transfer-Preview";
public static final String TRANSFER_IGNORE = "Transfer-Ignore";
public static final String TRANSFER_COMPLETE = "Transfer-Complete";
public static final String X_YACY_KEEP_ALIVE_REQUEST_COUNT = "X-Keep-Alive-Request-Count";
/* =============================================================
* defining default icap status messages
* ============================================================= */
public static final HashMap<String, String> icap1_0 = new HashMap<String, String>();
static {
// (1yz) Informational codes
icap1_0.put("100","Continue after ICAP preview");
// (2yz) Success codes:
icap1_0.put("200","OK");
icap1_0.put("204","No modifications needed");
// (4yz) Client error codes:
icap1_0.put("400","Bad request");
icap1_0.put("404","ICAP Service not found");
icap1_0.put("405","Method not allowed for service");
icap1_0.put("408","Request timeout");
// (5yz) Server error codes:
icap1_0.put("500","Server error");
icap1_0.put("501","Method not implemented");
icap1_0.put("502","Bad Gateway");
icap1_0.put("503","Service overloaded");
icap1_0.put("505","ICAP version not supported by server");
}
/* PROPERTIES: General properties */
public static final String CONNECTION_PROP_ICAP_VER = "ICAP";
public static final String CONNECTION_PROP_HOST = "HOST";
public static final String CONNECTION_PROP_PATH = "PATH";
public static final String CONNECTION_PROP_EXT = "EXT";
public static final String CONNECTION_PROP_METHOD = "METHOD";
public static final String CONNECTION_PROP_REQUESTLINE = "REQUESTLINE";
public static final String CONNECTION_PROP_CLIENTIP = "CLIENTIP";
public static final String CONNECTION_PROP_URL = "URL";
public static final String CONNECTION_PROP_ARGS = "ARGS";
public static final String CONNECTION_PROP_PERSISTENT = "PERSISTENT";
public static final String CONNECTION_PROP_KEEP_ALIVE_COUNT = "KEEP-ALIVE_COUNT";
private static final Collator insensitiveCollator = Collator.getInstance(Locale.US);
static {
insensitiveCollator.setStrength(Collator.SECONDARY);
insensitiveCollator.setDecomposition(Collator.NO_DECOMPOSITION);
}
public icapHeader() {
super(insensitiveCollator);
}
public boolean allow(final int statusCode) {
if (!super.containsKey("Allow")) return false;
final String allow = get("Allow");
return (allow.indexOf(Integer.toString(statusCode))!=-1);
}
// to make the occurrence of multiple keys possible, we add them using a counter
public String add(final String key, final String value) {
final int c = keyCount(key);
if (c == 0) return put(key, value);
return put("*" + key + "-" + c, value);
}
public int keyCount(final String key) {
if (!(containsKey(key))) return 0;
int c = 1;
while (containsKey("*" + key + "-" + c)) c++;
return c;
}
// a convenience method to access the map with fail-over defaults
public Object get(final Object key, final Object dflt) {
final Object result = get(key);
if (result == null) return dflt;
return result;
}
// return multiple results
public Object getSingle(final Object key, final int count) {
if (count == 0) return get(key, null);
return get("*" + key + "-" + count, null);
}
public StringBuilder toHeaderString(final String icapVersion, final int icapStatusCode, String icapStatusText) {
if ((icapStatusText == null)||(icapStatusText.length()==0)) {
if (icapVersion.equals("ICAP/1.0") && icapHeader.icap1_0.containsKey(Integer.toString(icapStatusCode)))
icapStatusText = icapHeader.icap1_0.get(Integer.toString(icapStatusCode));
}
final StringBuilder theHeader = new StringBuilder();
// write status line
theHeader.append(icapVersion).append(" ")
.append(Integer.toString(icapStatusCode)).append(" ")
.append(icapStatusText).append("\r\n");
// write header
final Iterator<String> i = keySet().iterator();
String key;
char tag;
int count;
while (i.hasNext()) {
key = i.next();
tag = key.charAt(0);
if ((tag != '*') && (tag != '#')) { // '#' in key is reserved for proxy attributes as artificial header values
count = keyCount(key);
for (int j = 0; j < count; j++) {
theHeader.append(key).append(": ").append((String) getSingle(key, j)).append("\r\n");
}
}
}
// end header
theHeader.append("\r\n");
return theHeader;
}
public static Properties parseRequestLine(final String cmd, String s, final Properties prop, final String virtualHost) {
// reset property from previous run
prop.clear();
// storing informations about the request
prop.setProperty(CONNECTION_PROP_METHOD, cmd);
prop.setProperty(CONNECTION_PROP_REQUESTLINE,cmd + " " + s);
// this parses a whole URL
if (s.length() == 0) {
prop.setProperty(CONNECTION_PROP_HOST, virtualHost);
prop.setProperty(CONNECTION_PROP_PATH, "/");
prop.setProperty(CONNECTION_PROP_ICAP_VER, "ICAP/1.0");
prop.setProperty(CONNECTION_PROP_EXT, "");
return prop;
}
// store the version propery "ICAP" and cut the query at both ends
int sep = s.indexOf(" ");
if (sep >= 0) {
// ICAP version is given
prop.setProperty(CONNECTION_PROP_ICAP_VER, s.substring(sep + 1).trim());
s = s.substring(0, sep).trim(); // cut off ICAP version mark
} else {
// ICAP version is not given, it will be treated as ver 0.9
prop.setProperty(CONNECTION_PROP_ICAP_VER, "ICAP/1.0");
}
String argsString = "";
sep = s.indexOf("?");
if (sep >= 0) {
// there are values attached to the query string
argsString = s.substring(sep + 1); // cut haed from tail of query
s = s.substring(0, sep);
}
prop.setProperty(CONNECTION_PROP_URL, s); // store URL
if (argsString.length() != 0) prop.setProperty(CONNECTION_PROP_ARGS, argsString); // store arguments in original form
// finally find host string
if (s.toUpperCase().startsWith("ICAP://")) {
// a host was given. extract it and set path
s = s.substring(7);
sep = s.indexOf("/");
if (sep < 0) {
// this is a malformed url, something like
// http://index.html
// we are lazy and guess that it means
// /index.html
// which is a localhost access to the file servlet
prop.setProperty(CONNECTION_PROP_HOST, virtualHost);
prop.setProperty(CONNECTION_PROP_PATH, "/" + s);
} else {
// THIS IS THE "GOOD" CASE
// a perfect formulated url
prop.setProperty(CONNECTION_PROP_HOST, s.substring(0, sep));
prop.setProperty(CONNECTION_PROP_PATH, s.substring(sep)); // yes, including beginning "/"
}
} else {
// no host in url. set path
if (s.startsWith("/")) {
// thats also fine, its a perfect localhost access
// in this case, we simulate a
// http://localhost/s
// access by setting a virtual host
prop.setProperty(CONNECTION_PROP_HOST, virtualHost);
prop.setProperty(CONNECTION_PROP_PATH, s);
} else {
// the client 'forgot' to set a leading '/'
// this is the same case as above, with some lazyness
prop.setProperty(CONNECTION_PROP_HOST, virtualHost);
prop.setProperty(CONNECTION_PROP_PATH, "/" + s);
}
}
return prop;
}
public static icapHeader readHeader(final Properties prop, final serverCore.Session theSession) {
// reading all headers
final icapHeader header = new icapHeader();
int p;
String line;
while ((line = theSession.readLineAsString()) != null) {
if (line.length() == 0) break; // this seperates the header of the HTTP request from the body
// parse the header line: a property seperated with the ':' sign
if ((p = line.indexOf(":")) >= 0) {
// store a property
header.add(line.substring(0, p).trim(), line.substring(p + 1).trim());
}
}
return header;
}
}

@ -1,433 +0,0 @@
//icapd.java
//-----------------------
//(C) by Michael Peter Christen; mc@yacy.net
//first published on http://www.anomic.de
//Frankfurt, Germany, 2004
//
//This file is contributed by Martin Thelian
//last major change: $LastChangedDate$ by $LastChangedBy$
//Revision: $LastChangedRevision$
//
//This program is free software; you can redistribute it and/or modify
//it under the terms of the GNU General Public License as published by
//the Free Software Foundation; either version 2 of the License, or
//(at your option) any later version.
//
//This program is distributed in the hope that it will be useful,
//but WITHOUT ANY WARRANTY; without even the implied warranty of
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
//GNU General Public License for more details.
//
//You should have received a copy of the GNU General Public License
//along with this program; if not, write to the Free Software
//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package de.anomic.icap;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.net.InetAddress;
import java.util.Date;
import java.util.Properties;
import de.anomic.document.ParserDispatcher;
import de.anomic.http.httpChunkedInputStream;
import de.anomic.http.httpHeader;
import de.anomic.http.httpRequestHeader;
import de.anomic.http.httpResponseHeader;
import de.anomic.http.httpDocument;
import de.anomic.kelondro.util.DateFormatter;
import de.anomic.kelondro.util.FileUtils;
import de.anomic.plasma.plasmaHTCache;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverCore;
import de.anomic.server.serverHandler;
import de.anomic.server.serverCore.Session;
import de.anomic.yacy.yacyURL;
import de.anomic.yacy.logging.Log;
/**
* @author theli
*/
public class icapd implements serverHandler, Cloneable {
private serverCore.Session session; // holds the session object of the calling class
// the connection properties
private final Properties prop = new Properties();
// the address of the client
private InetAddress userAddress;
private String clientIP;
private int keepAliveRequestCount = 0;
// needed for logging
private static final Log log = new Log("ICAPD");
private static plasmaSwitchboard sb = null;
private static String virtualHost = null;
private static boolean keepAliveSupport = true;
public icapd() {
if (sb == null) {
sb = plasmaSwitchboard.getSwitchboard();
virtualHost = sb.getConfig("fileHost","localhost");
}
}
public icapd clone(){
return new icapd();
}
public void initSession(final Session aSession) throws IOException {
this.session = aSession;
this.userAddress = aSession.userAddress; // client InetAddress
this.clientIP = this.userAddress.getHostAddress();
if (this.userAddress.isAnyLocalAddress()) this.clientIP = "localhost";
if (this.clientIP.startsWith("0:0:0:0:0:0:0:1")) this.clientIP = "localhost";
if (this.clientIP.startsWith("127.")) this.clientIP = "localhost";
}
public String greeting() {
// TODO Auto-generated method stub
return null;
}
public String error(final Throwable e) {
// TODO Auto-generated method stub
return null;
}
public void reset() {
}
public Boolean EMPTY(final String arg) throws IOException {
// TODO Auto-generated method stub
return serverCore.TERMINATE_CONNECTION;
}
public Boolean UNKNOWN(final String requestLine) throws IOException {
// TODO Auto-generated method stub
return serverCore.TERMINATE_CONNECTION;
}
public icapHeader getDefaultHeaders() {
final icapHeader newHeaders = new icapHeader();
newHeaders.put(icapHeader.SERVER,"YaCy/" + sb.getConfig("vString",""));
newHeaders.put(icapHeader.DATE, DateFormatter.formatRFC1123(new Date()));
newHeaders.put(icapHeader.ISTAG, "\"" + sb.getConfig("vString","") + "\"");
return newHeaders;
}
public Boolean OPTIONS(final String arg) throws IOException {
final BufferedOutputStream out = new BufferedOutputStream(this.session.out);
// parsing the http request line
parseRequestLine(icapHeader.METHOD_OPTIONS,arg);
// reading the headers
final icapHeader icapReqHeader = icapHeader.readHeader(this.prop,this.session);
// determines if the connection should be kept alive
final boolean persistent = handlePersistentConnection(icapReqHeader);
// setting the icap response headers
final icapHeader resHeader = getDefaultHeaders();
resHeader.put(icapHeader.ALLOW,"204");
resHeader.put(icapHeader.ENCAPSULATED,"null-body=0");
resHeader.put(icapHeader.MAX_CONNECTIONS,"1000");
resHeader.put(icapHeader.OPTIONS_TTL,"300");
resHeader.put(icapHeader.SERVICE_ID, "???");
resHeader.put(icapHeader.PREVIEW, "30");
resHeader.put(icapHeader.TRANSFER_COMPLETE, "*");
//resHeader.put(icapHeader.TRANSFER_PREVIEW, "*");
if (!persistent) resHeader.put(icapHeader.CONNECTION, "close");
// determining the requested service and call it or send back an error message
final String reqService = this.prop.getProperty(icapHeader.CONNECTION_PROP_PATH,"");
if (reqService.equalsIgnoreCase("/resIndexing")) {
resHeader.put(icapHeader.SERVICE, "YaCy ICAP Indexing Service 1.0");
resHeader.put(icapHeader.METHODS,icapHeader.METHOD_RESPMOD);
String transferIgnoreList = ParserDispatcher.getMediaExtList();
transferIgnoreList = transferIgnoreList.substring(1,transferIgnoreList.length()-1);
resHeader.put(icapHeader.TRANSFER_IGNORE, transferIgnoreList);
} else {
resHeader.put(icapHeader.SERVICE, "YaCy ICAP Service 1.0");
}
final StringBuilder header = resHeader.toHeaderString("ICAP/1.0",200,null);
out.write(header.toString().getBytes());
out.flush();
return this.prop.getProperty(icapHeader.CONNECTION_PROP_PERSISTENT).equals("keep-alive") ? serverCore.RESUME_CONNECTION : serverCore.TERMINATE_CONNECTION;
}
public Boolean REQMOD() {
return serverCore.TERMINATE_CONNECTION;
}
public Boolean RESPMOD(final String arg) {
try {
final InputStream in = this.session.in;
final OutputStream out = this.session.out;
// parsing the icap request line
parseRequestLine(icapHeader.METHOD_RESPMOD,arg);
// reading the icap request header
final icapHeader icapReqHeader = icapHeader.readHeader(this.prop,this.session);
// determines if the connection should be kept alive
handlePersistentConnection(icapReqHeader);
// determining the requested service and call it or send back an error message
final String reqService = this.prop.getProperty(icapHeader.CONNECTION_PROP_PATH,"");
if (reqService.equalsIgnoreCase("/resIndexing")) {
indexingService(icapReqHeader,in,out);
} else {
final icapHeader icapResHeader = getDefaultHeaders();
icapResHeader.put(icapHeader.ENCAPSULATED,icapReqHeader.get(icapHeader.ENCAPSULATED));
icapResHeader.put(icapHeader.SERVICE, "YaCy ICAP Service 1.0");
// icapResHeader.put(icapHeader.CONNECTION, "close");
final StringBuilder header = icapResHeader.toHeaderString("ICAP/1.0",404,null);
out.write((new String(header)).getBytes());
out.flush();
}
} catch (final Exception e) {
e.printStackTrace();
} finally {
}
return this.prop.getProperty(icapHeader.CONNECTION_PROP_PERSISTENT).equals("keep-alive") ? serverCore.RESUME_CONNECTION : serverCore.TERMINATE_CONNECTION;
}
/*
private void blacklistService(icapHeader reqHeader, InputStream in, OutputStream out) {
try {
} catch (Exception e) {
e.printStackTrace();
}
}
*/
private void indexingService(final icapHeader reqHeader, final InputStream in, final OutputStream out) {
try {
/* =========================================================================
* Reading the various message parts into buffers
* ========================================================================= */
ByteArrayInputStream reqHdrStream = null, resHdrStream = null, resBodyStream = null;
final String[] encapsulated = (reqHeader.get(icapHeader.ENCAPSULATED)).split(",");
int prevLength = 0, currLength=0;
for (int i=0; i < encapsulated.length; i++) {
// reading the request header
if (encapsulated[i].indexOf("req-hdr")>=0) {
prevLength = currLength;
currLength = Integer.parseInt(encapsulated[i+1].split("=")[1]);
final byte[] buffer = new byte[currLength-prevLength];
final int bytesRead = in.read(buffer, 0, buffer.length);
assert bytesRead == buffer.length;
reqHdrStream = new ByteArrayInputStream(buffer);
// reading the response header
} else if (encapsulated[i].indexOf("res-hdr")>=0) {
prevLength = currLength;
currLength = Integer.parseInt(encapsulated[i+1].split("=")[1]);
final byte[] buffer = new byte[currLength-prevLength];
final int bytesRead = in.read(buffer, 0, buffer.length);
assert bytesRead == buffer.length;
resHdrStream = new ByteArrayInputStream(buffer);
// reading the response body
} else if (encapsulated[i].indexOf("res-body")>=0) {
final httpChunkedInputStream chunkedIn = new httpChunkedInputStream(in);
final ByteArrayOutputStream bout = new ByteArrayOutputStream();
int l = 0,len = 0;
final byte[] buffer = new byte[2048];
while ((l = chunkedIn.read(buffer)) >= 0) {
len += l;
bout.write(buffer,0,l);
}
resBodyStream = new ByteArrayInputStream(bout.toByteArray());
}
}
/* =========================================================================
* sending back the icap status
* ========================================================================= */
final icapHeader icapResHeader = getDefaultHeaders();
if (reqHeader.allow(204)) {
icapResHeader.put(icapHeader.ENCAPSULATED,reqHeader.get(icapHeader.ENCAPSULATED));
icapResHeader.put(icapHeader.SERVICE, "YaCy ICAP Service 1.0");
// resHeader.put(icapHeader.CONNECTION, "close");
final StringBuilder header = icapResHeader.toHeaderString("ICAP/1.0",204,null);
out.write((new String(header)).getBytes());
out.flush();
} else {
icapResHeader.put(icapHeader.ENCAPSULATED,reqHeader.get(icapHeader.ENCAPSULATED));
icapResHeader.put(icapHeader.SERVICE, "YaCy ICAP Service 1.0");
// icapResHeader.put(icapHeader.CONNECTION, "close");
final StringBuilder header = icapResHeader.toHeaderString("ICAP/1.0",503,null);
out.write((new String(header)).getBytes());
out.flush();
}
/* =========================================================================
* Parsing request data
* ========================================================================= */
// reading the requestline
BufferedReader reader = new BufferedReader(new InputStreamReader(reqHdrStream));
final String httpRequestLine = reader.readLine();
// parsing the requestline
final Properties httpReqProps = new Properties();
httpRequestHeader.parseRequestLine(httpRequestLine,httpReqProps,virtualHost);
if (!httpReqProps.getProperty(httpHeader.CONNECTION_PROP_METHOD).equals(httpHeader.METHOD_GET)) {
log.logInfo("Wrong http request method for indexing:" +
"\nRequest Method: " + httpReqProps.getProperty(httpHeader.CONNECTION_PROP_METHOD) +
"\nRequest Line: " + httpRequestLine);
reader.close();
if(reqHdrStream != null) {
reqHdrStream.close();
}
return;
}
// reading all request headers
final httpRequestHeader httpReqHeader = new httpRequestHeader();
httpReqHeader.readHttpHeader(reader);
reader.close();
if(reqHdrStream != null) {
reqHdrStream.close();
}
// handle transparent proxy support: this function call is needed to set the host property properly
httpHeader.handleTransparentProxySupport(httpReqHeader,httpReqProps,virtualHost,true);
// getting the request URL
final yacyURL httpRequestURL = httpHeader.getRequestURL(httpReqProps);
/* =========================================================================
* Parsing response data
* ========================================================================= */
// getting the response status
reader = new BufferedReader(new InputStreamReader(resHdrStream));
final String httpRespStatusLine = reader.readLine();
final Object[] httpRespStatus = httpResponseHeader.parseResponseLine(httpRespStatusLine);
if (!(httpRespStatus[1].equals(Integer.valueOf(200)) || httpRespStatus[1].equals(Integer.valueOf(203)))) {
log.logInfo("Wrong status code for indexing:" +
"\nStatus Code: " + httpRespStatus[1] +
"\nRequest Line: " + httpRequestLine +
"\nResponse Line: " + httpRespStatusLine);
reader.close();
if(resHdrStream != null) {
resHdrStream.close();
}
return;
}
// reading all response headers
final httpResponseHeader httpResHeader = new httpResponseHeader();
httpResHeader.readHttpHeader(reader);
reader.close();
if(resHdrStream != null) {
resHdrStream.close();
}
if (!ParserDispatcher.supportedContent(httpRequestURL, httpResHeader.mime())) {
log.logInfo("Wrong mimeType or fileExtension for indexing:" +
"\nMimeType: " + httpResHeader.mime() +
"\nRequest Line:" + httpRequestLine);
return ;
}
/* =========================================================================
* Prepare data for indexing
* ========================================================================= */
// generating a htcache entry object
final httpDocument cacheEntry = new httpDocument(
0,
httpRequestURL,
"",
httpRespStatusLine,
httpReqHeader, httpResHeader,
null,
sb.crawler.defaultProxyProfile
);
// copy the response body into the file
ByteArrayOutputStream baos = new ByteArrayOutputStream();
FileUtils.copy(resBodyStream, baos);
if(resBodyStream != null) {
resBodyStream.close(); resBodyStream = null;
}
cacheEntry.setCacheArray(baos.toByteArray());
plasmaHTCache.storeMetadata(httpResHeader, cacheEntry);
// indexing the response
sb.htEntryStoreProcess(cacheEntry);
} catch (final Exception e) {
e.printStackTrace();
}
}
private final void parseRequestLine(final String cmd, final String s) {
// parsing the requestlin
icapHeader.parseRequestLine(cmd,s, this.prop,virtualHost);
// adding the client ip prop
this.prop.setProperty(icapHeader.CONNECTION_PROP_CLIENTIP, this.clientIP);
// counting the amount of received requests within this permanent conneciton
this.prop.setProperty(icapHeader.CONNECTION_PROP_KEEP_ALIVE_COUNT, Integer.toString(++this.keepAliveRequestCount));
}
private boolean handlePersistentConnection(final icapHeader header) {
if (!keepAliveSupport) {
this.prop.put(icapHeader.CONNECTION_PROP_PERSISTENT,"close");
return false;
}
boolean persistent = true;
if (((String)header.get(icapHeader.CONNECTION, "keep-alive")).toLowerCase().equals("close")) {
persistent = false;
}
this.prop.put(icapHeader.CONNECTION_PROP_PERSISTENT,persistent?"keep-alive":"close");
return persistent;
}
}

@ -56,7 +56,6 @@ import javax.net.ssl.SSLContext;
import javax.net.ssl.SSLSocket;
import javax.net.ssl.SSLSocketFactory;
import de.anomic.icap.icapd;
import de.anomic.kelondro.util.ByteBuffer;
import de.anomic.tools.PKCS12Tool;
import de.anomic.yacy.logging.Log;
@ -696,23 +695,11 @@ public final class serverCore extends serverAbstractBusyThread implements server
// now we need to initialize the session
if (this.commandCounter == 0) {
// first we need to determine the proper protocol handler
if (this.request.indexOf("ICAP") >= 0) reqProtocol = "ICAP";
else if (this.request.startsWith("REDIRECTOR")) reqProtocol = "REDIRECTOR";
else reqProtocol = "HTTP";
if (this.request.indexOf("HTTP") >= 0) reqProtocol = "HTTP";
else reqProtocol = null;
// next we need to get the proper protocol handler
if (reqProtocol.equals("ICAP")) {
this.commandObj = new icapd();
} else if (reqProtocol.equals("REDIRECTOR")) {
this.commandObj = new urlRedirectord();
} else {
// if ((this.commandObj != null) &&
// (this.commandObj.getClass().getName().equals(serverCore.this.handlerPrototype.getClass().getName()))) {
// this.commandObj.reset();
// } else {
// this.commandObj = (serverHandler) serverCore.this.handlerPrototype.clone();
// }
if (this.request == null) break;
if (reqProtocol.equals("HTTP")) {
this.commandObj = serverCore.this.handlerPrototype.clone();
}

@ -1,142 +0,0 @@
#!/usr/bin/perl -w
#
# This is an URL Redirector Script for squid that can be
# used to bundle YaCy and Squid together via the squid
# redirector support.
# See: http://www.squid-cache.org/Doc/FAQ/FAQ-15.html
#
# This scripts forwards URLs from squid to YaCy where the
# URLs are used to download and index the content of the URLs.
use strict;
use Socket qw(:DEFAULT :crlf);
use IO::Handle;
use Digest::MD5;
# setting administrator username + pwd, hostname + port
my $user = "user";
my $pwd = "";
my $host = "localhost";
my $port = "8080";
my $allowCgi = 0;
my $allowPost = 0;
my @mediaExt;
my @requestData;
$|=1;
sub isCGI {
my $url = lc shift;
return ((rindex $url, ".cgi") != -1) ||
((rindex $url, ".exe") != -1) ||
((rindex $url, ";jsessionid=") != -1) ||
((rindex $url, "sessionid/") != -1) ||
((rindex $url, "phpsessid=") != -1);
}
sub isPOST {
my $url = lc shift;
return ((rindex $url, "?") != -1) ||
((rindex $url, "&") != -1);
}
sub isMediaExt {
my $url = $_[0];
my @extList = @{$_[1]};
my $pos = rindex $url, ".";
if ($pos != -1) {
my $ext = substr($url,$pos+1,length($url));
my @match = grep(/$ext/,@extList);
return scalar(@match);
}
return 0;
}
my ($bytes_out,$bytes_in) = (0,0);
my ($msg_in,$msg_out);
my $protocol = getprotobyname('tcp');
$host = inet_aton($host) or die "$host: unknown host";
socket(SOCK, AF_INET, SOCK_STREAM, $protocol) or die "socket() failed: $!";
my $dest_addr = sockaddr_in($port,$host);
connect(SOCK,$dest_addr) or die("connect() failed: $!");
# enabling autoflush
SOCK->autoflush(1);
# sending the REDIRECTOR command to yacy to enable the proper
# command handler
print SOCK "REDIRECTOR".CRLF;
# Doing authentication
my $ctx = Digest::MD5->new;
$ctx->add($user.":".$pwd);
my $md5Pwd = $ctx->hexdigest;
print SOCK "USER ".$user.CRLF;
print SOCK "PWD ".$md5Pwd.CRLF;
# Getting a list of file extensions that should be ignored
print SOCK "MEDIAEXT".CRLF;
$msg_in = lc <SOCK>;
chomp $msg_in;
@mediaExt = split(/,\s*/, $msg_in);
# 1) Reading URLs from stdIn
# 2) Send it to Yacy
# 3) Receive response from YaCy
# 4) Print response to StdOut
while (defined($msg_out = <>)) {
chomp $msg_out;
# splitting request into it's various parts
#
# One squid redirector request line typically looks like this:
# http://www.pageresource.com/styles/tuts.css 192.168.0.5/- - GET
@requestData = split(/\s+/, $msg_out);
# testing if the URL is CGI
if (!$allowCgi && isCGI($requestData[0])) {
print STDOUT CRLF;
print STDERR "URL is cgi: ".$msg_out.CRLF;
next;
}
# testing if the URL is a POST request
if (!$allowPost && isPOST($requestData[0])){
print STDOUT CRLF;
print STDERR "URL is post: ".$msg_out.CRLF;
next;
}
# testing if the requested content is a media content
if (isMediaExt($requestData[0],\@mediaExt)) {
print STDOUT CRLF;
print STDERR "URL has media extension: ".$msg_out.CRLF;
next;
}
# sending the whole request line to YaCy
$msg_out .= CRLF;
print SOCK $msg_out;
# reading the response
if (defined($msg_in = <SOCK>)) {
print STDOUT $msg_in;
} else {
print STDERR "Socket closed".CRLF;
close SOCK;
exit(1);
}
$bytes_out += length($msg_out);
$bytes_in += length($msg_in);
}
print SOCK "EXIT".CRLF;
close SOCK;
print STDERR "bytes_sent = $bytes_out, bytes_received = $bytes_in\n";

@ -1,232 +0,0 @@
package de.anomic.server;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.net.MalformedURLException;
import java.util.Date;
import de.anomic.crawler.CrawlEntry;
import de.anomic.crawler.CrawlProfile;
import de.anomic.data.userDB;
import de.anomic.document.ParserDispatcher;
import de.anomic.http.httpClient;
import de.anomic.http.httpResponseHeader;
import de.anomic.plasma.plasmaSwitchboard;
import de.anomic.server.serverCore.Session;
import de.anomic.yacy.yacyURL;
import de.anomic.yacy.logging.Log;
public class urlRedirectord implements serverHandler, Cloneable {
private serverCore.Session session;
private static plasmaSwitchboard sb = null;
private static final Log theLogger = new Log("URL-REDIRECTOR");
private static CrawlProfile.entry profile = null;
private String nextURL;
public urlRedirectord() {
if (sb == null) {
sb = plasmaSwitchboard.getSwitchboard();
}
if (profile == null) {
profile = sb.crawler.profilesActiveCrawls.newEntry(
// name
"URL Redirector",
// start URL
null,
// crawling filter
CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER,
// depth
0,
// recrawlIfOlder (minutes), if negative: do not re-crawl
-1,
// domFilterDepth, if negative: no auto-filter
-1,
// domMaxPages, if negative: no count restriction
-1,
// crawlDynamic
false,
// indexText
true,
// indexMedia
true,
// storeHTCache
false,
// storeTxCache
true,
// remoteIndexing
false,
// xsstopw
true,
// xdstopw
true,
// xpstopw
true
);
}
}
public String getURL() {
return this.nextURL;
}
public void initSession(final Session theSession){
// getting current session
this.session = theSession;
}
public String greeting() {
return null;
}
public String error(final Throwable e) {
return null;
}
public urlRedirectord clone() {
return null;
}
public void reset() {
this.session = null;
}
public Boolean EMPTY(final String arg) throws IOException {
return null;
}
public Boolean UNKNOWN(final String requestLine) throws IOException {
return null;
}
public Boolean REDIRECTOR(final String requestLine) {
try {
boolean authenticated = false;
String userName = null;
String md5Pwd = null;
// setting timeout
this.session.controlSocket.setSoTimeout(0);
String line = null;
final BufferedReader inputReader = new BufferedReader(new InputStreamReader(this.session.in));
final PrintWriter outputWriter = new PrintWriter(this.session.out);
while ((line = inputReader.readLine()) != null) {
if (line.equals("EXIT")) {
break;
} else if (line.startsWith("#")) {
outputWriter.print("\r\n");
outputWriter.flush();
continue;
} else if (line.startsWith("USER")) {
userName = line.substring(line.indexOf(" ")).trim();
} else if (line.startsWith("PWD")) {
if (userName != null) {
final userDB.Entry userEntry = sb.userDB.getEntry(userName);
if (userEntry != null) {
md5Pwd = line.substring(line.indexOf(" ")).trim();
if (userEntry.getMD5EncodedUserPwd().equals(md5Pwd)) {
authenticated = true;
}
}
}
} else if (line.startsWith("MEDIAEXT")) {
String transferIgnoreList = ParserDispatcher.getMediaExtList();
transferIgnoreList = transferIgnoreList.substring(1,transferIgnoreList.length()-1);
outputWriter.print(transferIgnoreList);
outputWriter.print("\r\n");
outputWriter.flush();
} else if (line.startsWith("DEPTH")) {
final int pos = line.indexOf(" ");
if (pos != -1) {
final String newDepth = line.substring(pos).trim();
theLogger.logFine("Changing crawling depth to '" + newDepth + "'.");
sb.crawler.profilesActiveCrawls.changeEntry(profile, "generalDepth",newDepth);
}
outputWriter.print("\r\n");
outputWriter.flush();
} else if (line.startsWith("CRAWLDYNAMIC")) {
final int pos = line.indexOf(" ");
if (pos != -1) {
final String newValue = line.substring(pos).trim();
theLogger.logFine("Changing crawl dynamic setting to '" + newValue + "'");
sb.crawler.profilesActiveCrawls.changeEntry(profile, "crawlingQ",newValue);
}
outputWriter.print("\r\n");
outputWriter.flush();
} else {
if (!authenticated) {
return Boolean.FALSE;
}
final int pos = line.indexOf(" ");
this.nextURL = (pos != -1) ? line.substring(0,pos):line;
theLogger.logFine("Receiving request " + line);
outputWriter.print("\r\n");
outputWriter.flush();
String reasonString = null;
try {
// generating URL Object
final yacyURL reqURL = new yacyURL(this.nextURL, null);
// getting URL mimeType
final httpResponseHeader header = httpClient.whead(reqURL.toString());
if (ParserDispatcher.supportedContent(
reqURL,
header.mime())
) {
// first delete old entry, if exists
final String urlhash = reqURL.hash();
sb.indexSegment.urlMetadata().remove(urlhash);
sb.crawlQueues.noticeURL.removeByURLHash(urlhash);
sb.crawlQueues.errorURL.remove(urlhash);
// enqueuing URL for crawling
sb.crawlStacker.enqueueEntry(new CrawlEntry(
sb.peers.mySeed().hash,
reqURL,
null,
"URL Redirector",
new Date(),
null,
profile.handle(),
0,
0,
0
));
} else {
reasonString = "Unsupporte file extension";
}
} catch (final MalformedURLException badUrlEx) {
reasonString = "Malformed URL";
}
if (reasonString != null) {
theLogger.logFine("URL " + nextURL + " rejected. Reason: " + reasonString);
}
nextURL = null;
}
}
theLogger.logFine("Connection terminated");
// Terminating connection
return serverCore.TERMINATE_CONNECTION;
} catch (final Exception e) {
theLogger.logSevere("Unexpected Error: " + e.getMessage(),e);
return serverCore.TERMINATE_CONNECTION;
}
}
}
Loading…
Cancel
Save