git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6185 6c8d7289-2bf4-0310-a012-ef5d649a1542pull/1/head
parent
0e8647d62f
commit
499723891d
@ -1,286 +0,0 @@
|
||||
//icapHeader.java
|
||||
//-----------------------
|
||||
//(C) by Michael Peter Christen; mc@yacy.net
|
||||
//first published on http://www.anomic.de
|
||||
//Frankfurt, Germany, 2004
|
||||
//
|
||||
//This file is contributed by Martin Thelian
|
||||
//last major change: $LastChangedDate$ by $LastChangedBy$
|
||||
//Revision: $LastChangedRevision$
|
||||
//
|
||||
//This program is free software; you can redistribute it and/or modify
|
||||
//it under the terms of the GNU General Public License as published by
|
||||
//the Free Software Foundation; either version 2 of the License, or
|
||||
//(at your option) any later version.
|
||||
//
|
||||
//This program is distributed in the hope that it will be useful,
|
||||
//but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
//GNU General Public License for more details.
|
||||
//
|
||||
//You should have received a copy of the GNU General Public License
|
||||
//along with this program; if not, write to the Free Software
|
||||
//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
package de.anomic.icap;
|
||||
|
||||
import java.text.Collator;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import de.anomic.server.serverCore;
|
||||
|
||||
public class icapHeader extends TreeMap<String, String> implements Map<String, String> {
|
||||
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
/* =============================================================
|
||||
* Constants defining icap methods
|
||||
* ============================================================= */
|
||||
public static final String METHOD_REQMOD = "REQMOD";
|
||||
public static final String METHOD_RESPMOD = "RESPMOD";
|
||||
public static final String METHOD_OPTIONS = "OPTIONS";
|
||||
|
||||
/* =============================================================
|
||||
* Constants defining http header names
|
||||
* ============================================================= */
|
||||
public static final String HOST = "Host";
|
||||
public static final String USER_AGENT = "User-Agent";
|
||||
public static final String CONNECTION = "Connection";
|
||||
public static final String DATE = "Date";
|
||||
public static final String SERVER = "Server";
|
||||
public static final String ISTAG = "ISTAG";
|
||||
public static final String METHODS = "Methods";
|
||||
public static final String ALLOW = "Allow";
|
||||
public static final String ENCAPSULATED = "Encapsulated";
|
||||
public static final String MAX_CONNECTIONS = "Max-Connections";
|
||||
public static final String OPTIONS_TTL = "Options-TTL";
|
||||
public static final String SERVICE = "Service";
|
||||
public static final String SERVICE_ID = "Service-ID";
|
||||
public static final String PREVIEW = "Preview";
|
||||
public static final String TRANSFER_PREVIEW = "Transfer-Preview";
|
||||
public static final String TRANSFER_IGNORE = "Transfer-Ignore";
|
||||
public static final String TRANSFER_COMPLETE = "Transfer-Complete";
|
||||
|
||||
public static final String X_YACY_KEEP_ALIVE_REQUEST_COUNT = "X-Keep-Alive-Request-Count";
|
||||
|
||||
/* =============================================================
|
||||
* defining default icap status messages
|
||||
* ============================================================= */
|
||||
public static final HashMap<String, String> icap1_0 = new HashMap<String, String>();
|
||||
static {
|
||||
// (1yz) Informational codes
|
||||
icap1_0.put("100","Continue after ICAP preview");
|
||||
|
||||
// (2yz) Success codes:
|
||||
icap1_0.put("200","OK");
|
||||
icap1_0.put("204","No modifications needed");
|
||||
|
||||
// (4yz) Client error codes:
|
||||
icap1_0.put("400","Bad request");
|
||||
icap1_0.put("404","ICAP Service not found");
|
||||
icap1_0.put("405","Method not allowed for service");
|
||||
icap1_0.put("408","Request timeout");
|
||||
|
||||
// (5yz) Server error codes:
|
||||
icap1_0.put("500","Server error");
|
||||
icap1_0.put("501","Method not implemented");
|
||||
icap1_0.put("502","Bad Gateway");
|
||||
icap1_0.put("503","Service overloaded");
|
||||
icap1_0.put("505","ICAP version not supported by server");
|
||||
}
|
||||
|
||||
/* PROPERTIES: General properties */
|
||||
public static final String CONNECTION_PROP_ICAP_VER = "ICAP";
|
||||
public static final String CONNECTION_PROP_HOST = "HOST";
|
||||
public static final String CONNECTION_PROP_PATH = "PATH";
|
||||
public static final String CONNECTION_PROP_EXT = "EXT";
|
||||
public static final String CONNECTION_PROP_METHOD = "METHOD";
|
||||
public static final String CONNECTION_PROP_REQUESTLINE = "REQUESTLINE";
|
||||
public static final String CONNECTION_PROP_CLIENTIP = "CLIENTIP";
|
||||
public static final String CONNECTION_PROP_URL = "URL";
|
||||
public static final String CONNECTION_PROP_ARGS = "ARGS";
|
||||
public static final String CONNECTION_PROP_PERSISTENT = "PERSISTENT";
|
||||
public static final String CONNECTION_PROP_KEEP_ALIVE_COUNT = "KEEP-ALIVE_COUNT";
|
||||
|
||||
private static final Collator insensitiveCollator = Collator.getInstance(Locale.US);
|
||||
static {
|
||||
insensitiveCollator.setStrength(Collator.SECONDARY);
|
||||
insensitiveCollator.setDecomposition(Collator.NO_DECOMPOSITION);
|
||||
}
|
||||
|
||||
public icapHeader() {
|
||||
super(insensitiveCollator);
|
||||
}
|
||||
|
||||
public boolean allow(final int statusCode) {
|
||||
if (!super.containsKey("Allow")) return false;
|
||||
|
||||
final String allow = get("Allow");
|
||||
return (allow.indexOf(Integer.toString(statusCode))!=-1);
|
||||
}
|
||||
|
||||
// to make the occurrence of multiple keys possible, we add them using a counter
|
||||
public String add(final String key, final String value) {
|
||||
final int c = keyCount(key);
|
||||
if (c == 0) return put(key, value);
|
||||
return put("*" + key + "-" + c, value);
|
||||
}
|
||||
|
||||
public int keyCount(final String key) {
|
||||
if (!(containsKey(key))) return 0;
|
||||
int c = 1;
|
||||
while (containsKey("*" + key + "-" + c)) c++;
|
||||
return c;
|
||||
}
|
||||
|
||||
// a convenience method to access the map with fail-over defaults
|
||||
public Object get(final Object key, final Object dflt) {
|
||||
final Object result = get(key);
|
||||
if (result == null) return dflt;
|
||||
return result;
|
||||
}
|
||||
|
||||
// return multiple results
|
||||
public Object getSingle(final Object key, final int count) {
|
||||
if (count == 0) return get(key, null);
|
||||
return get("*" + key + "-" + count, null);
|
||||
}
|
||||
|
||||
public StringBuilder toHeaderString(final String icapVersion, final int icapStatusCode, String icapStatusText) {
|
||||
|
||||
if ((icapStatusText == null)||(icapStatusText.length()==0)) {
|
||||
if (icapVersion.equals("ICAP/1.0") && icapHeader.icap1_0.containsKey(Integer.toString(icapStatusCode)))
|
||||
icapStatusText = icapHeader.icap1_0.get(Integer.toString(icapStatusCode));
|
||||
}
|
||||
|
||||
final StringBuilder theHeader = new StringBuilder();
|
||||
|
||||
// write status line
|
||||
theHeader.append(icapVersion).append(" ")
|
||||
.append(Integer.toString(icapStatusCode)).append(" ")
|
||||
.append(icapStatusText).append("\r\n");
|
||||
|
||||
// write header
|
||||
final Iterator<String> i = keySet().iterator();
|
||||
String key;
|
||||
char tag;
|
||||
int count;
|
||||
while (i.hasNext()) {
|
||||
key = i.next();
|
||||
tag = key.charAt(0);
|
||||
if ((tag != '*') && (tag != '#')) { // '#' in key is reserved for proxy attributes as artificial header values
|
||||
count = keyCount(key);
|
||||
for (int j = 0; j < count; j++) {
|
||||
theHeader.append(key).append(": ").append((String) getSingle(key, j)).append("\r\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
// end header
|
||||
theHeader.append("\r\n");
|
||||
|
||||
|
||||
return theHeader;
|
||||
}
|
||||
|
||||
public static Properties parseRequestLine(final String cmd, String s, final Properties prop, final String virtualHost) {
|
||||
|
||||
// reset property from previous run
|
||||
prop.clear();
|
||||
|
||||
// storing informations about the request
|
||||
prop.setProperty(CONNECTION_PROP_METHOD, cmd);
|
||||
prop.setProperty(CONNECTION_PROP_REQUESTLINE,cmd + " " + s);
|
||||
|
||||
|
||||
// this parses a whole URL
|
||||
if (s.length() == 0) {
|
||||
prop.setProperty(CONNECTION_PROP_HOST, virtualHost);
|
||||
prop.setProperty(CONNECTION_PROP_PATH, "/");
|
||||
prop.setProperty(CONNECTION_PROP_ICAP_VER, "ICAP/1.0");
|
||||
prop.setProperty(CONNECTION_PROP_EXT, "");
|
||||
return prop;
|
||||
}
|
||||
|
||||
// store the version propery "ICAP" and cut the query at both ends
|
||||
int sep = s.indexOf(" ");
|
||||
if (sep >= 0) {
|
||||
// ICAP version is given
|
||||
prop.setProperty(CONNECTION_PROP_ICAP_VER, s.substring(sep + 1).trim());
|
||||
s = s.substring(0, sep).trim(); // cut off ICAP version mark
|
||||
} else {
|
||||
// ICAP version is not given, it will be treated as ver 0.9
|
||||
prop.setProperty(CONNECTION_PROP_ICAP_VER, "ICAP/1.0");
|
||||
}
|
||||
|
||||
|
||||
String argsString = "";
|
||||
sep = s.indexOf("?");
|
||||
if (sep >= 0) {
|
||||
// there are values attached to the query string
|
||||
argsString = s.substring(sep + 1); // cut haed from tail of query
|
||||
s = s.substring(0, sep);
|
||||
}
|
||||
prop.setProperty(CONNECTION_PROP_URL, s); // store URL
|
||||
if (argsString.length() != 0) prop.setProperty(CONNECTION_PROP_ARGS, argsString); // store arguments in original form
|
||||
|
||||
// finally find host string
|
||||
if (s.toUpperCase().startsWith("ICAP://")) {
|
||||
// a host was given. extract it and set path
|
||||
s = s.substring(7);
|
||||
sep = s.indexOf("/");
|
||||
if (sep < 0) {
|
||||
// this is a malformed url, something like
|
||||
// http://index.html
|
||||
// we are lazy and guess that it means
|
||||
// /index.html
|
||||
// which is a localhost access to the file servlet
|
||||
prop.setProperty(CONNECTION_PROP_HOST, virtualHost);
|
||||
prop.setProperty(CONNECTION_PROP_PATH, "/" + s);
|
||||
} else {
|
||||
// THIS IS THE "GOOD" CASE
|
||||
// a perfect formulated url
|
||||
prop.setProperty(CONNECTION_PROP_HOST, s.substring(0, sep));
|
||||
prop.setProperty(CONNECTION_PROP_PATH, s.substring(sep)); // yes, including beginning "/"
|
||||
}
|
||||
} else {
|
||||
// no host in url. set path
|
||||
if (s.startsWith("/")) {
|
||||
// thats also fine, its a perfect localhost access
|
||||
// in this case, we simulate a
|
||||
// http://localhost/s
|
||||
// access by setting a virtual host
|
||||
prop.setProperty(CONNECTION_PROP_HOST, virtualHost);
|
||||
prop.setProperty(CONNECTION_PROP_PATH, s);
|
||||
} else {
|
||||
// the client 'forgot' to set a leading '/'
|
||||
// this is the same case as above, with some lazyness
|
||||
prop.setProperty(CONNECTION_PROP_HOST, virtualHost);
|
||||
prop.setProperty(CONNECTION_PROP_PATH, "/" + s);
|
||||
}
|
||||
}
|
||||
return prop;
|
||||
|
||||
}
|
||||
|
||||
public static icapHeader readHeader(final Properties prop, final serverCore.Session theSession) {
|
||||
// reading all headers
|
||||
final icapHeader header = new icapHeader();
|
||||
int p;
|
||||
String line;
|
||||
while ((line = theSession.readLineAsString()) != null) {
|
||||
if (line.length() == 0) break; // this seperates the header of the HTTP request from the body
|
||||
// parse the header line: a property seperated with the ':' sign
|
||||
if ((p = line.indexOf(":")) >= 0) {
|
||||
// store a property
|
||||
header.add(line.substring(0, p).trim(), line.substring(p + 1).trim());
|
||||
}
|
||||
}
|
||||
|
||||
return header;
|
||||
}
|
||||
}
|
@ -1,433 +0,0 @@
|
||||
//icapd.java
|
||||
//-----------------------
|
||||
//(C) by Michael Peter Christen; mc@yacy.net
|
||||
//first published on http://www.anomic.de
|
||||
//Frankfurt, Germany, 2004
|
||||
//
|
||||
//This file is contributed by Martin Thelian
|
||||
//last major change: $LastChangedDate$ by $LastChangedBy$
|
||||
//Revision: $LastChangedRevision$
|
||||
//
|
||||
//This program is free software; you can redistribute it and/or modify
|
||||
//it under the terms of the GNU General Public License as published by
|
||||
//the Free Software Foundation; either version 2 of the License, or
|
||||
//(at your option) any later version.
|
||||
//
|
||||
//This program is distributed in the hope that it will be useful,
|
||||
//but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
//GNU General Public License for more details.
|
||||
//
|
||||
//You should have received a copy of the GNU General Public License
|
||||
//along with this program; if not, write to the Free Software
|
||||
//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
package de.anomic.icap;
|
||||
|
||||
import java.io.BufferedOutputStream;
|
||||
import java.io.BufferedReader;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.OutputStream;
|
||||
import java.net.InetAddress;
|
||||
import java.util.Date;
|
||||
import java.util.Properties;
|
||||
|
||||
import de.anomic.document.ParserDispatcher;
|
||||
import de.anomic.http.httpChunkedInputStream;
|
||||
import de.anomic.http.httpHeader;
|
||||
import de.anomic.http.httpRequestHeader;
|
||||
import de.anomic.http.httpResponseHeader;
|
||||
import de.anomic.http.httpDocument;
|
||||
import de.anomic.kelondro.util.DateFormatter;
|
||||
import de.anomic.kelondro.util.FileUtils;
|
||||
import de.anomic.plasma.plasmaHTCache;
|
||||
import de.anomic.plasma.plasmaSwitchboard;
|
||||
import de.anomic.server.serverCore;
|
||||
import de.anomic.server.serverHandler;
|
||||
import de.anomic.server.serverCore.Session;
|
||||
import de.anomic.yacy.yacyURL;
|
||||
import de.anomic.yacy.logging.Log;
|
||||
|
||||
/**
|
||||
* @author theli
|
||||
*/
|
||||
public class icapd implements serverHandler, Cloneable {
|
||||
|
||||
|
||||
private serverCore.Session session; // holds the session object of the calling class
|
||||
|
||||
// the connection properties
|
||||
private final Properties prop = new Properties();
|
||||
|
||||
// the address of the client
|
||||
private InetAddress userAddress;
|
||||
private String clientIP;
|
||||
private int keepAliveRequestCount = 0;
|
||||
|
||||
// needed for logging
|
||||
private static final Log log = new Log("ICAPD");
|
||||
|
||||
private static plasmaSwitchboard sb = null;
|
||||
private static String virtualHost = null;
|
||||
private static boolean keepAliveSupport = true;
|
||||
|
||||
|
||||
|
||||
public icapd() {
|
||||
if (sb == null) {
|
||||
sb = plasmaSwitchboard.getSwitchboard();
|
||||
virtualHost = sb.getConfig("fileHost","localhost");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public icapd clone(){
|
||||
return new icapd();
|
||||
}
|
||||
|
||||
public void initSession(final Session aSession) throws IOException {
|
||||
this.session = aSession;
|
||||
this.userAddress = aSession.userAddress; // client InetAddress
|
||||
this.clientIP = this.userAddress.getHostAddress();
|
||||
if (this.userAddress.isAnyLocalAddress()) this.clientIP = "localhost";
|
||||
if (this.clientIP.startsWith("0:0:0:0:0:0:0:1")) this.clientIP = "localhost";
|
||||
if (this.clientIP.startsWith("127.")) this.clientIP = "localhost";
|
||||
}
|
||||
|
||||
public String greeting() {
|
||||
// TODO Auto-generated method stub
|
||||
return null;
|
||||
}
|
||||
|
||||
public String error(final Throwable e) {
|
||||
// TODO Auto-generated method stub
|
||||
return null;
|
||||
}
|
||||
|
||||
public void reset() {
|
||||
}
|
||||
|
||||
public Boolean EMPTY(final String arg) throws IOException {
|
||||
// TODO Auto-generated method stub
|
||||
return serverCore.TERMINATE_CONNECTION;
|
||||
}
|
||||
|
||||
public Boolean UNKNOWN(final String requestLine) throws IOException {
|
||||
// TODO Auto-generated method stub
|
||||
return serverCore.TERMINATE_CONNECTION;
|
||||
}
|
||||
|
||||
public icapHeader getDefaultHeaders() {
|
||||
final icapHeader newHeaders = new icapHeader();
|
||||
|
||||
newHeaders.put(icapHeader.SERVER,"YaCy/" + sb.getConfig("vString",""));
|
||||
newHeaders.put(icapHeader.DATE, DateFormatter.formatRFC1123(new Date()));
|
||||
newHeaders.put(icapHeader.ISTAG, "\"" + sb.getConfig("vString","") + "\"");
|
||||
|
||||
return newHeaders;
|
||||
}
|
||||
|
||||
public Boolean OPTIONS(final String arg) throws IOException {
|
||||
|
||||
final BufferedOutputStream out = new BufferedOutputStream(this.session.out);
|
||||
|
||||
// parsing the http request line
|
||||
parseRequestLine(icapHeader.METHOD_OPTIONS,arg);
|
||||
|
||||
// reading the headers
|
||||
final icapHeader icapReqHeader = icapHeader.readHeader(this.prop,this.session);
|
||||
|
||||
// determines if the connection should be kept alive
|
||||
final boolean persistent = handlePersistentConnection(icapReqHeader);
|
||||
|
||||
// setting the icap response headers
|
||||
final icapHeader resHeader = getDefaultHeaders();
|
||||
resHeader.put(icapHeader.ALLOW,"204");
|
||||
resHeader.put(icapHeader.ENCAPSULATED,"null-body=0");
|
||||
resHeader.put(icapHeader.MAX_CONNECTIONS,"1000");
|
||||
resHeader.put(icapHeader.OPTIONS_TTL,"300");
|
||||
resHeader.put(icapHeader.SERVICE_ID, "???");
|
||||
resHeader.put(icapHeader.PREVIEW, "30");
|
||||
resHeader.put(icapHeader.TRANSFER_COMPLETE, "*");
|
||||
//resHeader.put(icapHeader.TRANSFER_PREVIEW, "*");
|
||||
if (!persistent) resHeader.put(icapHeader.CONNECTION, "close");
|
||||
|
||||
|
||||
// determining the requested service and call it or send back an error message
|
||||
final String reqService = this.prop.getProperty(icapHeader.CONNECTION_PROP_PATH,"");
|
||||
if (reqService.equalsIgnoreCase("/resIndexing")) {
|
||||
resHeader.put(icapHeader.SERVICE, "YaCy ICAP Indexing Service 1.0");
|
||||
resHeader.put(icapHeader.METHODS,icapHeader.METHOD_RESPMOD);
|
||||
|
||||
String transferIgnoreList = ParserDispatcher.getMediaExtList();
|
||||
transferIgnoreList = transferIgnoreList.substring(1,transferIgnoreList.length()-1);
|
||||
resHeader.put(icapHeader.TRANSFER_IGNORE, transferIgnoreList);
|
||||
} else {
|
||||
resHeader.put(icapHeader.SERVICE, "YaCy ICAP Service 1.0");
|
||||
}
|
||||
|
||||
|
||||
final StringBuilder header = resHeader.toHeaderString("ICAP/1.0",200,null);
|
||||
out.write(header.toString().getBytes());
|
||||
out.flush();
|
||||
|
||||
return this.prop.getProperty(icapHeader.CONNECTION_PROP_PERSISTENT).equals("keep-alive") ? serverCore.RESUME_CONNECTION : serverCore.TERMINATE_CONNECTION;
|
||||
}
|
||||
|
||||
public Boolean REQMOD() {
|
||||
return serverCore.TERMINATE_CONNECTION;
|
||||
}
|
||||
|
||||
public Boolean RESPMOD(final String arg) {
|
||||
try {
|
||||
final InputStream in = this.session.in;
|
||||
final OutputStream out = this.session.out;
|
||||
|
||||
// parsing the icap request line
|
||||
parseRequestLine(icapHeader.METHOD_RESPMOD,arg);
|
||||
|
||||
// reading the icap request header
|
||||
final icapHeader icapReqHeader = icapHeader.readHeader(this.prop,this.session);
|
||||
|
||||
// determines if the connection should be kept alive
|
||||
handlePersistentConnection(icapReqHeader);
|
||||
|
||||
// determining the requested service and call it or send back an error message
|
||||
final String reqService = this.prop.getProperty(icapHeader.CONNECTION_PROP_PATH,"");
|
||||
if (reqService.equalsIgnoreCase("/resIndexing")) {
|
||||
indexingService(icapReqHeader,in,out);
|
||||
} else {
|
||||
final icapHeader icapResHeader = getDefaultHeaders();
|
||||
icapResHeader.put(icapHeader.ENCAPSULATED,icapReqHeader.get(icapHeader.ENCAPSULATED));
|
||||
icapResHeader.put(icapHeader.SERVICE, "YaCy ICAP Service 1.0");
|
||||
// icapResHeader.put(icapHeader.CONNECTION, "close");
|
||||
|
||||
final StringBuilder header = icapResHeader.toHeaderString("ICAP/1.0",404,null);
|
||||
out.write((new String(header)).getBytes());
|
||||
out.flush();
|
||||
}
|
||||
|
||||
|
||||
|
||||
} catch (final Exception e) {
|
||||
e.printStackTrace();
|
||||
} finally {
|
||||
|
||||
}
|
||||
return this.prop.getProperty(icapHeader.CONNECTION_PROP_PERSISTENT).equals("keep-alive") ? serverCore.RESUME_CONNECTION : serverCore.TERMINATE_CONNECTION;
|
||||
}
|
||||
|
||||
/*
|
||||
private void blacklistService(icapHeader reqHeader, InputStream in, OutputStream out) {
|
||||
try {
|
||||
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
private void indexingService(final icapHeader reqHeader, final InputStream in, final OutputStream out) {
|
||||
try {
|
||||
|
||||
/* =========================================================================
|
||||
* Reading the various message parts into buffers
|
||||
* ========================================================================= */
|
||||
ByteArrayInputStream reqHdrStream = null, resHdrStream = null, resBodyStream = null;
|
||||
final String[] encapsulated = (reqHeader.get(icapHeader.ENCAPSULATED)).split(",");
|
||||
int prevLength = 0, currLength=0;
|
||||
for (int i=0; i < encapsulated.length; i++) {
|
||||
// reading the request header
|
||||
if (encapsulated[i].indexOf("req-hdr")>=0) {
|
||||
prevLength = currLength;
|
||||
currLength = Integer.parseInt(encapsulated[i+1].split("=")[1]);
|
||||
|
||||
final byte[] buffer = new byte[currLength-prevLength];
|
||||
final int bytesRead = in.read(buffer, 0, buffer.length);
|
||||
assert bytesRead == buffer.length;
|
||||
|
||||
reqHdrStream = new ByteArrayInputStream(buffer);
|
||||
|
||||
// reading the response header
|
||||
} else if (encapsulated[i].indexOf("res-hdr")>=0) {
|
||||
prevLength = currLength;
|
||||
currLength = Integer.parseInt(encapsulated[i+1].split("=")[1]);
|
||||
|
||||
final byte[] buffer = new byte[currLength-prevLength];
|
||||
final int bytesRead = in.read(buffer, 0, buffer.length);
|
||||
assert bytesRead == buffer.length;
|
||||
|
||||
resHdrStream = new ByteArrayInputStream(buffer);
|
||||
|
||||
// reading the response body
|
||||
} else if (encapsulated[i].indexOf("res-body")>=0) {
|
||||
final httpChunkedInputStream chunkedIn = new httpChunkedInputStream(in);
|
||||
final ByteArrayOutputStream bout = new ByteArrayOutputStream();
|
||||
int l = 0,len = 0;
|
||||
final byte[] buffer = new byte[2048];
|
||||
while ((l = chunkedIn.read(buffer)) >= 0) {
|
||||
len += l;
|
||||
bout.write(buffer,0,l);
|
||||
}
|
||||
resBodyStream = new ByteArrayInputStream(bout.toByteArray());
|
||||
}
|
||||
}
|
||||
|
||||
/* =========================================================================
|
||||
* sending back the icap status
|
||||
* ========================================================================= */
|
||||
final icapHeader icapResHeader = getDefaultHeaders();
|
||||
if (reqHeader.allow(204)) {
|
||||
icapResHeader.put(icapHeader.ENCAPSULATED,reqHeader.get(icapHeader.ENCAPSULATED));
|
||||
icapResHeader.put(icapHeader.SERVICE, "YaCy ICAP Service 1.0");
|
||||
// resHeader.put(icapHeader.CONNECTION, "close");
|
||||
|
||||
final StringBuilder header = icapResHeader.toHeaderString("ICAP/1.0",204,null);
|
||||
out.write((new String(header)).getBytes());
|
||||
out.flush();
|
||||
} else {
|
||||
icapResHeader.put(icapHeader.ENCAPSULATED,reqHeader.get(icapHeader.ENCAPSULATED));
|
||||
icapResHeader.put(icapHeader.SERVICE, "YaCy ICAP Service 1.0");
|
||||
// icapResHeader.put(icapHeader.CONNECTION, "close");
|
||||
|
||||
final StringBuilder header = icapResHeader.toHeaderString("ICAP/1.0",503,null);
|
||||
out.write((new String(header)).getBytes());
|
||||
out.flush();
|
||||
}
|
||||
|
||||
/* =========================================================================
|
||||
* Parsing request data
|
||||
* ========================================================================= */
|
||||
// reading the requestline
|
||||
BufferedReader reader = new BufferedReader(new InputStreamReader(reqHdrStream));
|
||||
final String httpRequestLine = reader.readLine();
|
||||
|
||||
// parsing the requestline
|
||||
final Properties httpReqProps = new Properties();
|
||||
httpRequestHeader.parseRequestLine(httpRequestLine,httpReqProps,virtualHost);
|
||||
|
||||
if (!httpReqProps.getProperty(httpHeader.CONNECTION_PROP_METHOD).equals(httpHeader.METHOD_GET)) {
|
||||
log.logInfo("Wrong http request method for indexing:" +
|
||||
"\nRequest Method: " + httpReqProps.getProperty(httpHeader.CONNECTION_PROP_METHOD) +
|
||||
"\nRequest Line: " + httpRequestLine);
|
||||
reader.close();
|
||||
if(reqHdrStream != null) {
|
||||
reqHdrStream.close();
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// reading all request headers
|
||||
final httpRequestHeader httpReqHeader = new httpRequestHeader();
|
||||
httpReqHeader.readHttpHeader(reader);
|
||||
reader.close();
|
||||
if(reqHdrStream != null) {
|
||||
reqHdrStream.close();
|
||||
}
|
||||
|
||||
// handle transparent proxy support: this function call is needed to set the host property properly
|
||||
httpHeader.handleTransparentProxySupport(httpReqHeader,httpReqProps,virtualHost,true);
|
||||
|
||||
// getting the request URL
|
||||
final yacyURL httpRequestURL = httpHeader.getRequestURL(httpReqProps);
|
||||
|
||||
/* =========================================================================
|
||||
* Parsing response data
|
||||
* ========================================================================= */
|
||||
// getting the response status
|
||||
reader = new BufferedReader(new InputStreamReader(resHdrStream));
|
||||
final String httpRespStatusLine = reader.readLine();
|
||||
|
||||
final Object[] httpRespStatus = httpResponseHeader.parseResponseLine(httpRespStatusLine);
|
||||
|
||||
if (!(httpRespStatus[1].equals(Integer.valueOf(200)) || httpRespStatus[1].equals(Integer.valueOf(203)))) {
|
||||
log.logInfo("Wrong status code for indexing:" +
|
||||
"\nStatus Code: " + httpRespStatus[1] +
|
||||
"\nRequest Line: " + httpRequestLine +
|
||||
"\nResponse Line: " + httpRespStatusLine);
|
||||
reader.close();
|
||||
if(resHdrStream != null) {
|
||||
resHdrStream.close();
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// reading all response headers
|
||||
final httpResponseHeader httpResHeader = new httpResponseHeader();
|
||||
httpResHeader.readHttpHeader(reader);
|
||||
reader.close();
|
||||
if(resHdrStream != null) {
|
||||
resHdrStream.close();
|
||||
}
|
||||
|
||||
if (!ParserDispatcher.supportedContent(httpRequestURL, httpResHeader.mime())) {
|
||||
log.logInfo("Wrong mimeType or fileExtension for indexing:" +
|
||||
"\nMimeType: " + httpResHeader.mime() +
|
||||
"\nRequest Line:" + httpRequestLine);
|
||||
return ;
|
||||
}
|
||||
|
||||
|
||||
/* =========================================================================
|
||||
* Prepare data for indexing
|
||||
* ========================================================================= */
|
||||
|
||||
// generating a htcache entry object
|
||||
final httpDocument cacheEntry = new httpDocument(
|
||||
0,
|
||||
httpRequestURL,
|
||||
"",
|
||||
httpRespStatusLine,
|
||||
httpReqHeader, httpResHeader,
|
||||
null,
|
||||
sb.crawler.defaultProxyProfile
|
||||
);
|
||||
|
||||
// copy the response body into the file
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
FileUtils.copy(resBodyStream, baos);
|
||||
if(resBodyStream != null) {
|
||||
resBodyStream.close(); resBodyStream = null;
|
||||
}
|
||||
cacheEntry.setCacheArray(baos.toByteArray());
|
||||
plasmaHTCache.storeMetadata(httpResHeader, cacheEntry);
|
||||
|
||||
// indexing the response
|
||||
sb.htEntryStoreProcess(cacheEntry);
|
||||
} catch (final Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
private final void parseRequestLine(final String cmd, final String s) {
|
||||
// parsing the requestlin
|
||||
icapHeader.parseRequestLine(cmd,s, this.prop,virtualHost);
|
||||
|
||||
// adding the client ip prop
|
||||
this.prop.setProperty(icapHeader.CONNECTION_PROP_CLIENTIP, this.clientIP);
|
||||
|
||||
// counting the amount of received requests within this permanent conneciton
|
||||
this.prop.setProperty(icapHeader.CONNECTION_PROP_KEEP_ALIVE_COUNT, Integer.toString(++this.keepAliveRequestCount));
|
||||
}
|
||||
|
||||
private boolean handlePersistentConnection(final icapHeader header) {
|
||||
|
||||
if (!keepAliveSupport) {
|
||||
this.prop.put(icapHeader.CONNECTION_PROP_PERSISTENT,"close");
|
||||
return false;
|
||||
}
|
||||
|
||||
boolean persistent = true;
|
||||
if (((String)header.get(icapHeader.CONNECTION, "keep-alive")).toLowerCase().equals("close")) {
|
||||
persistent = false;
|
||||
}
|
||||
|
||||
this.prop.put(icapHeader.CONNECTION_PROP_PERSISTENT,persistent?"keep-alive":"close");
|
||||
return persistent;
|
||||
}
|
||||
|
||||
}
|
@ -1,142 +0,0 @@
|
||||
#!/usr/bin/perl -w
|
||||
#
|
||||
# This is an URL Redirector Script for squid that can be
|
||||
# used to bundle YaCy and Squid together via the squid
|
||||
# redirector support.
|
||||
# See: http://www.squid-cache.org/Doc/FAQ/FAQ-15.html
|
||||
#
|
||||
# This scripts forwards URLs from squid to YaCy where the
|
||||
# URLs are used to download and index the content of the URLs.
|
||||
use strict;
|
||||
use Socket qw(:DEFAULT :crlf);
|
||||
use IO::Handle;
|
||||
use Digest::MD5;
|
||||
|
||||
# setting administrator username + pwd, hostname + port
|
||||
my $user = "user";
|
||||
my $pwd = "";
|
||||
my $host = "localhost";
|
||||
my $port = "8080";
|
||||
|
||||
my $allowCgi = 0;
|
||||
my $allowPost = 0;
|
||||
|
||||
my @mediaExt;
|
||||
my @requestData;
|
||||
|
||||
$|=1;
|
||||
|
||||
sub isCGI {
|
||||
my $url = lc shift;
|
||||
return ((rindex $url, ".cgi") != -1) ||
|
||||
((rindex $url, ".exe") != -1) ||
|
||||
((rindex $url, ";jsessionid=") != -1) ||
|
||||
((rindex $url, "sessionid/") != -1) ||
|
||||
((rindex $url, "phpsessid=") != -1);
|
||||
}
|
||||
|
||||
sub isPOST {
|
||||
my $url = lc shift;
|
||||
return ((rindex $url, "?") != -1) ||
|
||||
((rindex $url, "&") != -1);
|
||||
}
|
||||
|
||||
sub isMediaExt {
|
||||
my $url = $_[0];
|
||||
my @extList = @{$_[1]};
|
||||
my $pos = rindex $url, ".";
|
||||
|
||||
if ($pos != -1) {
|
||||
my $ext = substr($url,$pos+1,length($url));
|
||||
my @match = grep(/$ext/,@extList);
|
||||
return scalar(@match);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
my ($bytes_out,$bytes_in) = (0,0);
|
||||
my ($msg_in,$msg_out);
|
||||
|
||||
my $protocol = getprotobyname('tcp');
|
||||
$host = inet_aton($host) or die "$host: unknown host";
|
||||
|
||||
socket(SOCK, AF_INET, SOCK_STREAM, $protocol) or die "socket() failed: $!";
|
||||
my $dest_addr = sockaddr_in($port,$host);
|
||||
connect(SOCK,$dest_addr) or die("connect() failed: $!");
|
||||
|
||||
# enabling autoflush
|
||||
SOCK->autoflush(1);
|
||||
|
||||
# sending the REDIRECTOR command to yacy to enable the proper
|
||||
# command handler
|
||||
print SOCK "REDIRECTOR".CRLF;
|
||||
|
||||
# Doing authentication
|
||||
my $ctx = Digest::MD5->new;
|
||||
$ctx->add($user.":".$pwd);
|
||||
my $md5Pwd = $ctx->hexdigest;
|
||||
|
||||
print SOCK "USER ".$user.CRLF;
|
||||
print SOCK "PWD ".$md5Pwd.CRLF;
|
||||
|
||||
# Getting a list of file extensions that should be ignored
|
||||
print SOCK "MEDIAEXT".CRLF;
|
||||
$msg_in = lc <SOCK>;
|
||||
chomp $msg_in;
|
||||
@mediaExt = split(/,\s*/, $msg_in);
|
||||
|
||||
# 1) Reading URLs from stdIn
|
||||
# 2) Send it to Yacy
|
||||
# 3) Receive response from YaCy
|
||||
# 4) Print response to StdOut
|
||||
while (defined($msg_out = <>)) {
|
||||
chomp $msg_out;
|
||||
|
||||
# splitting request into it's various parts
|
||||
#
|
||||
# One squid redirector request line typically looks like this:
|
||||
# http://www.pageresource.com/styles/tuts.css 192.168.0.5/- - GET
|
||||
@requestData = split(/\s+/, $msg_out);
|
||||
|
||||
# testing if the URL is CGI
|
||||
if (!$allowCgi && isCGI($requestData[0])) {
|
||||
print STDOUT CRLF;
|
||||
print STDERR "URL is cgi: ".$msg_out.CRLF;
|
||||
next;
|
||||
}
|
||||
|
||||
# testing if the URL is a POST request
|
||||
if (!$allowPost && isPOST($requestData[0])){
|
||||
print STDOUT CRLF;
|
||||
print STDERR "URL is post: ".$msg_out.CRLF;
|
||||
next;
|
||||
}
|
||||
|
||||
# testing if the requested content is a media content
|
||||
if (isMediaExt($requestData[0],\@mediaExt)) {
|
||||
print STDOUT CRLF;
|
||||
print STDERR "URL has media extension: ".$msg_out.CRLF;
|
||||
next;
|
||||
}
|
||||
|
||||
# sending the whole request line to YaCy
|
||||
$msg_out .= CRLF;
|
||||
print SOCK $msg_out;
|
||||
|
||||
# reading the response
|
||||
if (defined($msg_in = <SOCK>)) {
|
||||
print STDOUT $msg_in;
|
||||
} else {
|
||||
print STDERR "Socket closed".CRLF;
|
||||
close SOCK;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
$bytes_out += length($msg_out);
|
||||
$bytes_in += length($msg_in);
|
||||
}
|
||||
print SOCK "EXIT".CRLF;
|
||||
|
||||
close SOCK;
|
||||
print STDERR "bytes_sent = $bytes_out, bytes_received = $bytes_in\n";
|
||||
|
@ -1,232 +0,0 @@
|
||||
package de.anomic.server;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.PrintWriter;
|
||||
import java.net.MalformedURLException;
|
||||
import java.util.Date;
|
||||
|
||||
import de.anomic.crawler.CrawlEntry;
|
||||
import de.anomic.crawler.CrawlProfile;
|
||||
import de.anomic.data.userDB;
|
||||
import de.anomic.document.ParserDispatcher;
|
||||
import de.anomic.http.httpClient;
|
||||
import de.anomic.http.httpResponseHeader;
|
||||
import de.anomic.plasma.plasmaSwitchboard;
|
||||
import de.anomic.server.serverCore.Session;
|
||||
import de.anomic.yacy.yacyURL;
|
||||
import de.anomic.yacy.logging.Log;
|
||||
|
||||
public class urlRedirectord implements serverHandler, Cloneable {
|
||||
|
||||
private serverCore.Session session;
|
||||
private static plasmaSwitchboard sb = null;
|
||||
private static final Log theLogger = new Log("URL-REDIRECTOR");
|
||||
private static CrawlProfile.entry profile = null;
|
||||
private String nextURL;
|
||||
|
||||
public urlRedirectord() {
|
||||
if (sb == null) {
|
||||
sb = plasmaSwitchboard.getSwitchboard();
|
||||
}
|
||||
|
||||
if (profile == null) {
|
||||
profile = sb.crawler.profilesActiveCrawls.newEntry(
|
||||
// name
|
||||
"URL Redirector",
|
||||
// start URL
|
||||
null,
|
||||
// crawling filter
|
||||
CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER,
|
||||
// depth
|
||||
0,
|
||||
// recrawlIfOlder (minutes), if negative: do not re-crawl
|
||||
-1,
|
||||
// domFilterDepth, if negative: no auto-filter
|
||||
-1,
|
||||
// domMaxPages, if negative: no count restriction
|
||||
-1,
|
||||
// crawlDynamic
|
||||
false,
|
||||
// indexText
|
||||
true,
|
||||
// indexMedia
|
||||
true,
|
||||
// storeHTCache
|
||||
false,
|
||||
// storeTxCache
|
||||
true,
|
||||
// remoteIndexing
|
||||
false,
|
||||
// xsstopw
|
||||
true,
|
||||
// xdstopw
|
||||
true,
|
||||
// xpstopw
|
||||
true
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
public String getURL() {
|
||||
return this.nextURL;
|
||||
}
|
||||
|
||||
public void initSession(final Session theSession){
|
||||
// getting current session
|
||||
this.session = theSession;
|
||||
}
|
||||
|
||||
public String greeting() {
|
||||
return null;
|
||||
}
|
||||
|
||||
public String error(final Throwable e) {
|
||||
return null;
|
||||
}
|
||||
|
||||
public urlRedirectord clone() {
|
||||
return null;
|
||||
}
|
||||
|
||||
public void reset() {
|
||||
this.session = null;
|
||||
}
|
||||
|
||||
public Boolean EMPTY(final String arg) throws IOException {
|
||||
return null;
|
||||
}
|
||||
|
||||
public Boolean UNKNOWN(final String requestLine) throws IOException {
|
||||
return null;
|
||||
}
|
||||
|
||||
public Boolean REDIRECTOR(final String requestLine) {
|
||||
try {
|
||||
|
||||
boolean authenticated = false;
|
||||
String userName = null;
|
||||
String md5Pwd = null;
|
||||
|
||||
// setting timeout
|
||||
this.session.controlSocket.setSoTimeout(0);
|
||||
|
||||
String line = null;
|
||||
final BufferedReader inputReader = new BufferedReader(new InputStreamReader(this.session.in));
|
||||
final PrintWriter outputWriter = new PrintWriter(this.session.out);
|
||||
|
||||
while ((line = inputReader.readLine()) != null) {
|
||||
if (line.equals("EXIT")) {
|
||||
break;
|
||||
} else if (line.startsWith("#")) {
|
||||
outputWriter.print("\r\n");
|
||||
outputWriter.flush();
|
||||
continue;
|
||||
} else if (line.startsWith("USER")) {
|
||||
userName = line.substring(line.indexOf(" ")).trim();
|
||||
} else if (line.startsWith("PWD")) {
|
||||
if (userName != null) {
|
||||
final userDB.Entry userEntry = sb.userDB.getEntry(userName);
|
||||
if (userEntry != null) {
|
||||
md5Pwd = line.substring(line.indexOf(" ")).trim();
|
||||
if (userEntry.getMD5EncodedUserPwd().equals(md5Pwd)) {
|
||||
authenticated = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (line.startsWith("MEDIAEXT")) {
|
||||
String transferIgnoreList = ParserDispatcher.getMediaExtList();
|
||||
transferIgnoreList = transferIgnoreList.substring(1,transferIgnoreList.length()-1);
|
||||
|
||||
outputWriter.print(transferIgnoreList);
|
||||
outputWriter.print("\r\n");
|
||||
outputWriter.flush();
|
||||
} else if (line.startsWith("DEPTH")) {
|
||||
final int pos = line.indexOf(" ");
|
||||
if (pos != -1) {
|
||||
final String newDepth = line.substring(pos).trim();
|
||||
theLogger.logFine("Changing crawling depth to '" + newDepth + "'.");
|
||||
sb.crawler.profilesActiveCrawls.changeEntry(profile, "generalDepth",newDepth);
|
||||
}
|
||||
outputWriter.print("\r\n");
|
||||
outputWriter.flush();
|
||||
} else if (line.startsWith("CRAWLDYNAMIC")) {
|
||||
final int pos = line.indexOf(" ");
|
||||
if (pos != -1) {
|
||||
final String newValue = line.substring(pos).trim();
|
||||
theLogger.logFine("Changing crawl dynamic setting to '" + newValue + "'");
|
||||
sb.crawler.profilesActiveCrawls.changeEntry(profile, "crawlingQ",newValue);
|
||||
}
|
||||
outputWriter.print("\r\n");
|
||||
outputWriter.flush();
|
||||
} else {
|
||||
if (!authenticated) {
|
||||
return Boolean.FALSE;
|
||||
}
|
||||
|
||||
final int pos = line.indexOf(" ");
|
||||
this.nextURL = (pos != -1) ? line.substring(0,pos):line;
|
||||
|
||||
theLogger.logFine("Receiving request " + line);
|
||||
outputWriter.print("\r\n");
|
||||
outputWriter.flush();
|
||||
|
||||
String reasonString = null;
|
||||
try {
|
||||
// generating URL Object
|
||||
final yacyURL reqURL = new yacyURL(this.nextURL, null);
|
||||
|
||||
// getting URL mimeType
|
||||
final httpResponseHeader header = httpClient.whead(reqURL.toString());
|
||||
|
||||
if (ParserDispatcher.supportedContent(
|
||||
reqURL,
|
||||
header.mime())
|
||||
) {
|
||||
// first delete old entry, if exists
|
||||
final String urlhash = reqURL.hash();
|
||||
sb.indexSegment.urlMetadata().remove(urlhash);
|
||||
sb.crawlQueues.noticeURL.removeByURLHash(urlhash);
|
||||
sb.crawlQueues.errorURL.remove(urlhash);
|
||||
|
||||
// enqueuing URL for crawling
|
||||
sb.crawlStacker.enqueueEntry(new CrawlEntry(
|
||||
sb.peers.mySeed().hash,
|
||||
reqURL,
|
||||
null,
|
||||
"URL Redirector",
|
||||
new Date(),
|
||||
null,
|
||||
profile.handle(),
|
||||
0,
|
||||
0,
|
||||
0
|
||||
));
|
||||
} else {
|
||||
reasonString = "Unsupporte file extension";
|
||||
}
|
||||
} catch (final MalformedURLException badUrlEx) {
|
||||
reasonString = "Malformed URL";
|
||||
}
|
||||
|
||||
if (reasonString != null) {
|
||||
theLogger.logFine("URL " + nextURL + " rejected. Reason: " + reasonString);
|
||||
}
|
||||
nextURL = null;
|
||||
}
|
||||
}
|
||||
|
||||
theLogger.logFine("Connection terminated");
|
||||
|
||||
// Terminating connection
|
||||
return serverCore.TERMINATE_CONNECTION;
|
||||
} catch (final Exception e) {
|
||||
theLogger.logSevere("Unexpected Error: " + e.getMessage(),e);
|
||||
return serverCore.TERMINATE_CONNECTION;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
Loading…
Reference in new issue