git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@6185 6c8d7289-2bf4-0310-a012-ef5d649a1542pull/1/head
parent
0e8647d62f
commit
499723891d
@ -1,286 +0,0 @@
|
|||||||
//icapHeader.java
|
|
||||||
//-----------------------
|
|
||||||
//(C) by Michael Peter Christen; mc@yacy.net
|
|
||||||
//first published on http://www.anomic.de
|
|
||||||
//Frankfurt, Germany, 2004
|
|
||||||
//
|
|
||||||
//This file is contributed by Martin Thelian
|
|
||||||
//last major change: $LastChangedDate$ by $LastChangedBy$
|
|
||||||
//Revision: $LastChangedRevision$
|
|
||||||
//
|
|
||||||
//This program is free software; you can redistribute it and/or modify
|
|
||||||
//it under the terms of the GNU General Public License as published by
|
|
||||||
//the Free Software Foundation; either version 2 of the License, or
|
|
||||||
//(at your option) any later version.
|
|
||||||
//
|
|
||||||
//This program is distributed in the hope that it will be useful,
|
|
||||||
//but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
//GNU General Public License for more details.
|
|
||||||
//
|
|
||||||
//You should have received a copy of the GNU General Public License
|
|
||||||
//along with this program; if not, write to the Free Software
|
|
||||||
//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
||||||
|
|
||||||
package de.anomic.icap;
|
|
||||||
|
|
||||||
import java.text.Collator;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.Locale;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.Properties;
|
|
||||||
import java.util.TreeMap;
|
|
||||||
|
|
||||||
import de.anomic.server.serverCore;
|
|
||||||
|
|
||||||
public class icapHeader extends TreeMap<String, String> implements Map<String, String> {
|
|
||||||
|
|
||||||
private static final long serialVersionUID = 1L;
|
|
||||||
|
|
||||||
/* =============================================================
|
|
||||||
* Constants defining icap methods
|
|
||||||
* ============================================================= */
|
|
||||||
public static final String METHOD_REQMOD = "REQMOD";
|
|
||||||
public static final String METHOD_RESPMOD = "RESPMOD";
|
|
||||||
public static final String METHOD_OPTIONS = "OPTIONS";
|
|
||||||
|
|
||||||
/* =============================================================
|
|
||||||
* Constants defining http header names
|
|
||||||
* ============================================================= */
|
|
||||||
public static final String HOST = "Host";
|
|
||||||
public static final String USER_AGENT = "User-Agent";
|
|
||||||
public static final String CONNECTION = "Connection";
|
|
||||||
public static final String DATE = "Date";
|
|
||||||
public static final String SERVER = "Server";
|
|
||||||
public static final String ISTAG = "ISTAG";
|
|
||||||
public static final String METHODS = "Methods";
|
|
||||||
public static final String ALLOW = "Allow";
|
|
||||||
public static final String ENCAPSULATED = "Encapsulated";
|
|
||||||
public static final String MAX_CONNECTIONS = "Max-Connections";
|
|
||||||
public static final String OPTIONS_TTL = "Options-TTL";
|
|
||||||
public static final String SERVICE = "Service";
|
|
||||||
public static final String SERVICE_ID = "Service-ID";
|
|
||||||
public static final String PREVIEW = "Preview";
|
|
||||||
public static final String TRANSFER_PREVIEW = "Transfer-Preview";
|
|
||||||
public static final String TRANSFER_IGNORE = "Transfer-Ignore";
|
|
||||||
public static final String TRANSFER_COMPLETE = "Transfer-Complete";
|
|
||||||
|
|
||||||
public static final String X_YACY_KEEP_ALIVE_REQUEST_COUNT = "X-Keep-Alive-Request-Count";
|
|
||||||
|
|
||||||
/* =============================================================
|
|
||||||
* defining default icap status messages
|
|
||||||
* ============================================================= */
|
|
||||||
public static final HashMap<String, String> icap1_0 = new HashMap<String, String>();
|
|
||||||
static {
|
|
||||||
// (1yz) Informational codes
|
|
||||||
icap1_0.put("100","Continue after ICAP preview");
|
|
||||||
|
|
||||||
// (2yz) Success codes:
|
|
||||||
icap1_0.put("200","OK");
|
|
||||||
icap1_0.put("204","No modifications needed");
|
|
||||||
|
|
||||||
// (4yz) Client error codes:
|
|
||||||
icap1_0.put("400","Bad request");
|
|
||||||
icap1_0.put("404","ICAP Service not found");
|
|
||||||
icap1_0.put("405","Method not allowed for service");
|
|
||||||
icap1_0.put("408","Request timeout");
|
|
||||||
|
|
||||||
// (5yz) Server error codes:
|
|
||||||
icap1_0.put("500","Server error");
|
|
||||||
icap1_0.put("501","Method not implemented");
|
|
||||||
icap1_0.put("502","Bad Gateway");
|
|
||||||
icap1_0.put("503","Service overloaded");
|
|
||||||
icap1_0.put("505","ICAP version not supported by server");
|
|
||||||
}
|
|
||||||
|
|
||||||
/* PROPERTIES: General properties */
|
|
||||||
public static final String CONNECTION_PROP_ICAP_VER = "ICAP";
|
|
||||||
public static final String CONNECTION_PROP_HOST = "HOST";
|
|
||||||
public static final String CONNECTION_PROP_PATH = "PATH";
|
|
||||||
public static final String CONNECTION_PROP_EXT = "EXT";
|
|
||||||
public static final String CONNECTION_PROP_METHOD = "METHOD";
|
|
||||||
public static final String CONNECTION_PROP_REQUESTLINE = "REQUESTLINE";
|
|
||||||
public static final String CONNECTION_PROP_CLIENTIP = "CLIENTIP";
|
|
||||||
public static final String CONNECTION_PROP_URL = "URL";
|
|
||||||
public static final String CONNECTION_PROP_ARGS = "ARGS";
|
|
||||||
public static final String CONNECTION_PROP_PERSISTENT = "PERSISTENT";
|
|
||||||
public static final String CONNECTION_PROP_KEEP_ALIVE_COUNT = "KEEP-ALIVE_COUNT";
|
|
||||||
|
|
||||||
private static final Collator insensitiveCollator = Collator.getInstance(Locale.US);
|
|
||||||
static {
|
|
||||||
insensitiveCollator.setStrength(Collator.SECONDARY);
|
|
||||||
insensitiveCollator.setDecomposition(Collator.NO_DECOMPOSITION);
|
|
||||||
}
|
|
||||||
|
|
||||||
public icapHeader() {
|
|
||||||
super(insensitiveCollator);
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean allow(final int statusCode) {
|
|
||||||
if (!super.containsKey("Allow")) return false;
|
|
||||||
|
|
||||||
final String allow = get("Allow");
|
|
||||||
return (allow.indexOf(Integer.toString(statusCode))!=-1);
|
|
||||||
}
|
|
||||||
|
|
||||||
// to make the occurrence of multiple keys possible, we add them using a counter
|
|
||||||
public String add(final String key, final String value) {
|
|
||||||
final int c = keyCount(key);
|
|
||||||
if (c == 0) return put(key, value);
|
|
||||||
return put("*" + key + "-" + c, value);
|
|
||||||
}
|
|
||||||
|
|
||||||
public int keyCount(final String key) {
|
|
||||||
if (!(containsKey(key))) return 0;
|
|
||||||
int c = 1;
|
|
||||||
while (containsKey("*" + key + "-" + c)) c++;
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
// a convenience method to access the map with fail-over defaults
|
|
||||||
public Object get(final Object key, final Object dflt) {
|
|
||||||
final Object result = get(key);
|
|
||||||
if (result == null) return dflt;
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
// return multiple results
|
|
||||||
public Object getSingle(final Object key, final int count) {
|
|
||||||
if (count == 0) return get(key, null);
|
|
||||||
return get("*" + key + "-" + count, null);
|
|
||||||
}
|
|
||||||
|
|
||||||
public StringBuilder toHeaderString(final String icapVersion, final int icapStatusCode, String icapStatusText) {
|
|
||||||
|
|
||||||
if ((icapStatusText == null)||(icapStatusText.length()==0)) {
|
|
||||||
if (icapVersion.equals("ICAP/1.0") && icapHeader.icap1_0.containsKey(Integer.toString(icapStatusCode)))
|
|
||||||
icapStatusText = icapHeader.icap1_0.get(Integer.toString(icapStatusCode));
|
|
||||||
}
|
|
||||||
|
|
||||||
final StringBuilder theHeader = new StringBuilder();
|
|
||||||
|
|
||||||
// write status line
|
|
||||||
theHeader.append(icapVersion).append(" ")
|
|
||||||
.append(Integer.toString(icapStatusCode)).append(" ")
|
|
||||||
.append(icapStatusText).append("\r\n");
|
|
||||||
|
|
||||||
// write header
|
|
||||||
final Iterator<String> i = keySet().iterator();
|
|
||||||
String key;
|
|
||||||
char tag;
|
|
||||||
int count;
|
|
||||||
while (i.hasNext()) {
|
|
||||||
key = i.next();
|
|
||||||
tag = key.charAt(0);
|
|
||||||
if ((tag != '*') && (tag != '#')) { // '#' in key is reserved for proxy attributes as artificial header values
|
|
||||||
count = keyCount(key);
|
|
||||||
for (int j = 0; j < count; j++) {
|
|
||||||
theHeader.append(key).append(": ").append((String) getSingle(key, j)).append("\r\n");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// end header
|
|
||||||
theHeader.append("\r\n");
|
|
||||||
|
|
||||||
|
|
||||||
return theHeader;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static Properties parseRequestLine(final String cmd, String s, final Properties prop, final String virtualHost) {
|
|
||||||
|
|
||||||
// reset property from previous run
|
|
||||||
prop.clear();
|
|
||||||
|
|
||||||
// storing informations about the request
|
|
||||||
prop.setProperty(CONNECTION_PROP_METHOD, cmd);
|
|
||||||
prop.setProperty(CONNECTION_PROP_REQUESTLINE,cmd + " " + s);
|
|
||||||
|
|
||||||
|
|
||||||
// this parses a whole URL
|
|
||||||
if (s.length() == 0) {
|
|
||||||
prop.setProperty(CONNECTION_PROP_HOST, virtualHost);
|
|
||||||
prop.setProperty(CONNECTION_PROP_PATH, "/");
|
|
||||||
prop.setProperty(CONNECTION_PROP_ICAP_VER, "ICAP/1.0");
|
|
||||||
prop.setProperty(CONNECTION_PROP_EXT, "");
|
|
||||||
return prop;
|
|
||||||
}
|
|
||||||
|
|
||||||
// store the version propery "ICAP" and cut the query at both ends
|
|
||||||
int sep = s.indexOf(" ");
|
|
||||||
if (sep >= 0) {
|
|
||||||
// ICAP version is given
|
|
||||||
prop.setProperty(CONNECTION_PROP_ICAP_VER, s.substring(sep + 1).trim());
|
|
||||||
s = s.substring(0, sep).trim(); // cut off ICAP version mark
|
|
||||||
} else {
|
|
||||||
// ICAP version is not given, it will be treated as ver 0.9
|
|
||||||
prop.setProperty(CONNECTION_PROP_ICAP_VER, "ICAP/1.0");
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
String argsString = "";
|
|
||||||
sep = s.indexOf("?");
|
|
||||||
if (sep >= 0) {
|
|
||||||
// there are values attached to the query string
|
|
||||||
argsString = s.substring(sep + 1); // cut haed from tail of query
|
|
||||||
s = s.substring(0, sep);
|
|
||||||
}
|
|
||||||
prop.setProperty(CONNECTION_PROP_URL, s); // store URL
|
|
||||||
if (argsString.length() != 0) prop.setProperty(CONNECTION_PROP_ARGS, argsString); // store arguments in original form
|
|
||||||
|
|
||||||
// finally find host string
|
|
||||||
if (s.toUpperCase().startsWith("ICAP://")) {
|
|
||||||
// a host was given. extract it and set path
|
|
||||||
s = s.substring(7);
|
|
||||||
sep = s.indexOf("/");
|
|
||||||
if (sep < 0) {
|
|
||||||
// this is a malformed url, something like
|
|
||||||
// http://index.html
|
|
||||||
// we are lazy and guess that it means
|
|
||||||
// /index.html
|
|
||||||
// which is a localhost access to the file servlet
|
|
||||||
prop.setProperty(CONNECTION_PROP_HOST, virtualHost);
|
|
||||||
prop.setProperty(CONNECTION_PROP_PATH, "/" + s);
|
|
||||||
} else {
|
|
||||||
// THIS IS THE "GOOD" CASE
|
|
||||||
// a perfect formulated url
|
|
||||||
prop.setProperty(CONNECTION_PROP_HOST, s.substring(0, sep));
|
|
||||||
prop.setProperty(CONNECTION_PROP_PATH, s.substring(sep)); // yes, including beginning "/"
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// no host in url. set path
|
|
||||||
if (s.startsWith("/")) {
|
|
||||||
// thats also fine, its a perfect localhost access
|
|
||||||
// in this case, we simulate a
|
|
||||||
// http://localhost/s
|
|
||||||
// access by setting a virtual host
|
|
||||||
prop.setProperty(CONNECTION_PROP_HOST, virtualHost);
|
|
||||||
prop.setProperty(CONNECTION_PROP_PATH, s);
|
|
||||||
} else {
|
|
||||||
// the client 'forgot' to set a leading '/'
|
|
||||||
// this is the same case as above, with some lazyness
|
|
||||||
prop.setProperty(CONNECTION_PROP_HOST, virtualHost);
|
|
||||||
prop.setProperty(CONNECTION_PROP_PATH, "/" + s);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return prop;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
public static icapHeader readHeader(final Properties prop, final serverCore.Session theSession) {
|
|
||||||
// reading all headers
|
|
||||||
final icapHeader header = new icapHeader();
|
|
||||||
int p;
|
|
||||||
String line;
|
|
||||||
while ((line = theSession.readLineAsString()) != null) {
|
|
||||||
if (line.length() == 0) break; // this seperates the header of the HTTP request from the body
|
|
||||||
// parse the header line: a property seperated with the ':' sign
|
|
||||||
if ((p = line.indexOf(":")) >= 0) {
|
|
||||||
// store a property
|
|
||||||
header.add(line.substring(0, p).trim(), line.substring(p + 1).trim());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return header;
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,433 +0,0 @@
|
|||||||
//icapd.java
|
|
||||||
//-----------------------
|
|
||||||
//(C) by Michael Peter Christen; mc@yacy.net
|
|
||||||
//first published on http://www.anomic.de
|
|
||||||
//Frankfurt, Germany, 2004
|
|
||||||
//
|
|
||||||
//This file is contributed by Martin Thelian
|
|
||||||
//last major change: $LastChangedDate$ by $LastChangedBy$
|
|
||||||
//Revision: $LastChangedRevision$
|
|
||||||
//
|
|
||||||
//This program is free software; you can redistribute it and/or modify
|
|
||||||
//it under the terms of the GNU General Public License as published by
|
|
||||||
//the Free Software Foundation; either version 2 of the License, or
|
|
||||||
//(at your option) any later version.
|
|
||||||
//
|
|
||||||
//This program is distributed in the hope that it will be useful,
|
|
||||||
//but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
//GNU General Public License for more details.
|
|
||||||
//
|
|
||||||
//You should have received a copy of the GNU General Public License
|
|
||||||
//along with this program; if not, write to the Free Software
|
|
||||||
//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
||||||
|
|
||||||
package de.anomic.icap;
|
|
||||||
|
|
||||||
import java.io.BufferedOutputStream;
|
|
||||||
import java.io.BufferedReader;
|
|
||||||
import java.io.ByteArrayInputStream;
|
|
||||||
import java.io.ByteArrayOutputStream;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.InputStream;
|
|
||||||
import java.io.InputStreamReader;
|
|
||||||
import java.io.OutputStream;
|
|
||||||
import java.net.InetAddress;
|
|
||||||
import java.util.Date;
|
|
||||||
import java.util.Properties;
|
|
||||||
|
|
||||||
import de.anomic.document.ParserDispatcher;
|
|
||||||
import de.anomic.http.httpChunkedInputStream;
|
|
||||||
import de.anomic.http.httpHeader;
|
|
||||||
import de.anomic.http.httpRequestHeader;
|
|
||||||
import de.anomic.http.httpResponseHeader;
|
|
||||||
import de.anomic.http.httpDocument;
|
|
||||||
import de.anomic.kelondro.util.DateFormatter;
|
|
||||||
import de.anomic.kelondro.util.FileUtils;
|
|
||||||
import de.anomic.plasma.plasmaHTCache;
|
|
||||||
import de.anomic.plasma.plasmaSwitchboard;
|
|
||||||
import de.anomic.server.serverCore;
|
|
||||||
import de.anomic.server.serverHandler;
|
|
||||||
import de.anomic.server.serverCore.Session;
|
|
||||||
import de.anomic.yacy.yacyURL;
|
|
||||||
import de.anomic.yacy.logging.Log;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @author theli
|
|
||||||
*/
|
|
||||||
public class icapd implements serverHandler, Cloneable {
|
|
||||||
|
|
||||||
|
|
||||||
private serverCore.Session session; // holds the session object of the calling class
|
|
||||||
|
|
||||||
// the connection properties
|
|
||||||
private final Properties prop = new Properties();
|
|
||||||
|
|
||||||
// the address of the client
|
|
||||||
private InetAddress userAddress;
|
|
||||||
private String clientIP;
|
|
||||||
private int keepAliveRequestCount = 0;
|
|
||||||
|
|
||||||
// needed for logging
|
|
||||||
private static final Log log = new Log("ICAPD");
|
|
||||||
|
|
||||||
private static plasmaSwitchboard sb = null;
|
|
||||||
private static String virtualHost = null;
|
|
||||||
private static boolean keepAliveSupport = true;
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
public icapd() {
|
|
||||||
if (sb == null) {
|
|
||||||
sb = plasmaSwitchboard.getSwitchboard();
|
|
||||||
virtualHost = sb.getConfig("fileHost","localhost");
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
public icapd clone(){
|
|
||||||
return new icapd();
|
|
||||||
}
|
|
||||||
|
|
||||||
public void initSession(final Session aSession) throws IOException {
|
|
||||||
this.session = aSession;
|
|
||||||
this.userAddress = aSession.userAddress; // client InetAddress
|
|
||||||
this.clientIP = this.userAddress.getHostAddress();
|
|
||||||
if (this.userAddress.isAnyLocalAddress()) this.clientIP = "localhost";
|
|
||||||
if (this.clientIP.startsWith("0:0:0:0:0:0:0:1")) this.clientIP = "localhost";
|
|
||||||
if (this.clientIP.startsWith("127.")) this.clientIP = "localhost";
|
|
||||||
}
|
|
||||||
|
|
||||||
public String greeting() {
|
|
||||||
// TODO Auto-generated method stub
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String error(final Throwable e) {
|
|
||||||
// TODO Auto-generated method stub
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void reset() {
|
|
||||||
}
|
|
||||||
|
|
||||||
public Boolean EMPTY(final String arg) throws IOException {
|
|
||||||
// TODO Auto-generated method stub
|
|
||||||
return serverCore.TERMINATE_CONNECTION;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Boolean UNKNOWN(final String requestLine) throws IOException {
|
|
||||||
// TODO Auto-generated method stub
|
|
||||||
return serverCore.TERMINATE_CONNECTION;
|
|
||||||
}
|
|
||||||
|
|
||||||
public icapHeader getDefaultHeaders() {
|
|
||||||
final icapHeader newHeaders = new icapHeader();
|
|
||||||
|
|
||||||
newHeaders.put(icapHeader.SERVER,"YaCy/" + sb.getConfig("vString",""));
|
|
||||||
newHeaders.put(icapHeader.DATE, DateFormatter.formatRFC1123(new Date()));
|
|
||||||
newHeaders.put(icapHeader.ISTAG, "\"" + sb.getConfig("vString","") + "\"");
|
|
||||||
|
|
||||||
return newHeaders;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Boolean OPTIONS(final String arg) throws IOException {
|
|
||||||
|
|
||||||
final BufferedOutputStream out = new BufferedOutputStream(this.session.out);
|
|
||||||
|
|
||||||
// parsing the http request line
|
|
||||||
parseRequestLine(icapHeader.METHOD_OPTIONS,arg);
|
|
||||||
|
|
||||||
// reading the headers
|
|
||||||
final icapHeader icapReqHeader = icapHeader.readHeader(this.prop,this.session);
|
|
||||||
|
|
||||||
// determines if the connection should be kept alive
|
|
||||||
final boolean persistent = handlePersistentConnection(icapReqHeader);
|
|
||||||
|
|
||||||
// setting the icap response headers
|
|
||||||
final icapHeader resHeader = getDefaultHeaders();
|
|
||||||
resHeader.put(icapHeader.ALLOW,"204");
|
|
||||||
resHeader.put(icapHeader.ENCAPSULATED,"null-body=0");
|
|
||||||
resHeader.put(icapHeader.MAX_CONNECTIONS,"1000");
|
|
||||||
resHeader.put(icapHeader.OPTIONS_TTL,"300");
|
|
||||||
resHeader.put(icapHeader.SERVICE_ID, "???");
|
|
||||||
resHeader.put(icapHeader.PREVIEW, "30");
|
|
||||||
resHeader.put(icapHeader.TRANSFER_COMPLETE, "*");
|
|
||||||
//resHeader.put(icapHeader.TRANSFER_PREVIEW, "*");
|
|
||||||
if (!persistent) resHeader.put(icapHeader.CONNECTION, "close");
|
|
||||||
|
|
||||||
|
|
||||||
// determining the requested service and call it or send back an error message
|
|
||||||
final String reqService = this.prop.getProperty(icapHeader.CONNECTION_PROP_PATH,"");
|
|
||||||
if (reqService.equalsIgnoreCase("/resIndexing")) {
|
|
||||||
resHeader.put(icapHeader.SERVICE, "YaCy ICAP Indexing Service 1.0");
|
|
||||||
resHeader.put(icapHeader.METHODS,icapHeader.METHOD_RESPMOD);
|
|
||||||
|
|
||||||
String transferIgnoreList = ParserDispatcher.getMediaExtList();
|
|
||||||
transferIgnoreList = transferIgnoreList.substring(1,transferIgnoreList.length()-1);
|
|
||||||
resHeader.put(icapHeader.TRANSFER_IGNORE, transferIgnoreList);
|
|
||||||
} else {
|
|
||||||
resHeader.put(icapHeader.SERVICE, "YaCy ICAP Service 1.0");
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
final StringBuilder header = resHeader.toHeaderString("ICAP/1.0",200,null);
|
|
||||||
out.write(header.toString().getBytes());
|
|
||||||
out.flush();
|
|
||||||
|
|
||||||
return this.prop.getProperty(icapHeader.CONNECTION_PROP_PERSISTENT).equals("keep-alive") ? serverCore.RESUME_CONNECTION : serverCore.TERMINATE_CONNECTION;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Boolean REQMOD() {
|
|
||||||
return serverCore.TERMINATE_CONNECTION;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Boolean RESPMOD(final String arg) {
|
|
||||||
try {
|
|
||||||
final InputStream in = this.session.in;
|
|
||||||
final OutputStream out = this.session.out;
|
|
||||||
|
|
||||||
// parsing the icap request line
|
|
||||||
parseRequestLine(icapHeader.METHOD_RESPMOD,arg);
|
|
||||||
|
|
||||||
// reading the icap request header
|
|
||||||
final icapHeader icapReqHeader = icapHeader.readHeader(this.prop,this.session);
|
|
||||||
|
|
||||||
// determines if the connection should be kept alive
|
|
||||||
handlePersistentConnection(icapReqHeader);
|
|
||||||
|
|
||||||
// determining the requested service and call it or send back an error message
|
|
||||||
final String reqService = this.prop.getProperty(icapHeader.CONNECTION_PROP_PATH,"");
|
|
||||||
if (reqService.equalsIgnoreCase("/resIndexing")) {
|
|
||||||
indexingService(icapReqHeader,in,out);
|
|
||||||
} else {
|
|
||||||
final icapHeader icapResHeader = getDefaultHeaders();
|
|
||||||
icapResHeader.put(icapHeader.ENCAPSULATED,icapReqHeader.get(icapHeader.ENCAPSULATED));
|
|
||||||
icapResHeader.put(icapHeader.SERVICE, "YaCy ICAP Service 1.0");
|
|
||||||
// icapResHeader.put(icapHeader.CONNECTION, "close");
|
|
||||||
|
|
||||||
final StringBuilder header = icapResHeader.toHeaderString("ICAP/1.0",404,null);
|
|
||||||
out.write((new String(header)).getBytes());
|
|
||||||
out.flush();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
} catch (final Exception e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
} finally {
|
|
||||||
|
|
||||||
}
|
|
||||||
return this.prop.getProperty(icapHeader.CONNECTION_PROP_PERSISTENT).equals("keep-alive") ? serverCore.RESUME_CONNECTION : serverCore.TERMINATE_CONNECTION;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
private void blacklistService(icapHeader reqHeader, InputStream in, OutputStream out) {
|
|
||||||
try {
|
|
||||||
|
|
||||||
} catch (Exception e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
private void indexingService(final icapHeader reqHeader, final InputStream in, final OutputStream out) {
|
|
||||||
try {
|
|
||||||
|
|
||||||
/* =========================================================================
|
|
||||||
* Reading the various message parts into buffers
|
|
||||||
* ========================================================================= */
|
|
||||||
ByteArrayInputStream reqHdrStream = null, resHdrStream = null, resBodyStream = null;
|
|
||||||
final String[] encapsulated = (reqHeader.get(icapHeader.ENCAPSULATED)).split(",");
|
|
||||||
int prevLength = 0, currLength=0;
|
|
||||||
for (int i=0; i < encapsulated.length; i++) {
|
|
||||||
// reading the request header
|
|
||||||
if (encapsulated[i].indexOf("req-hdr")>=0) {
|
|
||||||
prevLength = currLength;
|
|
||||||
currLength = Integer.parseInt(encapsulated[i+1].split("=")[1]);
|
|
||||||
|
|
||||||
final byte[] buffer = new byte[currLength-prevLength];
|
|
||||||
final int bytesRead = in.read(buffer, 0, buffer.length);
|
|
||||||
assert bytesRead == buffer.length;
|
|
||||||
|
|
||||||
reqHdrStream = new ByteArrayInputStream(buffer);
|
|
||||||
|
|
||||||
// reading the response header
|
|
||||||
} else if (encapsulated[i].indexOf("res-hdr")>=0) {
|
|
||||||
prevLength = currLength;
|
|
||||||
currLength = Integer.parseInt(encapsulated[i+1].split("=")[1]);
|
|
||||||
|
|
||||||
final byte[] buffer = new byte[currLength-prevLength];
|
|
||||||
final int bytesRead = in.read(buffer, 0, buffer.length);
|
|
||||||
assert bytesRead == buffer.length;
|
|
||||||
|
|
||||||
resHdrStream = new ByteArrayInputStream(buffer);
|
|
||||||
|
|
||||||
// reading the response body
|
|
||||||
} else if (encapsulated[i].indexOf("res-body")>=0) {
|
|
||||||
final httpChunkedInputStream chunkedIn = new httpChunkedInputStream(in);
|
|
||||||
final ByteArrayOutputStream bout = new ByteArrayOutputStream();
|
|
||||||
int l = 0,len = 0;
|
|
||||||
final byte[] buffer = new byte[2048];
|
|
||||||
while ((l = chunkedIn.read(buffer)) >= 0) {
|
|
||||||
len += l;
|
|
||||||
bout.write(buffer,0,l);
|
|
||||||
}
|
|
||||||
resBodyStream = new ByteArrayInputStream(bout.toByteArray());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* =========================================================================
|
|
||||||
* sending back the icap status
|
|
||||||
* ========================================================================= */
|
|
||||||
final icapHeader icapResHeader = getDefaultHeaders();
|
|
||||||
if (reqHeader.allow(204)) {
|
|
||||||
icapResHeader.put(icapHeader.ENCAPSULATED,reqHeader.get(icapHeader.ENCAPSULATED));
|
|
||||||
icapResHeader.put(icapHeader.SERVICE, "YaCy ICAP Service 1.0");
|
|
||||||
// resHeader.put(icapHeader.CONNECTION, "close");
|
|
||||||
|
|
||||||
final StringBuilder header = icapResHeader.toHeaderString("ICAP/1.0",204,null);
|
|
||||||
out.write((new String(header)).getBytes());
|
|
||||||
out.flush();
|
|
||||||
} else {
|
|
||||||
icapResHeader.put(icapHeader.ENCAPSULATED,reqHeader.get(icapHeader.ENCAPSULATED));
|
|
||||||
icapResHeader.put(icapHeader.SERVICE, "YaCy ICAP Service 1.0");
|
|
||||||
// icapResHeader.put(icapHeader.CONNECTION, "close");
|
|
||||||
|
|
||||||
final StringBuilder header = icapResHeader.toHeaderString("ICAP/1.0",503,null);
|
|
||||||
out.write((new String(header)).getBytes());
|
|
||||||
out.flush();
|
|
||||||
}
|
|
||||||
|
|
||||||
/* =========================================================================
|
|
||||||
* Parsing request data
|
|
||||||
* ========================================================================= */
|
|
||||||
// reading the requestline
|
|
||||||
BufferedReader reader = new BufferedReader(new InputStreamReader(reqHdrStream));
|
|
||||||
final String httpRequestLine = reader.readLine();
|
|
||||||
|
|
||||||
// parsing the requestline
|
|
||||||
final Properties httpReqProps = new Properties();
|
|
||||||
httpRequestHeader.parseRequestLine(httpRequestLine,httpReqProps,virtualHost);
|
|
||||||
|
|
||||||
if (!httpReqProps.getProperty(httpHeader.CONNECTION_PROP_METHOD).equals(httpHeader.METHOD_GET)) {
|
|
||||||
log.logInfo("Wrong http request method for indexing:" +
|
|
||||||
"\nRequest Method: " + httpReqProps.getProperty(httpHeader.CONNECTION_PROP_METHOD) +
|
|
||||||
"\nRequest Line: " + httpRequestLine);
|
|
||||||
reader.close();
|
|
||||||
if(reqHdrStream != null) {
|
|
||||||
reqHdrStream.close();
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// reading all request headers
|
|
||||||
final httpRequestHeader httpReqHeader = new httpRequestHeader();
|
|
||||||
httpReqHeader.readHttpHeader(reader);
|
|
||||||
reader.close();
|
|
||||||
if(reqHdrStream != null) {
|
|
||||||
reqHdrStream.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
// handle transparent proxy support: this function call is needed to set the host property properly
|
|
||||||
httpHeader.handleTransparentProxySupport(httpReqHeader,httpReqProps,virtualHost,true);
|
|
||||||
|
|
||||||
// getting the request URL
|
|
||||||
final yacyURL httpRequestURL = httpHeader.getRequestURL(httpReqProps);
|
|
||||||
|
|
||||||
/* =========================================================================
|
|
||||||
* Parsing response data
|
|
||||||
* ========================================================================= */
|
|
||||||
// getting the response status
|
|
||||||
reader = new BufferedReader(new InputStreamReader(resHdrStream));
|
|
||||||
final String httpRespStatusLine = reader.readLine();
|
|
||||||
|
|
||||||
final Object[] httpRespStatus = httpResponseHeader.parseResponseLine(httpRespStatusLine);
|
|
||||||
|
|
||||||
if (!(httpRespStatus[1].equals(Integer.valueOf(200)) || httpRespStatus[1].equals(Integer.valueOf(203)))) {
|
|
||||||
log.logInfo("Wrong status code for indexing:" +
|
|
||||||
"\nStatus Code: " + httpRespStatus[1] +
|
|
||||||
"\nRequest Line: " + httpRequestLine +
|
|
||||||
"\nResponse Line: " + httpRespStatusLine);
|
|
||||||
reader.close();
|
|
||||||
if(resHdrStream != null) {
|
|
||||||
resHdrStream.close();
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// reading all response headers
|
|
||||||
final httpResponseHeader httpResHeader = new httpResponseHeader();
|
|
||||||
httpResHeader.readHttpHeader(reader);
|
|
||||||
reader.close();
|
|
||||||
if(resHdrStream != null) {
|
|
||||||
resHdrStream.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!ParserDispatcher.supportedContent(httpRequestURL, httpResHeader.mime())) {
|
|
||||||
log.logInfo("Wrong mimeType or fileExtension for indexing:" +
|
|
||||||
"\nMimeType: " + httpResHeader.mime() +
|
|
||||||
"\nRequest Line:" + httpRequestLine);
|
|
||||||
return ;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/* =========================================================================
|
|
||||||
* Prepare data for indexing
|
|
||||||
* ========================================================================= */
|
|
||||||
|
|
||||||
// generating a htcache entry object
|
|
||||||
final httpDocument cacheEntry = new httpDocument(
|
|
||||||
0,
|
|
||||||
httpRequestURL,
|
|
||||||
"",
|
|
||||||
httpRespStatusLine,
|
|
||||||
httpReqHeader, httpResHeader,
|
|
||||||
null,
|
|
||||||
sb.crawler.defaultProxyProfile
|
|
||||||
);
|
|
||||||
|
|
||||||
// copy the response body into the file
|
|
||||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
|
||||||
FileUtils.copy(resBodyStream, baos);
|
|
||||||
if(resBodyStream != null) {
|
|
||||||
resBodyStream.close(); resBodyStream = null;
|
|
||||||
}
|
|
||||||
cacheEntry.setCacheArray(baos.toByteArray());
|
|
||||||
plasmaHTCache.storeMetadata(httpResHeader, cacheEntry);
|
|
||||||
|
|
||||||
// indexing the response
|
|
||||||
sb.htEntryStoreProcess(cacheEntry);
|
|
||||||
} catch (final Exception e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private final void parseRequestLine(final String cmd, final String s) {
|
|
||||||
// parsing the requestlin
|
|
||||||
icapHeader.parseRequestLine(cmd,s, this.prop,virtualHost);
|
|
||||||
|
|
||||||
// adding the client ip prop
|
|
||||||
this.prop.setProperty(icapHeader.CONNECTION_PROP_CLIENTIP, this.clientIP);
|
|
||||||
|
|
||||||
// counting the amount of received requests within this permanent conneciton
|
|
||||||
this.prop.setProperty(icapHeader.CONNECTION_PROP_KEEP_ALIVE_COUNT, Integer.toString(++this.keepAliveRequestCount));
|
|
||||||
}
|
|
||||||
|
|
||||||
private boolean handlePersistentConnection(final icapHeader header) {
|
|
||||||
|
|
||||||
if (!keepAliveSupport) {
|
|
||||||
this.prop.put(icapHeader.CONNECTION_PROP_PERSISTENT,"close");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
boolean persistent = true;
|
|
||||||
if (((String)header.get(icapHeader.CONNECTION, "keep-alive")).toLowerCase().equals("close")) {
|
|
||||||
persistent = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
this.prop.put(icapHeader.CONNECTION_PROP_PERSISTENT,persistent?"keep-alive":"close");
|
|
||||||
return persistent;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
@ -1,142 +0,0 @@
|
|||||||
#!/usr/bin/perl -w
|
|
||||||
#
|
|
||||||
# This is an URL Redirector Script for squid that can be
|
|
||||||
# used to bundle YaCy and Squid together via the squid
|
|
||||||
# redirector support.
|
|
||||||
# See: http://www.squid-cache.org/Doc/FAQ/FAQ-15.html
|
|
||||||
#
|
|
||||||
# This scripts forwards URLs from squid to YaCy where the
|
|
||||||
# URLs are used to download and index the content of the URLs.
|
|
||||||
use strict;
|
|
||||||
use Socket qw(:DEFAULT :crlf);
|
|
||||||
use IO::Handle;
|
|
||||||
use Digest::MD5;
|
|
||||||
|
|
||||||
# setting administrator username + pwd, hostname + port
|
|
||||||
my $user = "user";
|
|
||||||
my $pwd = "";
|
|
||||||
my $host = "localhost";
|
|
||||||
my $port = "8080";
|
|
||||||
|
|
||||||
my $allowCgi = 0;
|
|
||||||
my $allowPost = 0;
|
|
||||||
|
|
||||||
my @mediaExt;
|
|
||||||
my @requestData;
|
|
||||||
|
|
||||||
$|=1;
|
|
||||||
|
|
||||||
sub isCGI {
|
|
||||||
my $url = lc shift;
|
|
||||||
return ((rindex $url, ".cgi") != -1) ||
|
|
||||||
((rindex $url, ".exe") != -1) ||
|
|
||||||
((rindex $url, ";jsessionid=") != -1) ||
|
|
||||||
((rindex $url, "sessionid/") != -1) ||
|
|
||||||
((rindex $url, "phpsessid=") != -1);
|
|
||||||
}
|
|
||||||
|
|
||||||
sub isPOST {
|
|
||||||
my $url = lc shift;
|
|
||||||
return ((rindex $url, "?") != -1) ||
|
|
||||||
((rindex $url, "&") != -1);
|
|
||||||
}
|
|
||||||
|
|
||||||
sub isMediaExt {
|
|
||||||
my $url = $_[0];
|
|
||||||
my @extList = @{$_[1]};
|
|
||||||
my $pos = rindex $url, ".";
|
|
||||||
|
|
||||||
if ($pos != -1) {
|
|
||||||
my $ext = substr($url,$pos+1,length($url));
|
|
||||||
my @match = grep(/$ext/,@extList);
|
|
||||||
return scalar(@match);
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
my ($bytes_out,$bytes_in) = (0,0);
|
|
||||||
my ($msg_in,$msg_out);
|
|
||||||
|
|
||||||
my $protocol = getprotobyname('tcp');
|
|
||||||
$host = inet_aton($host) or die "$host: unknown host";
|
|
||||||
|
|
||||||
socket(SOCK, AF_INET, SOCK_STREAM, $protocol) or die "socket() failed: $!";
|
|
||||||
my $dest_addr = sockaddr_in($port,$host);
|
|
||||||
connect(SOCK,$dest_addr) or die("connect() failed: $!");
|
|
||||||
|
|
||||||
# enabling autoflush
|
|
||||||
SOCK->autoflush(1);
|
|
||||||
|
|
||||||
# sending the REDIRECTOR command to yacy to enable the proper
|
|
||||||
# command handler
|
|
||||||
print SOCK "REDIRECTOR".CRLF;
|
|
||||||
|
|
||||||
# Doing authentication
|
|
||||||
my $ctx = Digest::MD5->new;
|
|
||||||
$ctx->add($user.":".$pwd);
|
|
||||||
my $md5Pwd = $ctx->hexdigest;
|
|
||||||
|
|
||||||
print SOCK "USER ".$user.CRLF;
|
|
||||||
print SOCK "PWD ".$md5Pwd.CRLF;
|
|
||||||
|
|
||||||
# Getting a list of file extensions that should be ignored
|
|
||||||
print SOCK "MEDIAEXT".CRLF;
|
|
||||||
$msg_in = lc <SOCK>;
|
|
||||||
chomp $msg_in;
|
|
||||||
@mediaExt = split(/,\s*/, $msg_in);
|
|
||||||
|
|
||||||
# 1) Reading URLs from stdIn
|
|
||||||
# 2) Send it to Yacy
|
|
||||||
# 3) Receive response from YaCy
|
|
||||||
# 4) Print response to StdOut
|
|
||||||
while (defined($msg_out = <>)) {
|
|
||||||
chomp $msg_out;
|
|
||||||
|
|
||||||
# splitting request into it's various parts
|
|
||||||
#
|
|
||||||
# One squid redirector request line typically looks like this:
|
|
||||||
# http://www.pageresource.com/styles/tuts.css 192.168.0.5/- - GET
|
|
||||||
@requestData = split(/\s+/, $msg_out);
|
|
||||||
|
|
||||||
# testing if the URL is CGI
|
|
||||||
if (!$allowCgi && isCGI($requestData[0])) {
|
|
||||||
print STDOUT CRLF;
|
|
||||||
print STDERR "URL is cgi: ".$msg_out.CRLF;
|
|
||||||
next;
|
|
||||||
}
|
|
||||||
|
|
||||||
# testing if the URL is a POST request
|
|
||||||
if (!$allowPost && isPOST($requestData[0])){
|
|
||||||
print STDOUT CRLF;
|
|
||||||
print STDERR "URL is post: ".$msg_out.CRLF;
|
|
||||||
next;
|
|
||||||
}
|
|
||||||
|
|
||||||
# testing if the requested content is a media content
|
|
||||||
if (isMediaExt($requestData[0],\@mediaExt)) {
|
|
||||||
print STDOUT CRLF;
|
|
||||||
print STDERR "URL has media extension: ".$msg_out.CRLF;
|
|
||||||
next;
|
|
||||||
}
|
|
||||||
|
|
||||||
# sending the whole request line to YaCy
|
|
||||||
$msg_out .= CRLF;
|
|
||||||
print SOCK $msg_out;
|
|
||||||
|
|
||||||
# reading the response
|
|
||||||
if (defined($msg_in = <SOCK>)) {
|
|
||||||
print STDOUT $msg_in;
|
|
||||||
} else {
|
|
||||||
print STDERR "Socket closed".CRLF;
|
|
||||||
close SOCK;
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
$bytes_out += length($msg_out);
|
|
||||||
$bytes_in += length($msg_in);
|
|
||||||
}
|
|
||||||
print SOCK "EXIT".CRLF;
|
|
||||||
|
|
||||||
close SOCK;
|
|
||||||
print STDERR "bytes_sent = $bytes_out, bytes_received = $bytes_in\n";
|
|
||||||
|
|
@ -1,232 +0,0 @@
|
|||||||
package de.anomic.server;
|
|
||||||
|
|
||||||
import java.io.BufferedReader;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.InputStreamReader;
|
|
||||||
import java.io.PrintWriter;
|
|
||||||
import java.net.MalformedURLException;
|
|
||||||
import java.util.Date;
|
|
||||||
|
|
||||||
import de.anomic.crawler.CrawlEntry;
|
|
||||||
import de.anomic.crawler.CrawlProfile;
|
|
||||||
import de.anomic.data.userDB;
|
|
||||||
import de.anomic.document.ParserDispatcher;
|
|
||||||
import de.anomic.http.httpClient;
|
|
||||||
import de.anomic.http.httpResponseHeader;
|
|
||||||
import de.anomic.plasma.plasmaSwitchboard;
|
|
||||||
import de.anomic.server.serverCore.Session;
|
|
||||||
import de.anomic.yacy.yacyURL;
|
|
||||||
import de.anomic.yacy.logging.Log;
|
|
||||||
|
|
||||||
public class urlRedirectord implements serverHandler, Cloneable {
|
|
||||||
|
|
||||||
private serverCore.Session session;
|
|
||||||
private static plasmaSwitchboard sb = null;
|
|
||||||
private static final Log theLogger = new Log("URL-REDIRECTOR");
|
|
||||||
private static CrawlProfile.entry profile = null;
|
|
||||||
private String nextURL;
|
|
||||||
|
|
||||||
public urlRedirectord() {
|
|
||||||
if (sb == null) {
|
|
||||||
sb = plasmaSwitchboard.getSwitchboard();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (profile == null) {
|
|
||||||
profile = sb.crawler.profilesActiveCrawls.newEntry(
|
|
||||||
// name
|
|
||||||
"URL Redirector",
|
|
||||||
// start URL
|
|
||||||
null,
|
|
||||||
// crawling filter
|
|
||||||
CrawlProfile.MATCH_ALL, CrawlProfile.MATCH_NEVER,
|
|
||||||
// depth
|
|
||||||
0,
|
|
||||||
// recrawlIfOlder (minutes), if negative: do not re-crawl
|
|
||||||
-1,
|
|
||||||
// domFilterDepth, if negative: no auto-filter
|
|
||||||
-1,
|
|
||||||
// domMaxPages, if negative: no count restriction
|
|
||||||
-1,
|
|
||||||
// crawlDynamic
|
|
||||||
false,
|
|
||||||
// indexText
|
|
||||||
true,
|
|
||||||
// indexMedia
|
|
||||||
true,
|
|
||||||
// storeHTCache
|
|
||||||
false,
|
|
||||||
// storeTxCache
|
|
||||||
true,
|
|
||||||
// remoteIndexing
|
|
||||||
false,
|
|
||||||
// xsstopw
|
|
||||||
true,
|
|
||||||
// xdstopw
|
|
||||||
true,
|
|
||||||
// xpstopw
|
|
||||||
true
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getURL() {
|
|
||||||
return this.nextURL;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void initSession(final Session theSession){
|
|
||||||
// getting current session
|
|
||||||
this.session = theSession;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String greeting() {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String error(final Throwable e) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
public urlRedirectord clone() {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void reset() {
|
|
||||||
this.session = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Boolean EMPTY(final String arg) throws IOException {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Boolean UNKNOWN(final String requestLine) throws IOException {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Boolean REDIRECTOR(final String requestLine) {
|
|
||||||
try {
|
|
||||||
|
|
||||||
boolean authenticated = false;
|
|
||||||
String userName = null;
|
|
||||||
String md5Pwd = null;
|
|
||||||
|
|
||||||
// setting timeout
|
|
||||||
this.session.controlSocket.setSoTimeout(0);
|
|
||||||
|
|
||||||
String line = null;
|
|
||||||
final BufferedReader inputReader = new BufferedReader(new InputStreamReader(this.session.in));
|
|
||||||
final PrintWriter outputWriter = new PrintWriter(this.session.out);
|
|
||||||
|
|
||||||
while ((line = inputReader.readLine()) != null) {
|
|
||||||
if (line.equals("EXIT")) {
|
|
||||||
break;
|
|
||||||
} else if (line.startsWith("#")) {
|
|
||||||
outputWriter.print("\r\n");
|
|
||||||
outputWriter.flush();
|
|
||||||
continue;
|
|
||||||
} else if (line.startsWith("USER")) {
|
|
||||||
userName = line.substring(line.indexOf(" ")).trim();
|
|
||||||
} else if (line.startsWith("PWD")) {
|
|
||||||
if (userName != null) {
|
|
||||||
final userDB.Entry userEntry = sb.userDB.getEntry(userName);
|
|
||||||
if (userEntry != null) {
|
|
||||||
md5Pwd = line.substring(line.indexOf(" ")).trim();
|
|
||||||
if (userEntry.getMD5EncodedUserPwd().equals(md5Pwd)) {
|
|
||||||
authenticated = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if (line.startsWith("MEDIAEXT")) {
|
|
||||||
String transferIgnoreList = ParserDispatcher.getMediaExtList();
|
|
||||||
transferIgnoreList = transferIgnoreList.substring(1,transferIgnoreList.length()-1);
|
|
||||||
|
|
||||||
outputWriter.print(transferIgnoreList);
|
|
||||||
outputWriter.print("\r\n");
|
|
||||||
outputWriter.flush();
|
|
||||||
} else if (line.startsWith("DEPTH")) {
|
|
||||||
final int pos = line.indexOf(" ");
|
|
||||||
if (pos != -1) {
|
|
||||||
final String newDepth = line.substring(pos).trim();
|
|
||||||
theLogger.logFine("Changing crawling depth to '" + newDepth + "'.");
|
|
||||||
sb.crawler.profilesActiveCrawls.changeEntry(profile, "generalDepth",newDepth);
|
|
||||||
}
|
|
||||||
outputWriter.print("\r\n");
|
|
||||||
outputWriter.flush();
|
|
||||||
} else if (line.startsWith("CRAWLDYNAMIC")) {
|
|
||||||
final int pos = line.indexOf(" ");
|
|
||||||
if (pos != -1) {
|
|
||||||
final String newValue = line.substring(pos).trim();
|
|
||||||
theLogger.logFine("Changing crawl dynamic setting to '" + newValue + "'");
|
|
||||||
sb.crawler.profilesActiveCrawls.changeEntry(profile, "crawlingQ",newValue);
|
|
||||||
}
|
|
||||||
outputWriter.print("\r\n");
|
|
||||||
outputWriter.flush();
|
|
||||||
} else {
|
|
||||||
if (!authenticated) {
|
|
||||||
return Boolean.FALSE;
|
|
||||||
}
|
|
||||||
|
|
||||||
final int pos = line.indexOf(" ");
|
|
||||||
this.nextURL = (pos != -1) ? line.substring(0,pos):line;
|
|
||||||
|
|
||||||
theLogger.logFine("Receiving request " + line);
|
|
||||||
outputWriter.print("\r\n");
|
|
||||||
outputWriter.flush();
|
|
||||||
|
|
||||||
String reasonString = null;
|
|
||||||
try {
|
|
||||||
// generating URL Object
|
|
||||||
final yacyURL reqURL = new yacyURL(this.nextURL, null);
|
|
||||||
|
|
||||||
// getting URL mimeType
|
|
||||||
final httpResponseHeader header = httpClient.whead(reqURL.toString());
|
|
||||||
|
|
||||||
if (ParserDispatcher.supportedContent(
|
|
||||||
reqURL,
|
|
||||||
header.mime())
|
|
||||||
) {
|
|
||||||
// first delete old entry, if exists
|
|
||||||
final String urlhash = reqURL.hash();
|
|
||||||
sb.indexSegment.urlMetadata().remove(urlhash);
|
|
||||||
sb.crawlQueues.noticeURL.removeByURLHash(urlhash);
|
|
||||||
sb.crawlQueues.errorURL.remove(urlhash);
|
|
||||||
|
|
||||||
// enqueuing URL for crawling
|
|
||||||
sb.crawlStacker.enqueueEntry(new CrawlEntry(
|
|
||||||
sb.peers.mySeed().hash,
|
|
||||||
reqURL,
|
|
||||||
null,
|
|
||||||
"URL Redirector",
|
|
||||||
new Date(),
|
|
||||||
null,
|
|
||||||
profile.handle(),
|
|
||||||
0,
|
|
||||||
0,
|
|
||||||
0
|
|
||||||
));
|
|
||||||
} else {
|
|
||||||
reasonString = "Unsupporte file extension";
|
|
||||||
}
|
|
||||||
} catch (final MalformedURLException badUrlEx) {
|
|
||||||
reasonString = "Malformed URL";
|
|
||||||
}
|
|
||||||
|
|
||||||
if (reasonString != null) {
|
|
||||||
theLogger.logFine("URL " + nextURL + " rejected. Reason: " + reasonString);
|
|
||||||
}
|
|
||||||
nextURL = null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
theLogger.logFine("Connection terminated");
|
|
||||||
|
|
||||||
// Terminating connection
|
|
||||||
return serverCore.TERMINATE_CONNECTION;
|
|
||||||
} catch (final Exception e) {
|
|
||||||
theLogger.logSevere("Unexpected Error: " + e.getMessage(),e);
|
|
||||||
return serverCore.TERMINATE_CONNECTION;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
Loading…
Reference in new issue