You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1675 lines
83 KiB
1675 lines
83 KiB
// HTTPDFileHandler.java
|
|
// -----------------------
|
|
// (C) by Michael Peter Christen; mc@yacy.net
|
|
// first published on http://www.anomic.de
|
|
// Frankfurt, Germany, 2004, 2005
|
|
//
|
|
// $LastChangedDate$
|
|
// $LastChangedRevision$
|
|
// $LastChangedBy$
|
|
//
|
|
// This program is free software; you can redistribute it and/or modify
|
|
// it under the terms of the GNU General Public License as published by
|
|
// the Free Software Foundation; either version 2 of the License, or
|
|
// (at your option) any later version.
|
|
//
|
|
// This program is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU General Public License
|
|
// along with this program; if not, write to the Free Software
|
|
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
|
|
/*
|
|
Class documentation:
|
|
this class provides a file servlet and CGI interface
|
|
for the httpd server.
|
|
Whenever this server is addressed to load a local file,
|
|
this class searches for the file in the local path as
|
|
configured in the setting property 'rootPath'
|
|
The servlet loads the file and returns it to the client.
|
|
Every file can also act as an template for the built-in
|
|
CGI interface. There is no specific path for CGI functions.
|
|
CGI functionality is triggered, if for the file to-be-served
|
|
'template.html' also a file 'template.class' exists. Then,
|
|
the class file is called with the GET/POST properties that
|
|
are attached to the http call.
|
|
Possible variable hand-over are:
|
|
- form method GET
|
|
- form method POST, enctype text/plain
|
|
- form method POST, enctype multipart/form-data
|
|
The class that creates the CGI respond must have at least one
|
|
static method of the form
|
|
public static java.util.Hashtable respond(java.util.HashMap, serverSwitch)
|
|
In the HashMap, the GET/POST variables are handed over.
|
|
The return value is a Property object that contains replacement
|
|
key/value pairs for the patterns in the template file.
|
|
The templates must have the form
|
|
either '#['<name>']#' for single attributes, or
|
|
'#{'<enumname>'}#' and '#{/'<enumname>'}#' for enumerations of
|
|
values '#['<value>']#'.
|
|
A single value in repetitions/enumerations in the template has
|
|
the property key '_'<enumname><count>'_'<value>
|
|
Please see also the example files 'test.html' and 'test.java'
|
|
*/
|
|
|
|
package net.yacy.server.http;
|
|
|
|
import java.awt.Image;
|
|
import java.awt.image.BufferedImage;
|
|
import java.io.BufferedInputStream;
|
|
import java.io.BufferedOutputStream;
|
|
import java.io.BufferedReader;
|
|
import java.io.ByteArrayInputStream;
|
|
import java.io.ByteArrayOutputStream;
|
|
import java.io.File;
|
|
import java.io.FileInputStream;
|
|
import java.io.FileNotFoundException;
|
|
import java.io.FileReader;
|
|
import java.io.IOException;
|
|
import java.io.InputStream;
|
|
import java.io.OutputStream;
|
|
import java.io.StringWriter;
|
|
import java.lang.ref.SoftReference;
|
|
import java.lang.reflect.InvocationTargetException;
|
|
import java.lang.reflect.Method;
|
|
import java.net.MalformedURLException;
|
|
import java.net.URL;
|
|
import java.net.URLDecoder;
|
|
import java.util.ArrayList;
|
|
import java.util.Arrays;
|
|
import java.util.Collection;
|
|
import java.util.Date;
|
|
import java.util.HashMap;
|
|
import java.util.Iterator;
|
|
import java.util.List;
|
|
import java.util.Map;
|
|
import java.util.concurrent.ConcurrentHashMap;
|
|
import java.util.regex.Matcher;
|
|
import java.util.regex.Pattern;
|
|
import java.util.zip.GZIPOutputStream;
|
|
|
|
import net.yacy.cora.date.GenericFormatter;
|
|
import net.yacy.cora.document.ASCII;
|
|
import net.yacy.cora.document.Classification;
|
|
import net.yacy.cora.document.MultiProtocolURI;
|
|
import net.yacy.cora.document.UTF8;
|
|
import net.yacy.cora.order.Digest;
|
|
import net.yacy.cora.protocol.Domains;
|
|
import net.yacy.cora.protocol.HeaderFramework;
|
|
import net.yacy.cora.protocol.RequestHeader;
|
|
import net.yacy.cora.protocol.ResponseHeader;
|
|
import net.yacy.cora.util.NumberTools;
|
|
import net.yacy.data.UserDB;
|
|
import net.yacy.document.parser.htmlParser;
|
|
import net.yacy.document.parser.html.ContentScraper;
|
|
import net.yacy.document.parser.html.ScraperInputStream;
|
|
import net.yacy.kelondro.data.meta.DigestURI;
|
|
import net.yacy.kelondro.logging.Log;
|
|
import net.yacy.kelondro.util.ByteBuffer;
|
|
import net.yacy.kelondro.util.FileUtils;
|
|
import net.yacy.kelondro.util.MemoryControl;
|
|
import net.yacy.peers.Seed;
|
|
import net.yacy.peers.graphics.EncodedImage;
|
|
import net.yacy.peers.operation.yacyBuildProperties;
|
|
import net.yacy.search.Switchboard;
|
|
import net.yacy.search.SwitchboardConstants;
|
|
import net.yacy.server.serverClassLoader;
|
|
import net.yacy.server.serverCore;
|
|
import net.yacy.server.serverObjects;
|
|
import net.yacy.server.serverSwitch;
|
|
import net.yacy.server.servletProperties;
|
|
import net.yacy.visualization.RasterPlotter;
|
|
|
|
public final class HTTPDFileHandler {
|
|
|
|
// create a class loader
|
|
private static final serverClassLoader provider = new serverClassLoader(/*this.getClass().getClassLoader()*/);
|
|
private static serverSwitch switchboard = null;
|
|
private static Switchboard sb = Switchboard.getSwitchboard();
|
|
private final static byte[] UNRESOLVED_PATTERN = ASCII.getBytes("-UNRESOLVED_PATTERN-");
|
|
|
|
|
|
private static File htRootPath = null;
|
|
private static File htDocsPath = null;
|
|
private static String[] defaultFiles = null;
|
|
private static File htDefaultPath = null;
|
|
private static File htLocalePath = null;
|
|
public static String indexForward = "";
|
|
|
|
protected static final class TemplateCacheEntry {
|
|
Date lastModified;
|
|
byte[] content;
|
|
}
|
|
private static final ConcurrentHashMap<File, SoftReference<TemplateCacheEntry>> templateCache;
|
|
private static final ConcurrentHashMap<File, SoftReference<Method>> templateMethodCache;
|
|
|
|
public static final boolean useTemplateCache;
|
|
|
|
//private Properties connectionProperties = null;
|
|
// creating a logger
|
|
private static final Log theLogger = new Log("FILEHANDLER");
|
|
|
|
static {
|
|
final serverSwitch theSwitchboard = Switchboard.getSwitchboard();
|
|
useTemplateCache = theSwitchboard.getConfig("enableTemplateCache","true").equalsIgnoreCase("true");
|
|
templateCache = (useTemplateCache)? new ConcurrentHashMap<File, SoftReference<TemplateCacheEntry>>() : new ConcurrentHashMap<File, SoftReference<TemplateCacheEntry>>(0);
|
|
templateMethodCache = new ConcurrentHashMap<File, SoftReference<Method>>();
|
|
|
|
if (switchboard == null) {
|
|
switchboard = theSwitchboard;
|
|
|
|
if (Classification.countMimes() == 0) {
|
|
// load the mime table
|
|
final String mimeTablePath = theSwitchboard.getConfig("mimeTable","");
|
|
Log.logConfig("HTTPDFiles", "Loading mime mapping file " + mimeTablePath);
|
|
Classification.init(new File(theSwitchboard.getAppPath(), mimeTablePath));
|
|
}
|
|
|
|
// create default files array
|
|
initDefaultPath();
|
|
|
|
// create a htRootPath: system pages
|
|
if (htRootPath == null) {
|
|
htRootPath = new File(theSwitchboard.getAppPath(), theSwitchboard.getConfig(SwitchboardConstants.HTROOT_PATH, SwitchboardConstants.HTROOT_PATH_DEFAULT));
|
|
if (!(htRootPath.exists())) htRootPath.mkdir();
|
|
}
|
|
|
|
// create a htDocsPath: user defined pages
|
|
if (htDocsPath == null) {
|
|
htDocsPath = theSwitchboard.getDataPath(SwitchboardConstants.HTDOCS_PATH, SwitchboardConstants.HTDOCS_PATH_DEFAULT);
|
|
if (!(htDocsPath.exists())) htDocsPath.mkdirs();
|
|
}
|
|
|
|
// create a repository path
|
|
final File repository = new File(htDocsPath, "repository");
|
|
if (!repository.exists()) repository.mkdirs();
|
|
|
|
// create htLocaleDefault, htLocalePath
|
|
if (htDefaultPath == null) htDefaultPath = theSwitchboard.getAppPath("htDefaultPath", "htroot");
|
|
if (htLocalePath == null) htLocalePath = theSwitchboard.getDataPath("locale.translated_html", "DATA/LOCALE/htroot");
|
|
}
|
|
|
|
}
|
|
|
|
public static final void initDefaultPath() {
|
|
// create default files array
|
|
defaultFiles = switchboard.getConfig(SwitchboardConstants.BROWSER_DEFAULT,"index.html").split(",");
|
|
if (defaultFiles.length == 0) defaultFiles = new String[] {"index.html"};
|
|
indexForward = switchboard.getConfig(SwitchboardConstants.INDEX_FORWARD, "");
|
|
if (indexForward.startsWith("/")) indexForward = indexForward.substring(1);
|
|
}
|
|
|
|
/** Returns a path to the localized or default file according to the locale.language (from he switchboard)
|
|
* @param path relative from htroot */
|
|
public static File getLocalizedFile(final String path){
|
|
return getLocalizedFile(path, switchboard.getConfig("locale.language","default"));
|
|
}
|
|
|
|
/** Returns a path to the localized or default file according to the parameter localeSelection
|
|
* @param path relative from htroot
|
|
* @param localeSelection language of localized file; locale.language from switchboard is used if localeSelection.equals("") */
|
|
public static File getLocalizedFile(final String path, final String localeSelection){
|
|
//if (htDefaultPath == null) htDefaultPath = switchboard.getConfigPath("htDefaultPath", "htroot");
|
|
//if (htLocalePath == null) htLocalePath = switchboard.getConfigPath("locale.translated_html", "DATA/LOCALE/htroot");
|
|
//if (htDocsPath == null) htDocsPath = switchboard.getConfigPath(plasmaSwitchboardConstants.HTDOCS_PATH, plasmaSwitchboardConstants.HTDOCS_PATH_DEFAULT);
|
|
|
|
if (path.startsWith("/repository/"))
|
|
return new File(switchboard.getConfig("repositoryPath", "DATA/HTDOCS/repository"), path.substring(11));
|
|
if (!(localeSelection.equals("default"))) {
|
|
final File localePath = new File(htLocalePath, localeSelection + '/' + path);
|
|
if (localePath.exists()) return localePath; // avoid "NoSuchFile" troubles if the "localeSelection" is misspelled
|
|
}
|
|
|
|
final File docsPath = new File(htDocsPath, path);
|
|
if (docsPath.exists()) return docsPath;
|
|
return new File(htDefaultPath, path);
|
|
}
|
|
|
|
private static final ResponseHeader getDefaultHeaders(final String path) {
|
|
final ResponseHeader headers = new ResponseHeader(200);
|
|
String ext;
|
|
int pos;
|
|
if ((pos = path.lastIndexOf('.')) < 0) {
|
|
ext = "";
|
|
} else {
|
|
ext = path.substring(pos + 1).toLowerCase();
|
|
}
|
|
headers.put(HeaderFramework.SERVER, "AnomicHTTPD (www.anomic.de)");
|
|
headers.put(HeaderFramework.DATE, HeaderFramework.formatRFC1123(new Date()));
|
|
if(!(Classification.isMediaExtension(ext))){
|
|
headers.put(HeaderFramework.PRAGMA, "no-cache");
|
|
}
|
|
return headers;
|
|
}
|
|
|
|
public static void doGet(final HashMap<String, Object> conProp, final RequestHeader requestHeader, final OutputStream response) {
|
|
doResponse(conProp, requestHeader, response, null);
|
|
}
|
|
|
|
public static void doHead(final HashMap<String, Object> conProp, final RequestHeader requestHeader, final OutputStream response) {
|
|
doResponse(conProp, requestHeader, response, null);
|
|
}
|
|
|
|
public static void doPost(final HashMap<String, Object> conProp, final RequestHeader requestHeader, final OutputStream response, final InputStream body) {
|
|
doResponse(conProp, requestHeader, response, body);
|
|
}
|
|
|
|
public static void doResponse(final HashMap<String, Object> conProp, final RequestHeader requestHeader, final OutputStream out, final InputStream body) {
|
|
|
|
String path = null;
|
|
try {
|
|
// getting some connection properties
|
|
final String method = (String) conProp.get(HeaderFramework.CONNECTION_PROP_METHOD);
|
|
path = (String) conProp.get(HeaderFramework.CONNECTION_PROP_PATH);
|
|
String argsString = (String) conProp.get(HeaderFramework.CONNECTION_PROP_ARGS); // is null if no args were given
|
|
final String httpVersion = (String) conProp.get(HeaderFramework.CONNECTION_PROP_HTTP_VER);
|
|
String clientIP = (String) conProp.get(HeaderFramework.CONNECTION_PROP_CLIENTIP); if (clientIP == null) clientIP = "unknown-host";
|
|
|
|
// check hack attacks in path
|
|
if (path.indexOf("..",0) >= 0) {
|
|
HTTPDemon.sendRespondError(conProp,out,4,403,null,"Access not allowed",null);
|
|
return;
|
|
}
|
|
|
|
path = UTF8.decodeURL(path);
|
|
|
|
// check against hack attacks in path
|
|
if (path.indexOf("..",0) >= 0) {
|
|
HTTPDemon.sendRespondError(conProp,out,4,403,null,"Access not allowed",null);
|
|
return;
|
|
}
|
|
|
|
// allow proper access to current peer via virtual directory
|
|
if (path.startsWith("/currentyacypeer/")) {
|
|
path = path.substring(16);
|
|
}
|
|
|
|
// cache settings
|
|
boolean nocache = path.contains("?") || body != null;
|
|
|
|
// a bad patch to map the /xml/ path to /api/
|
|
if (path.startsWith("/xml/")) {
|
|
path = "/api/" + path.substring(5);
|
|
}
|
|
// another bad patch to map the /util/ path to /api/util/ to support old yacybars
|
|
if (path.startsWith("/util/")) {
|
|
path = "/api/util/" + path.substring(6);
|
|
}
|
|
// one more for bookmarks
|
|
if (path.startsWith("/bookmarks/")) {
|
|
path = "/api/bookmarks/" + path.substring(11);
|
|
}
|
|
// another patch for the gsa interface
|
|
if (path.startsWith("/gsa/search")) {
|
|
path = "/gsa/searchresult" + path.substring(11);
|
|
}
|
|
|
|
// these are the 5 cases where an access granted:
|
|
// (the alternative is that we deliver a 401 to request authorization)
|
|
|
|
// -1- the page is not protected; or
|
|
final boolean protectedPage = path.indexOf("_p.",0) > 0;
|
|
boolean accessGranted = !protectedPage;
|
|
|
|
// -2- a password is not configured; or
|
|
final String adminAccountBase64MD5 = switchboard.getConfig(SwitchboardConstants.ADMIN_ACCOUNT_B64MD5, "");
|
|
if (!accessGranted) {
|
|
accessGranted = adminAccountBase64MD5.isEmpty();
|
|
}
|
|
|
|
// -3- access from localhost is granted and access comes from localhost; or
|
|
final String refererHost = requestHeader.refererHost();
|
|
if (!accessGranted) {
|
|
final boolean adminAccountForLocalhost = sb.getConfigBool("adminAccountForLocalhost", false);
|
|
final boolean accessFromLocalhost = Domains.isLocalhost(clientIP) && (refererHost == null || refererHost.isEmpty() || Domains.isLocalhost(refererHost));
|
|
accessGranted = adminAccountForLocalhost && accessFromLocalhost;
|
|
}
|
|
|
|
// -4- a password is configured and access comes from localhost
|
|
// and the realm-value of a http-authentify String is equal to the stored base64MD5; or
|
|
String realmProp = requestHeader.get(RequestHeader.AUTHORIZATION);
|
|
if (realmProp != null && realmProp.isEmpty()) realmProp = null;
|
|
final String realmValue = realmProp == null ? null : realmProp.substring(6);
|
|
if (!accessGranted) {
|
|
final boolean accessFromLocalhost = Domains.isLocalhost(clientIP) && (refererHost == null || refererHost.isEmpty() || Domains.isLocalhost(refererHost));
|
|
accessGranted = accessFromLocalhost && realmValue != null && realmProp.length() > 6 && (adminAccountBase64MD5.equals(realmValue));
|
|
if (!accessGranted) Log.logInfo("HTTPDFileHandler", "access blocked, clientIP=" + clientIP);
|
|
}
|
|
|
|
// -5- a password is configured and access comes with matching http-authentify
|
|
if (!accessGranted) {
|
|
accessGranted = realmProp != null && realmValue != null && (sb.userDB.hasAdminRight(realmProp, requestHeader.getHeaderCookies()) || adminAccountBase64MD5.equals(Digest.encodeMD5Hex(realmValue)));
|
|
}
|
|
|
|
// in case that we are still not granted we ask for a password
|
|
if (!accessGranted) {
|
|
Log.logInfo("HTTPD", "Wrong log-in for path '" + path + "' from host '" + clientIP + "'");
|
|
final Integer attempts = serverCore.bfHost.get(clientIP);
|
|
if (attempts == null)
|
|
serverCore.bfHost.put(clientIP, Integer.valueOf(1));
|
|
else
|
|
serverCore.bfHost.put(clientIP, Integer.valueOf(attempts.intValue() + 1));
|
|
|
|
final ResponseHeader responseHeader = getDefaultHeaders(path);
|
|
responseHeader.put(RequestHeader.WWW_AUTHENTICATE, "Basic realm=\"" + serverObjects.ADMIN_AUTHENTICATE_MSG + "\"");
|
|
final servletProperties tp=new servletProperties();
|
|
tp.put("returnto", path);
|
|
HTTPDemon.sendRespondError(conProp, out, 5, 401, "Wrong Authentication", "", new File("proxymsg/authfail.inc"), tp, null, responseHeader);
|
|
return;
|
|
}
|
|
|
|
// Authentication successful. remove brute-force flag
|
|
serverCore.bfHost.remove(conProp.get(HeaderFramework.CONNECTION_PROP_CLIENTIP));
|
|
|
|
// parse arguments
|
|
serverObjects args = new serverObjects();
|
|
int argc = 0;
|
|
if (argsString == null) {
|
|
// no args here, maybe a POST with multipart extension
|
|
final int length = requestHeader.getContentLength();
|
|
//System.out.println("HEADER: " + requestHeader.toString()); // DEBUG
|
|
|
|
/* don't parse body in case of a POST CGI call since it has to be
|
|
* handed over to the CGI script unaltered and parsed by the script
|
|
*/
|
|
if (method.equals(HeaderFramework.METHOD_POST) &&
|
|
!(switchboard.getConfigBool("cgi.allow", false) &&
|
|
matchesSuffix(path, switchboard.getConfig("cgi.suffixes", null)))
|
|
) {
|
|
|
|
// if its a POST, it can be either multipart or as args in the body
|
|
if ((requestHeader.containsKey(HeaderFramework.CONTENT_TYPE)) &&
|
|
(requestHeader.get(HeaderFramework.CONTENT_TYPE).toLowerCase().startsWith("multipart"))) {
|
|
// parse multipart
|
|
final Map<String, byte[]> files = HTTPDemon.parseMultipart(requestHeader, args, body);
|
|
// integrate these files into the args
|
|
if (files != null) {
|
|
final Iterator<Map.Entry<String, byte[]>> fit = files.entrySet().iterator();
|
|
Map.Entry<String, byte[]> entry;
|
|
while (fit.hasNext()) {
|
|
entry = fit.next();
|
|
args.put(entry.getKey() + "$file", entry.getValue());
|
|
}
|
|
}
|
|
argc = Integer.parseInt(requestHeader.get("ARGC"));
|
|
} else {
|
|
// parse args in body
|
|
argc = HTTPDemon.parseArgs(args, body, length);
|
|
}
|
|
} else {
|
|
// no args
|
|
argsString = null;
|
|
args = null;
|
|
argc = 0;
|
|
}
|
|
} else {
|
|
// simple args in URL (stuff after the "?")
|
|
argc = HTTPDemon.parseArgs(args, argsString);
|
|
}
|
|
|
|
// check for cross site scripting - attacks in request arguments
|
|
if (args != null && argc > 0) {
|
|
// check all values for occurrences of script values
|
|
final Iterator<String> e = args.values().iterator(); // enumeration of values
|
|
String val;
|
|
while (e.hasNext()) {
|
|
val = e.next();
|
|
if ((val != null) && (val.indexOf("<script",0) >= 0) && !path.equals("/Crawler_p.html")) {
|
|
// deny request
|
|
HTTPDemon.sendRespondError(conProp,out,4,403,null,"bad post values",null);
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (args != null) nocache = true;
|
|
|
|
// we are finished with parsing
|
|
// the result of value hand-over is in args and argc
|
|
if (path.isEmpty()) {
|
|
HTTPDemon.sendRespondError(conProp,out,4,400,null,"Bad Request",null);
|
|
out.flush();
|
|
return;
|
|
}
|
|
File targetClass = null;
|
|
|
|
// locate the file
|
|
if (!path.isEmpty() && path.charAt(0) != '/' && path.charAt(0) != '\\') {
|
|
path = "/" + path; // attach leading slash
|
|
}
|
|
if (path.endsWith("index.html")) {
|
|
path = path.substring(0, path.length() - 10);
|
|
}
|
|
|
|
// a different language can be desired (by i.e. ConfigBasic.html) than the one stored in the locale.language
|
|
String localeSelection = switchboard.getConfig("locale.language","default");
|
|
if (args != null && (args.containsKey("language"))) {
|
|
// TODO 9.11.06 Bost: a class with information about available languages is needed.
|
|
// the indexOf(".") is just a workaround because there from ConfigLanguage.html commes "de.lng" and
|
|
// from ConfigBasic.html comes just "de" in the "language" parameter
|
|
localeSelection = args.get("language", localeSelection);
|
|
if (localeSelection.indexOf('.') != -1)
|
|
localeSelection = localeSelection.substring(0, localeSelection.indexOf('.'));
|
|
}
|
|
|
|
File targetFile = getLocalizedFile(path, localeSelection);
|
|
String targetExt = (String) conProp.get("EXT"); if (targetExt == null) targetExt = "";
|
|
targetClass = rewriteClassFile(new File(htDefaultPath, path));
|
|
if (path.endsWith("/") || path.endsWith("\\")) {
|
|
String testpath;
|
|
// look for indexForward setting
|
|
if (indexForward.length() > 0 && (targetFile = getOverlayedFile(path + indexForward)).exists()) {
|
|
testpath = path + indexForward;
|
|
targetClass = getOverlayedClass(testpath);
|
|
path = testpath;
|
|
} else {
|
|
// attach default file name(s)
|
|
for (final String defaultFile : defaultFiles) {
|
|
testpath = path + defaultFile;
|
|
targetFile = getOverlayedFile(testpath);
|
|
targetClass = getOverlayedClass(testpath);
|
|
if (targetFile.exists()) {
|
|
path = testpath;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
targetFile = getLocalizedFile(path, localeSelection);
|
|
|
|
//no defaultfile, send a dirlisting
|
|
if (targetFile == null || !targetFile.exists() || (targetFile.exists() && targetFile.isDirectory())) {
|
|
final StringBuilder aBuffer = new StringBuilder();
|
|
aBuffer.append("<html>\n<head>\n</head>\n<body>\n<h1>Index of " + path + "</h1>\n <ul>\n");
|
|
String[] list = targetFile.list();
|
|
if (list == null) list = new String[0]; // should not occur!
|
|
File f;
|
|
String size;
|
|
long sz;
|
|
String headline, author, description, publisher;
|
|
int images, links;
|
|
ContentScraper scraper;
|
|
for (final String element : list) {
|
|
f = new File(targetFile, element);
|
|
if (f.isDirectory()) {
|
|
aBuffer.append(" <li><a href=\"" + path + element + "/\">" + element + "/</a><br/></li>\n");
|
|
} else {
|
|
if (element.endsWith("html") || (element.endsWith("htm"))) {
|
|
scraper = ContentScraper.parseResource(f, 10000);
|
|
Collection<String> t = scraper.getTitles();
|
|
headline = t.size() > 0 ? t.iterator().next() : "";
|
|
author = scraper.getAuthor();
|
|
publisher = scraper.getPublisher();
|
|
description = scraper.getDescription();
|
|
images = scraper.getImages().size();
|
|
links = scraper.getAnchors().size();
|
|
} else {
|
|
headline = null;
|
|
author = null;
|
|
publisher = null;
|
|
description = null;
|
|
images = 0;
|
|
links = 0;
|
|
}
|
|
sz = f.length();
|
|
if (sz < 1024) {
|
|
size = sz + " bytes";
|
|
} else if (sz < 1024 * 1024) {
|
|
size = (sz / 1024) + " KB";
|
|
} else {
|
|
size = (sz / 1024 / 1024) + " MB";
|
|
}
|
|
aBuffer.append(" <li>");
|
|
if (headline != null && headline.length() > 0) aBuffer.append("<a href=\"" + element + "\"><b>" + headline + "</b></a><br/>");
|
|
aBuffer.append("<a href=\"" + path + element + "\">" + element + "</a><br/>");
|
|
if (author != null && author.length() > 0) aBuffer.append("Author: " + author + "<br/>");
|
|
if (publisher != null && publisher.length() > 0) aBuffer.append("Publisher: " + publisher + "<br/>");
|
|
if (description != null && description.length() > 0) aBuffer.append("Description: " + description + "<br/>");
|
|
aBuffer.append(GenericFormatter.SHORT_DAY_FORMATTER.format(new Date(f.lastModified())) + ", " + size + ((images > 0) ? ", " + images + " images" : "") + ((links > 0) ? ", " + links + " links" : "") + "<br/></li>\n");
|
|
}
|
|
}
|
|
aBuffer.append(" </ul>\n</body>\n</html>\n");
|
|
|
|
// write the list to the client
|
|
HTTPDemon.sendRespondHeader(conProp, out, httpVersion, 200, null, "text/html; charset=UTF-8", aBuffer.length(), new Date(targetFile.lastModified()), null, new ResponseHeader(200), null, null, true);
|
|
if (!method.equals(HeaderFramework.METHOD_HEAD)) {
|
|
out.write(UTF8.getBytes(aBuffer.toString()));
|
|
}
|
|
return;
|
|
}
|
|
} else {
|
|
//XXX: you cannot share a .png/.gif file with a name like a class in htroot.
|
|
if ( !(targetFile.exists()) &&
|
|
!((path.endsWith("png")||path.endsWith("gif") || path.indexOf('.') < 0 ||
|
|
matchesSuffix(path, switchboard.getConfig("cgi.suffixes", null)) ||
|
|
path.endsWith(".stream")) &&
|
|
targetClass!=null ) ){
|
|
targetFile = new File(htDocsPath, path);
|
|
targetClass = rewriteClassFile(new File(htDocsPath, path));
|
|
}
|
|
}
|
|
|
|
// implement proxy via url (not in servlet, because we need binary access on ouputStream)
|
|
if (path.equals("/proxy.html")) {
|
|
final List<Pattern> urlProxyAccess = Domains.makePatterns(sb.getConfig("proxyURL.access", Domains.LOCALHOST));
|
|
final UserDB.Entry user = sb.userDB.getUser(requestHeader);
|
|
final boolean user_may_see_proxyurl = Domains.matchesList(clientIP, urlProxyAccess) || (user!=null && user.hasRight(UserDB.AccessRight.PROXY_RIGHT));
|
|
if (sb.getConfigBool("proxyURL", false) && user_may_see_proxyurl) {
|
|
doURLProxy(conProp, requestHeader, out);
|
|
return;
|
|
}
|
|
HTTPDemon.sendRespondError(conProp,out,3,403,"Access denied",null,null);
|
|
}
|
|
|
|
// track all files that had been accessed so far
|
|
if (targetFile != null && targetFile.exists()) {
|
|
if (args != null && !args.isEmpty()) sb.setConfig("server.servlets.submitted", appendPath(sb.getConfig("server.servlets.submitted", ""), path));
|
|
}
|
|
|
|
//File targetClass = rewriteClassFile(targetFile);
|
|
//We need tp here
|
|
servletProperties templatePatterns = null;
|
|
Date targetDate;
|
|
|
|
if ((targetClass != null) && (path.endsWith("png"))) {
|
|
// call an image-servlet to produce an on-the-fly - generated image
|
|
Object img = null;
|
|
requestHeader.put(HeaderFramework.CONNECTION_PROP_CLIENTIP, (String) conProp.get(HeaderFramework.CONNECTION_PROP_CLIENTIP));
|
|
requestHeader.put(HeaderFramework.CONNECTION_PROP_PATH, path);
|
|
requestHeader.put(HeaderFramework.CONNECTION_PROP_EXT, "png");
|
|
// in case that there are no args given, args = null or empty hashmap
|
|
img = invokeServlet(targetClass, requestHeader, args, null);
|
|
if (img == null) {
|
|
// error with image generation; send file-not-found
|
|
HTTPDemon.sendRespondError(conProp, out, 3, 404, "File not Found", null, null);
|
|
} else {
|
|
if (img instanceof RasterPlotter) {
|
|
final RasterPlotter yp = (RasterPlotter) img;
|
|
// send an image to client
|
|
targetDate = new Date(System.currentTimeMillis());
|
|
nocache = true;
|
|
final String mimeType = Classification.ext2mime(targetExt, "text/html");
|
|
final ByteBuffer result = RasterPlotter.exportImage(yp.getImage(), targetExt);
|
|
|
|
// write the array to the client
|
|
HTTPDemon.sendRespondHeader(conProp, out, httpVersion, 200, null, mimeType, result.length(), targetDate, null, null, null, null, nocache);
|
|
if (!method.equals(HeaderFramework.METHOD_HEAD)) {
|
|
result.writeTo(out);
|
|
}
|
|
}
|
|
if (img instanceof EncodedImage) {
|
|
final EncodedImage yp = (EncodedImage) img;
|
|
// send an image to client
|
|
targetDate = new Date(System.currentTimeMillis());
|
|
nocache = true;
|
|
final String mimeType = Classification.ext2mime(targetExt, "text/html");
|
|
final ByteBuffer result = yp.getImage();
|
|
|
|
// write the array to the client
|
|
HTTPDemon.sendRespondHeader(conProp, out, httpVersion, 200, null, mimeType, result.length(), targetDate, null, null, null, null, nocache);
|
|
if (!method.equals(HeaderFramework.METHOD_HEAD)) {
|
|
result.writeTo(out);
|
|
}
|
|
}
|
|
/*
|
|
if (img instanceof BufferedImage) {
|
|
final BufferedImage i = (BufferedImage) img;
|
|
// send an image to client
|
|
targetDate = new Date(System.currentTimeMillis());
|
|
nocache = true;
|
|
final String mimeType = MimeTable.ext2mime(targetExt, "text/html");
|
|
|
|
// generate an byte array from the generated image
|
|
int width = i.getWidth(); if (width < 0) width = 96; // bad hack
|
|
int height = i.getHeight(); if (height < 0) height = 96; // bad hack
|
|
final ByteBuffer result = RasterPlotter.exportImage(i, targetExt);
|
|
|
|
// write the array to the client
|
|
HTTPDemon.sendRespondHeader(conProp, out, httpVersion, 200, null, mimeType, result.length(), targetDate, null, null, null, null, nocache);
|
|
if (!method.equals(HeaderFramework.METHOD_HEAD)) {
|
|
result.writeTo(out);
|
|
}
|
|
}
|
|
*/
|
|
if (img instanceof Image) {
|
|
final Image i = (Image) img;
|
|
// send an image to client
|
|
targetDate = new Date(System.currentTimeMillis());
|
|
nocache = true;
|
|
final String mimeType = Classification.ext2mime(targetExt, "text/html");
|
|
|
|
// generate an byte array from the generated image
|
|
int width = i.getWidth(null); if (width < 0) width = 96; // bad hack
|
|
int height = i.getHeight(null); if (height < 0) height = 96; // bad hack
|
|
final BufferedImage bi = new BufferedImage(width, height, BufferedImage.TYPE_INT_RGB);
|
|
bi.createGraphics().drawImage(i, 0, 0, width, height, null);
|
|
final ByteBuffer result = RasterPlotter.exportImage(bi, targetExt);
|
|
|
|
// write the array to the client
|
|
HTTPDemon.sendRespondHeader(conProp, out, httpVersion, 200, null, mimeType, result.length(), targetDate, null, null, null, null, nocache);
|
|
if (!method.equals(HeaderFramework.METHOD_HEAD)) {
|
|
result.writeTo(out);
|
|
}
|
|
}
|
|
}
|
|
// old-school CGI execution
|
|
} else if ((switchboard.getConfigBool("cgi.allow", false) // check if CGI execution is allowed in config
|
|
&& matchesSuffix(path, switchboard.getConfig("cgi.suffixes", null)) // "right" file extension?
|
|
&& path.substring(0, path.indexOf(targetFile.getName())).toUpperCase().contains("/CGI-BIN/") // file in right directory?
|
|
&& targetFile.exists())
|
|
) {
|
|
|
|
if (!targetFile.canExecute()) {
|
|
HTTPDemon.sendRespondError(
|
|
conProp,
|
|
out,
|
|
-1,
|
|
403,
|
|
null,
|
|
HeaderFramework.http1_1.get(
|
|
Integer.toString(403)),
|
|
null);
|
|
Log.logWarning(
|
|
"HTTPD",
|
|
"CGI script " + targetFile.getPath()
|
|
+ " could not be executed due to "
|
|
+ "insufficient access rights.");
|
|
} else {
|
|
String mimeType = "text/html";
|
|
int statusCode = 200;
|
|
|
|
final ProcessBuilder pb =
|
|
new ProcessBuilder(assembleCommandFromShebang(targetFile));
|
|
pb.directory(targetFile.getParentFile());
|
|
|
|
final String fileSeparator =
|
|
System.getProperty("file.separator", "/");
|
|
|
|
// set environment variables
|
|
final Map<String, String> env = pb.environment();
|
|
env.put(
|
|
"SERVER_SOFTWARE",
|
|
getDefaultHeaders(path).get(HeaderFramework.SERVER));
|
|
env.put("SERVER_NAME", sb.peers.mySeed().getName());
|
|
env.put("GATEWAY_INTERFACE", "CGI/1.1");
|
|
if (httpVersion != null) {
|
|
env.put("SERVER_PROTOCOL", httpVersion);
|
|
}
|
|
env.put("SERVER_PORT", switchboard.getConfig("port", "8090"));
|
|
env.put("REQUEST_METHOD", method);
|
|
// env.put("PATH_INFO", ""); // TODO: implement
|
|
// env.put("PATH_TRANSLATED", ""); // TODO: implement
|
|
env.put("SCRIPT_NAME", path);
|
|
if (argsString != null) {
|
|
env.put("QUERY_STRING", argsString);
|
|
}
|
|
env.put("REMOTE_ADDR", clientIP);
|
|
// env.put("AUTH_TYPE", ""); // TODO: implement
|
|
// env.put("REMOTE_USER", ""); // TODO: implement
|
|
// env.put("REMOTE_IDENT", ""); // I don't think we need this
|
|
env.put(
|
|
"DOCUMENT_ROOT",
|
|
switchboard.getAppPath().getAbsolutePath()
|
|
+ fileSeparator + switchboard.getConfig("htDocsPath", "DATA/HTDOCS"));
|
|
if (requestHeader.getContentType() != null) {
|
|
env.put("CONTENT_TYPE", requestHeader.getContentType());
|
|
}
|
|
if (method.equalsIgnoreCase(HeaderFramework.METHOD_POST)
|
|
&& body != null) {
|
|
env.put(
|
|
"CONTENT_LENGTH",
|
|
Integer.toString(requestHeader.getContentLength()));
|
|
}
|
|
|
|
/* add values from request header to environment
|
|
* (see: http://hoohoo.ncsa.uiuc.edu/cgi/env.html#headers) */
|
|
for (final Map.Entry<String, String> requestHeaderEntry
|
|
: requestHeader.entrySet()) {
|
|
env.put("HTTP_"
|
|
+ requestHeaderEntry.getKey().toUpperCase().replace("-", "_"),
|
|
requestHeaderEntry.getValue());
|
|
}
|
|
|
|
int exitValue = 0;
|
|
String cgiBody = null;
|
|
final StringBuilder error = new StringBuilder(256);
|
|
|
|
try {
|
|
// start execution of script
|
|
final Process p = pb.start();
|
|
|
|
final OutputStream os =
|
|
new BufferedOutputStream(p.getOutputStream());
|
|
|
|
if (method.equalsIgnoreCase(
|
|
HeaderFramework.METHOD_POST) && body != null) {
|
|
final byte[] buffer = new byte[1024];
|
|
int len = requestHeader.getContentLength();
|
|
while (len > 0) {
|
|
body.read(buffer);
|
|
len = len - buffer.length;
|
|
os.write(buffer);
|
|
}
|
|
}
|
|
|
|
os.close();
|
|
|
|
try {
|
|
p.waitFor();
|
|
} catch (final InterruptedException ex) {
|
|
|
|
}
|
|
|
|
exitValue = p.exitValue();
|
|
|
|
final InputStream is =
|
|
new BufferedInputStream(p.getInputStream());
|
|
|
|
final InputStream es =
|
|
new BufferedInputStream(p.getErrorStream());
|
|
|
|
final StringBuilder processOutput =
|
|
new StringBuilder(1024);
|
|
|
|
while (is.available() > 0) {
|
|
processOutput.append((char) is.read());
|
|
}
|
|
|
|
while (es.available() > 0) {
|
|
error.append((char) es.read());
|
|
}
|
|
|
|
int indexOfDelimiter = processOutput.indexOf("\n\n", 0);
|
|
final String[] cgiHeader;
|
|
if (indexOfDelimiter > -1) {
|
|
cgiHeader =
|
|
processOutput.substring(
|
|
0, indexOfDelimiter).split("\n");
|
|
} else {
|
|
cgiHeader = new String[0];
|
|
}
|
|
cgiBody = processOutput.substring(indexOfDelimiter + 1);
|
|
|
|
String key;
|
|
String value;
|
|
for (final String element : cgiHeader) {
|
|
indexOfDelimiter = element.indexOf(':');
|
|
key = element.substring(0, indexOfDelimiter).trim();
|
|
value = element.substring(indexOfDelimiter + 1).trim();
|
|
conProp.put(key, value);
|
|
if ("Cache-Control".equals(key)
|
|
&& "no-cache".equals(value)) {
|
|
nocache = true;
|
|
} else if ("Content-type".equals(key)) {
|
|
mimeType = value;
|
|
} else if ("Status".equals(key)) {
|
|
if (key.length() > 2) {
|
|
try {
|
|
statusCode =
|
|
Integer.parseInt(
|
|
value.substring(0, 3));
|
|
} catch (final NumberFormatException ex) {
|
|
Log.logWarning(
|
|
"HTTPD",
|
|
"CGI script " + targetFile.getPath()
|
|
+ " returned illegal status code \""
|
|
+ value + "\".");
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} catch (final IOException ex) {
|
|
exitValue = -1;
|
|
}
|
|
|
|
/* did the script return an exit value != 0
|
|
* and still there is supposed to be
|
|
* everything right with the HTTP status?
|
|
* -> change status to 500 since 200 would
|
|
* be a lie
|
|
*/
|
|
if (exitValue != 0 && statusCode == 200) {
|
|
statusCode = 500;
|
|
}
|
|
|
|
targetDate = new Date(System.currentTimeMillis());
|
|
|
|
if (cgiBody != null && !cgiBody.isEmpty()) {
|
|
HTTPDemon.sendRespondHeader(
|
|
conProp,
|
|
out,
|
|
httpVersion,
|
|
statusCode,
|
|
null,
|
|
mimeType,
|
|
cgiBody.length(),
|
|
targetDate,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
nocache);
|
|
out.write(UTF8.getBytes(cgiBody));
|
|
} else {
|
|
HTTPDemon.sendRespondError(
|
|
conProp,
|
|
out,
|
|
exitValue,
|
|
statusCode,
|
|
null,
|
|
HeaderFramework.http1_1.get(
|
|
Integer.toString(statusCode)),
|
|
null);
|
|
Log.logWarning(
|
|
"HTTPD",
|
|
"CGI script " + targetFile.getPath()
|
|
+ " returned exit value " + exitValue
|
|
+ ", body empty: "
|
|
+ (cgiBody == null || cgiBody.isEmpty()));
|
|
if (error.length() > 0) {
|
|
Log.logWarning("HTTPD", "Reported error: " + error);
|
|
}
|
|
}
|
|
}
|
|
} else if (targetClass != null && (path.endsWith(".stream") || path.substring(path.length() - 8).indexOf('.') < 0)) {
|
|
// call rewrite-class
|
|
requestHeader.put(HeaderFramework.CONNECTION_PROP_CLIENTIP, (String) conProp.get(HeaderFramework.CONNECTION_PROP_CLIENTIP));
|
|
requestHeader.put(HeaderFramework.CONNECTION_PROP_PATH, path);
|
|
requestHeader.put(HeaderFramework.CONNECTION_PROP_EXT, path.endsWith(".stream") ? "stream" : "");
|
|
//requestHeader.put(httpHeader.CONNECTION_PROP_INPUTSTREAM, body);
|
|
//requestHeader.put(httpHeader.CONNECTION_PROP_OUTPUTSTREAM, out);
|
|
|
|
// prepare response header
|
|
ResponseHeader header = new ResponseHeader(200);
|
|
header.put(HeaderFramework.CONTENT_TYPE, getMimeFromServlet(targetClass, requestHeader, args, "text/xml"));
|
|
header.put(HeaderFramework.CORS_ALLOW_ORIGIN, "*"); // allow Cross-Origin Resource Sharing for all stream servlets
|
|
conProp.remove(HeaderFramework.CONNECTION_PROP_PERSISTENT);
|
|
final boolean zipContent = requestHeader.acceptGzip();
|
|
if (zipContent) header.put(HeaderFramework.CONTENT_ENCODING, "gzip");
|
|
|
|
// send response head
|
|
HTTPDemon.sendRespondHeader(conProp, out, httpVersion, 200, null, header);
|
|
forceConnectionClose(conProp);
|
|
|
|
// send response content
|
|
OutputStream o = zipContent ? new GZIPOutputStream(out) : out;
|
|
invokeServlet(targetClass, requestHeader, args, o);
|
|
|
|
// immediately close stream as this terminates the http transmission
|
|
if (o instanceof GZIPOutputStream) ((GZIPOutputStream) o).finish();
|
|
o.flush();
|
|
o.close();
|
|
out.flush();
|
|
out.close();
|
|
return;
|
|
} else if (targetFile.exists() && targetFile.isFile() && targetFile.canRead()) {
|
|
// we have found a file that can be written to the client
|
|
// if this file uses templates, then we use the template
|
|
// re-write - method to create an result
|
|
String mimeType = Classification.ext2mime(targetExt, "text/html");
|
|
String ext = (String) conProp.get("EXT"); if (ext == null) ext = "";
|
|
final boolean zipContent = requestHeader.acceptGzip() && HTTPDemon.shallTransportZipped("." + ext);
|
|
if (path.endsWith("html") ||
|
|
path.endsWith("htm") ||
|
|
path.endsWith("xml") ||
|
|
path.endsWith("json") ||
|
|
path.endsWith("rdf") ||
|
|
path.endsWith("rss") ||
|
|
path.endsWith("csv") ||
|
|
path.endsWith("pac") ||
|
|
path.endsWith("src") ||
|
|
path.endsWith("vcf") ||
|
|
path.endsWith("kml") ||
|
|
path.endsWith("gpx") ||
|
|
path.endsWith("css") ||
|
|
path.endsWith("/") ||
|
|
path.equals("/robots.txt")) {
|
|
|
|
/*targetFile = getLocalizedFile(path);
|
|
if (!(targetFile.exists())) {
|
|
// try to find that file in the htDocsPath
|
|
File trialFile = new File(htDocsPath, path);
|
|
if (trialFile.exists()) targetFile = trialFile;
|
|
}*/
|
|
|
|
|
|
// call rewrite-class
|
|
|
|
if (targetClass != null) {
|
|
requestHeader.put(HeaderFramework.CONNECTION_PROP_CLIENTIP, (String) conProp.get(HeaderFramework.CONNECTION_PROP_CLIENTIP));
|
|
requestHeader.put(HeaderFramework.CONNECTION_PROP_PATH, path);
|
|
final int ep = path.lastIndexOf(".");
|
|
requestHeader.put(HeaderFramework.CONNECTION_PROP_EXT, path.substring(ep + 1));
|
|
// in case that there are no args given, args = null or empty hashmap
|
|
final Object tmp = invokeServlet(targetClass, requestHeader, args, null);
|
|
if (tmp == null) {
|
|
// if no args given, then tp will be an empty Hashtable object (not null)
|
|
templatePatterns = new servletProperties();
|
|
} else if (tmp instanceof servletProperties) {
|
|
templatePatterns = (servletProperties) tmp;
|
|
} else {
|
|
templatePatterns = new servletProperties((serverObjects) tmp);
|
|
}
|
|
// check if the servlets requests authentication
|
|
if (templatePatterns.containsKey(serverObjects.ACTION_AUTHENTICATE)) {
|
|
// handle brute-force protection
|
|
if (realmProp != null) {
|
|
Log.logInfo("HTTPD", "dynamic log-in for account 'admin' in http file handler for path '" + path + "' from host '" + clientIP + "'");
|
|
final Integer attempts = serverCore.bfHost.get(clientIP);
|
|
if (attempts == null)
|
|
serverCore.bfHost.put(clientIP, Integer.valueOf(1));
|
|
else
|
|
serverCore.bfHost.put(clientIP, Integer.valueOf(attempts.intValue() + 1));
|
|
}
|
|
// send authentication request to browser
|
|
final ResponseHeader headers = getDefaultHeaders(path);
|
|
headers.put(RequestHeader.WWW_AUTHENTICATE,"Basic realm=\"" + templatePatterns.get(serverObjects.ACTION_AUTHENTICATE, "") + "\"");
|
|
HTTPDemon.sendRespondHeader(conProp,out,httpVersion,401,headers);
|
|
return;
|
|
} else if (templatePatterns.containsKey(serverObjects.ACTION_LOCATION)) {
|
|
String location = templatePatterns.get(serverObjects.ACTION_LOCATION, "");
|
|
if (location.isEmpty()) location = path;
|
|
|
|
final ResponseHeader headers = getDefaultHeaders(path);
|
|
headers.setAdditionalHeaderProperties(templatePatterns.getOutgoingHeader().getAdditionalHeaderProperties()); //put the cookies into the new header TODO: can we put all headerlines, without trouble?
|
|
headers.put(HeaderFramework.LOCATION,location);
|
|
HTTPDemon.sendRespondHeader(conProp,out,httpVersion,302,headers);
|
|
return;
|
|
}
|
|
// add the application version, the uptime and the client name to every rewrite table
|
|
templatePatterns.put(servletProperties.PEER_STAT_VERSION, yacyBuildProperties.getVersion());
|
|
templatePatterns.put(servletProperties.PEER_STAT_UPTIME, ((System.currentTimeMillis() - serverCore.startupTime) / 1000) / 60); // uptime in minutes
|
|
templatePatterns.putHTML(servletProperties.PEER_STAT_CLIENTNAME, sb.peers.mySeed().getName());
|
|
templatePatterns.putHTML(servletProperties.PEER_STAT_CLIENTID, ((Switchboard) switchboard).peers.myID());
|
|
templatePatterns.put(servletProperties.PEER_STAT_MYTIME, GenericFormatter.SHORT_SECOND_FORMATTER.format());
|
|
final Seed myPeer = sb.peers.mySeed();
|
|
templatePatterns.put("newpeer", myPeer.getAge() >= 1 ? 0 : 1);
|
|
templatePatterns.putHTML("newpeer_peerhash", myPeer.hash);
|
|
//System.out.println("respond props: " + ((tp == null) ? "null" : tp.toString())); // debug
|
|
nocache = true;
|
|
}
|
|
|
|
targetDate = new Date(targetFile.lastModified());
|
|
Date expireDate = null;
|
|
if (templatePatterns == null) {
|
|
// if the file will not be changed, cache it in the browser
|
|
expireDate = new Date(new Date().getTime() + (31l * 24 * 60 * 60 * 1000));
|
|
}
|
|
|
|
|
|
// rewrite the file
|
|
InputStream fis = null;
|
|
|
|
// read the file/template
|
|
TemplateCacheEntry templateCacheEntry = null;
|
|
final long fileSize = targetFile.length();
|
|
if (useTemplateCache && fileSize <= 512 * 1024) {
|
|
// read from cache
|
|
SoftReference<TemplateCacheEntry> ref = templateCache.get(targetFile);
|
|
if (ref != null) {
|
|
templateCacheEntry = ref.get();
|
|
if (templateCacheEntry == null) templateCache.remove(targetFile);
|
|
}
|
|
|
|
final Date targetFileDate = new Date(targetFile.lastModified());
|
|
if (templateCacheEntry == null || targetFileDate.after(templateCacheEntry.lastModified)) {
|
|
// loading the content of the template file into
|
|
// a byte array
|
|
templateCacheEntry = new TemplateCacheEntry();
|
|
templateCacheEntry.lastModified = targetFileDate;
|
|
templateCacheEntry.content = FileUtils.read(targetFile);
|
|
|
|
// storing the content into the cache
|
|
ref = new SoftReference<TemplateCacheEntry>(templateCacheEntry);
|
|
if (MemoryControl.shortStatus()) templateCache.clear();
|
|
templateCache.put(targetFile, ref);
|
|
if (theLogger.isFinest()) theLogger.logFinest("Cache MISS for file " + targetFile);
|
|
} else {
|
|
if (theLogger.isFinest()) theLogger.logFinest("Cache HIT for file " + targetFile);
|
|
}
|
|
|
|
// creating an inputstream needed by the template
|
|
// rewrite function
|
|
fis = new ByteArrayInputStream(templateCacheEntry.content);
|
|
templateCacheEntry = null;
|
|
} else if (fileSize <= Math.min(4 * 1024 * 1204, MemoryControl.available() / 100)) {
|
|
// read file completely into ram, avoid that too many files are open at the same time
|
|
fis = new ByteArrayInputStream(FileUtils.read(targetFile));
|
|
} else {
|
|
fis = new BufferedInputStream(new FileInputStream(targetFile));
|
|
}
|
|
|
|
if (mimeType.startsWith("text")) {
|
|
// every text-file distributed by yacy is UTF-8
|
|
if (!path.startsWith("/repository")) {
|
|
mimeType = mimeType + "; charset=UTF-8";
|
|
} else {
|
|
// detect charset of html-files
|
|
if ((path.endsWith("html") || path.endsWith("htm"))) {
|
|
// save position
|
|
fis.mark(1000);
|
|
// scrape document to look up charset
|
|
final ScraperInputStream htmlFilter = new ScraperInputStream(fis, "UTF-8", new DigestURI("http://localhost"), null, false, 10);
|
|
final String charset = htmlParser.patchCharsetEncoding(htmlFilter.detectCharset());
|
|
htmlFilter.close();
|
|
if (charset != null) mimeType = mimeType + "; charset="+charset;
|
|
// reset position
|
|
fis.reset();
|
|
}
|
|
}
|
|
}
|
|
|
|
// write the array to the client
|
|
// we can do that either in standard mode (whole thing completely) or in chunked mode
|
|
// since yacy clients do not understand chunked mode (yet), we use this only for communication with the administrator
|
|
final boolean yacyClient = requestHeader.userAgent().startsWith("yacy");
|
|
final boolean chunked = !method.equals(HeaderFramework.METHOD_HEAD) && !yacyClient && httpVersion.equals(HeaderFramework.HTTP_VERSION_1_1);
|
|
if (chunked) {
|
|
// send page in chunks and parse SSIs
|
|
final ByteBuffer o = new ByteBuffer();
|
|
// apply templates
|
|
TemplateEngine.writeTemplate(fis, o, templatePatterns, UNRESOLVED_PATTERN);
|
|
fis.close();
|
|
ResponseHeader rh = (templatePatterns == null) ? new ResponseHeader(200) : templatePatterns.getOutgoingHeader();
|
|
HTTPDemon.sendRespondHeader(conProp, out,
|
|
httpVersion, rh.getStatusCode(), null, mimeType, -1,
|
|
targetDate, expireDate, rh,
|
|
null, "chunked", nocache);
|
|
// send the content in chunked parts, see RFC 2616 section 3.6.1
|
|
final ChunkedOutputStream chos = new ChunkedOutputStream(out);
|
|
// GZIPOutputStream does not implement flush (this is a bug IMHO)
|
|
// so we can't compress this stuff, without loosing the cool SSI trickle feature
|
|
ServerSideIncludes.writeSSI(o, chos, realmProp, clientIP, requestHeader);
|
|
//chos.write(result);
|
|
chos.finish();
|
|
} else {
|
|
// send page as whole thing, SSIs are not possible
|
|
final String contentEncoding = (zipContent) ? "gzip" : null;
|
|
// apply templates
|
|
final ByteBuffer o1 = new ByteBuffer();
|
|
TemplateEngine.writeTemplate(fis, o1, templatePatterns, ASCII.getBytes("-UNRESOLVED_PATTERN-"));
|
|
fis.close();
|
|
final ByteBuffer o = new ByteBuffer();
|
|
|
|
if (zipContent) {
|
|
GZIPOutputStream zippedOut = new GZIPOutputStream(o);
|
|
ServerSideIncludes.writeSSI(o1, zippedOut, realmProp, clientIP, requestHeader);
|
|
//httpTemplate.writeTemplate(fis, zippedOut, tp, "-UNRESOLVED_PATTERN-".getBytes("UTF-8"));
|
|
zippedOut.finish();
|
|
zippedOut.flush();
|
|
zippedOut.close();
|
|
zippedOut = null;
|
|
} else {
|
|
ServerSideIncludes.writeSSI(o1, o, realmProp, clientIP, requestHeader);
|
|
//httpTemplate.writeTemplate(fis, o, tp, "-UNRESOLVED_PATTERN-".getBytes("UTF-8"));
|
|
}
|
|
ResponseHeader rh = (templatePatterns == null) ? new ResponseHeader(200) : templatePatterns.getOutgoingHeader();
|
|
if (method.equals(HeaderFramework.METHOD_HEAD)) {
|
|
HTTPDemon.sendRespondHeader(conProp, out,
|
|
httpVersion, rh.getStatusCode(), null, mimeType, o.length(),
|
|
targetDate, expireDate, rh,
|
|
contentEncoding, null, nocache);
|
|
} else {
|
|
final byte[] result = o.getBytes(); // this interrupts streaming (bad idea!)
|
|
HTTPDemon.sendRespondHeader(conProp, out,
|
|
httpVersion, rh.getStatusCode(), null, mimeType, result.length,
|
|
targetDate, expireDate, rh,
|
|
contentEncoding, null, nocache);
|
|
FileUtils.copy(result, out);
|
|
}
|
|
}
|
|
} else { // no html
|
|
|
|
int statusCode = 200;
|
|
int rangeStartOffset = 0;
|
|
final ResponseHeader header = new ResponseHeader(statusCode);
|
|
|
|
// adding the accept ranges header
|
|
header.put(HeaderFramework.ACCEPT_RANGES, "bytes");
|
|
|
|
// reading the files md5 hash if availabe and use it as ETAG of the resource
|
|
String targetMD5 = null;
|
|
final File targetMd5File = new File(targetFile + ".md5");
|
|
try {
|
|
if (targetMd5File.exists()) {
|
|
//String description = null;
|
|
targetMD5 = UTF8.String(FileUtils.read(targetMd5File));
|
|
final int pos = targetMD5.indexOf('\n');
|
|
if (pos >= 0) {
|
|
//description = targetMD5.substring(pos + 1);
|
|
targetMD5 = targetMD5.substring(0, pos);
|
|
}
|
|
|
|
// using the checksum as ETAG header
|
|
header.put(HeaderFramework.ETAG, targetMD5);
|
|
}
|
|
} catch (final IOException e) {
|
|
Log.logException(e);
|
|
}
|
|
|
|
if (requestHeader.containsKey(HeaderFramework.RANGE)) {
|
|
final Object ifRange = requestHeader.ifRange();
|
|
if ((ifRange == null)||
|
|
(ifRange instanceof Date && targetFile.lastModified() == ((Date)ifRange).getTime()) ||
|
|
(ifRange instanceof String && ifRange.equals(targetMD5))) {
|
|
final String rangeHeaderVal = requestHeader.get(HeaderFramework.RANGE).trim();
|
|
if (rangeHeaderVal.startsWith("bytes=")) {
|
|
final String rangesVal = rangeHeaderVal.substring("bytes=".length());
|
|
final String[] ranges = rangesVal.split(",");
|
|
if ((ranges.length == 1)&&(ranges[0].endsWith("-"))) {
|
|
rangeStartOffset = NumberTools.parseIntDecSubstring(ranges[0], 0, ranges[0].length() - 1);
|
|
statusCode = 206;
|
|
header.put(HeaderFramework.CONTENT_RANGE, "bytes " + rangeStartOffset + "-" + (targetFile.length()-1) + "/" + targetFile.length());
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// write the file to the client
|
|
targetDate = new Date(targetFile.lastModified());
|
|
// cache file for one month in browser (but most browsers won't cache for that long)
|
|
final Date expireDate = new Date(new Date().getTime() + (31l * 24 * 60 * 60 * 1000));
|
|
final long contentLength = (zipContent)?-1:targetFile.length()-rangeStartOffset;
|
|
final String contentEncoding = (zipContent) ? "gzip" : null;
|
|
final String transferEncoding = (httpVersion.equals(HeaderFramework.HTTP_VERSION_1_1) && zipContent) ? "chunked" : null;
|
|
if (!httpVersion.equals(HeaderFramework.HTTP_VERSION_1_1) && zipContent) forceConnectionClose(conProp);
|
|
|
|
HTTPDemon.sendRespondHeader(conProp, out, httpVersion, statusCode, null, mimeType, contentLength, targetDate, expireDate, header, contentEncoding, transferEncoding, nocache);
|
|
|
|
if (!method.equals(HeaderFramework.METHOD_HEAD)) {
|
|
ChunkedOutputStream chunkedOut = null;
|
|
GZIPOutputStream zipped = null;
|
|
OutputStream newOut = out;
|
|
|
|
if (transferEncoding != null) {
|
|
chunkedOut = new ChunkedOutputStream(newOut);
|
|
newOut = chunkedOut;
|
|
}
|
|
if (contentEncoding != null) {
|
|
zipped = new GZIPOutputStream(newOut);
|
|
newOut = zipped;
|
|
}
|
|
|
|
FileUtils.copyRange(targetFile, newOut, rangeStartOffset);
|
|
|
|
if (zipped != null) {
|
|
zipped.flush();
|
|
zipped.finish();
|
|
}
|
|
if (chunkedOut != null) {
|
|
chunkedOut.finish();
|
|
}
|
|
|
|
// flush all
|
|
try {newOut.flush();}catch (final Exception e) {}
|
|
}
|
|
}
|
|
} else {
|
|
if (!targetFile.exists()) Log.logWarning("HTTPFileHandler", "target file " + targetFile.getAbsolutePath() + " does not exist");
|
|
//if (!targetFile.isFile()) Log.logWarning("HTTPFileHandler", "target file " + targetFile.getAbsolutePath() + " is not a file");
|
|
//if (!targetFile.canRead()) Log.logWarning("HTTPFileHandler", "target file " + targetFile.getAbsolutePath() + " cannot read");
|
|
HTTPDemon.sendRespondError(conProp,out,3,404,"File not Found",null,null);
|
|
return;
|
|
}
|
|
} catch (final Exception e) {
|
|
try {
|
|
// error handling
|
|
if (e instanceof NullPointerException) {
|
|
Log.logException(e);
|
|
}
|
|
int httpStatusCode = 400;
|
|
final String httpStatusText = null;
|
|
final StringBuilder errorMessage = new StringBuilder(2000);
|
|
Exception errorExc = null;
|
|
|
|
final String errorMsg = e.getMessage();
|
|
if (
|
|
(e instanceof InterruptedException) ||
|
|
((errorMsg != null) && (errorMsg.startsWith("Socket closed")) && (Thread.currentThread().isInterrupted()))
|
|
) {
|
|
errorMessage.append("Interruption detected while processing query.");
|
|
httpStatusCode = 503;
|
|
} else {
|
|
if ((errorMsg != null) &&
|
|
(
|
|
errorMsg.contains("Broken pipe") ||
|
|
errorMsg.contains("Connection reset") ||
|
|
errorMsg.contains("Read timed out") ||
|
|
errorMsg.contains("Connection timed out") ||
|
|
errorMsg.contains("Software caused connection abort")
|
|
)) {
|
|
// client closed the connection, so we just end silently
|
|
errorMessage.append("Client unexpectedly closed connection while processing query.");
|
|
} else {
|
|
errorMessage.append("Unexpected error while processing query.");
|
|
httpStatusCode = 500;
|
|
errorExc = e;
|
|
}
|
|
}
|
|
|
|
errorMessage.append("\nSession: ").append(Thread.currentThread().getName())
|
|
.append("\nQuery: ").append(path)
|
|
.append("\nClient: ").append(conProp.get(HeaderFramework.CONNECTION_PROP_CLIENTIP))
|
|
.append("\nReason: ").append(e.getMessage());
|
|
|
|
if (!conProp.containsKey(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_HEADER)) {
|
|
// sending back an error message to the client
|
|
// if we have not already send an http header
|
|
HTTPDemon.sendRespondError(conProp,out, 4, httpStatusCode, httpStatusText, errorMessage.toString(), errorExc);
|
|
} else {
|
|
// otherwise we close the connection
|
|
forceConnectionClose(conProp);
|
|
}
|
|
|
|
// if it is an unexpected error we log it
|
|
if (httpStatusCode == 500) {
|
|
theLogger.logWarning(errorMessage.toString(), e);
|
|
}
|
|
|
|
} catch (final Exception ee) {
|
|
forceConnectionClose(conProp);
|
|
}
|
|
|
|
} finally {
|
|
try {out.flush();}catch (final Exception e) {}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Returns a list which contains parts of command
|
|
* which is used to start external process for
|
|
* CGI scripts.
|
|
* @param targetFile file to run
|
|
* @return list of parts of command
|
|
* @throws FileNotFoundException
|
|
* @throws IOException if file can not be accessed
|
|
*/
|
|
private static List<String> assembleCommandFromShebang(final File targetFile) throws FileNotFoundException {
|
|
final List<String > ret = new ArrayList<String>();
|
|
final BufferedReader br = new BufferedReader(new FileReader(targetFile), 512);
|
|
String line;
|
|
try {
|
|
line = br.readLine();
|
|
if (line.startsWith("#!")) {
|
|
ret.addAll(Arrays.asList(line.substring(2).split(" ")));
|
|
}
|
|
ret.add(targetFile.getAbsolutePath());
|
|
} catch (IOException e) {
|
|
Log.logException(e);
|
|
} finally {
|
|
try {br.close();} catch (IOException e) {}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
private static final String appendPath(final String proplist, final String path) {
|
|
if (proplist.isEmpty()) return path;
|
|
if (proplist.indexOf(path) >= 0) return proplist;
|
|
return proplist + "," + path;
|
|
}
|
|
|
|
public static final File getOverlayedClass(final String path) {
|
|
File targetClass;
|
|
targetClass = rewriteClassFile(new File(htDefaultPath, path)); //works for default and localized files
|
|
if (targetClass == null || !targetClass.exists()) {
|
|
//works for htdocs
|
|
targetClass=rewriteClassFile(new File(htDocsPath, path));
|
|
}
|
|
return targetClass;
|
|
}
|
|
|
|
public static final File getOverlayedFile(final String path) {
|
|
File targetFile;
|
|
targetFile = getLocalizedFile(path);
|
|
if (!targetFile.exists()) {
|
|
targetFile = new File(htDocsPath, path);
|
|
}
|
|
return targetFile;
|
|
}
|
|
|
|
private static final void forceConnectionClose(final HashMap<String, Object> conprop) {
|
|
if (conprop != null) {
|
|
conprop.put(HeaderFramework.CONNECTION_PROP_PERSISTENT, "close");
|
|
}
|
|
}
|
|
|
|
private static final File rewriteClassFile(final File template) {
|
|
try {
|
|
String f = template.getCanonicalPath();
|
|
int cp = f.length() - 8;
|
|
if (cp < 0) {
|
|
final int p = f.lastIndexOf('.');
|
|
f = p < 0 ? f + ".class" : f.substring(0, p) + ".class";
|
|
} else {
|
|
final int p = f.substring(cp).lastIndexOf('.');
|
|
f = p < 0 ? f + ".class" : f.substring(0, cp + p) + ".class";
|
|
}
|
|
final File cf = new File(f);
|
|
if (cf.exists()) return cf;
|
|
return null;
|
|
} catch (final IOException e) {
|
|
return null;
|
|
}
|
|
}
|
|
|
|
private static final Method rewriteMethod(final File classFile, final String methodName) throws InvocationTargetException {
|
|
Method m = null;
|
|
// now make a class out of the stream
|
|
try {
|
|
if (templateMethodCache != null && "respond".equals(methodName)) {
|
|
final SoftReference<Method> ref = templateMethodCache.get(classFile);
|
|
if (ref != null) {
|
|
m = ref.get();
|
|
if (m == null) {
|
|
templateMethodCache.remove(classFile);
|
|
} else {
|
|
return m;
|
|
}
|
|
}
|
|
}
|
|
|
|
final Class<?> c = provider.loadClass(classFile);
|
|
Class<?>[] params = new Class[] {
|
|
RequestHeader.class,
|
|
serverObjects.class,
|
|
serverSwitch.class };
|
|
try {
|
|
m = c.getMethod(methodName, params);
|
|
} catch (NoSuchMethodException e) {
|
|
params = new Class[] {
|
|
RequestHeader.class,
|
|
serverObjects.class,
|
|
serverSwitch.class,
|
|
OutputStream.class};
|
|
m = c.getMethod(methodName, params);
|
|
}
|
|
|
|
if (MemoryControl.shortStatus()) {
|
|
templateMethodCache.clear();
|
|
} else {
|
|
// store the method into the cache
|
|
if (templateMethodCache != null && "respond".equals(methodName)) {
|
|
templateMethodCache.put(classFile, new SoftReference<Method>(m));
|
|
}
|
|
}
|
|
|
|
} catch (final ClassNotFoundException e) {
|
|
Log.logSevere("HTTPDFileHandler", "class " + classFile + " is missing:" + e.getMessage());
|
|
throw new InvocationTargetException(e, "class " + classFile + " is missing:" + e.getMessage());
|
|
} catch (final NoSuchMethodException e) {
|
|
Log.logSevere("HTTPDFileHandler", "method 'respond' not found in class " + classFile + ": " + e.getMessage());
|
|
throw new InvocationTargetException(e, "method 'respond' not found in class " + classFile + ": " + e.getMessage());
|
|
}
|
|
return m;
|
|
}
|
|
|
|
private static final Object invokeServlet(final File targetClass, final RequestHeader request, final serverObjects args, final OutputStream os) {
|
|
try {
|
|
if (os == null) {
|
|
return rewriteMethod(targetClass, "respond").invoke(null, new Object[] {request, args, switchboard});
|
|
}
|
|
return rewriteMethod(targetClass, "respond").invoke(null, new Object[] {request, args, switchboard, os});
|
|
} catch (final Throwable e) {
|
|
theLogger.logSevere("INTERNAL ERROR: " + e.toString() + ":" +
|
|
e.getMessage() +
|
|
" target exception at " + targetClass + ": " +
|
|
"; java.awt.graphicsenv='" + System.getProperty("java.awt.graphicsenv","") + "'");
|
|
Log.logException(e);
|
|
Log.logException(e.getCause());
|
|
if (e instanceof InvocationTargetException) Log.logException(((InvocationTargetException) e).getTargetException());
|
|
return null;
|
|
}
|
|
}
|
|
|
|
private static final String getMimeFromServlet(final File targetClass, final RequestHeader request, final serverObjects args, final String dflt) {
|
|
try {
|
|
return (String) rewriteMethod(targetClass, "mime").invoke(null, new Object[] {request, args, switchboard});
|
|
} catch (final Throwable e) {
|
|
theLogger.logSevere("INTERNAL ERROR: " + e.toString() + ":" +
|
|
e.getMessage() +
|
|
" target exception at " + targetClass + ": " +
|
|
"; java.awt.graphicsenv='" + System.getProperty("java.awt.graphicsenv","") + "'");
|
|
Log.logException(e);
|
|
Log.logException(e.getCause());
|
|
if (e instanceof InvocationTargetException) Log.logException(((InvocationTargetException) e).getTargetException());
|
|
return dflt;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Tells if a filename ends with a suffix from a given list.
|
|
* @param filename the filename
|
|
* @param suffixList the list of suffixes which is a string of suffixes separated by commas
|
|
* @return true if the filename ends with a suffix from the list, else false
|
|
*/
|
|
private static boolean matchesSuffix(final String name, final String suffixList) {
|
|
boolean ret = false;
|
|
|
|
if (suffixList != null && name != null) {
|
|
final String[] suffixes = suffixList.split(",");
|
|
find:
|
|
for (int i = 0; i < suffixes.length; i++) {
|
|
if (name.endsWith("." + suffixes[i].trim())) {
|
|
ret = true;
|
|
break find;
|
|
}
|
|
}
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
/**
|
|
* do a proxy request for document
|
|
* extracts url from GET-parameter url
|
|
* not in separete servlet, because we need access to binary outstream
|
|
* @throws IOException
|
|
*/
|
|
private static void doURLProxy(final HashMap<String, Object> conProp, final RequestHeader requestHeader, final OutputStream out) throws IOException {
|
|
final String httpVersion = (String) conProp.get(HeaderFramework.CONNECTION_PROP_HTTP_VER);
|
|
URL proxyurl = null;
|
|
String action = "";
|
|
|
|
if(conProp != null && conProp.containsKey("ARGS")) {
|
|
String strARGS = (String) conProp.get("ARGS");
|
|
if(strARGS.startsWith("action=")) {
|
|
int detectnextargument = strARGS.indexOf("&");
|
|
action = strARGS.substring (7, detectnextargument);
|
|
strARGS = strARGS.substring(detectnextargument+1);
|
|
}
|
|
if(strARGS.startsWith("url=")) {
|
|
final String strUrl = strARGS.substring(4); // strip url=
|
|
|
|
try {
|
|
proxyurl = new URL(strUrl);
|
|
} catch (MalformedURLException e) {
|
|
proxyurl = new URL (URLDecoder.decode(strUrl, UTF8.charset.name()));
|
|
|
|
}
|
|
}
|
|
}
|
|
|
|
if (proxyurl==null) {
|
|
throw new IOException("no url as argument supplied");
|
|
}
|
|
String host = proxyurl.getHost();
|
|
if (proxyurl.getPort() != -1) {
|
|
host += ":" + proxyurl.getPort();
|
|
}
|
|
|
|
// set properties for proxy connection
|
|
final HashMap<String, Object> prop = new HashMap<String, Object>();
|
|
prop.put(HeaderFramework.CONNECTION_PROP_HTTP_VER, HeaderFramework.HTTP_VERSION_1_1);
|
|
prop.put(HeaderFramework.CONNECTION_PROP_HOST, host);
|
|
prop.put(HeaderFramework.CONNECTION_PROP_PATH, proxyurl.getFile().replaceAll(" ", "%20"));
|
|
prop.put(HeaderFramework.CONNECTION_PROP_REQUESTLINE, "PROXY");
|
|
prop.put("CLIENTIP", "0:0:0:0:0:0:0:1");
|
|
|
|
// remove some stuff from request header, so it isn't send to the server
|
|
requestHeader.remove("CLIENTIP");
|
|
requestHeader.remove("EXT");
|
|
requestHeader.remove("PATH");
|
|
requestHeader.remove("Authorization");
|
|
requestHeader.remove("Connection");
|
|
requestHeader.put(HeaderFramework.HOST, proxyurl.getHost());
|
|
|
|
// temporarily add argument to header to pass it on to augmented browsing
|
|
requestHeader.put("YACYACTION", action);
|
|
|
|
final ByteArrayOutputStream o = new ByteArrayOutputStream();
|
|
HTTPDProxyHandler.doGet(prop, requestHeader, o);
|
|
|
|
// reparse header to extract content-length and mimetype
|
|
final ResponseHeader outgoingHeader = new ResponseHeader(200);
|
|
final InputStream in = new ByteArrayInputStream(o.toByteArray());
|
|
String line = readLine(in);
|
|
while(line != null && !line.equals("")) {
|
|
int p;
|
|
if ((p = line.indexOf(':')) >= 0) {
|
|
// store a property
|
|
outgoingHeader.add(line.substring(0, p).trim(), line.substring(p + 1).trim());
|
|
}
|
|
line = readLine(in);
|
|
}
|
|
if (line==null) {
|
|
HTTPDemon.sendRespondError(conProp,out,3,500,"null",null,null);
|
|
return;
|
|
}
|
|
|
|
final int httpStatus = Integer.parseInt((String) prop.get(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_STATUS));
|
|
|
|
String directory = "";
|
|
if (proxyurl.getPath().lastIndexOf('/') > 0)
|
|
directory = proxyurl.getPath().substring(0, proxyurl.getPath().lastIndexOf('/'));
|
|
|
|
String location = "";
|
|
|
|
if (outgoingHeader.containsKey("Location")) {
|
|
// rewrite location header
|
|
location = outgoingHeader.get("Location");
|
|
if (location.startsWith("http")) {
|
|
location = "/proxy.html?action="+action+"&url=" + location;
|
|
} else {
|
|
location = "/proxy.html?action="+action+"&url=http://" + proxyurl.getHost() + "/" + location;
|
|
}
|
|
outgoingHeader.put("Location", location);
|
|
}
|
|
|
|
final String mimeType = outgoingHeader.getContentType();
|
|
if ((mimeType.startsWith("text/html") || mimeType.startsWith("text"))) {
|
|
final StringWriter buffer = new StringWriter();
|
|
|
|
if (outgoingHeader.containsKey(HeaderFramework.TRANSFER_ENCODING)) {
|
|
FileUtils.copy(new ChunkedInputStream(in), buffer, UTF8.charset);
|
|
} else {
|
|
FileUtils.copy(in, buffer, UTF8.charset);
|
|
}
|
|
|
|
final String sbuffer = buffer.toString();
|
|
|
|
final Pattern p = Pattern.compile("(href=\"|src=\")([^\"]+)|(href='|src=')([^']+)|(url\\(')([^']+)|(url\\(\")([^\"]+)|(url\\()([^\\)]+)");
|
|
final Matcher m = p.matcher(sbuffer);
|
|
final StringBuffer result = new StringBuffer(80);
|
|
String init, url;
|
|
MultiProtocolURI target;
|
|
while (m.find()) {
|
|
init = null;
|
|
if(m.group(1) != null) init = m.group(1);
|
|
if(m.group(3) != null) init = m.group(3);
|
|
if(m.group(5) != null) init = m.group(5);
|
|
if(m.group(7) != null) init = m.group(7);
|
|
if(m.group(9) != null) init = m.group(9);
|
|
url = null;
|
|
if(m.group(2) != null) url = m.group(2);
|
|
if(m.group(4) != null) url = m.group(4);
|
|
if(m.group(6) != null) url = m.group(6);
|
|
if(m.group(8) != null) url = m.group(8);
|
|
if(m.group(10) != null) url = m.group(10);
|
|
if (url.startsWith("data:") || url.startsWith("#") || url.startsWith("mailto:") || url.startsWith("javascript:")) {
|
|
String newurl = init + url;
|
|
newurl = newurl.replaceAll("\\$","\\\\\\$");
|
|
m.appendReplacement(result, newurl);
|
|
|
|
} else if (url.startsWith("http")) {
|
|
// absoulte url of form href="http://domain.com/path"
|
|
if (sb.getConfig("proxyURL.rewriteURLs", "all").equals("domainlist")) {
|
|
if (sb.crawlStacker.urlInAcceptedDomain(new DigestURI(url)) != null) {
|
|
continue;
|
|
}
|
|
}
|
|
|
|
String newurl = init + "/proxy.html?url=" + url;
|
|
newurl = newurl.replaceAll("\\$","\\\\\\$");
|
|
m.appendReplacement(result, newurl);
|
|
|
|
} else if (url.startsWith("//")) {
|
|
// absoulte url but same protocol of form href="//domain.com/path"
|
|
final String complete_url = proxyurl.getProtocol() + ":" + url;
|
|
if (sb.getConfig("proxyURL.rewriteURLs", "all").equals("domainlist")) {
|
|
if (sb.crawlStacker.urlInAcceptedDomain(new DigestURI(complete_url)) != null) {
|
|
continue;
|
|
}
|
|
}
|
|
|
|
String newurl = init + "/proxy.html?url=" + complete_url;
|
|
newurl = newurl.replaceAll("\\$","\\\\\\$");
|
|
m.appendReplacement(result, newurl);
|
|
|
|
} else if (url.startsWith("/")) {
|
|
// absolute path of form href="/absolute/path/to/linked/page"
|
|
String newurl = init + "/proxy.html?url=http://" + host + url;
|
|
newurl = newurl.replaceAll("\\$","\\\\\\$");
|
|
m.appendReplacement(result, newurl);
|
|
|
|
} else {
|
|
// relative path of form href="relative/path"
|
|
try {
|
|
target = new MultiProtocolURI("http://" + host + directory + "/" + url);
|
|
String newurl = init + "/proxy.html?url=" + target.toString();
|
|
newurl = newurl.replaceAll("\\$","\\\\\\$");
|
|
m.appendReplacement(result, newurl);
|
|
}
|
|
catch (final MalformedURLException e) {}
|
|
|
|
}
|
|
}
|
|
m.appendTail(result);
|
|
|
|
final byte[] sbb = UTF8.getBytes(result.toString());
|
|
|
|
if (outgoingHeader.containsKey(HeaderFramework.TRANSFER_ENCODING)) {
|
|
HTTPDemon.sendRespondHeader(conProp, out, httpVersion, httpStatus, outgoingHeader);
|
|
final ChunkedOutputStream cos = new ChunkedOutputStream(out);
|
|
cos.write(sbb);
|
|
cos.finish();
|
|
cos.close();
|
|
} else {
|
|
outgoingHeader.put(HeaderFramework.CONTENT_LENGTH, Integer.toString(sbb.length));
|
|
HTTPDemon.sendRespondHeader(conProp, out, httpVersion, httpStatus, outgoingHeader);
|
|
out.write(sbb);
|
|
}
|
|
} else {
|
|
if (!outgoingHeader.containsKey(HeaderFramework.CONTENT_LENGTH))
|
|
outgoingHeader.put(HeaderFramework.CONTENT_LENGTH, (String) prop.get(HeaderFramework.CONNECTION_PROP_PROXY_RESPOND_SIZE));
|
|
HTTPDemon.sendRespondHeader(conProp, out, httpVersion, httpStatus, outgoingHeader);
|
|
FileUtils.copy(in, out);
|
|
}
|
|
return;
|
|
}
|
|
|
|
private static String readLine(final InputStream in) throws IOException {
|
|
final ByteArrayOutputStream buf = new ByteArrayOutputStream();
|
|
int b;
|
|
while ((b=in.read()) != '\r' && b != -1) {
|
|
buf.write(b);
|
|
}
|
|
if (b == -1) return null;
|
|
b = in.read(); // read \n
|
|
if (b == -1) return null;
|
|
return buf.toString("UTF-8");
|
|
}
|
|
|
|
}
|