diff --git a/.classpath b/.classpath index 8087d584c..5c4063b3c 100644 --- a/.classpath +++ b/.classpath @@ -13,7 +13,6 @@ - diff --git a/addon/YaCy.app/Contents/Info.plist b/addon/YaCy.app/Contents/Info.plist index 40473be90..cee5a4604 100644 --- a/addon/YaCy.app/Contents/Info.plist +++ b/addon/YaCy.app/Contents/Info.plist @@ -41,7 +41,6 @@ $JAVAROOT/lib/bzip2.jar $JAVAROOT/lib/commons-codec-1.4.jar $JAVAROOT/lib/commons-fileupload-1.2.2.jar - $JAVAROOT/lib/commons-httpclient-3.1.jar $JAVAROOT/lib/commons-io-1.4.jar $JAVAROOT/lib/commons-jxpath-1.3.jar $JAVAROOT/lib/commons-logging-1.1.1.jar diff --git a/build.xml b/build.xml index 475debc25..d81a1d039 100644 --- a/build.xml +++ b/build.xml @@ -185,7 +185,6 @@ - diff --git a/lib/commons-httpclient-3.1.jar b/lib/commons-httpclient-3.1.jar deleted file mode 100644 index 7c59774ae..000000000 Binary files a/lib/commons-httpclient-3.1.jar and /dev/null differ diff --git a/source/de/anomic/http/server/ChunkedInputStream.java b/source/de/anomic/http/server/ChunkedInputStream.java new file mode 100644 index 000000000..479a39d92 --- /dev/null +++ b/source/de/anomic/http/server/ChunkedInputStream.java @@ -0,0 +1,461 @@ +/* ==================================================================== + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * . + * + */ + +/* + * This file was imported from apache http client 3.1 library and modified + * to work for the YaCy http server when htto client library use was migrated + * to apache http components 4.0 + * by Michael Christen, 20.09.2010 + */ + + +package de.anomic.http.server; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.UnsupportedEncodingException; + + +/** + *

Transparently coalesces chunks of a HTTP stream that uses + * Transfer-Encoding chunked.

+ * + *

Note that this class NEVER closes the underlying stream, even when close + * gets called. Instead, it will read until the "end" of its chunking on close, + * which allows for the seamless invocation of subsequent HTTP 1.1 calls, while + * not requiring the client to remember to read the entire contents of the + * response.

+ * + * @author Ortwin Glueck + * @author Sean C. Sullivan + * @author Martin Elwin + * @author Eric Johnson + * @author Mike Bowler + * @author Michael Becke + * @author Oleg Kalnichevski + * + * @since 2.0 + * + */ +public class ChunkedInputStream extends InputStream { + /** The inputstream that we're wrapping */ + private InputStream in; + + /** The chunk size */ + private int chunkSize; + + /** The current position within the current chunk */ + private int pos; + + /** True if we'are at the beginning of stream */ + private boolean bof = true; + + /** True if we've reached the end of stream */ + private boolean eof = false; + + /** True if this stream is closed */ + private boolean closed = false; + + /** + * ChunkedInputStream constructor + * + * @param in the raw input stream + * + * @throws IOException If an IO error occurs + */ + public ChunkedInputStream(final InputStream in) throws IOException { + + if (in == null) { + throw new IllegalArgumentException("InputStream parameter may not be null"); + } + this.in = in; + this.pos = 0; + } + + + /** + *

Returns all the data in a chunked stream in coalesced form. A chunk + * is followed by a CRLF. The method returns -1 as soon as a chunksize of 0 + * is detected.

+ * + *

Trailer headers are read automcatically at the end of the stream and + * can be obtained with the getResponseFooters() method.

+ * + * @return -1 of the end of the stream has been reached or the next data + * byte + * @throws IOException If an IO problem occurs + * + * @see HttpMethod#getResponseFooters() + */ + public int read() throws IOException { + + if (closed) { + throw new IOException("Attempted read from closed stream."); + } + if (eof) { + return -1; + } + if (pos >= chunkSize) { + nextChunk(); + if (eof) { + return -1; + } + } + pos++; + return in.read(); + } + + /** + * Read some bytes from the stream. + * @param b The byte array that will hold the contents from the stream. + * @param off The offset into the byte array at which bytes will start to be + * placed. + * @param len the maximum number of bytes that can be returned. + * @return The number of bytes returned or -1 if the end of stream has been + * reached. + * @see java.io.InputStream#read(byte[], int, int) + * @throws IOException if an IO problem occurs. + */ + public int read (byte[] b, int off, int len) throws IOException { + + if (closed) { + throw new IOException("Attempted read from closed stream."); + } + + if (eof) { + return -1; + } + if (pos >= chunkSize) { + nextChunk(); + if (eof) { + return -1; + } + } + len = Math.min(len, chunkSize - pos); + int count = in.read(b, off, len); + pos += count; + return count; + } + + /** + * Read some bytes from the stream. + * @param b The byte array that will hold the contents from the stream. + * @return The number of bytes returned or -1 if the end of stream has been + * reached. + * @see java.io.InputStream#read(byte[]) + * @throws IOException if an IO problem occurs. + */ + public int read (byte[] b) throws IOException { + return read(b, 0, b.length); + } + + /** + * Read the CRLF terminator. + * @throws IOException If an IO error occurs. + */ + private void readCRLF() throws IOException { + int cr = in.read(); + int lf = in.read(); + if ((cr != '\r') || (lf != '\n')) { + throw new IOException( + "CRLF expected at end of chunk: " + cr + "/" + lf); + } + } + + + /** + * Read the next chunk. + * @throws IOException If an IO error occurs. + */ + private void nextChunk() throws IOException { + if (!bof) { + readCRLF(); + } + chunkSize = getChunkSizeFromInputStream(in); + bof = false; + pos = 0; + if (chunkSize == 0) { + eof = true; + skipTrailerHeaders(); + } + } + + /** + * Expects the stream to start with a chunksize in hex with optional + * comments after a semicolon. The line must end with a CRLF: "a3; some + * comment\r\n" Positions the stream at the start of the next line. + * + * @param in The new input stream. + * @param required true if a valid chunk must be present, + * false otherwise. + * + * @return the chunk size as integer + * + * @throws IOException when the chunk size could not be parsed + */ + private static int getChunkSizeFromInputStream(final InputStream in) + throws IOException { + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + // States: 0=normal, 1=\r was scanned, 2=inside quoted string, -1=end + int state = 0; + while (state != -1) { + int b = in.read(); + if (b == -1) { + throw new IOException("chunked stream ended unexpectedly"); + } + switch (state) { + case 0: + switch (b) { + case '\r': + state = 1; + break; + case '\"': + state = 2; + baos.write(b); + break; + default: + baos.write(b); + } + break; + + case 1: + if (b == '\n') { + state = -1; + } else { + // this was not CRLF + throw new IOException("Protocol violation: Unexpected" + + " single newline character in chunk size"); + } + break; + + case 2: + switch (b) { + case '\\': + b = in.read(); + baos.write(b); + break; + case '\"': + state = 0; + baos.write(b); + break; + default: + baos.write(b); + } + break; + default: throw new RuntimeException("assertion failed"); + } + } + + //parse data + String dataString = getAsciiString(baos.toByteArray()); + int separator = dataString.indexOf(';'); + dataString = (separator > 0) + ? dataString.substring(0, separator).trim() + : dataString.trim(); + + int result; + try { + result = Integer.parseInt(dataString.trim(), 16); + } catch (NumberFormatException e) { + throw new IOException ("Bad chunk size: " + dataString); + } + return result; + } + + + /** + * Converts the byte array of ASCII characters to a string. This method is + * to be used when decoding content of HTTP elements (such as response + * headers) + * + * @param data the byte array to be encoded + * @return The string representation of the byte array + * + * @since 3.0 + */ + private static String getAsciiString(final byte[] data) throws IOException { + if (data == null) { + throw new IllegalArgumentException("Parameter may not be null"); + } + + try { + return new String(data, 0, data.length, "US-ASCII"); + } catch (UnsupportedEncodingException e) { + throw new IOException("HttpClient requires ASCII support"); + } + } + + /** + * Reads and stores the Trailer headers. + * @throws IOException If an IO problem occurs + */ + private void skipTrailerHeaders() throws IOException { + for (; ;) { + String line = readLine(in, "US-ASCII"); + if ((line == null) || (line.trim().length() < 1)) break; + } + } + + + /** + * Read up to "\n" from an (unchunked) input stream. + * If the stream ends before the line terminator is found, + * the last part of the string will still be returned. + * If no input data available, null is returned. + * + * @param inputStream the stream to read from + * @param charset charset of HTTP protocol elements + * + * @throws IOException if an I/O problem occurs + * @return a line from the stream + * + * @since 3.0 + */ + private static String readLine(InputStream inputStream, String charset) throws IOException { + byte[] rawdata = readRawLine(inputStream); + if (rawdata == null) { + return null; + } + // strip CR and LF from the end + int len = rawdata.length; + int offset = 0; + if (len > 0) { + if (rawdata[len - 1] == '\n') { + offset++; + if (len > 1) { + if (rawdata[len - 2] == '\r') { + offset++; + } + } + } + } + final String result = getString(rawdata, 0, len - offset, charset); + return result; + } + + + /** + * Converts the byte array of HTTP content characters to a string. If + * the specified charset is not supported, default system encoding + * is used. + * + * @param data the byte array to be encoded + * @param offset the index of the first byte to encode + * @param length the number of bytes to encode + * @param charset the desired character encoding + * @return The result of the conversion. + * + * @since 3.0 + */ + private static String getString( + final byte[] data, + int offset, + int length, + String charset + ) { + + if (data == null) { + throw new IllegalArgumentException("Parameter may not be null"); + } + + if (charset == null || charset.length() == 0) { + throw new IllegalArgumentException("charset may not be null or empty"); + } + + try { + return new String(data, offset, length, charset); + } catch (UnsupportedEncodingException e) { + return new String(data, offset, length); + } + } + + /** + * Return byte array from an (unchunked) input stream. + * Stop reading when "\n" terminator encountered + * If the stream ends before the line terminator is found, + * the last part of the string will still be returned. + * If no input data available, null is returned. + * + * @param inputStream the stream to read from + * + * @throws IOException if an I/O problem occurs + * @return a byte array from the stream + */ + private static byte[] readRawLine(InputStream inputStream) throws IOException { + + ByteArrayOutputStream buf = new ByteArrayOutputStream(); + int ch; + while ((ch = inputStream.read()) >= 0) { + buf.write(ch); + if (ch == '\n') { // be tolerant (RFC-2616 Section 19.3) + break; + } + } + if (buf.size() == 0) { + return null; + } + return buf.toByteArray(); + } + + /** + * Upon close, this reads the remainder of the chunked message, + * leaving the underlying socket at a position to start reading the + * next response without scanning. + * @throws IOException If an IO problem occurs. + */ + public void close() throws IOException { + if (!closed) { + try { + if (!eof) { + exhaustInputStream(this); + } + } finally { + eof = true; + closed = true; + } + } + } + + /** + * Exhaust an input stream, reading until EOF has been encountered. + * + *

Note that this function is intended as a non-public utility. + * This is a little weird, but it seemed silly to make a utility + * class for this one function, so instead it is just static and + * shared that way.

+ * + * @param inStream The {@link InputStream} to exhaust. + * @throws IOException If an IO problem occurs + */ + static void exhaustInputStream(InputStream inStream) throws IOException { + // read and discard the remainder of the message + byte buffer[] = new byte[1024]; + while (inStream.read(buffer) >= 0) { + ; + } + } +} diff --git a/source/de/anomic/http/server/ContentLengthInputStream.java b/source/de/anomic/http/server/ContentLengthInputStream.java new file mode 100644 index 000000000..3332065f9 --- /dev/null +++ b/source/de/anomic/http/server/ContentLengthInputStream.java @@ -0,0 +1,210 @@ +/* + * ==================================================================== + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * . + * + */ + +/* + * This file was imported from apache http client 3.1 library and modified + * to work for the YaCy http server when htto client library use was migrated + * to apache http components 4.0 + * by Michael Christen, 20.09.2010 + */ + +package de.anomic.http.server; + +import java.io.IOException; +import java.io.InputStream; + +/** + * Cuts the wrapped InputStream off after a specified number of bytes. + * + *

Implementation note: Choices abound. One approach would pass + * through the {@link InputStream#mark} and {@link InputStream#reset} calls to + * the underlying stream. That's tricky, though, because you then have to + * start duplicating the work of keeping track of how much a reset rewinds. + * Further, you have to watch out for the "readLimit", and since the semantics + * for the readLimit leave room for differing implementations, you might get + * into a lot of trouble.

+ * + *

Alternatively, you could make this class extend {@link java.io.BufferedInputStream} + * and then use the protected members of that class to avoid duplicated effort. + * That solution has the side effect of adding yet another possible layer of + * buffering.

+ * + *

Then, there is the simple choice, which this takes - simply don't + * support {@link InputStream#mark} and {@link InputStream#reset}. That choice + * has the added benefit of keeping this class very simple.

+ * + * @author Ortwin Glueck + * @author Eric Johnson + * @author Mike Bowler + * @since 2.0 + */ +public class ContentLengthInputStream extends InputStream { + + /** + * The maximum number of bytes that can be read from the stream. Subsequent + * read operations will return -1. + */ + private long contentLength; + + /** The current position */ + private long pos = 0; + + /** True if the stream is closed. */ + private boolean closed = false; + + /** + * Wrapped input stream that all calls are delegated to. + */ + private InputStream wrappedStream = null; + + /** + * Creates a new length limited stream + * + * @param in The stream to wrap + * @param contentLength The maximum number of bytes that can be read from + * the stream. Subsequent read operations will return -1. + * + * @since 3.0 + */ + public ContentLengthInputStream(InputStream in, long contentLength) { + super(); + this.wrappedStream = in; + this.contentLength = contentLength; + } + + /** + *

Reads until the end of the known length of content.

+ * + *

Does not close the underlying socket input, but instead leaves it + * primed to parse the next response.

+ * @throws IOException If an IO problem occurs. + */ + public void close() throws IOException { + if (!closed) { + try { + ChunkedInputStream.exhaustInputStream(this); + } finally { + // close after above so that we don't throw an exception trying + // to read after closed! + closed = true; + } + } + } + + + /** + * Read the next byte from the stream + * @return The next byte or -1 if the end of stream has been reached. + * @throws IOException If an IO problem occurs + * @see java.io.InputStream#read() + */ + public int read() throws IOException { + if (closed) { + throw new IOException("Attempted read from closed stream."); + } + + if (pos >= contentLength) { + return -1; + } + pos++; + return this.wrappedStream.read(); + } + + /** + * Does standard {@link InputStream#read(byte[], int, int)} behavior, but + * also notifies the watcher when the contents have been consumed. + * + * @param b The byte array to fill. + * @param off Start filling at this position. + * @param len The number of bytes to attempt to read. + * @return The number of bytes read, or -1 if the end of content has been + * reached. + * + * @throws java.io.IOException Should an error occur on the wrapped stream. + */ + public int read (byte[] b, int off, int len) throws java.io.IOException { + if (closed) { + throw new IOException("Attempted read from closed stream."); + } + + if (pos >= contentLength) { + return -1; + } + + if (pos + len > contentLength) { + len = (int) (contentLength - pos); + } + int count = this.wrappedStream.read(b, off, len); + pos += count; + return count; + } + + + /** + * Read more bytes from the stream. + * @param b The byte array to put the new data in. + * @return The number of bytes read into the buffer. + * @throws IOException If an IO problem occurs + * @see java.io.InputStream#read(byte[]) + */ + public int read(byte[] b) throws IOException { + return read(b, 0, b.length); + } + + /** + * Skips and discards a number of bytes from the input stream. + * @param n The number of bytes to skip. + * @return The actual number of bytes skipped. <= 0 if no bytes + * are skipped. + * @throws IOException If an error occurs while skipping bytes. + * @see InputStream#skip(long) + */ + public long skip(long n) throws IOException { + // make sure we don't skip more bytes than are + // still available + long length = Math.min(n, contentLength - pos); + // skip and keep track of the bytes actually skipped + length = this.wrappedStream.skip(length); + // only add the skipped bytes to the current position + // if bytes were actually skipped + if (length > 0) { + pos += length; + } + return length; + } + + public int available() throws IOException { + if (this.closed) { + return 0; + } + int avail = this.wrappedStream.available(); + if (this.pos + avail > this.contentLength ) { + avail = (int)(this.contentLength - this.pos); + } + return avail; + } + +} diff --git a/source/de/anomic/http/server/HTTPDemon.java b/source/de/anomic/http/server/HTTPDemon.java index cf830896c..177bc492f 100644 --- a/source/de/anomic/http/server/HTTPDemon.java +++ b/source/de/anomic/http/server/HTTPDemon.java @@ -71,8 +71,6 @@ import org.apache.commons.fileupload.FileUploadBase; import org.apache.commons.fileupload.FileUploadException; import org.apache.commons.fileupload.RequestContext; import org.apache.commons.fileupload.disk.DiskFileItemFactory; -import org.apache.commons.httpclient.ChunkedInputStream; -import org.apache.commons.httpclient.ContentLengthInputStream; import de.anomic.data.userDB; import de.anomic.search.Switchboard;