terminated migration from apache httpclient-3.1 to 4.1:

- remove the library - added two classes from the httpclient-3.1 library as source code to YaCy because these classes were used by the YaCy HTTP Server - modified the added classes ChunkedInputStream and ContentLengthInputStream in such a way that: * there are no more dependencies to httpclient-3.1 * these classes had been simplified to serve only the purpose for the YaCy httpd git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@7171 6c8d7289-2bf4-0310-a012-ef5d649a1542
15 years ago · 3552476fbe
parent 8da4eb5de6
commit 3552476fbe
7 changed files with 671 additions and 5 deletions
--- a/.classpath
+++ b/.classpath
@ -13,7 +13,6 @@
 	<classpathentry kind="src" path="htroot/api/bookmarks/xbel"/>
 	<classpathentry kind="src" path="htroot/api/bookmarks/tags"/>
 	<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
-	<classpathentry kind="lib" path="lib/commons-httpclient-3.1.jar" sourcepath="/commons-httpclient-3.1/src"/>
 	<classpathentry kind="lib" path="lib/commons-logging-1.1.1.jar"/>
 	<classpathentry kind="lib" path="lib/commons-io-1.4.jar"/>
 	<classpathentry kind="lib" path="lib/servlet-api.jar"/>
--- a/addon/YaCy.app/Contents/Info.plist
+++ b/addon/YaCy.app/Contents/Info.plist
@ -41,7 +41,6 @@
 			<string>$JAVAROOT/lib/bzip2.jar</string>
 			<string>$JAVAROOT/lib/commons-codec-1.4.jar</string>
 			<string>$JAVAROOT/lib/commons-fileupload-1.2.2.jar</string>
-			<string>$JAVAROOT/lib/commons-httpclient-3.1.jar</string>
 			<string>$JAVAROOT/lib/commons-io-1.4.jar</string>
 			<string>$JAVAROOT/lib/commons-jxpath-1.3.jar</string>
 			<string>$JAVAROOT/lib/commons-logging-1.1.1.jar</string>
--- a/build.xml
+++ b/build.xml
@ -185,7 +185,6 @@
    	<pathelement location="${lib}/bzip2.jar" />
    	<pathelement location="${lib}/commons-codec-1.4.jar" />
    	<pathelement location="${lib}/commons-fileupload-1.2.2.jar" />
-    	<pathelement location="${lib}/commons-httpclient-3.1.jar" />
    	<pathelement location="${lib}/commons-io-1.4.jar" />
    	<pathelement location="${lib}/commons-jxpath-1.3.jar" />
    	<pathelement location="${lib}/commons-logging-1.1.1.jar" />
--- a/lib/commons-httpclient-3.1.jar
+++ b/lib/commons-httpclient-3.1.jar
--- a/source/de/anomic/http/server/ChunkedInputStream.java
+++ b/source/de/anomic/http/server/ChunkedInputStream.java
@ -0,0 +1,461 @@
+/* ====================================================================
+ *
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ *
+ */
+
+/*
+ * This file was imported from apache http client 3.1 library and modified
+ * to work for the YaCy http server when htto client library use was migrated
+ * to apache http components 4.0
+ * by Michael Christen, 20.09.2010
+ */
+
+
+package de.anomic.http.server;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.UnsupportedEncodingException;
+
+
+/**
+ * <p>Transparently coalesces chunks of a HTTP stream that uses
+ * Transfer-Encoding chunked.</p>
+ *
+ * <p>Note that this class NEVER closes the underlying stream, even when close
+ * gets called.  Instead, it will read until the "end" of its chunking on close,
+ * which allows for the seamless invocation of subsequent HTTP 1.1 calls, while
+ * not requiring the client to remember to read the entire contents of the
+ * response.</p>
+ *
+ * @author Ortwin Glueck
+ * @author Sean C. Sullivan
+ * @author Martin Elwin
+ * @author Eric Johnson
+ * @author <a href="mailto:mbowler@GargoyleSoftware.com">Mike Bowler</a>
+ * @author Michael Becke
+ * @author <a href="mailto:oleg@ural.ru">Oleg Kalnichevski</a>
+ *
+ * @since 2.0
+ *
+ */
+public class ChunkedInputStream extends InputStream {
+    /** The inputstream that we're wrapping */
+    private InputStream in;
+
+    /** The chunk size */
+    private int chunkSize;
+
+    /** The current position within the current chunk */
+    private int pos;
+
+    /** True if we'are at the beginning of stream */
+    private boolean bof = true;
+
+    /** True if we've reached the end of stream */
+    private boolean eof = false;
+
+    /** True if this stream is closed */
+    private boolean closed = false;
+
+    /**
+     * ChunkedInputStream constructor
+     *
+     * @param in the raw input stream
+     *
+     * @throws IOException If an IO error occurs
+     */
+    public ChunkedInputStream(final InputStream in) throws IOException {
+            
+        if (in == null) {
+            throw new IllegalArgumentException("InputStream parameter may not be null");
+        }
+        this.in = in;
+        this.pos = 0;
+    }
+
+    
+    /**
+     * <p> Returns all the data in a chunked stream in coalesced form. A chunk
+     * is followed by a CRLF. The method returns -1 as soon as a chunksize of 0
+     * is detected.</p>
+     * 
+     * <p> Trailer headers are read automcatically at the end of the stream and
+     * can be obtained with the getResponseFooters() method.</p>
+     *
+     * @return -1 of the end of the stream has been reached or the next data
+     * byte
+     * @throws IOException If an IO problem occurs
+     * 
+     * @see HttpMethod#getResponseFooters()
+     */
+    public int read() throws IOException {
+
+        if (closed) {
+            throw new IOException("Attempted read from closed stream.");
+        }
+        if (eof) {
+            return -1;
+        } 
+        if (pos >= chunkSize) {
+            nextChunk();
+            if (eof) { 
+                return -1;
+            }
+        }
+        pos++;
+        return in.read();
+    }
+
+    /**
+     * Read some bytes from the stream.
+     * @param b The byte array that will hold the contents from the stream.
+     * @param off The offset into the byte array at which bytes will start to be
+     * placed.
+     * @param len the maximum number of bytes that can be returned.
+     * @return The number of bytes returned or -1 if the end of stream has been
+     * reached.
+     * @see java.io.InputStream#read(byte[], int, int)
+     * @throws IOException if an IO problem occurs.
+     */
+    public int read (byte[] b, int off, int len) throws IOException {
+
+        if (closed) {
+            throw new IOException("Attempted read from closed stream.");
+        }
+
+        if (eof) { 
+            return -1;
+        }
+        if (pos >= chunkSize) {
+            nextChunk();
+            if (eof) { 
+                return -1;
+            }
+        }
+        len = Math.min(len, chunkSize - pos);
+        int count = in.read(b, off, len);
+        pos += count;
+        return count;
+    }
+
+    /**
+     * Read some bytes from the stream.
+     * @param b The byte array that will hold the contents from the stream.
+     * @return The number of bytes returned or -1 if the end of stream has been
+     * reached.
+     * @see java.io.InputStream#read(byte[])
+     * @throws IOException if an IO problem occurs.
+     */
+    public int read (byte[] b) throws IOException {
+        return read(b, 0, b.length);
+    }
+
+    /**
+     * Read the CRLF terminator.
+     * @throws IOException If an IO error occurs.
+     */
+    private void readCRLF() throws IOException {
+        int cr = in.read();
+        int lf = in.read();
+        if ((cr != '\r') || (lf != '\n')) { 
+            throw new IOException(
+                "CRLF expected at end of chunk: " + cr + "/" + lf);
+        }
+    }
+
+
+    /**
+     * Read the next chunk.
+     * @throws IOException If an IO error occurs.
+     */
+    private void nextChunk() throws IOException {
+        if (!bof) {
+            readCRLF();
+        }
+        chunkSize = getChunkSizeFromInputStream(in);
+        bof = false;
+        pos = 0;
+        if (chunkSize == 0) {
+            eof = true;
+            skipTrailerHeaders();
+        }
+    }
+
+    /**
+     * Expects the stream to start with a chunksize in hex with optional
+     * comments after a semicolon. The line must end with a CRLF: "a3; some
+     * comment\r\n" Positions the stream at the start of the next line.
+     *
+     * @param in The new input stream.
+     * @param required <tt>true<tt/> if a valid chunk must be present,
+     *                 <tt>false<tt/> otherwise.
+     * 
+     * @return the chunk size as integer
+     * 
+     * @throws IOException when the chunk size could not be parsed
+     */
+    private static int getChunkSizeFromInputStream(final InputStream in) 
+      throws IOException {
+            
+        ByteArrayOutputStream baos = new ByteArrayOutputStream();
+        // States: 0=normal, 1=\r was scanned, 2=inside quoted string, -1=end
+        int state = 0; 
+        while (state != -1) {
+        int b = in.read();
+            if (b == -1) { 
+                throw new IOException("chunked stream ended unexpectedly");
+            }
+            switch (state) {
+                case 0: 
+                    switch (b) {
+                        case '\r':
+                            state = 1;
+                            break;
+                        case '\"':
+                            state = 2;
+                            baos.write(b);
+                            break;
+                        default:
+                            baos.write(b);
+                    }
+                    break;
+
+                case 1:
+                    if (b == '\n') {
+                        state = -1;
+                    } else {
+                        // this was not CRLF
+                        throw new IOException("Protocol violation: Unexpected"
+                            + " single newline character in chunk size");
+                    }
+                    break;
+
+                case 2:
+                    switch (b) {
+                        case '\\':
+                            b = in.read();
+                            baos.write(b);
+                            break;
+                        case '\"':
+                            state = 0;
+                            baos.write(b);
+                            break;
+                        default:
+                            baos.write(b);
+                    }
+                    break;
+                default: throw new RuntimeException("assertion failed");
+            }
+        }
+
+        //parse data
+        String dataString = getAsciiString(baos.toByteArray());
+        int separator = dataString.indexOf(';');
+        dataString = (separator > 0)
+            ? dataString.substring(0, separator).trim()
+            : dataString.trim();
+
+        int result;
+        try {
+            result = Integer.parseInt(dataString.trim(), 16);
+        } catch (NumberFormatException e) {
+            throw new IOException ("Bad chunk size: " + dataString);
+        }
+        return result;
+    }
+
+
+    /**
+     * Converts the byte array of ASCII characters to a string. This method is
+     * to be used when decoding content of HTTP elements (such as response
+     * headers)
+     *
+     * @param data the byte array to be encoded
+     * @return The string representation of the byte array
+     * 
+     * @since 3.0
+     */
+    private static String getAsciiString(final byte[] data) throws IOException {
+        if (data == null) {
+            throw new IllegalArgumentException("Parameter may not be null");
+        }
+
+        try {
+            return new String(data, 0, data.length, "US-ASCII");
+        } catch (UnsupportedEncodingException e) {
+            throw new IOException("HttpClient requires ASCII support");
+        }
+    }
+    
+    /**
+     * Reads and stores the Trailer headers.
+     * @throws IOException If an IO problem occurs
+     */
+    private void skipTrailerHeaders() throws IOException {
+        for (; ;) {
+            String line = readLine(in, "US-ASCII");
+            if ((line == null) || (line.trim().length() < 1)) break;
+        }
+    }
+
+
+    /**
+     * Read up to <tt>"\n"</tt> from an (unchunked) input stream.
+     * If the stream ends before the line terminator is found,
+     * the last part of the string will still be returned.
+     * If no input data available, <code>null</code> is returned.
+     *
+     * @param inputStream the stream to read from
+     * @param charset charset of HTTP protocol elements
+     *
+     * @throws IOException if an I/O problem occurs
+     * @return a line from the stream
+     * 
+     * @since 3.0
+     */
+    private static String readLine(InputStream inputStream, String charset) throws IOException {
+        byte[] rawdata = readRawLine(inputStream);
+        if (rawdata == null) {
+            return null;
+        }
+        // strip CR and LF from the end
+        int len = rawdata.length;
+        int offset = 0;
+        if (len > 0) {
+            if (rawdata[len - 1] == '\n') {
+                offset++;
+                if (len > 1) {
+                    if (rawdata[len - 2] == '\r') {
+                        offset++;
+                    }
+                }
+            }
+        }
+        final String result = getString(rawdata, 0, len - offset, charset);
+        return result;
+    }
+    
+
+    /**
+     * Converts the byte array of HTTP content characters to a string. If
+     * the specified charset is not supported, default system encoding
+     * is used.
+     *
+     * @param data the byte array to be encoded
+     * @param offset the index of the first byte to encode
+     * @param length the number of bytes to encode 
+     * @param charset the desired character encoding
+     * @return The result of the conversion.
+     * 
+     * @since 3.0
+     */
+    private static String getString(
+        final byte[] data, 
+        int offset, 
+        int length, 
+        String charset
+    ) {
+
+        if (data == null) {
+            throw new IllegalArgumentException("Parameter may not be null");
+        }
+
+        if (charset == null || charset.length() == 0) {
+            throw new IllegalArgumentException("charset may not be null or empty");
+        }
+
+        try {
+            return new String(data, offset, length, charset);
+        } catch (UnsupportedEncodingException e) {
+            return new String(data, offset, length);
+        }
+    }
+    
+    /**
+     * Return byte array from an (unchunked) input stream.
+     * Stop reading when <tt>"\n"</tt> terminator encountered 
+     * If the stream ends before the line terminator is found,
+     * the last part of the string will still be returned. 
+     * If no input data available, <code>null</code> is returned.
+     *
+     * @param inputStream the stream to read from
+     *
+     * @throws IOException if an I/O problem occurs
+     * @return a byte array from the stream
+     */
+    private static byte[] readRawLine(InputStream inputStream) throws IOException {
+
+        ByteArrayOutputStream buf = new ByteArrayOutputStream();
+        int ch;
+        while ((ch = inputStream.read()) >= 0) {
+            buf.write(ch);
+            if (ch == '\n') { // be tolerant (RFC-2616 Section 19.3)
+                break;
+            }
+        }
+        if (buf.size() == 0) {
+            return null;
+        }
+        return buf.toByteArray();
+    }
+    
+    /**
+     * Upon close, this reads the remainder of the chunked message,
+     * leaving the underlying socket at a position to start reading the
+     * next response without scanning.
+     * @throws IOException If an IO problem occurs.
+     */
+    public void close() throws IOException {
+        if (!closed) {
+            try {
+                if (!eof) {
+                    exhaustInputStream(this);
+                }
+            } finally {
+                eof = true;
+                closed = true;
+            }
+        }
+    }
+
+    /**
+     * Exhaust an input stream, reading until EOF has been encountered.
+     *
+     * <p>Note that this function is intended as a non-public utility.
+     * This is a little weird, but it seemed silly to make a utility
+     * class for this one function, so instead it is just static and
+     * shared that way.</p>
+     *
+     * @param inStream The {@link InputStream} to exhaust.
+     * @throws IOException If an IO problem occurs
+     */
+    static void exhaustInputStream(InputStream inStream) throws IOException {
+        // read and discard the remainder of the message
+        byte buffer[] = new byte[1024];
+        while (inStream.read(buffer) >= 0) {
+            ;
+        }
+    }
+}
--- a/source/de/anomic/http/server/ContentLengthInputStream.java
+++ b/source/de/anomic/http/server/ContentLengthInputStream.java
@ -0,0 +1,210 @@
+/*
+ * ====================================================================
+ *
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ *
+ */
+
+/*
+ * This file was imported from apache http client 3.1 library and modified
+ * to work for the YaCy http server when htto client library use was migrated
+ * to apache http components 4.0
+ * by Michael Christen, 20.09.2010
+ */
+
+package de.anomic.http.server;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+/**
+ * Cuts the wrapped InputStream off after a specified number of bytes.
+ *
+ * <p>Implementation note: Choices abound. One approach would pass
+ * through the {@link InputStream#mark} and {@link InputStream#reset} calls to
+ * the underlying stream.  That's tricky, though, because you then have to
+ * start duplicating the work of keeping track of how much a reset rewinds.
+ * Further, you have to watch out for the "readLimit", and since the semantics
+ * for the readLimit leave room for differing implementations, you might get
+ * into a lot of trouble.</p>
+ *
+ * <p>Alternatively, you could make this class extend {@link java.io.BufferedInputStream}
+ * and then use the protected members of that class to avoid duplicated effort.
+ * That solution has the side effect of adding yet another possible layer of
+ * buffering.</p>
+ *
+ * <p>Then, there is the simple choice, which this takes - simply don't
+ * support {@link InputStream#mark} and {@link InputStream#reset}.  That choice
+ * has the added benefit of keeping this class very simple.</p>
+ *
+ * @author Ortwin Glueck
+ * @author Eric Johnson
+ * @author <a href="mailto:mbowler@GargoyleSoftware.com">Mike Bowler</a>
+ * @since 2.0
+ */
+public class ContentLengthInputStream extends InputStream {
+    
+    /**
+     * The maximum number of bytes that can be read from the stream. Subsequent
+     * read operations will return -1.
+     */
+    private long contentLength;
+
+    /** The current position */
+    private long pos = 0;
+
+    /** True if the stream is closed. */
+    private boolean closed = false;
+
+    /**
+     * Wrapped input stream that all calls are delegated to.
+     */
+    private InputStream wrappedStream = null;
+
+    /**
+     * Creates a new length limited stream
+     *
+     * @param in The stream to wrap
+     * @param contentLength The maximum number of bytes that can be read from
+     * the stream. Subsequent read operations will return -1.
+     * 
+     * @since 3.0
+     */
+    public ContentLengthInputStream(InputStream in, long contentLength) {
+        super();
+        this.wrappedStream = in;
+        this.contentLength = contentLength;
+    }
+
+    /**
+     * <p>Reads until the end of the known length of content.</p>
+     *
+     * <p>Does not close the underlying socket input, but instead leaves it
+     * primed to parse the next response.</p>
+     * @throws IOException If an IO problem occurs.
+     */
+    public void close() throws IOException {
+        if (!closed) {
+            try {
+                ChunkedInputStream.exhaustInputStream(this);
+            } finally {
+                // close after above so that we don't throw an exception trying
+                // to read after closed!
+                closed = true;
+            }
+        }
+    }
+
+
+    /**
+     * Read the next byte from the stream
+     * @return The next byte or -1 if the end of stream has been reached.
+     * @throws IOException If an IO problem occurs
+     * @see java.io.InputStream#read()
+     */
+    public int read() throws IOException {
+        if (closed) {
+            throw new IOException("Attempted read from closed stream.");
+        }
+
+        if (pos >= contentLength) {
+            return -1;
+        }
+        pos++;
+        return this.wrappedStream.read();
+    }
+
+    /**
+     * Does standard {@link InputStream#read(byte[], int, int)} behavior, but
+     * also notifies the watcher when the contents have been consumed.
+     *
+     * @param b     The byte array to fill.
+     * @param off   Start filling at this position.
+     * @param len   The number of bytes to attempt to read.
+     * @return The number of bytes read, or -1 if the end of content has been
+     *  reached.
+     *
+     * @throws java.io.IOException Should an error occur on the wrapped stream.
+     */
+    public int read (byte[] b, int off, int len) throws java.io.IOException {
+        if (closed) {
+            throw new IOException("Attempted read from closed stream.");
+        }
+
+        if (pos >= contentLength) {
+            return -1;
+        }
+
+        if (pos + len > contentLength) {
+            len = (int) (contentLength - pos);
+        }
+        int count = this.wrappedStream.read(b, off, len);
+        pos += count;
+        return count;
+    }
+
+
+    /**
+     * Read more bytes from the stream.
+     * @param b The byte array to put the new data in.
+     * @return The number of bytes read into the buffer.
+     * @throws IOException If an IO problem occurs
+     * @see java.io.InputStream#read(byte[])
+     */
+    public int read(byte[] b) throws IOException {
+        return read(b, 0, b.length);
+    }
+
+    /**
+     * Skips and discards a number of bytes from the input stream.
+     * @param n The number of bytes to skip.
+     * @return The actual number of bytes skipped. <= 0 if no bytes
+     * are skipped.
+     * @throws IOException If an error occurs while skipping bytes.
+     * @see InputStream#skip(long)
+     */
+    public long skip(long n) throws IOException {
+        // make sure we don't skip more bytes than are 
+        // still available
+        long length = Math.min(n, contentLength - pos);
+        // skip and keep track of the bytes actually skipped
+        length = this.wrappedStream.skip(length);
+        // only add the skipped bytes to the current position
+        // if bytes were actually skipped
+        if (length > 0) {
+            pos += length;
+        }
+        return length;
+    }
+
+    public int available() throws IOException {
+        if (this.closed) {
+            return 0;
+        }
+        int avail = this.wrappedStream.available();
+        if (this.pos + avail > this.contentLength ) {
+            avail = (int)(this.contentLength - this.pos);
+        }
+        return avail;     
+    }
+    
+}
--- a/source/de/anomic/http/server/HTTPDemon.java
+++ b/source/de/anomic/http/server/HTTPDemon.java
@ -71,8 +71,6 @@ import org.apache.commons.fileupload.FileUploadBase;
 import org.apache.commons.fileupload.FileUploadException;
 import org.apache.commons.fileupload.RequestContext;
 import org.apache.commons.fileupload.disk.DiskFileItemFactory;
-import org.apache.commons.httpclient.ChunkedInputStream;
-import org.apache.commons.httpclient.ContentLengthInputStream;

 import de.anomic.data.userDB;
 import de.anomic.search.Switchboard;