Improve HTTP/ICAP header processing.

- workaround for illegal line endings (LF only), closes: http://forum.yacy-websuche.de/viewtopic.php?f=6&t=595
- fixed bug where we didn't break the processing immediately on EOS (the loop was run until the buffer was completely filled with -1)
- further performance improvements (one simple loop, avoid double processing of every byte and unnecessary temporary buffers)

git-svn-id: https://svn.berlios.de/svnroot/repos/yacy/trunk@4270 6c8d7289-2bf4-0310-a012-ef5d649a1542
pull/1/head
fuchsi 17 years ago
parent 01554f4012
commit 1bd02762de

@ -1162,34 +1162,56 @@ public final class serverCore extends serverAbstractThread implements serverThre
} }
/**
* Read a line from a protocol stream (HTTP/ICAP) and do some
* pre-processing (check validity, strip line endings).
* <br>
* Illegal control characters will be stripped from the result.
* Besides the valid line ending CRLF a single LF is treated as a
* line ending as well to avoid errors with buggy server.
*
* @param pbis The stream to read from.
* @param maxSize maximum number of bytes to read in one run.
* @param logerr log error messages if true, be silent otherwise.
*
* @return A byte array representing one line of the input or
* <code>null</null> if EOS reached.
*/
public static byte[] receive(PushbackInputStream pbis, int maxSize, boolean logerr) { public static byte[] receive(PushbackInputStream pbis, int maxSize, boolean logerr) {
// reuse an existing linebuffer // reuse an existing linebuffer
serverByteBuffer readLineBuffer = new serverByteBuffer(80); serverByteBuffer readLineBuffer = new serverByteBuffer(80);
serverByteBuffer temp = new serverByteBuffer(80);
int bufferSize = 0, b = 0; int bufferSize = 0, b = 0;
try { try {
while ((b = pbis.read()) != cr) { // catch bytes until line end or illegal character reached or buffer full
temp.write(b); // resulting readLineBuffer doesn't include CRLF or illegal control chars
if (bufferSize++ > maxSize) break; while (bufferSize < maxSize) {
} b = pbis.read();
// we have catched a possible line end if ((b > 31 && b != 127) || b == 9) {
if (b == cr) { // add legal chars to the result
// maybe a lf follows, read it: readLineBuffer.append(b);
if ((b = pbis.read()) != lf) if (b >= 0) pbis.unread(b); // we push back the byte bufferSize++;
} } else if (b == cr) {
// possible beginning of CRLF, check following byte
byte tempByte; b = pbis.read();
for(int i=0; i<temp.length(); i++){ if (b == lf) {
tempByte = temp.byteAt(i); // line end catched: break the loop
// filter illegal bytes send by buggy HTTP servers break;
if( tempByte == 9 || (tempByte > 31 && tempByte != 127)) } else if (b >= 0) {
readLineBuffer.append(tempByte); // no line end: push back the byte, ignore the CR
pbis.unread(b);
}
} else if (b == lf || b < 0) {
// LF without precedent CR: treat as line end of broken servers
// b < 0: EOS
break;
}
} }
if ((readLineBuffer.length()==0)&&(b == -1)) return null; // EOS
if (bufferSize == 0 && b == -1) return null;
return readLineBuffer.getBytes(); return readLineBuffer.getBytes();
} catch (ClosedByInterruptException e) { } catch (ClosedByInterruptException e) {
if (logerr) serverLog.logSevere("SERVER", "receive interrupted - timeout"); if (logerr) serverLog.logSevere("SERVER", "receive interrupted - timeout");

Loading…
Cancel
Save