HttpHeaderParser.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tomcat.util.http.parser;
import java.io.IOException;
import java.nio.ByteBuffer;
import org.apache.tomcat.util.buf.MessageBytes;
import org.apache.tomcat.util.http.HeaderUtil;
import org.apache.tomcat.util.http.MimeHeaders;
import org.apache.tomcat.util.res.StringManager;
public class HttpHeaderParser {
private static final StringManager sm = StringManager.getManager(HttpHeaderParser.class);
private static final byte CR = (byte) '\r';
private static final byte LF = (byte) '\n';
private static final byte SP = (byte) ' ';
private static final byte HT = (byte) '\t';
private static final byte COLON = (byte) ':';
private static final byte A = (byte) 'A';
private static final byte a = (byte) 'a';
private static final byte Z = (byte) 'Z';
private static final byte LC_OFFSET = A - a;
private final HeaderDataSource source;
private final MimeHeaders headers;
private final boolean tolerantEol;
private final HeaderParseData headerData = new HeaderParseData();
private HeaderParsePosition headerParsePos = HeaderParsePosition.HEADER_START;
private byte prevChr = 0;
private byte chr = 0;
public HttpHeaderParser(HeaderDataSource source, MimeHeaders headers, boolean tolerantEol) {
this.source = source;
this.headers = headers;
this.tolerantEol = tolerantEol;
}
public void recycle() {
chr = 0;
prevChr = 0;
headerParsePos = HeaderParsePosition.HEADER_START;
headerData.recycle();
}
/**
* Parse an HTTP header.
*
* @return One of {@link HeaderParseStatus#NEED_MORE_DATA}, {@link HeaderParseStatus#HAVE_MORE_HEADERS} or
* {@link HeaderParseStatus#DONE}.
*
* @throws IOException If an error occurs during the parsing of the headers
*/
public HeaderParseStatus parseHeader() throws IOException {
while (headerParsePos == HeaderParsePosition.HEADER_START) {
// Read new bytes if needed
if (source.getHeaderByteBuffer().position() >= source.getHeaderByteBuffer().limit()) {
if (!source.fillHeaderBuffer()) {
return HeaderParseStatus.NEED_MORE_DATA;
}
}
prevChr = chr;
chr = source.getHeaderByteBuffer().get();
if (chr == CR && prevChr != CR) {
// Possible start of CRLF - process the next byte.
} else if (chr == LF) {
if (!tolerantEol && prevChr != CR) {
throw new IllegalArgumentException(sm.getString("httpHeaderParser.invalidCrlfNoCR"));
}
return HeaderParseStatus.DONE;
} else {
if (prevChr == CR) {
// Must have read two bytes (first was CR, second was not LF)
source.getHeaderByteBuffer().position(source.getHeaderByteBuffer().position() - 2);
} else {
// Must have only read one byte
source.getHeaderByteBuffer().position(source.getHeaderByteBuffer().position() - 1);
}
break;
}
}
if (headerParsePos == HeaderParsePosition.HEADER_START) {
// Mark the current buffer position
headerData.start = source.getHeaderByteBuffer().position();
headerData.lineStart = headerData.start;
headerParsePos = HeaderParsePosition.HEADER_NAME;
}
//
// Reading the header name
// Header name is always US-ASCII
//
while (headerParsePos == HeaderParsePosition.HEADER_NAME) {
// Read new bytes if needed
if (source.getHeaderByteBuffer().position() >= source.getHeaderByteBuffer().limit()) {
if (!source.fillHeaderBuffer()) {
return HeaderParseStatus.NEED_MORE_DATA;
}
}
int pos = source.getHeaderByteBuffer().position();
chr = source.getHeaderByteBuffer().get();
if (chr == COLON) {
if (headerData.start == pos) {
// Zero length header name - not valid.
// skipLine() will handle the error
return skipLine();
}
headerParsePos = HeaderParsePosition.HEADER_VALUE_START;
headerData.headerValue = headers.addValue(source.getHeaderByteBuffer().array(), headerData.start,
pos - headerData.start);
pos = source.getHeaderByteBuffer().position();
// Mark the current buffer position
headerData.start = pos;
headerData.realPos = pos;
headerData.lastSignificantChar = pos;
break;
} else if (!HttpParser.isToken(chr)) {
// Non-token characters are illegal in header names
// Parsing continues so the error can be reported in context
headerData.lastSignificantChar = pos;
source.getHeaderByteBuffer().position(source.getHeaderByteBuffer().position() - 1);
// skipLine() will handle the error
return skipLine();
}
// chr is next byte of header name. Convert to lowercase.
if (chr >= A && chr <= Z) {
source.getHeaderByteBuffer().put(pos, (byte) (chr - LC_OFFSET));
}
}
// Skip the line and ignore the header
if (headerParsePos == HeaderParsePosition.HEADER_SKIPLINE) {
return skipLine();
}
//
// Reading the header value (which can be spanned over multiple lines)
//
while (headerParsePos == HeaderParsePosition.HEADER_VALUE_START ||
headerParsePos == HeaderParsePosition.HEADER_VALUE ||
headerParsePos == HeaderParsePosition.HEADER_MULTI_LINE) {
if (headerParsePos == HeaderParsePosition.HEADER_VALUE_START) {
// Skipping spaces
while (true) {
// Read new bytes if needed
if (source.getHeaderByteBuffer().position() >= source.getHeaderByteBuffer().limit()) {
if (!source.fillHeaderBuffer()) {
return HeaderParseStatus.NEED_MORE_DATA;
}
}
chr = source.getHeaderByteBuffer().get();
if (chr != SP && chr != HT) {
headerParsePos = HeaderParsePosition.HEADER_VALUE;
source.getHeaderByteBuffer().position(source.getHeaderByteBuffer().position() - 1);
// Avoids prevChr = chr at start of header value
// parsing which causes problems when chr is CR
// (in the case of an empty header value)
chr = 0;
break;
}
}
}
if (headerParsePos == HeaderParsePosition.HEADER_VALUE) {
// Reading bytes until the end of the line
boolean eol = false;
while (!eol) {
// Read new bytes if needed
if (source.getHeaderByteBuffer().position() >= source.getHeaderByteBuffer().limit()) {
if (!source.fillHeaderBuffer()) {
return HeaderParseStatus.NEED_MORE_DATA;
}
}
prevChr = chr;
chr = source.getHeaderByteBuffer().get();
if (chr == CR && prevChr != CR) {
// CR is only permitted at the start of a CRLF sequence.
// Possible start of CRLF - process the next byte.
} else if (chr == LF) {
if (!tolerantEol && prevChr != CR) {
throw new IllegalArgumentException(sm.getString("httpHeaderParser.invalidCrlfNoCR"));
}
eol = true;
} else if (prevChr == CR) {
// Invalid value - also need to delete header
return skipLine();
} else if (HttpParser.isControl(chr) && chr != HT) {
// Invalid value - also need to delete header
return skipLine();
} else if (chr == SP || chr == HT) {
source.getHeaderByteBuffer().put(headerData.realPos, chr);
headerData.realPos++;
} else {
source.getHeaderByteBuffer().put(headerData.realPos, chr);
headerData.realPos++;
headerData.lastSignificantChar = headerData.realPos;
}
}
// Ignore whitespaces at the end of the line
headerData.realPos = headerData.lastSignificantChar;
// Checking the first character of the new line. If the character
// is a LWS, then it's a multiline header
headerParsePos = HeaderParsePosition.HEADER_MULTI_LINE;
}
// Read new bytes if needed
if (source.getHeaderByteBuffer().position() >= source.getHeaderByteBuffer().limit()) {
if (!source.fillHeaderBuffer()) {
return HeaderParseStatus.NEED_MORE_DATA;
}
}
byte peek = source.getHeaderByteBuffer().get(source.getHeaderByteBuffer().position());
if (headerParsePos == HeaderParsePosition.HEADER_MULTI_LINE) {
if (peek != SP && peek != HT) {
headerParsePos = HeaderParsePosition.HEADER_START;
break;
} else {
// Copying one extra space in the buffer (since there must
// be at least one space inserted between the lines)
source.getHeaderByteBuffer().put(headerData.realPos, peek);
headerData.realPos++;
headerParsePos = HeaderParsePosition.HEADER_VALUE_START;
}
}
}
// Set the header value
headerData.headerValue.setBytes(source.getHeaderByteBuffer().array(), headerData.start,
headerData.lastSignificantChar - headerData.start);
headerData.recycle();
return HeaderParseStatus.HAVE_MORE_HEADERS;
}
private HeaderParseStatus skipLine() throws IOException {
// Parse the rest of the invalid header so we can construct a useful
// exception and/or debug message.
headerParsePos = HeaderParsePosition.HEADER_SKIPLINE;
boolean eol = false;
// Reading bytes until the end of the line
while (!eol) {
// Read new bytes if needed
if (source.getHeaderByteBuffer().position() >= source.getHeaderByteBuffer().limit()) {
if (!source.fillHeaderBuffer()) {
return HeaderParseStatus.NEED_MORE_DATA;
}
}
int pos = source.getHeaderByteBuffer().position();
prevChr = chr;
chr = source.getHeaderByteBuffer().get();
if (chr == CR) {
// Skip
} else if (chr == LF) {
if (!tolerantEol && prevChr != CR) {
throw new IllegalArgumentException(sm.getString("httpHeaderParser.invalidCrlfNoCR"));
}
eol = true;
} else {
headerData.lastSignificantChar = pos;
}
}
throw new IllegalArgumentException(sm.getString("httpHeaderParser.invalidHeader",
HeaderUtil.toPrintableString(source.getHeaderByteBuffer().array(), headerData.lineStart,
headerData.lastSignificantChar - headerData.lineStart + 1)));
}
public enum HeaderParseStatus {
DONE,
HAVE_MORE_HEADERS,
NEED_MORE_DATA
}
public enum HeaderParsePosition {
/**
* Start of a new header. A CRLF here means that there are no more headers. Any other character starts a header
* name.
*/
HEADER_START,
/**
* Reading a header name. All characters of header are HTTP_TOKEN_CHAR. Header name is followed by ':'. No
* whitespace is allowed.<br>
* Any non-HTTP_TOKEN_CHAR (this includes any whitespace) encountered before ':' will result in the whole line
* being ignored.
*/
HEADER_NAME,
/**
* Skipping whitespace before text of header value starts, either on the first line of header value (just after
* ':') or on subsequent lines when it is known that subsequent line starts with SP or HT.
*/
HEADER_VALUE_START,
/**
* Reading the header value. We are inside the value. Either on the first line or on any subsequent line. We
* come into this state from HEADER_VALUE_START after the first non-SP/non-HT byte is encountered on the line.
*/
HEADER_VALUE,
/**
* Before reading a new line of a header. Once the next byte is peeked, the state changes without advancing our
* position. The state becomes either HEADER_VALUE_START (if that first byte is SP or HT), or HEADER_START
* (otherwise).
*/
HEADER_MULTI_LINE,
/**
* Reading all bytes until the next CRLF. The line is being ignored.
*/
HEADER_SKIPLINE
}
private static class HeaderParseData {
/**
* The first character of the header line.
*/
int lineStart = 0;
/**
* When parsing header name: first character of the header.<br>
* When skipping broken header line: first character of the header.<br>
* When parsing header value: first character after ':'.
*/
int start = 0;
/**
* When parsing header name: not used (stays as 0).<br>
* When skipping broken header line: not used (stays as 0).<br>
* When parsing header value: starts as the first character after ':'. Then is increased as far as more bytes of
* the header are harvested. Bytes from buf[pos] are copied to buf[realPos]. Thus the string from [start] to
* [realPos-1] is the prepared value of the header, with whitespaces removed as needed.<br>
*/
int realPos = 0;
/**
* When parsing header name: not used (stays as 0).<br>
* When skipping broken header line: last non-CR/non-LF character.<br>
* When parsing header value: position after the last not-LWS character.<br>
*/
int lastSignificantChar = 0;
/**
* MB that will store the value of the header. It is null while parsing header name and is created after the
* name has been parsed.
*/
MessageBytes headerValue = null;
public void recycle() {
lineStart = 0;
start = 0;
realPos = 0;
lastSignificantChar = 0;
headerValue = null;
}
}
public interface HeaderDataSource {
/**
* Read more data into the header buffer. The implementation is expected to determine if blocking or not
* blocking IO should be used.
*
* @return {@code true} if more data was added to the buffer, otherwise {@code false}
*
* @throws IOException If an I/O error occurred while obtaining more header data
*/
boolean fillHeaderBuffer() throws IOException;
/**
* Obtain a reference to the buffer containing the header data.
*
* @return The buffer containing the header data
*/
ByteBuffer getHeaderByteBuffer();
}
}