UDecoder.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tomcat.util.buf;
import java.io.ByteArrayOutputStream;
import java.io.CharConversionException;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import org.apache.tomcat.util.res.StringManager;
/**
* All URL decoding happens here. This way we can reuse, review, optimize without adding complexity to the buffers. The
* conversion will modify the original buffer.
*
* @author Costin Manolache
*/
public final class UDecoder {
private static final StringManager sm = StringManager.getManager(UDecoder.class);
@Deprecated
public static final boolean ALLOW_ENCODED_SLASH = Boolean
.parseBoolean(System.getProperty("org.apache.tomcat.util.buf.UDecoder.ALLOW_ENCODED_SLASH", "false"));
private static class DecodeException extends CharConversionException {
private static final long serialVersionUID = 1L;
DecodeException(String s) {
super(s);
}
@Override
public synchronized Throwable fillInStackTrace() {
// This class does not provide a stack trace
return this;
}
}
/** Unexpected end of data. */
private static final IOException EXCEPTION_EOF = new DecodeException(sm.getString("uDecoder.eof"));
/** %xx with not-hex digit */
private static final IOException EXCEPTION_NOT_HEX_DIGIT = new DecodeException(sm.getString("uDecoder.isHexDigit"));
/** %-encoded slash is forbidden in resource path */
private static final IOException EXCEPTION_SLASH = new DecodeException(sm.getString("uDecoder.noSlash"));
/**
* URLDecode, will modify the source. Assumes source bytes are encoded using a superset of US-ASCII as per RFC 7230.
* "%2f" will be rejected unless the input is a query string.
*
* @param mb The URL encoded bytes
* @param query {@code true} if this is a query string. For a query string '+' will be decoded to ' '
*
* @throws IOException Invalid %xx URL encoding
*/
public void convert(ByteChunk mb, boolean query) throws IOException {
if (query) {
convert(mb, true, EncodedSolidusHandling.DECODE);
} else {
convert(mb, false, EncodedSolidusHandling.REJECT);
}
}
/**
* URLDecode, will modify the source. Assumes source bytes are encoded using a superset of US-ASCII as per RFC 7230.
*
* @param mb The URL encoded bytes
* @param encodedSolidusHandling How should the %2f sequence handled by the decoder? For query strings this
* parameter will be ignored and the %2f sequence will be decoded
*
* @throws IOException Invalid %xx URL encoding
*/
public void convert(ByteChunk mb, EncodedSolidusHandling encodedSolidusHandling) throws IOException {
convert(mb, false, encodedSolidusHandling);
}
private void convert(ByteChunk mb, boolean query, EncodedSolidusHandling encodedSolidusHandling)
throws IOException {
int start = mb.getStart();
byte buff[] = mb.getBytes();
int end = mb.getEnd();
int idx = ByteChunk.findByte(buff, start, end, (byte) '%');
int idx2 = -1;
if (query) {
idx2 = ByteChunk.findByte(buff, start, (idx >= 0 ? idx : end), (byte) '+');
}
if (idx < 0 && idx2 < 0) {
return;
}
// idx will be the smallest positive index ( first % or + )
if ((idx2 >= 0 && idx2 < idx) || idx < 0) {
idx = idx2;
}
for (int j = idx; j < end; j++, idx++) {
if (buff[j] == '+' && query) {
buff[idx] = (byte) ' ';
} else if (buff[j] != '%') {
buff[idx] = buff[j];
} else {
// read next 2 digits
if (j + 2 >= end) {
throw EXCEPTION_EOF;
}
byte b1 = buff[j + 1];
byte b2 = buff[j + 2];
if (!isHexDigit(b1) || !isHexDigit(b2)) {
throw EXCEPTION_NOT_HEX_DIGIT;
}
j += 2;
int res = x2c(b1, b2);
if (res == '/') {
switch (encodedSolidusHandling) {
case DECODE: {
buff[idx] = (byte) res;
break;
}
case REJECT: {
throw EXCEPTION_SLASH;
}
case PASS_THROUGH: {
buff[idx++] = buff[j - 2];
buff[idx++] = buff[j - 1];
buff[idx] = buff[j];
}
}
} else if (res == '%') {
/*
* If encoded '/' is going to be left encoded then so must encoded '%' else the subsequent %nn
* decoding will either fail or corrupt the output.
*/
switch (encodedSolidusHandling) {
case DECODE:
case REJECT: {
buff[idx] = (byte) res;
break;
}
case PASS_THROUGH: {
buff[idx++] = buff[j - 2];
buff[idx++] = buff[j - 1];
buff[idx] = buff[j];
}
}
} else {
buff[idx] = (byte) res;
}
}
}
mb.setEnd(idx);
}
// -------------------- Additional methods --------------------
/**
* In-buffer processing - the buffer will be modified.
* <p>
* <b>WARNING:</b> This method assumes US-ASCII encoding.
*
* @param mb The URL encoded chars
* @param query <code>true</code> if this is a query string
*
* @throws IOException Invalid %xx URL encoding
*
* @deprecated Unused. Will be removed in Tomcat 10
*/
@Deprecated
public void convert(CharChunk mb, boolean query) throws IOException {
// log( "Converting a char chunk ");
int start = mb.getOffset();
char buff[] = mb.getBuffer();
int cend = mb.getEnd();
int idx = CharChunk.indexOf(buff, start, cend, '%');
int idx2 = -1;
if (query) {
idx2 = CharChunk.indexOf(buff, start, (idx >= 0 ? idx : cend), '+');
}
if (idx < 0 && idx2 < 0) {
return;
}
// idx will be the smallest positive index ( first % or + )
if ((idx2 >= 0 && idx2 < idx) || idx < 0) {
idx = idx2;
}
final boolean noSlash = !(ALLOW_ENCODED_SLASH || query);
for (int j = idx; j < cend; j++, idx++) {
if (buff[j] == '+' && query) {
buff[idx] = (' ');
} else if (buff[j] != '%') {
buff[idx] = buff[j];
} else {
// read next 2 digits
if (j + 2 >= cend) {
// invalid
throw EXCEPTION_EOF;
}
char b1 = buff[j + 1];
char b2 = buff[j + 2];
if (!isHexDigit(b1) || !isHexDigit(b2)) {
throw EXCEPTION_NOT_HEX_DIGIT;
}
j += 2;
int res = x2c(b1, b2);
if (noSlash && (res == '/')) {
throw EXCEPTION_SLASH;
}
buff[idx] = (char) res;
}
}
mb.setEnd(idx);
}
/**
* URLDecode, will modify the source.
* <p>
* <b>WARNING:</b> This method assumes US-ASCII encoding.
*
* @param mb The URL encoded String, bytes or chars
* @param query <code>true</code> if this is a query string
*
* @throws IOException Invalid %xx URL encoding
*
* @deprecated Unused. Will be removed in Tomcat 10
*/
@Deprecated
public void convert(MessageBytes mb, boolean query) throws IOException {
switch (mb.getType()) {
case MessageBytes.T_STR:
String strValue = mb.toString();
if (strValue == null) {
return;
}
try {
mb.setString(convert(strValue, query));
} catch (RuntimeException ex) {
throw new DecodeException(ex.getMessage());
}
break;
case MessageBytes.T_CHARS:
CharChunk charC = mb.getCharChunk();
convert(charC, query);
break;
case MessageBytes.T_BYTES:
ByteChunk bytesC = mb.getByteChunk();
convert(bytesC, query);
break;
}
}
/**
* %xx decoding of a string.
* <p>
* <b>WARNING:</b> This method assumes US-ASCII encoding.
* <p>
* FIXME: this is inefficient.
*
* @param str The URL encoded string
* @param query <code>true</code> if this is a query string
*
* @return the decoded string
*
* @deprecated Unused. Will be removed in Tomcat 10
*/
@Deprecated
public String convert(String str, boolean query) {
if (str == null) {
return null;
}
if ((!query || str.indexOf('+') < 0) && str.indexOf('%') < 0) {
return str;
}
final boolean noSlash = !(ALLOW_ENCODED_SLASH || query);
StringBuilder dec = new StringBuilder(); // decoded string output
int strPos = 0;
int strLen = str.length();
dec.ensureCapacity(str.length());
while (strPos < strLen) {
int laPos; // lookahead position
// look ahead to next URLencoded metacharacter, if any
for (laPos = strPos; laPos < strLen; laPos++) {
char laChar = str.charAt(laPos);
if ((laChar == '+' && query) || (laChar == '%')) {
break;
}
}
// if there were non-metacharacters, copy them all as a block
if (laPos > strPos) {
dec.append(str.substring(strPos, laPos));
strPos = laPos;
}
// shortcut out of here if we're at the end of the string
if (strPos >= strLen) {
break;
}
// process next metacharacter
char metaChar = str.charAt(strPos);
if (metaChar == '+') {
dec.append(' ');
strPos++;
continue;
} else if (metaChar == '%') {
// We throw the original exception - the super will deal with
// it
// try {
char res = (char) Integer.parseInt(str.substring(strPos + 1, strPos + 3), 16);
if (noSlash && (res == '/')) {
throw new IllegalArgumentException(sm.getString("uDecoder.noSlash"));
}
dec.append(res);
strPos += 3;
}
}
return dec.toString();
}
/**
* Decode and return the specified URL-encoded String. When the byte array is converted to a string, UTF-8 is used.
* This may be different than some other servers. It is assumed the string is not a query string.
*
* @param str The url-encoded string
*
* @return the decoded string
*
* @exception IllegalArgumentException if a '%' character is not followed by a valid 2-digit hexadecimal number
*
* @deprecated Unused. This will be removed in Tomcat 10 onwards
*/
@Deprecated
public static String URLDecode(String str) {
return URLDecode(str, StandardCharsets.UTF_8);
}
/**
* Decode and return the specified URL-encoded String. It is assumed the string is not a query string.
*
* @param str The url-encoded string
* @param charset The character encoding to use; if null, UTF-8 is used.
*
* @return the decoded string
*
* @exception IllegalArgumentException if a '%' character is not followed by a valid 2-digit hexadecimal number
*/
public static String URLDecode(String str, Charset charset) {
if (str == null) {
return null;
}
if (str.indexOf('%') == -1) {
// No %nn sequences, so return string unchanged
return str;
}
if (charset == null) {
charset = StandardCharsets.UTF_8;
}
/*
* Decoding is required.
*
* Potential complications:
*
* - The source String may be partially decoded so it is not valid to assume that the source String is ASCII.
*
* - Have to process as characters since there is no guarantee that the byte sequence for '%' is going to be the
* same in all character sets.
*
* - We don't know how many '%nn' sequences are required for a single character. It varies between character
* sets and some use a variable length.
*/
// This isn't perfect but it is a reasonable guess for the size of the
// array required
ByteArrayOutputStream baos = new ByteArrayOutputStream(str.length() * 2);
OutputStreamWriter osw = new OutputStreamWriter(baos, charset);
char[] sourceChars = str.toCharArray();
int len = sourceChars.length;
int ix = 0;
try {
while (ix < len) {
char c = sourceChars[ix++];
if (c == '%') {
osw.flush();
if (ix + 2 > len) {
throw new IllegalArgumentException(sm.getString("uDecoder.urlDecode.missingDigit", str));
}
char c1 = sourceChars[ix++];
char c2 = sourceChars[ix++];
if (isHexDigit(c1) && isHexDigit(c2)) {
baos.write(x2c(c1, c2));
} else {
throw new IllegalArgumentException(sm.getString("uDecoder.urlDecode.missingDigit", str));
}
} else {
osw.append(c);
}
}
osw.flush();
return baos.toString(charset.name());
} catch (IOException ioe) {
throw new IllegalArgumentException(sm.getString("uDecoder.urlDecode.conversionError", str, charset.name()),
ioe);
}
}
private static boolean isHexDigit(int c) {
return ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'));
}
private static int x2c(byte b1, byte b2) {
int digit = (b1 >= 'A') ? ((b1 & 0xDF) - 'A') + 10 : (b1 - '0');
digit *= 16;
digit += (b2 >= 'A') ? ((b2 & 0xDF) - 'A') + 10 : (b2 - '0');
return digit;
}
private static int x2c(char b1, char b2) {
int digit = (b1 >= 'A') ? ((b1 & 0xDF) - 'A') + 10 : (b1 - '0');
digit *= 16;
digit += (b2 >= 'A') ? ((b2 & 0xDF) - 'A') + 10 : (b2 - '0');
return digit;
}
}