RFC2231Utility.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tomcat.util.http.fileupload.util.mime;
import java.io.ByteArrayOutputStream;
import java.io.UnsupportedEncodingException;
/**
* Utility class to decode/encode character set on HTTP Header fields based on RFC 2231.
* This implementation adheres to RFC 5987 in particular, which was defined for HTTP headers.
* <p>
* RFC 5987 builds on RFC 2231, but has lesser scope like
* <a href="https://tools.ietf.org/html/rfc5987#section-3.2">mandatory charset definition</a>
* and <a href="https://tools.ietf.org/html/rfc5987#section-4">no parameter continuation</a>
*
* @see <a href="https://tools.ietf.org/html/rfc2231">RFC 2231</a>
* @see <a href="https://tools.ietf.org/html/rfc5987">RFC 5987</a>
*/
public final class RFC2231Utility {
/**
* The Hexadecimal values char array.
*/
private static final char[] HEX_DIGITS = "0123456789ABCDEF".toCharArray();
/**
* The Hexadecimal representation of 127.
*/
private static final byte MASK = 0x7f;
/**
* The Hexadecimal representation of 128.
*/
private static final int MASK_128 = 0x80;
/**
* The Hexadecimal decode value.
*/
private static final byte[] HEX_DECODE = new byte[MASK_128];
// create a ASCII decoded array of Hexadecimal values
static {
for (int i = 0; i < HEX_DIGITS.length; i++) {
HEX_DECODE[HEX_DIGITS[i]] = (byte) i;
HEX_DECODE[Character.toLowerCase(HEX_DIGITS[i])] = (byte) i;
}
}
/**
* Private constructor so that no instances can be created. This class
* contains only static utility methods.
*/
private RFC2231Utility() {
}
/**
* Checks if Asterisk (*) at the end of parameter name to indicate,
* if it has charset and language information to decode the value.
* @param paramName The parameter, which is being checked.
* @return {@code true}, if encoded as per RFC 2231, {@code false} otherwise
*/
public static boolean hasEncodedValue(final String paramName) {
if (paramName != null) {
return paramName.lastIndexOf('*') == (paramName.length() - 1);
}
return false;
}
/**
* If {@code paramName} has Asterisk (*) at the end, it will be stripped off,
* else the passed value will be returned.
* @param paramName The parameter, which is being inspected.
* @return stripped {@code paramName} of Asterisk (*), if RFC2231 encoded
*/
public static String stripDelimiter(final String paramName) {
if (hasEncodedValue(paramName)) {
final StringBuilder paramBuilder = new StringBuilder(paramName);
paramBuilder.deleteCharAt(paramName.lastIndexOf('*'));
return paramBuilder.toString();
}
return paramName;
}
/**
* Decode a string of text obtained from a HTTP header as per RFC 2231
* <p>
* <b>Eg 1.</b> {@code us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A}
* will be decoded to {@code This is ***fun***}
* <p>
* <b>Eg 2.</b> {@code iso-8859-1'en'%A3%20rate}
* will be decoded to {@code £ rate}
* <p>
* <b>Eg 3.</b> {@code UTF-8''%c2%a3%20and%20%e2%82%ac%20rates}
* will be decoded to {@code £ and € rates}
*
* @param encodedText - Text to be decoded has a format of {@code <charset>'<language>'<encoded_value>}
* and ASCII only
* @return Decoded text based on charset encoding
* @throws UnsupportedEncodingException The requested character set wasn't found.
*/
public static String decodeText(final String encodedText) throws UnsupportedEncodingException {
final int langDelimitStart = encodedText.indexOf('\'');
if (langDelimitStart == -1) {
// missing charset
return encodedText;
}
final String mimeCharset = encodedText.substring(0, langDelimitStart);
final int langDelimitEnd = encodedText.indexOf('\'', langDelimitStart + 1);
if (langDelimitEnd == -1) {
// missing language
return encodedText;
}
final byte[] bytes = fromHex(encodedText.substring(langDelimitEnd + 1));
return new String(bytes, getJavaCharset(mimeCharset));
}
/**
* Convert {@code text} to their corresponding Hex value.
* @param text - ASCII text input
* @return Byte array of characters decoded from ASCII table
*/
private static byte[] fromHex(final String text) {
final int shift = 4;
final ByteArrayOutputStream out = new ByteArrayOutputStream(text.length());
for (int i = 0; i < text.length();) {
final char c = text.charAt(i++);
if (c == '%') {
if (i > text.length() - 2) {
break; // unterminated sequence
}
final byte b1 = HEX_DECODE[text.charAt(i++) & MASK];
final byte b2 = HEX_DECODE[text.charAt(i++) & MASK];
out.write((b1 << shift) | b2);
} else {
out.write((byte) c);
}
}
return out.toByteArray();
}
private static String getJavaCharset(final String mimeCharset) {
// good enough for standard values
return mimeCharset;
}
}