B2CConverter.java

/*
 *  Licensed to the Apache Software Foundation (ASF) under one or more
 *  contributor license agreements.  See the NOTICE file distributed with
 *  this work for additional information regarding copyright ownership.
 *  The ASF licenses this file to You under the Apache License, Version 2.0
 *  (the "License"); you may not use this file except in compliance with
 *  the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */
package org.apache.tomcat.util.buf;

import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CoderResult;
import java.nio.charset.CodingErrorAction;
import java.nio.charset.StandardCharsets;
import java.util.Locale;

import org.apache.juli.logging.Log;
import org.apache.juli.logging.LogFactory;
import org.apache.tomcat.util.ExceptionUtils;
import org.apache.tomcat.util.res.StringManager;

/**
 * NIO based character decoder.
 */
public class B2CConverter {

    private static final Log log = LogFactory.getLog(B2CConverter.class);
    private static final StringManager sm = StringManager.getManager(B2CConverter.class);

    private static final CharsetCache charsetCache = new CharsetCache();


    // Protected so unit tests can use it
    protected static final int LEFTOVER_SIZE = 9;

    /**
     * Obtain the Charset for the given encoding
     *
     * @param enc The name of the encoding for the required charset
     *
     * @return The Charset corresponding to the requested encoding
     *
     * @throws UnsupportedEncodingException If the requested Charset is not available
     */
    public static Charset getCharset(String enc) throws UnsupportedEncodingException {

        // Encoding names should all be ASCII
        String lowerCaseEnc = enc.toLowerCase(Locale.ENGLISH);

        return getCharsetLower(lowerCaseEnc);
    }


    /**
     * Only to be used when it is known that the encoding name is in lower case.
     *
     * @param lowerCaseEnc The name of the encoding for the required charset in lower case
     *
     * @return The Charset corresponding to the requested encoding
     *
     * @throws UnsupportedEncodingException If the requested Charset is not available
     *
     * @deprecated Will be removed in Tomcat 9.0.x
     */
    @Deprecated
    public static Charset getCharsetLower(String lowerCaseEnc) throws UnsupportedEncodingException {

        Charset charset = charsetCache.getCharset(lowerCaseEnc);

        if (charset == null) {
            // Pre-population of the cache means this must be invalid
            throw new UnsupportedEncodingException(sm.getString("b2cConverter.unknownEncoding", lowerCaseEnc));
        }
        return charset;
    }


    private final CharsetDecoder decoder;
    private ByteBuffer bb = null;
    private CharBuffer cb = null;

    /**
     * Leftover buffer used for incomplete characters.
     */
    private final ByteBuffer leftovers;

    public B2CConverter(Charset charset) {
        this(charset, false);
    }

    public B2CConverter(Charset charset, boolean replaceOnError) {
        byte[] left = new byte[LEFTOVER_SIZE];
        leftovers = ByteBuffer.wrap(left);
        CodingErrorAction action;
        if (replaceOnError) {
            action = CodingErrorAction.REPLACE;
        } else {
            action = CodingErrorAction.REPORT;
        }
        // Special case. Use the Apache Harmony based UTF-8 decoder because it
        // - a) rejects invalid sequences that the JVM decoder does not
        // - b) fails faster for some invalid sequences
        if (charset.equals(StandardCharsets.UTF_8)) {
            decoder = new Utf8Decoder();
        } else {
            decoder = charset.newDecoder();
        }
        decoder.onMalformedInput(action);
        decoder.onUnmappableCharacter(action);
    }

    /**
     * Reset the decoder state.
     */
    public void recycle() {
        try {
            decoder.reset();
        } catch (Throwable t) {
            ExceptionUtils.handleThrowable(t);
            log.warn(sm.getString("b2cConverter.decoderResetFail", decoder.charset()), t);
        }
        leftovers.position(0);
    }

    /**
     * Convert the given bytes to characters.
     *
     * @param bc         byte input
     * @param cc         char output
     * @param endOfInput Is this all of the available data
     *
     * @throws IOException If the conversion can not be completed
     */
    public void convert(ByteChunk bc, CharChunk cc, boolean endOfInput) throws IOException {
        if ((bb == null) || (bb.array() != bc.getBuffer())) {
            // Create a new byte buffer if anything changed
            bb = ByteBuffer.wrap(bc.getBuffer(), bc.getStart(), bc.getLength());
        } else {
            // Initialize the byte buffer
            bb.limit(bc.getEnd());
            bb.position(bc.getStart());
        }
        if ((cb == null) || (cb.array() != cc.getBuffer())) {
            // Create a new char buffer if anything changed
            cb = CharBuffer.wrap(cc.getBuffer(), cc.getEnd(), cc.getBuffer().length - cc.getEnd());
        } else {
            // Initialize the char buffer
            cb.limit(cc.getBuffer().length);
            cb.position(cc.getEnd());
        }
        CoderResult result = null;
        // Parse leftover if any are present
        if (leftovers.position() > 0) {
            int pos = cb.position();
            // Loop until one char is decoded or there is a decoder error
            do {
                leftovers.put(bc.subtractB());
                leftovers.flip();
                result = decoder.decode(leftovers, cb, endOfInput);
                leftovers.position(leftovers.limit());
                leftovers.limit(leftovers.array().length);
            } while (result.isUnderflow() && (cb.position() == pos));
            if (result.isError() || result.isMalformed()) {
                result.throwException();
            }
            bb.position(bc.getStart());
            leftovers.position(0);
        }
        // Do the decoding and get the results into the byte chunk and the char
        // chunk
        result = decoder.decode(bb, cb, endOfInput);
        if (result.isError() || result.isMalformed()) {
            result.throwException();
        } else if (result.isOverflow()) {
            // Propagate current positions to the byte chunk and char chunk, if
            // this continues the char buffer will get resized
            bc.setOffset(bb.position());
            cc.setEnd(cb.position());
        } else if (result.isUnderflow()) {
            // Propagate current positions to the byte chunk and char chunk
            bc.setOffset(bb.position());
            cc.setEnd(cb.position());
            // Put leftovers in the leftovers byte buffer
            if (bc.getLength() > 0) {
                leftovers.limit(leftovers.array().length);
                leftovers.position(bc.getLength());
                bc.subtract(leftovers.array(), 0, bc.getLength());
            }
        }
    }

    /**
     * Convert the given bytes to characters.
     *
     * @param bc         byte input
     * @param cc         char output
     * @param ic         byte input channel
     * @param endOfInput Is this all of the available data
     *
     * @throws IOException If the conversion can not be completed
     */
    public void convert(ByteBuffer bc, CharBuffer cc, ByteChunk.ByteInputChannel ic, boolean endOfInput)
            throws IOException {
        if ((bb == null) || (bb.array() != bc.array())) {
            // Create a new byte buffer if anything changed
            bb = ByteBuffer.wrap(bc.array(), bc.arrayOffset() + bc.position(), bc.remaining());
        } else {
            // Initialize the byte buffer
            bb.limit(bc.limit());
            bb.position(bc.position());
        }
        if ((cb == null) || (cb.array() != cc.array())) {
            // Create a new char buffer if anything changed
            cb = CharBuffer.wrap(cc.array(), cc.limit(), cc.capacity() - cc.limit());
        } else {
            // Initialize the char buffer
            cb.limit(cc.capacity());
            cb.position(cc.limit());
        }
        CoderResult result = null;
        // Parse leftover if any are present
        if (leftovers.position() > 0) {
            int pos = cb.position();
            // Loop until one char is decoded or there is a decoder error
            do {
                byte chr;
                if (bc.remaining() == 0) {
                    int n = ic.realReadBytes();
                    chr = n < 0 ? -1 : bc.get();
                } else {
                    chr = bc.get();
                }
                leftovers.put(chr);
                leftovers.flip();
                result = decoder.decode(leftovers, cb, endOfInput);
                leftovers.position(leftovers.limit());
                leftovers.limit(leftovers.array().length);
            } while (result.isUnderflow() && (cb.position() == pos));
            if (result.isError() || result.isMalformed()) {
                result.throwException();
            }
            bb.position(bc.position());
            leftovers.position(0);
        }
        // Do the decoding and get the results into the byte chunk and the char
        // chunk
        result = decoder.decode(bb, cb, endOfInput);
        if (result.isError() || result.isMalformed()) {
            result.throwException();
        } else if (result.isOverflow()) {
            // Propagate current positions to the byte chunk and char chunk, if
            // this continues the char buffer will get resized
            bc.position(bb.position());
            cc.limit(cb.position());
        } else if (result.isUnderflow()) {
            // Propagate current positions to the byte chunk and char chunk
            bc.position(bb.position());
            cc.limit(cb.position());
            // Put leftovers in the leftovers byte buffer
            if (bc.remaining() > 0) {
                leftovers.limit(leftovers.array().length);
                leftovers.position(bc.remaining());
                bc.get(leftovers.array(), 0, bc.remaining());
            }
        }
    }


    public Charset getCharset() {
        return decoder.charset();
    }
}