ByteChunk.java

/*
 *  Licensed to the Apache Software Foundation (ASF) under one or more
 *  contributor license agreements.  See the NOTICE file distributed with
 *  this work for additional information regarding copyright ownership.
 *  The ASF licenses this file to You under the Apache License, Version 2.0
 *  (the "License"); you may not use this file except in compliance with
 *  the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */
package org.apache.tomcat.util.buf;

import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CodingErrorAction;
import java.nio.charset.StandardCharsets;

/**
 * This class is used to represent a chunk of bytes, and utilities to manipulate byte[].
 * <p>
 * The buffer can be modified and used for both input and output.
 * <p>
 * There are 2 modes: The chunk can be associated with a sink - ByteInputChannel or ByteOutputChannel, which will be
 * used when the buffer is empty (on input) or filled (on output). For output, it can also grow. This operating mode is
 * selected by calling setLimit() or allocate(initial, limit) with limit != -1.
 * <p>
 * Various search and append method are defined - similar with String and StringBuffer, but operating on bytes.
 * <p>
 * This is important because it allows processing the http headers directly on the received bytes, without converting to
 * chars and Strings until the strings are needed. In addition, the charset is determined later, from headers or user
 * code.
 * <p>
 * In a server it is very important to be able to operate on
 * the original byte[] without converting everything to chars.
 * Some protocols are ASCII only, and some allow different
 * non-UNICODE encodings. The encoding is not known beforehand,
 * and can even change during the execution of the protocol.
 * ( for example a multipart message may have parts with different
 *  encoding )
 * <p>
 * For HTTP it is not very clear how the encoding of RequestURI
 * and mime values can be determined, but it is a great advantage
 * to be able to parse the request without converting to string.
 *
 * @author dac@sun.com
 * @author James Todd [gonzo@sun.com]
 * @author Costin Manolache
 * @author Remy Maucherat
 */
public final class ByteChunk extends AbstractChunk {

    private static final long serialVersionUID = 1L;

    /**
     * Input interface, used when the buffer is empty. Same as java.nio.channels.ReadableByteChannel
     */
    public interface ByteInputChannel {

        /**
         * Read new bytes.
         *
         * @return The number of bytes read
         *
         * @throws IOException If an I/O error occurs during reading
         */
        int realReadBytes() throws IOException;
    }

    /**
     * When we need more space we'll either grow the buffer ( up to the limit ) or send it to a channel. Same as
     * java.nio.channel.WritableByteChannel.
     */
    public interface ByteOutputChannel {

        /**
         * Send the bytes ( usually the internal conversion buffer ). Expect 8k output if the buffer is full.
         *
         * @param buf bytes that will be written
         * @param off offset in the bytes array
         * @param len length that will be written
         *
         * @throws IOException If an I/O occurs while writing the bytes
         */
        void realWriteBytes(byte buf[], int off, int len) throws IOException;


        /**
         * Send the bytes ( usually the internal conversion buffer ). Expect 8k output if the buffer is full.
         *
         * @param from bytes that will be written
         *
         * @throws IOException If an I/O occurs while writing the bytes
         */
        void realWriteBytes(ByteBuffer from) throws IOException;
    }

    // --------------------

    /**
     * Default encoding used to convert to strings. It should be UTF8, as most standards seem to converge, but the
     * servlet API requires 8859_1, and this object is used mostly for servlets.
     */
    public static final Charset DEFAULT_CHARSET = StandardCharsets.ISO_8859_1;

    private transient Charset charset;

    // byte[]
    private byte[] buff;

    // transient as serialization is primarily for values via, e.g. JMX
    private transient ByteInputChannel in = null;
    private transient ByteOutputChannel out = null;


    /**
     * Creates a new, uninitialized ByteChunk object.
     */
    public ByteChunk() {
    }


    public ByteChunk(int initial) {
        allocate(initial, -1);
    }


    private void writeObject(ObjectOutputStream oos) throws IOException {
        oos.defaultWriteObject();
        oos.writeUTF(getCharset().name());
    }


    private void readObject(ObjectInputStream ois) throws ClassNotFoundException, IOException {
        ois.defaultReadObject();
        this.charset = Charset.forName(ois.readUTF());
    }


    @Override
    public Object clone() throws CloneNotSupportedException {
        return super.clone();
    }


    @Override
    public void recycle() {
        super.recycle();
        charset = null;
    }


    // -------------------- Setup --------------------

    public void allocate(int initial, int limit) {
        if (buff == null || buff.length < initial) {
            buff = new byte[initial];
        }
        setLimit(limit);
        start = 0;
        end = 0;
        isSet = true;
        hasHashCode = false;
    }


    /**
     * Sets the buffer to the specified subarray of bytes.
     *
     * @param b   the ascii bytes
     * @param off the start offset of the bytes
     * @param len the length of the bytes
     */
    public void setBytes(byte[] b, int off, int len) {
        buff = b;
        start = off;
        end = start + len;
        isSet = true;
        hasHashCode = false;
    }


    public void setCharset(Charset charset) {
        this.charset = charset;
    }


    public Charset getCharset() {
        if (charset == null) {
            charset = DEFAULT_CHARSET;
        }
        return charset;
    }


    /**
     * @return the buffer.
     */
    public byte[] getBytes() {
        return getBuffer();
    }


    /**
     * @return the buffer.
     */
    public byte[] getBuffer() {
        return buff;
    }


    /**
     * When the buffer is empty, read the data from the input channel.
     *
     * @param in The input channel
     */
    public void setByteInputChannel(ByteInputChannel in) {
        this.in = in;
    }


    /**
     * When the buffer is full, write the data to the output channel. Also used when large amount of data is appended.
     * If not set, the buffer will grow to the limit.
     *
     * @param out The output channel
     */
    public void setByteOutputChannel(ByteOutputChannel out) {
        this.out = out;
    }


    // -------------------- Adding data to the buffer --------------------

    public void append(byte b) throws IOException {
        makeSpace(1);
        int limit = getLimitInternal();

        // couldn't make space
        if (end >= limit) {
            flushBuffer();
        }
        buff[end++] = b;
    }


    public void append(ByteChunk src) throws IOException {
        append(src.getBytes(), src.getStart(), src.getLength());
    }


    /**
     * Add data to the buffer.
     *
     * @param src Bytes array
     * @param off Offset
     * @param len Length
     *
     * @throws IOException Writing overflow data to the output channel failed
     */
    public void append(byte src[], int off, int len) throws IOException {
        // will grow, up to limit
        makeSpace(len);
        int limit = getLimitInternal();

        // Optimize on a common case.
        // If the buffer is empty and the source is going to fill up all the
        // space in buffer, may as well write it directly to the output,
        // and avoid an extra copy
        if (len == limit && end == start && out != null) {
            out.realWriteBytes(src, off, len);
            return;
        }

        // if we are below the limit
        if (len <= limit - end) {
            System.arraycopy(src, off, buff, end, len);
            end += len;
            return;
        }

        // Need more space than we can afford, need to flush buffer.

        // The buffer is already at (or bigger than) limit.

        // We chunk the data into slices fitting in the buffer limit, although
        // if the data is written directly if it doesn't fit.

        int avail = limit - end;
        System.arraycopy(src, off, buff, end, avail);
        end += avail;

        flushBuffer();

        int remain = len - avail;

        while (remain > (limit - end)) {
            out.realWriteBytes(src, (off + len) - remain, limit - end);
            remain = remain - (limit - end);
        }

        System.arraycopy(src, (off + len) - remain, buff, end, remain);
        end += remain;
    }


    /**
     * Add data to the buffer.
     *
     * @param from the ByteBuffer with the data
     *
     * @throws IOException Writing overflow data to the output channel failed
     */
    public void append(ByteBuffer from) throws IOException {
        int len = from.remaining();

        // will grow, up to limit
        makeSpace(len);
        int limit = getLimitInternal();

        // Optimize on a common case.
        // If the buffer is empty and the source is going to fill up all the
        // space in buffer, may as well write it directly to the output,
        // and avoid an extra copy
        if (len == limit && end == start && out != null) {
            out.realWriteBytes(from);
            from.position(from.limit());
            return;
        }
        // if we have limit and we're below
        if (len <= limit - end) {
            // makeSpace will grow the buffer to the limit,
            // so we have space
            from.get(buff, end, len);
            end += len;
            return;
        }

        // need more space than we can afford, need to flush
        // buffer

        // the buffer is already at ( or bigger than ) limit

        // We chunk the data into slices fitting in the buffer limit, although
        // if the data is written directly if it doesn't fit

        int avail = limit - end;
        from.get(buff, end, avail);
        end += avail;

        flushBuffer();

        int fromLimit = from.limit();
        int remain = len - avail;
        avail = limit - end;
        while (remain >= avail) {
            from.limit(from.position() + avail);
            out.realWriteBytes(from);
            from.position(from.limit());
            remain = remain - avail;
        }

        from.limit(fromLimit);
        from.get(buff, end, remain);
        end += remain;
    }


    // -------------------- Removing data from the buffer --------------------

    public int subtract() throws IOException {
        if (checkEof()) {
            return -1;
        }
        return buff[start++] & 0xFF;
    }

    public byte subtractB() throws IOException {
        if (checkEof()) {
            return -1;
        }
        return buff[start++];
    }


    public int subtract(byte dest[], int off, int len) throws IOException {
        if (checkEof()) {
            return -1;
        }
        int n = len;
        if (len > getLength()) {
            n = getLength();
        }
        System.arraycopy(buff, start, dest, off, n);
        start += n;
        return n;
    }


    /**
     * Transfers bytes from the buffer to the specified ByteBuffer. After the operation the position of the ByteBuffer
     * will be returned to the one before the operation, the limit will be the position incremented by the number of the
     * transferred bytes.
     *
     * @param to the ByteBuffer into which bytes are to be written.
     *
     * @return an integer specifying the actual number of bytes read, or -1 if the end of the stream is reached
     *
     * @throws IOException if an input or output exception has occurred
     */
    public int subtract(ByteBuffer to) throws IOException {
        if (checkEof()) {
            return -1;
        }
        int n = Math.min(to.remaining(), getLength());
        to.put(buff, start, n);
        to.limit(to.position());
        to.position(to.position() - n);
        start += n;
        return n;
    }


    private boolean checkEof() throws IOException {
        if ((end - start) == 0) {
            if (in == null) {
                return true;
            }
            int n = in.realReadBytes();
            if (n < 0) {
                return true;
            }
        }
        return false;
    }


    /**
     * Send the buffer to the sink. Called by append() when the limit is reached. You can also call it explicitly to
     * force the data to be written.
     *
     * @throws IOException Writing overflow data to the output channel failed
     */
    public void flushBuffer() throws IOException {
        // assert out!=null
        if (out == null) {
            throw new BufferOverflowException(
                    sm.getString("chunk.overflow", Integer.valueOf(getLimit()), Integer.valueOf(buff.length)));
        }
        out.realWriteBytes(buff, start, end - start);
        end = start;
    }


    /**
     * Make space for len bytes. If len is small, allocate a reserve space too. Never grow bigger than the limit or
     * {@link AbstractChunk#ARRAY_MAX_SIZE}.
     *
     * @param count The size
     */
    public void makeSpace(int count) {
        byte[] tmp = null;

        int limit = getLimitInternal();

        long newSize;
        long desiredSize = end + count;

        // Can't grow above the limit
        if (desiredSize > limit) {
            desiredSize = limit;
        }

        if (buff == null) {
            if (desiredSize < 256) {
                desiredSize = 256; // take a minimum
            }
            buff = new byte[(int) desiredSize];
        }

        // limit < buf.length (the buffer is already big)
        // or we already have space
        if (desiredSize <= buff.length) {
            return;
        }
        // grow in larger chunks
        if (desiredSize < 2L * buff.length) {
            newSize = buff.length * 2L;
        } else {
            newSize = buff.length * 2L + count;
        }

        if (newSize > limit) {
            newSize = limit;
        }
        tmp = new byte[(int) newSize];

        // Compacts buffer
        System.arraycopy(buff, start, tmp, 0, end - start);
        buff = tmp;
        tmp = null;
        end = end - start;
        start = 0;
    }


    // -------------------- Conversion and getters --------------------

    @Override
    public String toString() {
        try {
            return toString(CodingErrorAction.REPLACE, CodingErrorAction.REPLACE);
        } catch (CharacterCodingException e) {
            // Unreachable code. Use of REPLACE above means the exception will never be thrown.
            throw new IllegalStateException(e);
        }
    }


    public String toString(CodingErrorAction malformedInputAction, CodingErrorAction unmappableCharacterAction)
            throws CharacterCodingException {
        if (isNull()) {
            return null;
        } else if (end - start == 0) {
            return "";
        }
        return StringCache.toString(this, malformedInputAction, unmappableCharacterAction);
    }


    /**
     * Converts the current content of the byte buffer to a String using the configured character set.
     *
     * @param malformedInputAction      Action to take if the input is malformed
     * @param unmappableCharacterAction Action to take if a byte sequence can't be mapped to a character
     *
     * @return The result of converting the bytes to a String
     *
     * @throws CharacterCodingException If an error occurs during the conversion
     */
    public String toStringInternal(CodingErrorAction malformedInputAction, CodingErrorAction unmappableCharacterAction)
            throws CharacterCodingException {
        if (charset == null) {
            charset = DEFAULT_CHARSET;
        }
        // new String(byte[], int, int, Charset) takes a defensive copy of the
        // entire byte array. This is expensive if only a small subset of the
        // bytes will be used. The code below is from Apache Harmony.
        CharBuffer cb;
        if (malformedInputAction == CodingErrorAction.REPLACE && unmappableCharacterAction == CodingErrorAction.REPLACE) {
            cb = charset.decode(ByteBuffer.wrap(buff, start, end - start));
        } else {
            cb = charset.newDecoder().onMalformedInput(malformedInputAction)
                    .onUnmappableCharacter(unmappableCharacterAction).decode(ByteBuffer.wrap(buff, start, end - start));
        }
        return new String(cb.array(), cb.arrayOffset(), cb.length());
    }


    public long getLong() {
        return Ascii.parseLong(buff, start, end - start);
    }


    // -------------------- equals --------------------

    @Override
    public boolean equals(Object obj) {
        if (obj instanceof ByteChunk) {
            return equals((ByteChunk) obj);
        }
        return false;
    }


    /**
     * Compares the message bytes to the specified String object.
     * <p>
     * NOTE: This only works for characters in the range 0-127.
     *
     * @param s the String to compare
     *
     * @return <code>true</code> if the comparison succeeded, <code>false</code> otherwise
     */
    public boolean equals(String s) {
        byte[] b = buff;
        int len = end - start;
        if (b == null || len != s.length()) {
            return false;
        }
        int off = start;
        for (int i = 0; i < len; i++) {
            if (b[off++] != s.charAt(i)) {
                return false;
            }
        }
        return true;
    }


    /**
     * Compares the message bytes to the specified String object.
     * <p>
     * NOTE: This only works for characters in the range 0-127.
     *
     * @param s the String to compare
     *
     * @return <code>true</code> if the comparison succeeded, <code>false</code> otherwise
     */
    public boolean equalsIgnoreCase(String s) {
        byte[] b = buff;
        int len = end - start;
        if (b == null || len != s.length()) {
            return false;
        }
        int off = start;
        for (int i = 0; i < len; i++) {
            if (Ascii.toLower(b[off++]) != Ascii.toLower(s.charAt(i))) {
                return false;
            }
        }
        return true;
    }


    public boolean equals(ByteChunk bb) {
        return equals(bb.getBytes(), bb.getStart(), bb.getLength());
    }


    public boolean equals(byte b2[], int off2, int len2) {
        byte b1[] = buff;
        if (b1 == null && b2 == null) {
            return true;
        }

        int len = end - start;
        if (len != len2 || b1 == null || b2 == null) {
            return false;
        }

        int off1 = start;

        while (len-- > 0) {
            if (b1[off1++] != b2[off2++]) {
                return false;
            }
        }
        return true;
    }


    public boolean equalsIgnoreCase(byte b2[], int off2, int len2) {
        byte b1[] = buff;
        if (b1 == null && b2 == null) {
            return true;
        }

        int len = end - start;
        if (len != len2 || b1 == null || b2 == null) {
            return false;
        }

        int off1 = start;

        while (len-- > 0) {
            if (Ascii.toLower(b1[off1++]) != Ascii.toLower(b2[off2++])) {
                return false;
            }
        }
        return true;
    }


    public boolean equals(CharChunk cc) {
        return equals(cc.getChars(), cc.getStart(), cc.getLength());
    }


    /**
     * Compares the message bytes to the specified char array.
     * <p>
     * NOTE: This only works for characters in the range 0-127.
     *
     * @param c2 the array to compare to
     * @param off2 offset
     * @param len2 length
     * @return <code>true</code> if the comparison succeeded, <code>false</code> otherwise
     */
    public boolean equals(char c2[], int off2, int len2) {
        byte b1[] = buff;
        if (c2 == null && b1 == null) {
            return true;
        }

        if (b1 == null || c2 == null || end - start != len2) {
            return false;
        }
        int off1 = start;
        int len = end - start;

        while (len-- > 0) {
            if ((char) b1[off1++] != c2[off2++]) {
                return false;
            }
        }
        return true;
    }


    /**
     * Returns true if the buffer starts with the specified string when tested in a case sensitive manner.
     * <p>
     * NOTE: This only works for characters in the range 0-127.
     *
     * @param s   the string
     * @param pos The position
     *
     * @return <code>true</code> if the start matches
     */
    public boolean startsWith(String s, int pos) {
        byte[] b = buff;
        int len = s.length();
        if (b == null || len + pos > end - start) {
            return false;
        }
        int off = start + pos;
        for (int i = 0; i < len; i++) {
            if (b[off++] != s.charAt(i)) {
                return false;
            }
        }
        return true;
    }


    /**
     * Returns true if the buffer starts with the specified string when tested in a case insensitive manner.
     * <p>
     * NOTE: This only works for characters in the range 0-127.
     *
     * @param s   the string
     * @param pos The position
     *
     * @return <code>true</code> if the start matches
     */
    public boolean startsWithIgnoreCase(String s, int pos) {
        byte[] b = buff;
        int len = s.length();
        if (b == null || len + pos > end - start) {
            return false;
        }
        int off = start + pos;
        for (int i = 0; i < len; i++) {
            if (Ascii.toLower(b[off++]) != Ascii.toLower(s.charAt(i))) {
                return false;
            }
        }
        return true;
    }


    @Override
    protected int getBufferElement(int index) {
        return buff[index];
    }


    /**
     * Returns the first instance of the given character in this ByteChunk starting at the specified byte. If the
     * character is not found, -1 is returned.
     * <p>
     * NOTE: This only works for characters in the range 0-127.
     *
     * @param c        The character
     * @param starting The start position
     *
     * @return The position of the first instance of the character or -1 if the character is not found.
     */
    public int indexOf(char c, int starting) {
        int ret = indexOf(buff, start + starting, end, c);
        return (ret >= start) ? ret - start : -1;
    }


    /**
     * Returns the first instance of the given character in the given byte array between the specified start and end.
     * <p>
     * NOTE: This only works for characters in the range 0-127.
     *
     * @param bytes The array to search
     * @param start The point to start searching from in the array
     * @param end   The point to stop searching in the array
     * @param s     The character to search for
     *
     * @return The position of the first instance of the character or -1 if the character is not found.
     */
    public static int indexOf(byte bytes[], int start, int end, char s) {
        int offset = start;

        while (offset < end) {
            byte b = bytes[offset];
            if (b == s) {
                return offset;
            }
            offset++;
        }
        return -1;
    }


    /**
     * Returns the first instance of the given byte in the byte array between the specified start and end.
     *
     * @param bytes The byte array to search
     * @param start The point to start searching from in the byte array
     * @param end   The point to stop searching in the byte array
     * @param b     The byte to search for
     *
     * @return The position of the first instance of the byte or -1 if the byte is not found.
     */
    public static int findByte(byte bytes[], int start, int end, byte b) {
        int offset = start;
        while (offset < end) {
            if (bytes[offset] == b) {
                return offset;
            }
            offset++;
        }
        return -1;
    }


    /**
     * Returns the first instance of any of the given bytes in the byte array between the specified start and end.
     *
     * @param bytes The byte array to search
     * @param start The point to start searching from in the byte array
     * @param end   The point to stop searching in the byte array
     * @param b     The array of bytes to search for
     *
     * @return The position of the first instance of the byte or -1 if the byte is not found.
     */
    public static int findBytes(byte bytes[], int start, int end, byte b[]) {
        int offset = start;
        while (offset < end) {
            for (byte value : b) {
                if (bytes[offset] == value) {
                    return offset;
                }
            }
            offset++;
        }
        return -1;
    }


    /**
     * Convert specified String to a byte array. This ONLY WORKS for ascii, UTF chars will be truncated.
     *
     * @param value to convert to byte array
     *
     * @return the byte array value
     */
    public static byte[] convertToBytes(String value) {
        byte[] result = new byte[value.length()];
        for (int i = 0; i < value.length(); i++) {
            result[i] = (byte) value.charAt(i);
        }
        return result;
    }


    public static class BufferOverflowException extends IOException {

        private static final long serialVersionUID = 1L;

        public BufferOverflowException(String message) {
            super(message);
        }
    }
}