001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.wicket.util.io;
018
019import java.io.Serializable;
020import java.util.Locale;
021
022/**
023 * Byte Order Mark (BOM) representation - see {@link BOMInputStream}.
024 *
025 * @see BOMInputStream
026 * @see <a href="http://en.wikipedia.org/wiki/Byte_order_mark">Wikipedia: Byte Order Mark</a>
027 * @see <a href="http://www.w3.org/TR/2006/REC-xml-20060816/#sec-guessing">W3C: Autodetection of Character Encodings
028 *      (Non-Normative)</a>
029 * @version $Id$
030 * @since 2.0
031 */
032public class ByteOrderMark implements Serializable {
033
034    private static final long serialVersionUID = 1L;
035
036    /** UTF-8 BOM */
037    public static final ByteOrderMark UTF_8    = new ByteOrderMark("UTF-8",    0xEF, 0xBB, 0xBF);
038
039    /** UTF-16BE BOM (Big-Endian) */
040    public static final ByteOrderMark UTF_16BE = new ByteOrderMark("UTF-16BE", 0xFE, 0xFF);
041
042    /** UTF-16LE BOM (Little-Endian) */
043    public static final ByteOrderMark UTF_16LE = new ByteOrderMark("UTF-16LE", 0xFF, 0xFE);
044
045    /**
046     * UTF-32BE BOM (Big-Endian)
047     * @since 2.2
048     */
049    public static final ByteOrderMark UTF_32BE = new ByteOrderMark("UTF-32BE", 0x00, 0x00, 0xFE, 0xFF);
050
051    /**
052     * UTF-32LE BOM (Little-Endian)
053     * @since 2.2
054     */
055    public static final ByteOrderMark UTF_32LE = new ByteOrderMark("UTF-32LE", 0xFF, 0xFE, 0x00, 0x00);
056
057    /**
058     * Unicode BOM character; external form depends on the encoding.
059     * @see <a href="http://unicode.org/faq/utf_bom.html#BOM">Byte Order Mark (BOM) FAQ</a>
060     * @since 2.5
061     */
062    public static final char UTF_BOM = '\uFEFF';
063
064    private final String charsetName;
065    private final int[] bytes;
066
067    /**
068     * Construct a new BOM.
069     *
070     * @param charsetName The name of the charset the BOM represents
071     * @param bytes The BOM's bytes
072     * @throws IllegalArgumentException if the charsetName is null or
073     * zero length
074     * @throws IllegalArgumentException if the bytes are null or zero
075     * length
076     */
077    public ByteOrderMark(final String charsetName, final int... bytes) {
078        if (charsetName == null || charsetName.length() == 0) {
079            throw new IllegalArgumentException("No charsetName specified");
080        }
081        if (bytes == null || bytes.length == 0) {
082            throw new IllegalArgumentException("No bytes specified");
083        }
084        this.charsetName = charsetName;
085        this.bytes = new int[bytes.length];
086        System.arraycopy(bytes, 0, this.bytes, 0, bytes.length);
087    }
088
089    /**
090     * Return the name of the {@link java.nio.charset.Charset} the BOM represents.
091     *
092     * @return the character set name
093     */
094    public String getCharsetName() {
095        return charsetName;
096    }
097
098    /**
099     * Return the length of the BOM's bytes.
100     *
101     * @return the length of the BOM's bytes
102     */
103    public int length() {
104        return bytes.length;
105    }
106
107    /**
108     * The byte at the specified position.
109     *
110     * @param pos The position
111     * @return The specified byte
112     */
113    public int get(final int pos) {
114        return bytes[pos];
115    }
116
117    /**
118     * Return a copy of the BOM's bytes.
119     *
120     * @return a copy of the BOM's bytes
121     */
122    public byte[] getBytes() {
123        final byte[] copy = new byte[bytes.length];
124        for (int i = 0; i < bytes.length; i++) {
125            copy[i] = (byte)bytes[i];
126        }
127        return copy;
128    }
129
130    /**
131     * Indicates if this BOM's bytes equals another.
132     *
133     * @param obj The object to compare to
134     * @return true if the bom's bytes are equal, otherwise
135     * false
136     */
137    @Override
138    public boolean equals(final Object obj) {
139        if (!(obj instanceof ByteOrderMark)) {
140            return false;
141        }
142        final ByteOrderMark bom = (ByteOrderMark)obj;
143        if (bytes.length != bom.length()) {
144            return false;
145        }
146        for (int i = 0; i < bytes.length; i++) {
147            if (bytes[i] != bom.get(i)) {
148                return false;
149            }
150        }
151        return true;
152    }
153
154    /**
155     * Return the hashcode for this BOM.
156     *
157     * @return the hashcode for this BOM.
158     * @see java.lang.Object#hashCode()
159     */
160    @Override
161    public int hashCode() {
162        int hashCode = getClass().hashCode();
163        for (final int b : bytes) {
164            hashCode += b;
165        }
166        return hashCode;
167    }
168
169    /**
170     * Provide a String representation of the BOM.
171     *
172     * @return the length of the BOM's bytes
173     */
174    @Override
175    public String toString() {
176        final StringBuilder builder = new StringBuilder();
177        builder.append(getClass().getSimpleName());
178        builder.append('[');
179        builder.append(charsetName);
180        builder.append(": ");
181        for (int i = 0; i < bytes.length; i++) {
182            if (i > 0) {
183                builder.append(",");
184            }
185            builder.append("0x");
186            builder.append(Integer.toHexString(0xFF & bytes[i]).toUpperCase(Locale.ROOT));
187        }
188        builder.append(']');
189        return builder.toString();
190    }
191
192}