001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.wicket.util.io; 018 019import java.io.Serializable; 020import java.util.Locale; 021 022/** 023 * Byte Order Mark (BOM) representation - see {@link BOMInputStream}. 024 * 025 * @see BOMInputStream 026 * @see <a href="http://en.wikipedia.org/wiki/Byte_order_mark">Wikipedia: Byte Order Mark</a> 027 * @see <a href="http://www.w3.org/TR/2006/REC-xml-20060816/#sec-guessing">W3C: Autodetection of Character Encodings 028 * (Non-Normative)</a> 029 * @version $Id$ 030 * @since 2.0 031 */ 032public class ByteOrderMark implements Serializable { 033 034 private static final long serialVersionUID = 1L; 035 036 /** UTF-8 BOM */ 037 public static final ByteOrderMark UTF_8 = new ByteOrderMark("UTF-8", 0xEF, 0xBB, 0xBF); 038 039 /** UTF-16BE BOM (Big-Endian) */ 040 public static final ByteOrderMark UTF_16BE = new ByteOrderMark("UTF-16BE", 0xFE, 0xFF); 041 042 /** UTF-16LE BOM (Little-Endian) */ 043 public static final ByteOrderMark UTF_16LE = new ByteOrderMark("UTF-16LE", 0xFF, 0xFE); 044 045 /** 046 * UTF-32BE BOM (Big-Endian) 047 * @since 2.2 048 */ 049 public static final ByteOrderMark UTF_32BE = new ByteOrderMark("UTF-32BE", 0x00, 0x00, 0xFE, 0xFF); 050 051 /** 052 * UTF-32LE BOM (Little-Endian) 053 * @since 2.2 054 */ 055 public static final ByteOrderMark UTF_32LE = new ByteOrderMark("UTF-32LE", 0xFF, 0xFE, 0x00, 0x00); 056 057 /** 058 * Unicode BOM character; external form depends on the encoding. 059 * @see <a href="http://unicode.org/faq/utf_bom.html#BOM">Byte Order Mark (BOM) FAQ</a> 060 * @since 2.5 061 */ 062 public static final char UTF_BOM = '\uFEFF'; 063 064 private final String charsetName; 065 private final int[] bytes; 066 067 /** 068 * Construct a new BOM. 069 * 070 * @param charsetName The name of the charset the BOM represents 071 * @param bytes The BOM's bytes 072 * @throws IllegalArgumentException if the charsetName is null or 073 * zero length 074 * @throws IllegalArgumentException if the bytes are null or zero 075 * length 076 */ 077 public ByteOrderMark(final String charsetName, final int... bytes) { 078 if (charsetName == null || charsetName.length() == 0) { 079 throw new IllegalArgumentException("No charsetName specified"); 080 } 081 if (bytes == null || bytes.length == 0) { 082 throw new IllegalArgumentException("No bytes specified"); 083 } 084 this.charsetName = charsetName; 085 this.bytes = new int[bytes.length]; 086 System.arraycopy(bytes, 0, this.bytes, 0, bytes.length); 087 } 088 089 /** 090 * Return the name of the {@link java.nio.charset.Charset} the BOM represents. 091 * 092 * @return the character set name 093 */ 094 public String getCharsetName() { 095 return charsetName; 096 } 097 098 /** 099 * Return the length of the BOM's bytes. 100 * 101 * @return the length of the BOM's bytes 102 */ 103 public int length() { 104 return bytes.length; 105 } 106 107 /** 108 * The byte at the specified position. 109 * 110 * @param pos The position 111 * @return The specified byte 112 */ 113 public int get(final int pos) { 114 return bytes[pos]; 115 } 116 117 /** 118 * Return a copy of the BOM's bytes. 119 * 120 * @return a copy of the BOM's bytes 121 */ 122 public byte[] getBytes() { 123 final byte[] copy = new byte[bytes.length]; 124 for (int i = 0; i < bytes.length; i++) { 125 copy[i] = (byte)bytes[i]; 126 } 127 return copy; 128 } 129 130 /** 131 * Indicates if this BOM's bytes equals another. 132 * 133 * @param obj The object to compare to 134 * @return true if the bom's bytes are equal, otherwise 135 * false 136 */ 137 @Override 138 public boolean equals(final Object obj) { 139 if (!(obj instanceof ByteOrderMark)) { 140 return false; 141 } 142 final ByteOrderMark bom = (ByteOrderMark)obj; 143 if (bytes.length != bom.length()) { 144 return false; 145 } 146 for (int i = 0; i < bytes.length; i++) { 147 if (bytes[i] != bom.get(i)) { 148 return false; 149 } 150 } 151 return true; 152 } 153 154 /** 155 * Return the hashcode for this BOM. 156 * 157 * @return the hashcode for this BOM. 158 * @see java.lang.Object#hashCode() 159 */ 160 @Override 161 public int hashCode() { 162 int hashCode = getClass().hashCode(); 163 for (final int b : bytes) { 164 hashCode += b; 165 } 166 return hashCode; 167 } 168 169 /** 170 * Provide a String representation of the BOM. 171 * 172 * @return the length of the BOM's bytes 173 */ 174 @Override 175 public String toString() { 176 final StringBuilder builder = new StringBuilder(); 177 builder.append(getClass().getSimpleName()); 178 builder.append('['); 179 builder.append(charsetName); 180 builder.append(": "); 181 for (int i = 0; i < bytes.length; i++) { 182 if (i > 0) { 183 builder.append(","); 184 } 185 builder.append("0x"); 186 builder.append(Integer.toHexString(0xFF & bytes[i]).toUpperCase(Locale.ROOT)); 187 } 188 builder.append(']'); 189 return builder.toString(); 190 } 191 192}