001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.wicket.extensions.util.encoding; 018 019import java.io.File; 020import java.io.FileInputStream; 021import java.io.IOException; 022import java.io.InputStream; 023import java.util.ArrayList; 024import java.util.HashMap; 025import java.util.Hashtable; 026import java.util.List; 027import java.util.Locale; 028import java.util.Map; 029import java.util.Properties; 030 031/** 032 * This class maintains a set of mappers defining mappings between locales and the corresponding 033 * charsets. The mappings are defined as properties between locale and charset names. The 034 * definitions can be listed in property files located in user's home directory, Java home directory 035 * or the current class jar. In addition, this class maintains static default mappings and 036 * constructors support application specific mappings. 037 * 038 * This source has originally been taken from the jakarta Turbine project. 039 * 040 * @author <a href="mailto:ilkka.priha@simsoft.fi">Ilkka Priha </a> 041 */ 042public final class CharSetMap 043{ 044 /** 045 * The default charset when nothing else is applicable. 046 */ 047 public static final String DEFAULT_CHARSET = "ISO-8859-1"; 048 049 /** 050 * The name for charset mapper resources. 051 */ 052 public static final String CHARSET_RESOURCE = "charset.properties"; 053 054 /** 055 * Priorities of available mappers. 056 */ 057 private static final int MAP_CACHE = 0; 058 private static final int MAP_PROG = 1; 059 private static final int MAP_HOME = 2; 060 private static final int MAP_SYS = 3; 061 private static final int MAP_JAR = 4; 062 private static final int MAP_COM = 5; 063 064 /** 065 * A common charset mapper for languages. 066 */ 067 private static final Map<String, String> commonMapper = new HashMap<>(); 068 069 static 070 { 071 commonMapper.put("ar", "ISO-8859-6"); 072 commonMapper.put("be", "ISO-8859-5"); 073 commonMapper.put("bg", "ISO-8859-5"); 074 commonMapper.put("ca", "ISO-8859-1"); 075 commonMapper.put("cs", "ISO-8859-2"); 076 commonMapper.put("da", "ISO-8859-1"); 077 commonMapper.put("de", "ISO-8859-1"); 078 commonMapper.put("el", "ISO-8859-7"); 079 commonMapper.put("en", "ISO-8859-1"); 080 commonMapper.put("es", "ISO-8859-1"); 081 commonMapper.put("et", "ISO-8859-1"); 082 commonMapper.put("fi", "ISO-8859-1"); 083 commonMapper.put("fr", "ISO-8859-1"); 084 commonMapper.put("hr", "ISO-8859-2"); 085 commonMapper.put("hu", "ISO-8859-2"); 086 commonMapper.put("is", "ISO-8859-1"); 087 commonMapper.put("it", "ISO-8859-1"); 088 commonMapper.put("iw", "ISO-8859-8"); 089 commonMapper.put("ja", "Shift_JIS"); 090 commonMapper.put("ko", "EUC-KR"); 091 commonMapper.put("lt", "ISO-8859-2"); 092 commonMapper.put("lv", "ISO-8859-2"); 093 commonMapper.put("mk", "ISO-8859-5"); 094 commonMapper.put("nl", "ISO-8859-1"); 095 commonMapper.put("no", "ISO-8859-1"); 096 commonMapper.put("pl", "ISO-8859-2"); 097 commonMapper.put("pt", "ISO-8859-1"); 098 commonMapper.put("ro", "ISO-8859-2"); 099 commonMapper.put("ru", "ISO-8859-5"); 100 commonMapper.put("sh", "ISO-8859-5"); 101 commonMapper.put("sk", "ISO-8859-2"); 102 commonMapper.put("sl", "ISO-8859-2"); 103 commonMapper.put("sq", "ISO-8859-2"); 104 commonMapper.put("sr", "ISO-8859-5"); 105 commonMapper.put("sv", "ISO-8859-1"); 106 commonMapper.put("tr", "ISO-8859-9"); 107 commonMapper.put("uk", "ISO-8859-5"); 108 commonMapper.put("zh", "GB2312"); 109 commonMapper.put("zh_TW", "Big5"); 110 } 111 112 /** 113 * A collection of available charset mappers. 114 */ 115 private final List<Map<String, String>> mappers = new ArrayList<>(); 116 { 117 for (int i = 0; i < MAP_COM; i++) 118 { 119 mappers.add(null); 120 } 121 } 122 123 /** 124 * Loads mappings from a stream. 125 * 126 * @param input 127 * an input stream. 128 * @return the mappings. 129 * @throws IOException 130 * for an incorrect stream. 131 */ 132 protected static Map<String, String> loadStream(final InputStream input) 133 throws IOException 134 { 135 return createMap(input); 136 } 137 138 private static Map<String, String> createMap(final InputStream input) throws IOException 139 { 140 final Properties props = new Properties(); 141 props.load(input); 142 return createMap(props); 143 } 144 145 private static Map<String, String> createMap(final Properties props) 146 { 147 HashMap<String, String> map = new HashMap<>(); 148 for (Object key : props.keySet()) 149 { 150 String keyString = (String)key; 151 map.put(keyString, props.getProperty(keyString)); 152 } 153 return map; 154 } 155 156 /** 157 * Loads mappings from a file. 158 * 159 * @param file 160 * a file. 161 * @return the mappings. 162 * @throws IOException 163 * for an incorrect file. 164 */ 165 protected static Map<String, String> loadFile(final File file) throws IOException 166 { 167 return loadStream(new FileInputStream(file)); 168 } 169 170 /** 171 * Loads mappings from a file path. 172 * 173 * @param path 174 * a file path. 175 * @return the mappings. 176 * @throws IOException 177 * for an incorrect file. 178 */ 179 protected static Map<String, String> loadPath(final String path) throws IOException 180 { 181 return loadFile(new File(path)); 182 } 183 184 /** 185 * Loads mappings from a resource. 186 * 187 * @param name 188 * a resource name. 189 * @return the mappings. 190 */ 191 protected static Map<String, String> loadResource(final String name) 192 { 193 final InputStream input = CharSetMap.class.getResourceAsStream(name); 194 if (input != null) 195 { 196 try 197 { 198 return loadStream(input); 199 } 200 catch (IOException ex) 201 { 202 return null; 203 } 204 } 205 206 return null; 207 } 208 209 /** 210 * Constructs a new charset map with default mappers. 211 */ 212 public CharSetMap() 213 { 214 String path; 215 try 216 { 217 // Check whether the user directory contains mappings. 218 path = System.getProperty("user.home"); 219 if (path != null) 220 { 221 path = path + File.separator + CHARSET_RESOURCE; 222 mappers.add(MAP_HOME, loadPath(path)); 223 } 224 } 225 catch (Exception ex) 226 { 227 // ignore 228 } 229 230 try 231 { 232 // Check whether the system directory contains mappings. 233 path = System.getProperty("java.home") + File.separator + "lib" + File.separator + 234 CHARSET_RESOURCE; 235 236 mappers.add(MAP_SYS, loadPath(path)); 237 } 238 catch (Exception ex) 239 { 240 // ignore 241 } 242 243 // Check whether the current class jar contains mappings. 244 mappers.add(MAP_JAR, loadResource("/META-INF/" + CHARSET_RESOURCE)); 245 246 // Set the common mapper to have the lowest priority. 247 mappers.add(MAP_COM, commonMapper); 248 249 // Set the cache mapper to have the highest priority. 250 mappers.add(MAP_CACHE, new Hashtable<String, String>()); 251 } 252 253 /** 254 * Constructs a charset map from properties. 255 * 256 * @param props 257 * charset mapping properties. 258 */ 259 public CharSetMap(final Properties props) 260 { 261 this(); 262 mappers.add(MAP_PROG, createMap(props)); 263 } 264 265 /** 266 * Constructs a charset map read from a stream. 267 * 268 * @param input 269 * an input stream. 270 * @throws IOException 271 * for an incorrect stream. 272 */ 273 public CharSetMap(final InputStream input) throws IOException 274 { 275 this(); 276 mappers.add(MAP_PROG, loadStream(input)); 277 } 278 279 /** 280 * Constructs a charset map read from a property file. 281 * 282 * @param file 283 * a property file. 284 * @throws IOException 285 * for an incorrect property file. 286 */ 287 public CharSetMap(final File file) throws IOException 288 { 289 this(); 290 mappers.add(MAP_PROG, loadFile(file)); 291 } 292 293 /** 294 * Constructs a charset map read from a property file path. 295 * 296 * @param path 297 * a property file path. 298 * @throws IOException 299 * for an incorrect property file. 300 */ 301 public CharSetMap(final String path) throws IOException 302 { 303 this(); 304 mappers.add(MAP_PROG, loadPath(path)); 305 } 306 307 /** 308 * Sets a locale-charset mapping. 309 * 310 * @param key 311 * the key for the charset. 312 * @param charset 313 * the corresponding charset. 314 */ 315 @SuppressWarnings({ "unchecked" }) 316 public final synchronized void setCharSet(final String key, final String charset) 317 { 318 HashMap<String, String> mapper = (HashMap<String, String>)mappers.get(MAP_PROG); 319 if (mapper != null) 320 { 321 mapper = (HashMap<String, String>)mapper.clone(); 322 } 323 else 324 { 325 mapper = new HashMap<>(); 326 } 327 mapper.put(key, charset); 328 mappers.add(MAP_PROG, mapper); 329 mappers.get(MAP_CACHE).clear(); 330 } 331 332 /** 333 * Gets the charset for a locale. First a locale specific charset is searched for, then a 334 * country specific one and lastly a language specific one. If none is found, the default 335 * charset is returned. 336 * 337 * @param locale 338 * the locale. 339 * @return the charset. 340 */ 341 public final String getCharSet(final Locale locale) 342 { 343 // Check the cache first. 344 String key = locale.toString(); 345 if (key.length() == 0) 346 { 347 key = "__" + locale.getVariant(); 348 if (key.length() == 2) 349 { 350 return DEFAULT_CHARSET; 351 } 352 } 353 354 String charset = searchCharSet(key); 355 if (charset.length() == 0) 356 { 357 // Not found, perform a full search and update the cache. 358 String[] items = new String[3]; 359 items[2] = locale.getVariant(); 360 items[1] = locale.getCountry(); 361 items[0] = locale.getLanguage(); 362 363 charset = searchCharSet(items); 364 if (charset.length() == 0) 365 { 366 charset = DEFAULT_CHARSET; 367 } 368 369 mappers.get(MAP_CACHE).put(key, charset); 370 } 371 372 return charset; 373 } 374 375 /** 376 * Gets the charset for a locale with a variant. The search is performed in the following order: 377 * "lang"_"country"_"variant"="charset", _"country"_"variant"="charset", 378 * "lang"__"variant"="charset", __"variant"="charset", "lang"_"country"="charset", 379 * _"country"="charset", "lang"="charset". If nothing of the above is found, the default charset 380 * is returned. 381 * 382 * @param locale 383 * the locale. 384 * @param variant 385 * a variant field. 386 * @return the charset. 387 */ 388 public final String getCharSet(final Locale locale, final String variant) 389 { 390 // Check the cache first. 391 if ((variant != null) && (variant.length() > 0)) 392 { 393 String key = locale.toString(); 394 if (key.length() == 0) 395 { 396 key = "__" + locale.getVariant(); 397 if (key.length() > 2) 398 { 399 key += '_' + variant; 400 } 401 else 402 { 403 key += variant; 404 } 405 } 406 else if (locale.getCountry().length() == 0) 407 { 408 key += "__" + variant; 409 } 410 else 411 { 412 key += '_' + variant; 413 } 414 415 String charset = searchCharSet(key); 416 if (charset.length() == 0) 417 { 418 // Not found, perform a full search and update the cache. 419 String[] items = new String[4]; 420 items[3] = variant; 421 items[2] = locale.getVariant(); 422 items[1] = locale.getCountry(); 423 items[0] = locale.getLanguage(); 424 425 charset = searchCharSet(items); 426 if (charset.length() == 0) 427 { 428 charset = DEFAULT_CHARSET; 429 } 430 431 mappers.get(MAP_CACHE).put(key, charset); 432 } 433 434 return charset; 435 } 436 437 return getCharSet(locale); 438 } 439 440 /** 441 * Gets the charset for a specified key. 442 * 443 * @param key 444 * the key for the charset. 445 * @return the found charset or the default one. 446 */ 447 public final String getCharSet(final String key) 448 { 449 final String charset = searchCharSet(key); 450 return charset.length() > 0 ? charset : DEFAULT_CHARSET; 451 } 452 453 /** 454 * Gets the charset for a specified key. 455 * 456 * @param key 457 * the key for the charset. 458 * @param def 459 * the default charset if none is found. 460 * @return the found charset or the given default. 461 */ 462 public final String getCharSet(final String key, final String def) 463 { 464 String charset = searchCharSet(key); 465 return charset.length() > 0 ? charset : def; 466 } 467 468 /** 469 * Searches for a charset for a specified locale. 470 * 471 * @param items 472 * an array of locale items. 473 * @return the found charset or an empty string. 474 */ 475 private final String searchCharSet(final String[] items) 476 { 477 String charset; 478 final StringBuilder sb = new StringBuilder(); 479 for (int i = items.length; i > 0; i--) 480 { 481 charset = searchCharSet(items, sb, i); 482 if (charset.length() > 0) 483 { 484 return charset; 485 } 486 487 sb.setLength(0); 488 } 489 490 return ""; 491 } 492 493 /** 494 * Searches recursively for a charset for a specified locale. 495 * 496 * @param items 497 * an array of locale items. 498 * @param base 499 * a buffer of base items. 500 * @param count 501 * the number of items to go through. 502 * @return the found charset or an empty string. 503 */ 504 private final String searchCharSet(final String[] items, final StringBuilder base, int count) 505 { 506 if ((--count >= 0) && (items[count] != null) && (items[count].length() > 0)) 507 { 508 String charset; 509 base.insert(0, items[count]); 510 int length = base.length(); 511 512 for (int i = count; i > 0; i--) 513 { 514 if ((i == count) || (i <= 1)) 515 { 516 base.insert(0, '_'); 517 length++; 518 } 519 520 charset = searchCharSet(items, base, i); 521 if (charset.length() > 0) 522 { 523 return charset; 524 } 525 526 base.delete(0, base.length() - length); 527 } 528 529 return searchCharSet(base.toString()); 530 } 531 532 return ""; 533 } 534 535 /** 536 * Searches for a charset for a specified key. 537 * 538 * @param key 539 * the key for the charset. 540 * @return the found charset or an empty string. 541 */ 542 private final String searchCharSet(final String key) 543 { 544 if ((key != null) && (key.length() > 0)) 545 { 546 // Go through mappers. 547 Map<String, String> mapper; 548 String charset; 549 550 for (int i = 0; i < mappers.size(); i++) 551 { 552 mapper = mappers.get(i); 553 if (mapper != null) 554 { 555 charset = mapper.get(key); 556 if (charset != null) 557 { 558 // Update the cache. 559 if (i > MAP_CACHE) 560 { 561 mappers.get(MAP_CACHE).put(key, charset); 562 } 563 564 return charset; 565 } 566 } 567 } 568 569 // Not found, add an empty string to the cache. 570 mappers.get(MAP_CACHE).put(key, ""); 571 } 572 573 return ""; 574 } 575 576 /** 577 * Sets a common locale-charset mapping. 578 * 579 * @param key 580 * the key for the charset. 581 * @param charset 582 * the corresponding charset. 583 */ 584 @SuppressWarnings({ "unchecked" }) 585 protected final synchronized void setCommonCharSet(final String key, final String charset) 586 { 587 HashMap<String, String> map = (HashMap<String, String>)mappers.get(MAP_COM); 588 final HashMap<String, String> mapper = (HashMap<String, String>)map.clone(); 589 mapper.put(key, charset); 590 mappers.add(MAP_COM, mapper); 591 mappers.get(MAP_CACHE).clear(); 592 } 593}