001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.wicket.extensions.util.encoding;
018
019import java.io.File;
020import java.io.FileInputStream;
021import java.io.IOException;
022import java.io.InputStream;
023import java.util.ArrayList;
024import java.util.HashMap;
025import java.util.Hashtable;
026import java.util.List;
027import java.util.Locale;
028import java.util.Map;
029import java.util.Properties;
030
031/**
032 * This class maintains a set of mappers defining mappings between locales and the corresponding
033 * charsets. The mappings are defined as properties between locale and charset names. The
034 * definitions can be listed in property files located in user's home directory, Java home directory
035 * or the current class jar. In addition, this class maintains static default mappings and
036 * constructors support application specific mappings.
037 * 
038 * This source has originally been taken from the jakarta Turbine project.
039 * 
040 * @author <a href="mailto:ilkka.priha@simsoft.fi">Ilkka Priha </a>
041 */
042public final class CharSetMap
043{
044        /**
045         * The default charset when nothing else is applicable.
046         */
047        public static final String DEFAULT_CHARSET = "ISO-8859-1";
048
049        /**
050         * The name for charset mapper resources.
051         */
052        public static final String CHARSET_RESOURCE = "charset.properties";
053
054        /**
055         * Priorities of available mappers.
056         */
057        private static final int MAP_CACHE = 0;
058        private static final int MAP_PROG = 1;
059        private static final int MAP_HOME = 2;
060        private static final int MAP_SYS = 3;
061        private static final int MAP_JAR = 4;
062        private static final int MAP_COM = 5;
063
064        /**
065         * A common charset mapper for languages.
066         */
067        private static final Map<String, String> commonMapper = new HashMap<>();
068
069        static
070        {
071                commonMapper.put("ar", "ISO-8859-6");
072                commonMapper.put("be", "ISO-8859-5");
073                commonMapper.put("bg", "ISO-8859-5");
074                commonMapper.put("ca", "ISO-8859-1");
075                commonMapper.put("cs", "ISO-8859-2");
076                commonMapper.put("da", "ISO-8859-1");
077                commonMapper.put("de", "ISO-8859-1");
078                commonMapper.put("el", "ISO-8859-7");
079                commonMapper.put("en", "ISO-8859-1");
080                commonMapper.put("es", "ISO-8859-1");
081                commonMapper.put("et", "ISO-8859-1");
082                commonMapper.put("fi", "ISO-8859-1");
083                commonMapper.put("fr", "ISO-8859-1");
084                commonMapper.put("hr", "ISO-8859-2");
085                commonMapper.put("hu", "ISO-8859-2");
086                commonMapper.put("is", "ISO-8859-1");
087                commonMapper.put("it", "ISO-8859-1");
088                commonMapper.put("iw", "ISO-8859-8");
089                commonMapper.put("ja", "Shift_JIS");
090                commonMapper.put("ko", "EUC-KR");
091                commonMapper.put("lt", "ISO-8859-2");
092                commonMapper.put("lv", "ISO-8859-2");
093                commonMapper.put("mk", "ISO-8859-5");
094                commonMapper.put("nl", "ISO-8859-1");
095                commonMapper.put("no", "ISO-8859-1");
096                commonMapper.put("pl", "ISO-8859-2");
097                commonMapper.put("pt", "ISO-8859-1");
098                commonMapper.put("ro", "ISO-8859-2");
099                commonMapper.put("ru", "ISO-8859-5");
100                commonMapper.put("sh", "ISO-8859-5");
101                commonMapper.put("sk", "ISO-8859-2");
102                commonMapper.put("sl", "ISO-8859-2");
103                commonMapper.put("sq", "ISO-8859-2");
104                commonMapper.put("sr", "ISO-8859-5");
105                commonMapper.put("sv", "ISO-8859-1");
106                commonMapper.put("tr", "ISO-8859-9");
107                commonMapper.put("uk", "ISO-8859-5");
108                commonMapper.put("zh", "GB2312");
109                commonMapper.put("zh_TW", "Big5");
110        }
111
112        /**
113         * A collection of available charset mappers.
114         */
115        private final List<Map<String, String>> mappers = new ArrayList<>();
116        {
117                for (int i = 0; i < MAP_COM; i++)
118                {
119                        mappers.add(null);
120                }
121        }
122
123        /**
124         * Loads mappings from a stream.
125         * 
126         * @param input
127         *            an input stream.
128         * @return the mappings.
129         * @throws IOException
130         *             for an incorrect stream.
131         */
132        protected static Map<String, String> loadStream(final InputStream input)
133                throws IOException
134        {
135                return createMap(input);
136        }
137
138        private static Map<String, String> createMap(final InputStream input) throws IOException
139        {
140                final Properties props = new Properties();
141                props.load(input);
142                return createMap(props);
143        }
144
145        private static Map<String, String> createMap(final Properties props)
146        {
147                HashMap<String, String> map = new HashMap<>();
148                for (Object key : props.keySet())
149                {
150                        String keyString = (String)key;
151                        map.put(keyString, props.getProperty(keyString));
152                }
153                return map;
154        }
155
156        /**
157         * Loads mappings from a file.
158         * 
159         * @param file
160         *            a file.
161         * @return the mappings.
162         * @throws IOException
163         *             for an incorrect file.
164         */
165        protected static Map<String, String> loadFile(final File file) throws IOException
166        {
167                return loadStream(new FileInputStream(file));
168        }
169
170        /**
171         * Loads mappings from a file path.
172         * 
173         * @param path
174         *            a file path.
175         * @return the mappings.
176         * @throws IOException
177         *             for an incorrect file.
178         */
179        protected static Map<String, String> loadPath(final String path) throws IOException
180        {
181                return loadFile(new File(path));
182        }
183
184        /**
185         * Loads mappings from a resource.
186         * 
187         * @param name
188         *            a resource name.
189         * @return the mappings.
190         */
191        protected static Map<String, String> loadResource(final String name)
192        {
193                final InputStream input = CharSetMap.class.getResourceAsStream(name);
194                if (input != null)
195                {
196                        try
197                        {
198                                return loadStream(input);
199                        }
200                        catch (IOException ex)
201                        {
202                                return null;
203                        }
204                }
205
206                return null;
207        }
208
209        /**
210         * Constructs a new charset map with default mappers.
211         */
212        public CharSetMap()
213        {
214                String path;
215                try
216                {
217                        // Check whether the user directory contains mappings.
218                        path = System.getProperty("user.home");
219                        if (path != null)
220                        {
221                                path = path + File.separator + CHARSET_RESOURCE;
222                                mappers.add(MAP_HOME, loadPath(path));
223                        }
224                }
225                catch (Exception ex)
226                {
227                        // ignore
228                }
229
230                try
231                {
232                        // Check whether the system directory contains mappings.
233                        path = System.getProperty("java.home") + File.separator + "lib" + File.separator +
234                                CHARSET_RESOURCE;
235
236                        mappers.add(MAP_SYS, loadPath(path));
237                }
238                catch (Exception ex)
239                {
240                        // ignore
241                }
242
243                // Check whether the current class jar contains mappings.
244                mappers.add(MAP_JAR, loadResource("/META-INF/" + CHARSET_RESOURCE));
245
246                // Set the common mapper to have the lowest priority.
247                mappers.add(MAP_COM, commonMapper);
248
249                // Set the cache mapper to have the highest priority.
250                mappers.add(MAP_CACHE, new Hashtable<String, String>());
251        }
252
253        /**
254         * Constructs a charset map from properties.
255         * 
256         * @param props
257         *            charset mapping properties.
258         */
259        public CharSetMap(final Properties props)
260        {
261                this();
262                mappers.add(MAP_PROG, createMap(props));
263        }
264
265        /**
266         * Constructs a charset map read from a stream.
267         * 
268         * @param input
269         *            an input stream.
270         * @throws IOException
271         *             for an incorrect stream.
272         */
273        public CharSetMap(final InputStream input) throws IOException
274        {
275                this();
276                mappers.add(MAP_PROG, loadStream(input));
277        }
278
279        /**
280         * Constructs a charset map read from a property file.
281         * 
282         * @param file
283         *            a property file.
284         * @throws IOException
285         *             for an incorrect property file.
286         */
287        public CharSetMap(final File file) throws IOException
288        {
289                this();
290                mappers.add(MAP_PROG, loadFile(file));
291        }
292
293        /**
294         * Constructs a charset map read from a property file path.
295         * 
296         * @param path
297         *            a property file path.
298         * @throws IOException
299         *             for an incorrect property file.
300         */
301        public CharSetMap(final String path) throws IOException
302        {
303                this();
304                mappers.add(MAP_PROG, loadPath(path));
305        }
306
307        /**
308         * Sets a locale-charset mapping.
309         * 
310         * @param key
311         *            the key for the charset.
312         * @param charset
313         *            the corresponding charset.
314         */
315        @SuppressWarnings({ "unchecked" })
316        public final synchronized void setCharSet(final String key, final String charset)
317        {
318                HashMap<String, String> mapper = (HashMap<String, String>)mappers.get(MAP_PROG);
319                if (mapper != null)
320                {
321                        mapper = (HashMap<String, String>)mapper.clone();
322                }
323                else
324                {
325                        mapper = new HashMap<>();
326                }
327                mapper.put(key, charset);
328                mappers.add(MAP_PROG, mapper);
329                mappers.get(MAP_CACHE).clear();
330        }
331
332        /**
333         * Gets the charset for a locale. First a locale specific charset is searched for, then a
334         * country specific one and lastly a language specific one. If none is found, the default
335         * charset is returned.
336         * 
337         * @param locale
338         *            the locale.
339         * @return the charset.
340         */
341        public final String getCharSet(final Locale locale)
342        {
343                // Check the cache first.
344                String key = locale.toString();
345                if (key.length() == 0)
346                {
347                        key = "__" + locale.getVariant();
348                        if (key.length() == 2)
349                        {
350                                return DEFAULT_CHARSET;
351                        }
352                }
353
354                String charset = searchCharSet(key);
355                if (charset.length() == 0)
356                {
357                        // Not found, perform a full search and update the cache.
358                        String[] items = new String[3];
359                        items[2] = locale.getVariant();
360                        items[1] = locale.getCountry();
361                        items[0] = locale.getLanguage();
362
363                        charset = searchCharSet(items);
364                        if (charset.length() == 0)
365                        {
366                                charset = DEFAULT_CHARSET;
367                        }
368
369                        mappers.get(MAP_CACHE).put(key, charset);
370                }
371
372                return charset;
373        }
374
375        /**
376         * Gets the charset for a locale with a variant. The search is performed in the following order:
377         * "lang"_"country"_"variant"="charset", _"country"_"variant"="charset",
378         * "lang"__"variant"="charset", __"variant"="charset", "lang"_"country"="charset",
379         * _"country"="charset", "lang"="charset". If nothing of the above is found, the default charset
380         * is returned.
381         * 
382         * @param locale
383         *            the locale.
384         * @param variant
385         *            a variant field.
386         * @return the charset.
387         */
388        public final String getCharSet(final Locale locale, final String variant)
389        {
390                // Check the cache first.
391                if ((variant != null) && (variant.length() > 0))
392                {
393                        String key = locale.toString();
394                        if (key.length() == 0)
395                        {
396                                key = "__" + locale.getVariant();
397                                if (key.length() > 2)
398                                {
399                                        key += '_' + variant;
400                                }
401                                else
402                                {
403                                        key += variant;
404                                }
405                        }
406                        else if (locale.getCountry().length() == 0)
407                        {
408                                key += "__" + variant;
409                        }
410                        else
411                        {
412                                key += '_' + variant;
413                        }
414
415                        String charset = searchCharSet(key);
416                        if (charset.length() == 0)
417                        {
418                                // Not found, perform a full search and update the cache.
419                                String[] items = new String[4];
420                                items[3] = variant;
421                                items[2] = locale.getVariant();
422                                items[1] = locale.getCountry();
423                                items[0] = locale.getLanguage();
424
425                                charset = searchCharSet(items);
426                                if (charset.length() == 0)
427                                {
428                                        charset = DEFAULT_CHARSET;
429                                }
430
431                                mappers.get(MAP_CACHE).put(key, charset);
432                        }
433
434                        return charset;
435                }
436
437                return getCharSet(locale);
438        }
439
440        /**
441         * Gets the charset for a specified key.
442         * 
443         * @param key
444         *            the key for the charset.
445         * @return the found charset or the default one.
446         */
447        public final String getCharSet(final String key)
448        {
449                final String charset = searchCharSet(key);
450                return charset.length() > 0 ? charset : DEFAULT_CHARSET;
451        }
452
453        /**
454         * Gets the charset for a specified key.
455         * 
456         * @param key
457         *            the key for the charset.
458         * @param def
459         *            the default charset if none is found.
460         * @return the found charset or the given default.
461         */
462        public final String getCharSet(final String key, final String def)
463        {
464                String charset = searchCharSet(key);
465                return charset.length() > 0 ? charset : def;
466        }
467
468        /**
469         * Searches for a charset for a specified locale.
470         * 
471         * @param items
472         *            an array of locale items.
473         * @return the found charset or an empty string.
474         */
475        private final String searchCharSet(final String[] items)
476        {
477                String charset;
478                final StringBuilder sb = new StringBuilder();
479                for (int i = items.length; i > 0; i--)
480                {
481                        charset = searchCharSet(items, sb, i);
482                        if (charset.length() > 0)
483                        {
484                                return charset;
485                        }
486
487                        sb.setLength(0);
488                }
489
490                return "";
491        }
492
493        /**
494         * Searches recursively for a charset for a specified locale.
495         * 
496         * @param items
497         *            an array of locale items.
498         * @param base
499         *            a buffer of base items.
500         * @param count
501         *            the number of items to go through.
502         * @return the found charset or an empty string.
503         */
504        private final String searchCharSet(final String[] items, final StringBuilder base, int count)
505        {
506                if ((--count >= 0) && (items[count] != null) && (items[count].length() > 0))
507                {
508                        String charset;
509                        base.insert(0, items[count]);
510                        int length = base.length();
511
512                        for (int i = count; i > 0; i--)
513                        {
514                                if ((i == count) || (i <= 1))
515                                {
516                                        base.insert(0, '_');
517                                        length++;
518                                }
519
520                                charset = searchCharSet(items, base, i);
521                                if (charset.length() > 0)
522                                {
523                                        return charset;
524                                }
525
526                                base.delete(0, base.length() - length);
527                        }
528
529                        return searchCharSet(base.toString());
530                }
531
532                return "";
533        }
534
535        /**
536         * Searches for a charset for a specified key.
537         * 
538         * @param key
539         *            the key for the charset.
540         * @return the found charset or an empty string.
541         */
542        private final String searchCharSet(final String key)
543        {
544                if ((key != null) && (key.length() > 0))
545                {
546                        // Go through mappers.
547                        Map<String, String> mapper;
548                        String charset;
549
550                        for (int i = 0; i < mappers.size(); i++)
551                        {
552                                mapper = mappers.get(i);
553                                if (mapper != null)
554                                {
555                                        charset = mapper.get(key);
556                                        if (charset != null)
557                                        {
558                                                // Update the cache.
559                                                if (i > MAP_CACHE)
560                                                {
561                                                        mappers.get(MAP_CACHE).put(key, charset);
562                                                }
563
564                                                return charset;
565                                        }
566                                }
567                        }
568
569                        // Not found, add an empty string to the cache.
570                        mappers.get(MAP_CACHE).put(key, "");
571                }
572
573                return "";
574        }
575
576        /**
577         * Sets a common locale-charset mapping.
578         * 
579         * @param key
580         *            the key for the charset.
581         * @param charset
582         *            the corresponding charset.
583         */
584        @SuppressWarnings({ "unchecked" })
585        protected final synchronized void setCommonCharSet(final String key, final String charset)
586        {
587                HashMap<String, String> map = (HashMap<String, String>)mappers.get(MAP_COM);
588                final HashMap<String, String> mapper = (HashMap<String, String>)map.clone();
589                mapper.put(key, charset);
590                mappers.add(MAP_COM, mapper);
591                mappers.get(MAP_CACHE).clear();
592        }
593}