001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.wicket.util.encoding;
018
019import java.io.ByteArrayOutputStream;
020import java.io.UnsupportedEncodingException;
021import java.nio.charset.Charset;
022import java.nio.charset.IllegalCharsetNameException;
023import java.nio.charset.UnsupportedCharsetException;
024
025import org.apache.wicket.util.lang.Args;
026import org.slf4j.Logger;
027import org.slf4j.LoggerFactory;
028
029/**
030 * Adapted from Spring Framework's UriUtils class, but defines instances for query string decoding versus URL path
031 * component decoding.
032 * <p/>
033 * The difference is important because a space is encoded as a + in a query string, but this is a
034 * valid value in a path component (and is therefore not decode back to a space).
035 *
036 * @author Thomas Heigl
037 * @see <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC-2396</a>
038 */
039public class UrlDecoder
040{
041        private static final Logger LOG = LoggerFactory.getLogger(UrlDecoder.class);
042
043        private final boolean decodePlus;
044
045        /**
046         * Encoder used to decode name or value components of a query string.<br/>
047         * <br/>
048         *
049         * For example: http://org.acme/notthis/northis/oreventhis?buthis=isokay&amp;asis=thispart
050         */
051        public static final UrlDecoder QUERY_INSTANCE = new UrlDecoder(true);
052
053        /**
054         * Encoder used to decode components of a path.<br/>
055         * <br/>
056         *
057         * For example: http://org.acme/foo/thispart/orthispart?butnot=thispart
058         */
059        public static final UrlDecoder PATH_INSTANCE = new UrlDecoder(false);
060
061        /**
062         * Create decoder
063         *
064         * @param decodePlus
065         *            - whether to decode + to space
066         */
067        private UrlDecoder(final boolean decodePlus)
068        {
069                this.decodePlus = decodePlus;
070        }
071
072        /**
073         * @param s
074         *            string to decode
075         * @param enc
076         *            encoding to decode with
077         * @return decoded string
078         */
079        public String decode(final String s, final String enc)
080        {
081                Args.notNull(enc, "enc");
082
083                try
084                {
085                        return decode(s, Charset.forName(enc));
086                }
087                catch (IllegalCharsetNameException | UnsupportedCharsetException e)
088                {
089                        throw new RuntimeException(new UnsupportedEncodingException(enc));
090                }
091        }
092
093        /**
094         * @param source
095         *            string to decode
096         * @param charset
097         *            encoding to decode with
098         * @return decoded string
099         */
100        public String decode(final String source, final Charset charset)
101        {
102                if (source == null || source.isEmpty())
103                {
104                        return source;
105                }
106
107                Args.notNull(charset, "charset");
108
109                final int length = source.length();
110                final ByteArrayOutputStream bos = new ByteArrayOutputStream(length);
111                boolean changed = false;
112                for (int i = 0; i < length; i++)
113                {
114                        final int ch = source.charAt(i);
115                        if (ch == '%')
116                        {
117                                if (i + 2 < length)
118                                {
119                                        final char hex1 = source.charAt(i + 1);
120                                        final char hex2 = source.charAt(i + 2);
121                                        final int u = Character.digit(hex1, 16);
122                                        final int l = Character.digit(hex2, 16);
123                                        if (u != -1 && l != -1)
124                                        {
125                                                bos.write((char)((u << 4) + l));
126                                                i += 2;
127                                        }
128                                        changed = true;
129                                }
130                                else
131                                {
132                                        LOG.info(
133                                                "Incomplete trailing escape (%) pattern in '{}'. The escape character (%) will be ignored.",
134                                                source);
135                                        changed = true;
136                                }
137                        }
138                        else if (ch == '+')
139                        {
140                                if (decodePlus)
141                                {
142                                        bos.write(' ');
143                                        changed = true;
144                                }
145                                else
146                                {
147                                        bos.write(ch);
148                                }
149                        }
150                        else
151                        {
152                                bos.write(ch);
153                        }
154                }
155                final String result = changed ? new String(bos.toByteArray(), charset) : source;
156                // no trying to filter out bad escapes beforehand, just kill all null bytes here at the end,
157                // that way none will come through
158                return result.replace("\0", "NULL");
159        }
160}