001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.wicket.util.encoding; 018 019import java.io.ByteArrayOutputStream; 020import java.io.UnsupportedEncodingException; 021import java.nio.charset.Charset; 022import java.nio.charset.IllegalCharsetNameException; 023import java.nio.charset.UnsupportedCharsetException; 024 025import org.apache.wicket.util.lang.Args; 026import org.slf4j.Logger; 027import org.slf4j.LoggerFactory; 028 029/** 030 * Adapted from Spring Framework's UriUtils class, but defines instances for query string decoding versus URL path 031 * component decoding. 032 * <p/> 033 * The difference is important because a space is encoded as a + in a query string, but this is a 034 * valid value in a path component (and is therefore not decode back to a space). 035 * 036 * @author Thomas Heigl 037 * @see <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC-2396</a> 038 */ 039public class UrlDecoder 040{ 041 private static final Logger LOG = LoggerFactory.getLogger(UrlDecoder.class); 042 043 private final boolean decodePlus; 044 045 /** 046 * Encoder used to decode name or value components of a query string.<br/> 047 * <br/> 048 * 049 * For example: http://org.acme/notthis/northis/oreventhis?buthis=isokay&asis=thispart 050 */ 051 public static final UrlDecoder QUERY_INSTANCE = new UrlDecoder(true); 052 053 /** 054 * Encoder used to decode components of a path.<br/> 055 * <br/> 056 * 057 * For example: http://org.acme/foo/thispart/orthispart?butnot=thispart 058 */ 059 public static final UrlDecoder PATH_INSTANCE = new UrlDecoder(false); 060 061 /** 062 * Create decoder 063 * 064 * @param decodePlus 065 * - whether to decode + to space 066 */ 067 private UrlDecoder(final boolean decodePlus) 068 { 069 this.decodePlus = decodePlus; 070 } 071 072 /** 073 * @param s 074 * string to decode 075 * @param enc 076 * encoding to decode with 077 * @return decoded string 078 */ 079 public String decode(final String s, final String enc) 080 { 081 Args.notNull(enc, "enc"); 082 083 try 084 { 085 return decode(s, Charset.forName(enc)); 086 } 087 catch (IllegalCharsetNameException | UnsupportedCharsetException e) 088 { 089 throw new RuntimeException(new UnsupportedEncodingException(enc)); 090 } 091 } 092 093 /** 094 * @param source 095 * string to decode 096 * @param charset 097 * encoding to decode with 098 * @return decoded string 099 */ 100 public String decode(final String source, final Charset charset) 101 { 102 if (source == null || source.isEmpty()) 103 { 104 return source; 105 } 106 107 Args.notNull(charset, "charset"); 108 109 final int length = source.length(); 110 final ByteArrayOutputStream bos = new ByteArrayOutputStream(length); 111 boolean changed = false; 112 for (int i = 0; i < length; i++) 113 { 114 final int ch = source.charAt(i); 115 if (ch == '%') 116 { 117 if (i + 2 < length) 118 { 119 final char hex1 = source.charAt(i + 1); 120 final char hex2 = source.charAt(i + 2); 121 final int u = Character.digit(hex1, 16); 122 final int l = Character.digit(hex2, 16); 123 if (u != -1 && l != -1) 124 { 125 bos.write((char)((u << 4) + l)); 126 i += 2; 127 } 128 changed = true; 129 } 130 else 131 { 132 LOG.info( 133 "Incomplete trailing escape (%) pattern in '{}'. The escape character (%) will be ignored.", 134 source); 135 changed = true; 136 } 137 } 138 else if (ch == '+') 139 { 140 if (decodePlus) 141 { 142 bos.write(' '); 143 changed = true; 144 } 145 else 146 { 147 bos.write(ch); 148 } 149 } 150 else 151 { 152 bos.write(ch); 153 } 154 } 155 final String result = changed ? new String(bos.toByteArray(), charset) : source; 156 // no trying to filter out bad escapes beforehand, just kill all null bytes here at the end, 157 // that way none will come through 158 return result.replace("\0", "NULL"); 159 } 160}