001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.wicket.util.encoding; 018 019import java.io.ByteArrayOutputStream; 020import java.io.UnsupportedEncodingException; 021import java.nio.charset.Charset; 022import java.nio.charset.IllegalCharsetNameException; 023import java.nio.charset.UnsupportedCharsetException; 024 025import org.apache.wicket.util.lang.Args; 026 027/** 028 * Adapted from Spring Framework's UriUtils class, but defines instances for query string encoding versus URL path 029 * component encoding. 030 * <p/> 031 * The difference is important because a space is encoded as a + in a query string, but this is a 032 * valid value in a path component (and is therefore not decode back to a space). 033 * 034 * @author Thomas Heigl 035 * @see <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC-2396</a> 036 */ 037public class UrlEncoder 038{ 039 040 enum Type { 041 //@formatter:off 042 QUERY { 043 @Override 044 public boolean isAllowed(int c) 045 { 046 return isPchar(c) || 047 ' ' == c || // encoding a space to a + is done in the encode() method 048 '*' == c || 049 '/' == c || // to allow direct passing of URL in query 050 ',' == c || 051 ':' == c || // allowed and used in wicket interface 052 '@' == c ; 053 } 054 }, 055 PATH { 056 @Override 057 public boolean isAllowed(int c) 058 { 059 return isPchar(c) || 060 '*' == c || 061 '&' == c || 062 '+' == c || 063 ',' == c || 064 ';' == c || // semicolon is used in ;jsessionid= 065 '=' == c || 066 ':' == c || // allowed and used in wicket interface 067 '@' == c ; 068 069 } 070 }, 071 HEADER { 072 @Override 073 public boolean isAllowed(int c) 074 { 075 return isPchar(c) || 076 '#' == c || 077 '&' == c || 078 '+' == c || 079 '^' == c || 080 '`' == c || 081 '|' ==c; 082 } 083 }; 084 //@formatter:on 085 086 /** 087 * Indicates whether the given character is allowed in this URI component. 088 * @return {@code true} if the character is allowed; {@code false} otherwise 089 */ 090 public abstract boolean isAllowed(int c); 091 092 /** 093 * Indicates whether the given character is in the {@code ALPHA} set. 094 * @see <a href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986, appendix A</a> 095 */ 096 protected boolean isAlpha(int c) 097 { 098 return (c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z'); 099 } 100 101 /** 102 * Indicates whether the given character is in the {@code DIGIT} set. 103 * @see <a href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986, appendix A</a> 104 */ 105 protected boolean isDigit(int c) 106 { 107 return (c >= '0' && c <= '9'); 108 } 109 110 /** 111 * Indicates whether the given character is in the {@code sub-delims} set. 112 * @see <a href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986, appendix A</a> 113 */ 114 protected boolean isSubDelimiter(int c) 115 { 116 return ('!' == c || '$' == c); 117 } 118 119 /** 120 * Indicates whether the given character is in the {@code unreserved} set. 121 * @see <a href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986, appendix A</a> 122 */ 123 protected boolean isUnreserved(int c) 124 { 125 return (isAlpha(c) || isDigit(c) || '-' == c || '.' == c || '_' == c || '~' == c); 126 } 127 128 /** 129 * Indicates whether the given character is in the {@code pchar} set. 130 * @see <a href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986, appendix A</a> 131 */ 132 protected boolean isPchar(int c) 133 { 134 return (isUnreserved(c) || isSubDelimiter(c)); 135 } 136 } 137 138 private final Type type; 139 140 /** 141 * Encoder used to encode name or value components of a query string.<br/> 142 * <br/> 143 * 144 * For example: http://org.acme/notthis/northis/oreventhis?buthis=isokay&asis=thispart 145 */ 146 public static final UrlEncoder QUERY_INSTANCE = new UrlEncoder(Type.QUERY); 147 148 /** 149 * Encoder used to encode segments of a path.<br/> 150 * <br/> 151 * 152 * For example: http://org.acme/foo/thispart/orthispart?butnot=thispart 153 */ 154 public static final UrlEncoder PATH_INSTANCE = new UrlEncoder(Type.PATH); 155 156 /** 157 * Encoder used to encode a header. 158 */ 159 public static final UrlEncoder HEADER_INSTANCE = new UrlEncoder(Type.HEADER); 160 161 /** 162 * Allow subclass to call constructor. 163 * 164 * @param type 165 * encoder type 166 */ 167 protected UrlEncoder(final Type type) 168 { 169 this.type = type; 170 } 171 172 /** 173 * @param s 174 * string to encode 175 * @param charsetName 176 * charset to use for encoding 177 * @return encoded string 178 */ 179 public String encode(final String s, final String charsetName) 180 { 181 Args.notNull(charsetName, "charsetName"); 182 183 try 184 { 185 return encode(s, Charset.forName(charsetName)); 186 } 187 catch (IllegalCharsetNameException | UnsupportedCharsetException e) 188 { 189 throw new RuntimeException(new UnsupportedEncodingException(charsetName)); 190 } 191 } 192 193 /** 194 * @param unsafeInput 195 * string to encode 196 * @param charset 197 * encoding to use 198 * @return encoded string 199 */ 200 public String encode(final String unsafeInput, final Charset charset) 201 { 202 if (unsafeInput == null || unsafeInput.isEmpty()) 203 { 204 return unsafeInput; 205 } 206 207 Args.notNull(charset, "charset"); 208 209 final byte[] bytes = unsafeInput.getBytes(charset); 210 boolean original = true; 211 for (final byte b : bytes) 212 { 213 if (!type.isAllowed(b) || b == ' ' || b == '\0') 214 { 215 original = false; 216 break; 217 } 218 } 219 if (original) 220 { 221 return unsafeInput; 222 } 223 224 final ByteArrayOutputStream bos = new ByteArrayOutputStream(bytes.length); 225 for (final byte b : bytes) 226 { 227 if (type.isAllowed(b)) 228 { 229 if (b == ' ') 230 { 231 bos.write('+'); 232 } 233 else 234 { 235 bos.write(b); 236 } 237 } 238 else 239 { 240 if (b == '\0') 241 { 242 bos.writeBytes("NULL".getBytes(charset)); 243 } 244 else 245 { 246 bos.write('%'); 247 bos.write(Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, 16))); 248 bos.write(Character.toUpperCase(Character.forDigit(b & 0xF, 16))); 249 } 250 } 251 } 252 return bos.toString(charset); 253 } 254 255}