001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.wicket.application; 018 019import java.util.HashMap; 020import java.util.Map; 021 022 023/** 024 * This class is an utility class that perform wildcard-patterns matching and isolation. 025 * 026 * @version $Id: WildcardMatcherHelper.java 420832 2006-07-11 13:11:02Z cziegeler $ 027 */ 028public class WildcardMatcherHelper 029{ 030 // ~ Static fields/initializers 031 // ----------------------------------------------------------------- 032 033 /** Escape character */ 034 public static final char ESC = '\\'; 035 036 /** Default path separator: "." */ 037 public static final char PATHSEP = '.'; 038 039 /** any char */ 040 public static final char STAR = '*'; 041 042 // ~ Methods 043 // ------------------------------------------------------------------------------------ 044 045 /** 046 * Match a pattern against a string and isolates wildcard replacement into a <code>Map</code>. <br> 047 * Here is how the matching algorithm works: 048 * 049 * <ul> 050 * <li>The '*' character, meaning that zero or more characters (excluding the path separator 051 * '/') are to be matched.</li> 052 * <li>The '**' sequence, meaning that zero or more characters (including the path separator 053 * '/') are to be matched.</li> 054 * <li>The '\*' sequence is honored as a literal '*' character, not a wildcard</li> 055 * </ul> 056 * <br> 057 * When more than two '*' characters, not separated by another character, are found their value 058 * is considered as '**' and immediate succeeding '*' are skipped. <br> 059 * The '**' wildcard is greedy and thus the following sample cannot match: 060 * <dl> 061 * <dt>pattern</dt> 062 * <dd>STAR,STAR,PATHSEP,STAR,PATHSEP,STAR,STAR (why can't I express it literally?)</dt> 063 * <dt>string</dt> 064 * <dd>foo/bar/baz/bug</dt> 065 * </dl> 066 * The first '**' in the pattern will suck up until the last '/' in string and thus will not 067 * match. <br> 068 * A more advance algorithm could certainly check whether there is an other literal later in the 069 * pattern to ev. match in string but I'll leave this exercise to someone else ATM if one needs 070 * such. 071 * 072 * @param pat 073 * The pattern string. 074 * @param str 075 * The string to math against the pattern 076 * 077 * @return a <code>Map</code> containing the representation of the extracted pattern. The 078 * extracted patterns are keys in the <code>Map</code> from left to right beginning with 079 * "1" for the left most, "2" for the next, a.s.o. The key "0" is the string itself. If 080 * the return value is null, string does not match to the pattern . 081 */ 082 public static Map<String, String> match(final String pat, final String str) 083 { 084 final Matcher map = new Matcher(pat, str); 085 086 if (map.isMatch()) 087 { 088 return map.getMap(); 089 } 090 091 return null; 092 } 093 094 // ~ Inner Classes 095 // ------------------------------------------------------------------------------ 096 097 /** 098 * The private matcher class 099 */ 100 private static class Matcher 101 { 102 // ~ Instance fields 103 // ------------------------------------------------------------------------ 104 105 /** The character array of the pattern */ 106 private final char[] apat; 107 108 /** The length of the character array of the pattern */ 109 private final int lpat; 110 111 /** The character array of the string */ 112 private final char[] astr; 113 114 /** The length of the character array of the string */ 115 private final int lstr; 116 117 /** The <code>Map</code> to be filled */ 118 private final Map<String, String> map = new HashMap<String, String>(); 119 120 /** Whether string matched to pattern */ 121 private final boolean matched; 122 123 /** map index */ 124 private int idx = 0; 125 126 /** index into pattern */ 127 private int ipat = 0; 128 129 /** index into string */ 130 private int istr = 0; 131 132 // ~ Constructors 133 // --------------------------------------------------------------------------- 134 135 /** 136 * Creates a new Matcher object. 137 * 138 * @param pat 139 * The pattern 140 * @param str 141 * The string 142 */ 143 public Matcher(final String pat, final String str) 144 { 145 apat = pat.toCharArray(); 146 lpat = apat.length; 147 astr = str.toCharArray(); 148 lstr = astr.length; 149 add(str); 150 matched = match(); 151 } 152 153 // ~ Methods 154 // -------------------------------------------------------------------------------- 155 156 /** 157 * DOCUMENT ME! 158 * 159 * @return DOCUMENT ME! 160 */ 161 public Map<String, String> getMap() 162 { 163 return map; 164 } 165 166 /** 167 * Has it matched? 168 * 169 * @return whether it has matched 170 */ 171 public boolean isMatch() 172 { 173 return matched; 174 } 175 176 /** 177 * Add a extracted substring to the map 178 * 179 * @param aStr 180 * The extracted substring 181 */ 182 private void add(final String aStr) 183 { 184 String key = String.valueOf(idx++).intern(); 185 map.put(key, aStr); 186 } 187 188 /** 189 * Scans the pattern and the search string from the start toward the end 190 * 191 * @return whether the string matches the pattern 192 */ 193 private boolean match() 194 { 195 // scan a common literal prefix 196 scanLiteralPrefix(); 197 198 // if we are already at the end of both strings 199 // than the pattern matched 200 if (ipat >= lpat && istr >= lstr) 201 { 202 return true; 203 } 204 205 // if hole string has matched the pattern so far and the rest of the pattern only has 206 // wildcard(s) 207 // we match too otherwise we clearly don't match 208 if (ipat < lpat && istr >= lstr) 209 { 210 while (ipat < lpat && apat[ipat] == STAR) 211 { 212 ipat++; 213 } 214 215 if (ipat >= lpat) 216 { 217 add(""); 218 219 return true; 220 } 221 else 222 { 223 return false; 224 } 225 } 226 227 // if hole pattern has matched the string so far but the string has more characters left 228 // we don't match 229 if (ipat >= lpat && istr < lstr) 230 { 231 return false; 232 } 233 234 // if we have not stopped at a wildcard character 235 // a character doesn't match and thus we do not match at all 236 if (apat[ipat] != STAR) 237 { 238 return false; 239 } 240 241 // if it is a double (or more) wildcard pattern 242 if (ipat < lpat - 1 && apat[ipat + 1] == STAR) 243 { 244 // skip to first non star character in the pattern 245 while (++ipat < lpat && apat[ipat] == STAR) 246 { 247 ; // noop 248 } 249 250 // if we are at the end of the pattern we've matched and are finish scanning 251 if (ipat >= lpat) 252 { 253 add(new String(astr, istr, lstr - istr)); 254 255 return true; 256 } 257 258 // Now we need to scan for the end of the literal characters in the pattern 259 final int sipat = ipat; // start position of a literal character used for substring 260 // operations 261 262 while (ipat < lpat && (apat[ipat] != STAR || (ipat > 0 && apat[ipat - 1] == ESC))) 263 { 264 ipat++; 265 } 266 267 // if we reached the end of the pattern just do a string compare with the 268 // corresponding part from 269 // the end of the string 270 if (ipat >= lpat) 271 { 272 return checkEnds(sipat, false); 273 } 274 275 // Now we need to check whether the literal substring of the pattern 276 // is contained in the string somewhere 277 final int l = ipat - sipat; 278 int eistr = lstr - l; 279 280 // because the '**' wildcard need to be greedy we scan from the end of the string 281 // for a match 282 while (istr < eistr && !strncmp(apat, sipat, astr, eistr, l)) 283 { 284 eistr--; 285 } 286 287 if (istr >= eistr) 288 { 289 return false; 290 } 291 292 add(new String(astr, istr, eistr - istr)); 293 istr = eistr + l; 294 } 295 else 296 {// if it is a single star pattern 297 // skip the star 298 ++ipat; 299 300 // if we are at the beginning of the pattern we have to check there is no PATH_SEP 301 // in string 302 if (ipat >= lpat) 303 { 304 final int sistr = istr; 305 306 while (istr < lstr && (astr[istr] != PATHSEP)) 307 { 308 istr++; 309 } 310 311 if (istr >= lstr) 312 { 313 add(new String(astr, sistr, lstr - sistr)); 314 315 return true; 316 } 317 318 // otherwise we do not match 319 return false; 320 } 321 322 // Now we need to search for the start of either a path separator or another 323 // wildcard characters 324 // in the pattern 325 final int sipat = ipat; 326 327 while (ipat < lpat && apat[ipat] != STAR && 328 (apat[ipat] != ESC || ipat < lpat - 1 && apat[ipat + 1] != STAR) && 329 apat[ipat] != PATHSEP) 330 { 331 ipat++; 332 } 333 334 // if we reached the end of the pattern just do a string compare with the 335 // corresponding part from 336 // the end of the string 337 if (ipat >= lpat) 338 { 339 return checkEnds(sipat, true); 340 } 341 342 // If we stopped at an other wildcard 343 // we exclude it from being compared 344 if (apat[ipat] == STAR) 345 { 346 ipat--; 347 } 348 349 // Now we need to check whether the literal substring of the pattern 350 // is contained in the string somewhere 351 final int l = ipat - sipat + 1; 352 final int sistr = istr; 353 354 while (istr < lstr && !strncmp(apat, sipat, astr, istr, l)) 355 { 356 istr++; 357 } 358 359 if (istr >= lstr) 360 { 361 return false; 362 } 363 364 add(new String(astr, sistr, istr - sistr)); 365 ipat++; 366 istr += l; 367 } 368 369 return match(); 370 } 371 372 /** 373 * Scan a possible common suffix 374 */ 375 private void scanLiteralPrefix() 376 { 377 // scan a common literal suffix 378 while (ipat < lpat && 379 istr < lstr && 380 (apat[ipat] == ESC && ipat < lpat - 1 && apat[ipat + 1] == STAR && 381 apat[++ipat] == astr[istr] || apat[ipat] != STAR && apat[ipat] == astr[istr])) 382 { 383 ipat++; 384 istr++; 385 } 386 } 387 388 /** 389 * Compare two character array from individual offsets 390 * 391 * @param a1 392 * The first character array 393 * @param o1 394 * The offset into the first character array 395 * @param a2 396 * The second character array 397 * @param o2 398 * The offset into the second character array 399 * @param l 400 * The length to compare 401 * 402 * @return Whether the all the mentioned characters match each other 403 */ 404 private boolean strncmp(final char[] a1, final int o1, final char[] a2, final int o2, 405 final int l) 406 { 407 int i = 0; 408 409 while (i < l && o1 + i < a1.length && o2 + i < a2.length && a1[o1 + i] == a2[o2 + i]) 410 { 411 i++; 412 } 413 414 return i == l; 415 } 416 417 private boolean checkEnds(final int sipat, final boolean isSingleStart) 418 { 419 // if the remaining length of the string isn't the same as that found in the pattern 420 // we do not match 421 final int l = lpat - sipat; // calculate length of comparison 422 final int ostr = lstr - l; // calculate offset into string 423 if (ostr >= 0 && strncmp(apat, sipat, astr, ostr, l)) 424 { 425 if (isSingleStart) 426 { 427 // if the ends matches make sure there isn't a PATHSEP in the candidate string 428 // part 429 int i = ostr - istr; 430 while (i > istr) 431 { 432 if (astr[--i] == PATHSEP) 433 { 434 return false; // we cannot match because of a PATHSEP in the matched 435 // part 436 } 437 } 438 } 439 add(new String(astr, istr, ostr - istr)); 440 441 return true; 442 } 443 444 // otherwise we do not match 445 return false; 446 } 447 } 448}