001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.wicket.application;
018
019import java.util.HashMap;
020import java.util.Map;
021
022
023/**
024 * This class is an utility class that perform wildcard-patterns matching and isolation.
025 * 
026 * @version $Id: WildcardMatcherHelper.java 420832 2006-07-11 13:11:02Z cziegeler $
027 */
028public class WildcardMatcherHelper
029{
030        // ~ Static fields/initializers
031        // -----------------------------------------------------------------
032
033        /** Escape character */
034        public static final char ESC = '\\';
035
036        /** Default path separator: "." */
037        public static final char PATHSEP = '.';
038
039        /** any char */
040        public static final char STAR = '*';
041
042        // ~ Methods
043        // ------------------------------------------------------------------------------------
044
045        /**
046         * Match a pattern against a string and isolates wildcard replacement into a <code>Map</code>. <br>
047         * Here is how the matching algorithm works:
048         * 
049         * <ul>
050         * <li>The '*' character, meaning that zero or more characters (excluding the path separator
051         * '/') are to be matched.</li>
052         * <li>The '**' sequence, meaning that zero or more characters (including the path separator
053         * '/') are to be matched.</li>
054         * <li>The '\*' sequence is honored as a literal '*' character, not a wildcard</li>
055         * </ul>
056         * <br>
057         * When more than two '*' characters, not separated by another character, are found their value
058         * is considered as '**' and immediate succeeding '*' are skipped. <br>
059         * The '**' wildcard is greedy and thus the following sample cannot match:
060         * <dl>
061         * <dt>pattern</dt>
062         * <dd>STAR,STAR,PATHSEP,STAR,PATHSEP,STAR,STAR (why can't I express it literally?)</dt>
063         * <dt>string</dt>
064         * <dd>foo/bar/baz/bug</dt>
065         * </dl>
066         * The first '**' in the pattern will suck up until the last '/' in string and thus will not
067         * match. <br>
068         * A more advance algorithm could certainly check whether there is an other literal later in the
069         * pattern to ev. match in string but I'll leave this exercise to someone else ATM if one needs
070         * such.
071         * 
072         * @param pat
073         *            The pattern string.
074         * @param str
075         *            The string to math against the pattern
076         * 
077         * @return a <code>Map</code> containing the representation of the extracted pattern. The
078         *         extracted patterns are keys in the <code>Map</code> from left to right beginning with
079         *         "1" for the left most, "2" for the next, a.s.o. The key "0" is the string itself. If
080         *         the return value is null, string does not match to the pattern .
081         */
082        public static Map<String, String> match(final String pat, final String str)
083        {
084                final Matcher map = new Matcher(pat, str);
085
086                if (map.isMatch())
087                {
088                        return map.getMap();
089                }
090
091                return null;
092        }
093
094        // ~ Inner Classes
095        // ------------------------------------------------------------------------------
096
097        /**
098         * The private matcher class
099         */
100        private static class Matcher
101        {
102                // ~ Instance fields
103                // ------------------------------------------------------------------------
104
105                /** The character array of the pattern */
106                private final char[] apat;
107
108                /** The length of the character array of the pattern */
109                private final int lpat;
110
111                /** The character array of the string */
112                private final char[] astr;
113
114                /** The length of the character array of the string */
115                private final int lstr;
116
117                /** The <code>Map</code> to be filled */
118                private final Map<String, String> map = new HashMap<String, String>();
119
120                /** Whether string matched to pattern */
121                private final boolean matched;
122
123                /** map index */
124                private int idx = 0;
125
126                /** index into pattern */
127                private int ipat = 0;
128
129                /** index into string */
130                private int istr = 0;
131
132                // ~ Constructors
133                // ---------------------------------------------------------------------------
134
135                /**
136                 * Creates a new Matcher object.
137                 * 
138                 * @param pat
139                 *            The pattern
140                 * @param str
141                 *            The string
142                 */
143                public Matcher(final String pat, final String str)
144                {
145                        apat = pat.toCharArray();
146                        lpat = apat.length;
147                        astr = str.toCharArray();
148                        lstr = astr.length;
149                        add(str);
150                        matched = match();
151                }
152
153                // ~ Methods
154                // --------------------------------------------------------------------------------
155
156                /**
157                 * DOCUMENT ME!
158                 * 
159                 * @return DOCUMENT ME!
160                 */
161                public Map<String, String> getMap()
162                {
163                        return map;
164                }
165
166                /**
167                 * Has it matched?
168                 * 
169                 * @return whether it has matched
170                 */
171                public boolean isMatch()
172                {
173                        return matched;
174                }
175
176                /**
177                 * Add a extracted substring to the map
178                 * 
179                 * @param aStr
180                 *            The extracted substring
181                 */
182                private void add(final String aStr)
183                {
184                        String key = String.valueOf(idx++).intern();
185                        map.put(key, aStr);
186                }
187
188                /**
189                 * Scans the pattern and the search string from the start toward the end
190                 * 
191                 * @return whether the string matches the pattern
192                 */
193                private boolean match()
194                {
195                        // scan a common literal prefix
196                        scanLiteralPrefix();
197
198                        // if we are already at the end of both strings
199                        // than the pattern matched
200                        if (ipat >= lpat && istr >= lstr)
201                        {
202                                return true;
203                        }
204
205                        // if hole string has matched the pattern so far and the rest of the pattern only has
206                        // wildcard(s)
207                        // we match too otherwise we clearly don't match
208                        if (ipat < lpat && istr >= lstr)
209                        {
210                                while (ipat < lpat && apat[ipat] == STAR)
211                                {
212                                        ipat++;
213                                }
214
215                                if (ipat >= lpat)
216                                {
217                                        add("");
218
219                                        return true;
220                                }
221                                else
222                                {
223                                        return false;
224                                }
225                        }
226
227                        // if hole pattern has matched the string so far but the string has more characters left
228                        // we don't match
229                        if (ipat >= lpat && istr < lstr)
230                        {
231                                return false;
232                        }
233
234                        // if we have not stopped at a wildcard character
235                        // a character doesn't match and thus we do not match at all
236                        if (apat[ipat] != STAR)
237                        {
238                                return false;
239                        }
240
241                        // if it is a double (or more) wildcard pattern
242                        if (ipat < lpat - 1 && apat[ipat + 1] == STAR)
243                        {
244                                // skip to first non star character in the pattern
245                                while (++ipat < lpat && apat[ipat] == STAR)
246                                {
247                                        ; // noop
248                                }
249
250                                // if we are at the end of the pattern we've matched and are finish scanning
251                                if (ipat >= lpat)
252                                {
253                                        add(new String(astr, istr, lstr - istr));
254
255                                        return true;
256                                }
257
258                                // Now we need to scan for the end of the literal characters in the pattern
259                                final int sipat = ipat; // start position of a literal character used for substring
260                                // operations
261
262                                while (ipat < lpat && (apat[ipat] != STAR || (ipat > 0 && apat[ipat - 1] == ESC)))
263                                {
264                                        ipat++;
265                                }
266
267                                // if we reached the end of the pattern just do a string compare with the
268                                // corresponding part from
269                                // the end of the string
270                                if (ipat >= lpat)
271                                {
272                                        return checkEnds(sipat, false);
273                                }
274
275                                // Now we need to check whether the literal substring of the pattern
276                                // is contained in the string somewhere
277                                final int l = ipat - sipat;
278                                int eistr = lstr - l;
279
280                                // because the '**' wildcard need to be greedy we scan from the end of the string
281                                // for a match
282                                while (istr < eistr && !strncmp(apat, sipat, astr, eistr, l))
283                                {
284                                        eistr--;
285                                }
286
287                                if (istr >= eistr)
288                                {
289                                        return false;
290                                }
291
292                                add(new String(astr, istr, eistr - istr));
293                                istr = eistr + l;
294                        }
295                        else
296                        {// if it is a single star pattern
297                                // skip the star
298                                ++ipat;
299
300                                // if we are at the beginning of the pattern we have to check there is no PATH_SEP
301                                // in string
302                                if (ipat >= lpat)
303                                {
304                                        final int sistr = istr;
305
306                                        while (istr < lstr && (astr[istr] != PATHSEP))
307                                        {
308                                                istr++;
309                                        }
310
311                                        if (istr >= lstr)
312                                        {
313                                                add(new String(astr, sistr, lstr - sistr));
314
315                                                return true;
316                                        }
317
318                                        // otherwise we do not match
319                                        return false;
320                                }
321
322                                // Now we need to search for the start of either a path separator or another
323                                // wildcard characters
324                                // in the pattern
325                                final int sipat = ipat;
326
327                                while (ipat < lpat && apat[ipat] != STAR &&
328                                        (apat[ipat] != ESC || ipat < lpat - 1 && apat[ipat + 1] != STAR) &&
329                                        apat[ipat] != PATHSEP)
330                                {
331                                        ipat++;
332                                }
333
334                                // if we reached the end of the pattern just do a string compare with the
335                                // corresponding part from
336                                // the end of the string
337                                if (ipat >= lpat)
338                                {
339                                        return checkEnds(sipat, true);
340                                }
341
342                                // If we stopped at an other wildcard
343                                // we exclude it from being compared
344                                if (apat[ipat] == STAR)
345                                {
346                                        ipat--;
347                                }
348
349                                // Now we need to check whether the literal substring of the pattern
350                                // is contained in the string somewhere
351                                final int l = ipat - sipat + 1;
352                                final int sistr = istr;
353
354                                while (istr < lstr && !strncmp(apat, sipat, astr, istr, l))
355                                {
356                                        istr++;
357                                }
358
359                                if (istr >= lstr)
360                                {
361                                        return false;
362                                }
363
364                                add(new String(astr, sistr, istr - sistr));
365                                ipat++;
366                                istr += l;
367                        }
368
369                        return match();
370                }
371
372                /**
373                 * Scan a possible common suffix
374                 */
375                private void scanLiteralPrefix()
376                {
377                        // scan a common literal suffix
378                        while (ipat < lpat &&
379                                istr < lstr &&
380                                (apat[ipat] == ESC && ipat < lpat - 1 && apat[ipat + 1] == STAR &&
381                                        apat[++ipat] == astr[istr] || apat[ipat] != STAR && apat[ipat] == astr[istr]))
382                        {
383                                ipat++;
384                                istr++;
385                        }
386                }
387
388                /**
389                 * Compare two character array from individual offsets
390                 * 
391                 * @param a1
392                 *            The first character array
393                 * @param o1
394                 *            The offset into the first character array
395                 * @param a2
396                 *            The second character array
397                 * @param o2
398                 *            The offset into the second character array
399                 * @param l
400                 *            The length to compare
401                 * 
402                 * @return Whether the all the mentioned characters match each other
403                 */
404                private boolean strncmp(final char[] a1, final int o1, final char[] a2, final int o2,
405                        final int l)
406                {
407                        int i = 0;
408
409                        while (i < l && o1 + i < a1.length && o2 + i < a2.length && a1[o1 + i] == a2[o2 + i])
410                        {
411                                i++;
412                        }
413
414                        return i == l;
415                }
416
417                private boolean checkEnds(final int sipat, final boolean isSingleStart)
418                {
419                        // if the remaining length of the string isn't the same as that found in the pattern
420                        // we do not match
421                        final int l = lpat - sipat; // calculate length of comparison
422                        final int ostr = lstr - l; // calculate offset into string
423                        if (ostr >= 0 && strncmp(apat, sipat, astr, ostr, l))
424                        {
425                                if (isSingleStart)
426                                {
427                                        // if the ends matches make sure there isn't a PATHSEP in the candidate string
428                                        // part
429                                        int i = ostr - istr;
430                                        while (i > istr)
431                                        {
432                                                if (astr[--i] == PATHSEP)
433                                                {
434                                                        return false; // we cannot match because of a PATHSEP in the matched
435                                                        // part
436                                                }
437                                        }
438                                }
439                                add(new String(astr, istr, ostr - istr));
440
441                                return true;
442                        }
443
444                        // otherwise we do not match
445                        return false;
446                }
447        }
448}