001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.wicket.util.parse.metapattern;
018
019import java.util.Arrays;
020import java.util.List;
021import java.util.regex.Matcher;
022import java.util.regex.Pattern;
023
024import org.apache.wicket.util.io.IClusterable;
025
026
027/**
028 * Useful class for constructing readable and reusable regular expressions.
029 * <p>
030 * MetaPatterns can be constructed from a simple regular expression String, from other MetaPatterns
031 * (copy constructor), from a list of MetaPatterns or from an array of MetaPatterns. In this way, it
032 * is easy to build up larger patterns while transparently binding the capturing groups of each
033 * MetaPattern for easy object oriented access to capturing group matches.
034 * <p>
035 * A given MetaPattern can be converted to a Matcher or Pattern. Groups within the MetaPattern can
036 * be used to automatically reference capturing group values when a match is made with a Matcher
037 * object.
038 * <p>
039 * A variety of static constants are provided for use in constructing compound MetaPatterns. Also, a
040 * number of simple parsers have been constructed using MetaPatterns in the parsers subpackage.
041 * 
042 * @author Jonathan Locke
043 */
044public class MetaPattern implements IClusterable
045{
046        private static final long serialVersionUID = 1L;
047
048        /**
049         * Compiled regular expression pattern, or null if patterns variable is valid instead
050         */
051        private Pattern pattern;
052
053        /** List of patterns, or null if pattern variable is valid instead */
054        private List<MetaPattern> patterns;
055
056        /** The compiled MetaPattern */
057        private Pattern compiledPattern;
058
059        // Regexps that are used multiple times in defining meta patterns
060        private static final String _DOUBLE_QUOTED_STRING = "\"[^\"]*?\"";
061        private static final String _SINGLE_QUOTED_STRING = "'[^']*?\'";
062        private static final String _STRING = "(?:[\\w\\-\\.]+|" + _DOUBLE_QUOTED_STRING + "|" +
063                _SINGLE_QUOTED_STRING + ")";
064        private static final String _OPTIONAL_STRING = _STRING + "?";
065        private static final String _VARIABLE_NAME = "[A-Za-z_][A-Za-z0-9_-]*";
066
067        // '@' char is not allowed by https://www.w3.org/TR/REC-xml/#NT-NameStartChar
068        // but we allow it to be friendlier with VueJS
069        private static final String _XML_NAME = "[A-Za-z_:@][A-Za-z0-9_.-]*";
070
071        // Delimiters and punctuation
072        /** Constant for whitespace. */
073        public static final MetaPattern WHITESPACE = new MetaPattern("\\s+");
074
075        /** Constant for optional whitespace. */
076        public static final MetaPattern OPTIONAL_WHITESPACE = new MetaPattern("\\s*");
077
078        /** Constant for non-word. */
079        public static final MetaPattern NON_WORD = new MetaPattern("\\W+");
080
081        /** Constant for comma. */
082        public static final MetaPattern COMMA = new MetaPattern(",");
083
084        /** Constant for colon. */
085        public static final MetaPattern COLON = new MetaPattern(":");
086
087        /** Constant for semicolon. */
088        public static final MetaPattern SEMICOLON = new MetaPattern(";");
089
090        /** Constant for slash. */
091        public static final MetaPattern SLASH = new MetaPattern("/");
092
093        /** Constant for backslash. */
094        public static final MetaPattern BACKSLASH = new MetaPattern("\\\\");
095
096        /** Constant for dot. */
097        public static final MetaPattern DOT = new MetaPattern("\\.");
098
099        /** Constant for plus. */
100        public static final MetaPattern PLUS = new MetaPattern("\\+");
101
102        /** Constant for minus. */
103        public static final MetaPattern MINUS = new MetaPattern("-");
104
105        /** Constant for dash. */
106        public static final MetaPattern DASH = new MetaPattern("-");
107
108        /** Constant for underscore. */
109        public static final MetaPattern UNDERSCORE = new MetaPattern("_");
110
111        /** Constant for ampersand. */
112        public static final MetaPattern AMPERSAND = new MetaPattern("&");
113
114        /** Constant for percent. */
115        public static final MetaPattern PERCENT = new MetaPattern("%");
116
117        /** Constant for dollar. */
118        public static final MetaPattern DOLLAR_SIGN = new MetaPattern("$");
119
120        /** Constant for pound. */
121        public static final MetaPattern POUND_SIGN = new MetaPattern("#");
122
123        /** Constant for at. */
124        public static final MetaPattern AT_SIGN = new MetaPattern("@");
125
126        /** Constant for excl. */
127        public static final MetaPattern EXCLAMATION_POINT = new MetaPattern("!");
128
129        /** Constant for tilde. */
130        public static final MetaPattern TILDE = new MetaPattern("~");
131
132        /** Constant for equals. */
133        public static final MetaPattern EQUALS = new MetaPattern("=");
134
135        /** Constant for star. */
136        public static final MetaPattern STAR = new MetaPattern("\\*");
137
138        /** Constant for pipe. */
139        public static final MetaPattern PIPE = new MetaPattern("\\|");
140
141        /** Constant for left paren. */
142        public static final MetaPattern LEFT_PAREN = new MetaPattern("\\(");
143
144        /** Constant for right paren. */
145        public static final MetaPattern RIGHT_PAREN = new MetaPattern("\\)");
146
147        /** Constant for left curly braces. */
148        public static final MetaPattern LEFT_CURLY = new MetaPattern("\\{");
149
150        /** Constant for right curly braces. */
151        public static final MetaPattern RIGHT_CURLY = new MetaPattern("\\}");
152
153        /** Constant for left square bracket. */
154        public static final MetaPattern LEFT_SQUARE = new MetaPattern("\\[");
155
156        /** Constant for right square bracket. */
157        public static final MetaPattern RIGHT_SQUARE = new MetaPattern("\\]");
158
159        /** Constant for digit. */
160        public static final MetaPattern DIGIT = new MetaPattern("\\d");
161
162        /** Constant for digits. */
163        public static final MetaPattern DIGITS = new MetaPattern("\\d+");
164
165        /** Constant for an integer (of any size). */
166        public static final MetaPattern INTEGER = new MetaPattern("-?\\d+");
167
168        /** Constant for a floating point number. */
169        public static final MetaPattern FLOATING_POINT_NUMBER = new MetaPattern(
170                "-?\\d+\\.?\\d*|-?\\.\\d+");
171
172        /** Constant for a positive integer. */
173        public static final MetaPattern POSITIVE_INTEGER = new MetaPattern("\\d+");
174
175        /** Constant for hex digit. */
176        public static final MetaPattern HEXADECIMAL_DIGIT = new MetaPattern("[0-9a-fA-F]");
177
178        /** Constant for hex digits. */
179        public static final MetaPattern HEXADECIMAL_DIGITS = new MetaPattern("[0-9a-fA-F]+");
180
181        /** Constant for anything (string). */
182        public static final MetaPattern ANYTHING = new MetaPattern(".*");
183
184        /** Constant for anything non-empty (string). */
185        public static final MetaPattern ANYTHING_NON_EMPTY = new MetaPattern(".+");
186
187        /** Constant for a word. */
188        public static final MetaPattern WORD = new MetaPattern("\\w+");
189
190        /** Constant for an optional word. */
191        public static final MetaPattern OPTIONAL_WORD = new MetaPattern("\\w*");
192
193        /** Constant for a variable name. */
194        public static final MetaPattern VARIABLE_NAME = new MetaPattern(_VARIABLE_NAME);
195
196        /** Constant for an XML element name. */
197        public static final MetaPattern XML_ELEMENT_NAME = new MetaPattern(_XML_NAME);
198
199        /** Constant for an XML attribute name. */
200        public static final MetaPattern XML_ATTRIBUTE_NAME = new MetaPattern(_XML_NAME);
201
202        /** Constant for perl interpolation. */
203        public static final MetaPattern PERL_INTERPOLATION = new MetaPattern("$\\{" + _VARIABLE_NAME +
204                "\\}");
205
206        /** Constant for a double quoted string. */
207        public static final MetaPattern DOUBLE_QUOTED_STRING = new MetaPattern(_DOUBLE_QUOTED_STRING);
208
209        /** Constant for a string. */
210        public static final MetaPattern STRING = new MetaPattern(_STRING);
211
212        /** Constant for an optional string. */
213        public static final MetaPattern OPTIONAL_STRING = new MetaPattern(_OPTIONAL_STRING);
214
215        /**
216         * Constructor for a simple pattern.
217         * 
218         * @param pattern
219         *            The regular expression pattern to compile
220         */
221        public MetaPattern(final String pattern)
222        {
223                this.pattern = Pattern.compile(pattern);
224        }
225
226        /**
227         * Copy constructor.
228         * 
229         * @param pattern
230         *            The meta pattern to copy
231         */
232        public MetaPattern(final MetaPattern pattern)
233        {
234                this.pattern = pattern.pattern;
235                patterns = pattern.patterns;
236                compiledPattern = pattern.compiledPattern;
237        }
238
239        /**
240         * Constructs from an array of MetaPatterns.
241         * 
242         * @param patterns
243         *            Array of MetaPatterns
244         */
245        public MetaPattern(final MetaPattern... patterns)
246        {
247                this(Arrays.asList(patterns));
248        }
249
250        /**
251         * Constructs from a list of MetaPatterns
252         * 
253         * @param patterns
254         *            List of MetaPatterns
255         */
256        public MetaPattern(final List<MetaPattern> patterns)
257        {
258                this.patterns = patterns;
259        }
260
261        /**
262         * Creates a matcher against a given input character sequence.
263         * 
264         * @param input
265         *            The input to match against
266         * @return The matcher
267         */
268        public final Matcher matcher(final CharSequence input)
269        {
270                return matcher(input, 0);
271        }
272
273        /**
274         * Creates a matcher with the given regexp compile flags. Once you call this method with a given
275         * regexp compile flag value, the pattern will be compiled. Calling it again with a different
276         * value for flags will not recompile the pattern.
277         * 
278         * @param input
279         *            The input to match
280         * @param flags
281         *            One or more of the standard Java regular expression compile flags (see
282         *            {@link Pattern#compile(String, int)})
283         * @return The matcher
284         */
285        public final Matcher matcher(final CharSequence input, final int flags)
286        {
287                compile(flags);
288                return compiledPattern.matcher(input);
289        }
290
291        /**
292         * Gets the regular expression Pattern for this MetaPattern by compiling it.
293         * 
294         * @return Pattern compiled with default Java regular expression compile flags
295         */
296        public final Pattern pattern()
297        {
298                return pattern(0);
299        }
300
301        /**
302         * Gets the regular expression Pattern for this MetaPattern by compiling it using the given
303         * flags.
304         * 
305         * @param flags
306         *            One or more of the standard Java regular expression compile flags (see
307         *            {@link Pattern#compile(String, int)})
308         * @return Equivalent Java regular expression Pattern compiled with the given flags
309         */
310        public final Pattern pattern(final int flags)
311        {
312                compile(flags);
313                return compiledPattern;
314        }
315
316        /**
317         * Converts this MetaPattern to a String.
318         * 
319         * @return A String representing this MetaPattern
320         * @see java.lang.Object#toString()
321         */
322        @Override
323        public String toString()
324        {
325                if (pattern != null)
326                {
327                        return pattern.pattern();
328                }
329                else
330                {
331                        final StringBuilder buffer = new StringBuilder();
332                        for (MetaPattern metaPattern : patterns)
333                        {
334                                buffer.append(metaPattern);
335                        }
336                        return buffer.toString();
337                }
338        }
339
340        /**
341         * Compiles this MetaPattern with the given Java regular expression flags.
342         * 
343         * @param flags
344         *            One or more of the standard Java regular expression compile flags (see
345         *            {@link Pattern#compile(String, int)})
346         */
347        private synchronized void compile(final int flags)
348        {
349                if (compiledPattern == null)
350                {
351                        bind(1);
352                        compiledPattern = Pattern.compile(toString(), flags);
353                }
354        }
355
356        /**
357         * Binds this MetaPattern to one or more capturing groups. Since MetaPatterns can nest, the
358         * binding process can recurse.
359         * 
360         * @param group
361         *            The initial capturing group number
362         * @return The final capturing group (for use in recursion)
363         */
364        private int bind(int group)
365        {
366                if (this instanceof Group)
367                {
368                        ((Group)this).bind(group++);
369                }
370
371                if (patterns != null)
372                {
373                        for (MetaPattern metaPattern : patterns)
374                        {
375                                group = metaPattern.bind(group);
376                        }
377                }
378
379                return group;
380        }
381}