001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.wicket.markup;
018
019import java.io.IOException;
020import java.text.ParseException;
021import java.util.List;
022import java.util.regex.Matcher;
023import java.util.regex.Pattern;
024
025import org.apache.wicket.Application;
026import org.apache.wicket.markup.parser.IMarkupFilter;
027import org.apache.wicket.markup.parser.IXmlPullParser;
028import org.apache.wicket.markup.parser.XmlPullParser;
029import org.apache.wicket.markup.parser.filter.RootMarkupFilter;
030import org.apache.wicket.settings.MarkupSettings;
031import org.apache.wicket.util.resource.ResourceStreamNotFoundException;
032import org.apache.wicket.util.resource.StringResourceStream;
033import org.slf4j.Logger;
034import org.slf4j.LoggerFactory;
035
036/**
037 * This is a base MarkupParser specifically for (X)HTML. It makes use of a streaming XML parser to
038 * read the markup and IMarkupFilters to remove comments, identify Wicket relevant tags, apply html
039 * specific treatments etc.. Please see WicketMarkupParser for a parser preconfigured for Wicket.
040 * <p>
041 * The result will be an Markup object, which is basically a list, containing Wicket relevant tags
042 * and RawMarkup.
043 * 
044 * @see IMarkupFilter
045 * @see MarkupFactory
046 * @see MarkupSettings
047 * 
048 * @author Jonathan Locke
049 * @author Juergen Donnerstag
050 */
051public abstract class AbstractMarkupParser
052{
053        /** Log for reporting. */
054        private static final Logger log = LoggerFactory.getLogger(AbstractMarkupParser.class);
055
056        /** Opening a conditional comment section, which is NOT treated as a comment section */
057        public static final Pattern CONDITIONAL_COMMENT_OPENING = Pattern.compile("(s?)^[^>]*?<!--\\[if.*?\\]>(-->)?(<!.*?-->)?");
058
059        private static final Pattern PRE_BLOCK = Pattern.compile("<pre>.*?</pre>", Pattern.DOTALL | Pattern.MULTILINE);
060        private static final Pattern SPACE_OR_TAB_PATTERN = Pattern.compile("[ \\t]+");
061        private static final Pattern NEW_LINE_PATTERN = Pattern.compile("( ?[\\r\\n] ?)+");
062
063        /** The XML parser to use */
064        private final IXmlPullParser xmlParser;
065
066        /** The markup handler chain: each filter has a specific task */
067        private IMarkupFilter markupFilterChain;
068
069        /** The markup created by reading the markup file */
070        private final Markup markup;
071
072        /** Temporary variable: Application.get().getMarkupSettings() */
073        private final MarkupSettings markupSettings;
074
075        private final List<IMarkupFilter> filters;
076
077        /**
078         * Constructor.
079         * 
080         * @param resource
081         *            The markup resource (file)
082         */
083        public AbstractMarkupParser(final MarkupResourceStream resource)
084        {
085                this(new XmlPullParser(), resource);
086        }
087
088        /**
089         * Constructor. Usually for testing purposes only
090         * 
091         * @param markup
092         *            The markup resource.
093         */
094        public AbstractMarkupParser(final String markup)
095        {
096                this(new XmlPullParser(), new MarkupResourceStream(new StringResourceStream(markup)));
097        }
098
099        /**
100         * Constructor.
101         * 
102         * @param xmlParser
103         *            The streaming xml parser to read and parse the markup
104         * @param resource
105         *            The markup resource (file)
106         */
107        public AbstractMarkupParser(final IXmlPullParser xmlParser, final MarkupResourceStream resource)
108        {
109                this.xmlParser = xmlParser;
110                markupSettings = Application.get().getMarkupSettings();
111
112                markup = new Markup(resource);
113
114                // The root of all filters is the xml parser
115                markupFilterChain = new RootMarkupFilter(xmlParser, resource);
116
117                // Initialize the markup filter chain
118                filters = initializeMarkupFilters(markup);
119        }
120
121        /**
122         * @return Gets the list of markup filters
123         */
124        public List<IMarkupFilter> getMarkupFilters()
125        {
126                return filters;
127        }
128
129        /**
130         * In case you want to analyze markup which BY DEFAULT does not use "wicket" to find relevant
131         * tags.
132         * 
133         * @param namespace
134         */
135        public final void setWicketNamespace(final String namespace)
136        {
137                markup.getMarkupResourceStream().setWicketNamespace(namespace);
138        }
139
140        /**
141         * Applications which subclass initFilterChain() might also wish to access the markup resource
142         * stream.
143         * 
144         * @return The markup resource stream
145         */
146        protected MarkupResourceStream getMarkupResourceStream()
147        {
148                return markup.getMarkupResourceStream();
149        }
150
151        /**
152         * Create a new markup filter chain and initialize with all default filters required.
153         * 
154         * @param markup
155         * @return The list of markup filters to be considered by the markup parser
156         */
157        protected abstract List<IMarkupFilter> initializeMarkupFilters(final Markup markup);
158
159        /**
160         * Reads and parses markup from a file.
161         * 
162         * @return The markup
163         * @throws IOException
164         * @throws ResourceStreamNotFoundException
165         */
166        public final Markup parse() throws IOException, ResourceStreamNotFoundException
167        {
168                // The root of all markup filters is the xml parser
169                markupFilterChain = new RootMarkupFilter(xmlParser, markup.getMarkupResourceStream());
170
171                // Convert the list of markup filters into a chain
172                for (IMarkupFilter filter : getMarkupFilters())
173                {
174                        filter.setNextFilter(markupFilterChain);
175                        markupFilterChain = filter;
176                }
177
178                // Initialize the xml parser
179                MarkupResourceStream markupResourceStream = markup.getMarkupResourceStream();
180                xmlParser.parse(markupResourceStream.getResource().getInputStream(),
181                        markupSettings.getDefaultMarkupEncoding());
182
183                // parse the xml markup and tokenize it into wicket relevant markup
184                // elements
185                parseMarkup();
186
187                markupResourceStream.setEncoding(xmlParser.getEncoding());
188                markupResourceStream.setDoctype(xmlParser.getDoctype());
189
190                if (xmlParser.getEncoding() == null)
191                {
192                        String a = "The markup file does not have a XML declaration prolog with 'encoding' attribute";
193                        String b = ". E.g. <?xml version=\"1.0\" encoding=\"UTF-8\" ?>";
194
195                        if (markupSettings.getThrowExceptionOnMissingXmlDeclaration())
196                        {
197                                throw new MarkupException(markupResourceStream.getResource(), a + b);
198                        }
199                        else
200                        {
201                                log.debug(a + ":" + markupResourceStream.getResource() + ". It is safer to use it" +
202                                        b);
203                        }
204                }
205
206                return markup;
207        }
208
209        /**
210         * Get the next tag from the markup file
211         * 
212         * @return The next tag
213         * @throws ParseException
214         */
215        private MarkupElement getNextTag() throws ParseException
216        {
217                return markupFilterChain.nextElement();
218        }
219
220        /**
221         * Scans the given markup and extracts balancing tags.
222         */
223        private void parseMarkup()
224        {
225                try
226                {
227                        // always remember the latest index (size)
228                        int size = markup.size();
229
230                        // Loop through tags
231                        MarkupElement elem;
232                        while (null != (elem = getNextTag()))
233                        {
234                                if (elem instanceof HtmlSpecialTag)
235                                {
236                                        elem = new ComponentTag(((HtmlSpecialTag)elem).getXmlTag());
237                                }
238
239                                if (elem instanceof ComponentTag)
240                                {
241                                        ComponentTag tag = (ComponentTag)elem;
242
243                                        boolean add = (tag.getId() != null);
244                                        if (!add && tag.isClose())
245                                        {
246                                                add = ((tag.getOpenTag() != null) && (tag.getOpenTag().getId() != null));
247                                        }
248
249                                        // Add tag to list?
250                                        if (add || tag.isModified() || (markup.size() != size))
251                                        {
252                                                // Add text from last position to the current tag position
253                                                CharSequence text = xmlParser.getInputFromPositionMarker(tag.getPos());
254                                                if (text.length() > 0)
255                                                {
256                                                        text = handleRawText(text.toString());
257
258                                                        // Make sure you add it at the correct location.
259                                                        // IMarkupFilters might have added elements as well.
260                                                        markup.addMarkupElement(size, new RawMarkup(text));
261                                                }
262
263                                                xmlParser.setPositionMarker();
264
265                                                if (add)
266                                                {
267                                                        // Add to the markup unless the tag has been flagged as
268                                                        // to be removed from the markup. (e.g. <wicket:remove>
269                                                        if (tag.isIgnore() == false)
270                                                        {
271                                                                markup.addMarkupElement(tag);
272                                                        }
273                                                }
274                                                else if (tag.isModified())
275                                                {
276                                                        markup.addMarkupElement(new RawMarkup(tag.toCharSequence()));
277                                                }
278                                                else
279                                                {
280                                                        xmlParser.setPositionMarker(tag.getPos());
281                                                }
282                                        }
283
284                                        // always remember the latest index (size)
285                                        size = markup.size();
286                                }
287                        }
288                }
289                catch (final ParseException ex)
290                {
291                        // Add remaining input string
292                        final CharSequence text = xmlParser.getInputFromPositionMarker(-1);
293                        if (text.length() > 0)
294                        {
295                                markup.addMarkupElement(new RawMarkup(text));
296                        }
297
298                        markup.getMarkupResourceStream().setEncoding(xmlParser.getEncoding());
299                        markup.getMarkupResourceStream().setDoctype(xmlParser.getDoctype());
300
301                        final MarkupStream markupStream = new MarkupStream(markup);
302                        markupStream.setCurrentIndex(markup.size() - 1);
303                        throw new MarkupException(markupStream, ex.getMessage(), ex);
304                }
305
306                // Add tail?
307                CharSequence text = xmlParser.getInputFromPositionMarker(-1);
308                if (text.length() > 0)
309                {
310                        text = handleRawText(text.toString());
311
312                        // Make sure you add it at the correct location.
313                        // IMarkupFilters might have added elements as well.
314                        markup.addMarkupElement(new RawMarkup(text));
315                }
316
317                postProcess(markup);
318
319                // Make all tags immutable and the list of elements unmodifiable
320                markup.makeImmutable();
321        }
322
323        /**
324         * 
325         * @param markup
326         */
327        protected void postProcess(final Markup markup)
328        {
329                IMarkupFilter filter = markupFilterChain;
330                while (filter != null)
331                {
332                        filter.postProcess(markup);
333                        filter = filter.getNextFilter();
334                }
335        }
336
337        /**
338         * 
339         * @param rawMarkup
340         * @return The modified raw markup
341         */
342        protected CharSequence handleRawText(String rawMarkup)
343        {
344                // Get relevant settings from the Application
345                final boolean stripComments = markupSettings.getStripComments();
346                final boolean compressWhitespace = markupSettings.getCompressWhitespace();
347
348                if (stripComments)
349                {
350                        rawMarkup = removeComment(rawMarkup);
351                }
352
353                if (compressWhitespace)
354                {
355                        rawMarkup = compressWhitespace(rawMarkup);
356                }
357
358                return rawMarkup;
359        }
360
361        /**
362         * Remove whitespace from the raw markup
363         * 
364         * @param rawMarkup
365         * @return rawMarkup
366         */
367        protected String compressWhitespace(String rawMarkup)
368        {
369                // We don't want to compress whitespace inside <pre> tags, so we look
370                // for matches and:
371                // - Do whitespace compression on everything before the first match.
372                // - Append the <pre>.*?</pre> match with no compression.
373                // - Loop to find the next match.
374                // - Append with compression everything between the two matches.
375                // - Repeat until no match, then special-case the fragment after the
376                // last <pre>.
377                Matcher m = PRE_BLOCK.matcher(rawMarkup);
378                int lastend = 0;
379                StringBuilder sb = null;
380                while (true)
381                {
382                        boolean matched = m.find();
383                        String nonPre = matched ? rawMarkup.substring(lastend, m.start())
384                                : rawMarkup.substring(lastend);
385                        nonPre = SPACE_OR_TAB_PATTERN.matcher(nonPre).replaceAll(" ");
386                        nonPre = NEW_LINE_PATTERN.matcher(nonPre).replaceAll("\n");
387
388                        // Don't create a StringBuilder if we don't actually need one.
389                        // This optimizes the trivial common case where there is no <pre>
390                        // tag at all down to just doing the replaceAlls above.
391                        if (lastend == 0)
392                        {
393                                if (matched)
394                                {
395                                        sb = new StringBuilder(rawMarkup.length());
396                                }
397                                else
398                                {
399                                        return nonPre;
400                                }
401                        }
402                        sb.append(nonPre);
403                        if (matched)
404                        {
405                                sb.append(m.group());
406                                lastend = m.end();
407                        }
408                        else
409                        {
410                                break;
411                        }
412                }
413                return sb.toString();
414        }
415
416
417        /**
418         * Remove all comment sections (&lt;!-- .. --&gt;) from the raw markup.
419         * 
420         * @param rawMarkup
421         * @return raw markup
422         */
423        private static String removeComment(String rawMarkup)
424        {
425                int pos1 = rawMarkup.indexOf("<!--");
426                while (pos1 != -1)
427                {
428                        final StringBuilder buf = new StringBuilder(rawMarkup.length());
429                        final String possibleComment = rawMarkup.substring(pos1);
430                        Matcher matcher = CONDITIONAL_COMMENT_OPENING.matcher(possibleComment);
431                        if (matcher.find())
432                        {
433                                pos1 = pos1 + matcher.end();
434                        }
435                        else
436                        {
437                                int pos2 = rawMarkup.indexOf("-->", pos1 + 4);
438                                buf.append(rawMarkup.substring(0, pos1));
439                                if (rawMarkup.length() >= pos2 + 3)
440                                {
441                                        buf.append(rawMarkup.substring(pos2 + 3));
442                                }
443                                rawMarkup = buf.toString();
444                        }
445                        pos1 = rawMarkup.indexOf("<!--", pos1);
446                }
447                return rawMarkup;
448        }
449
450        /**
451         * @see java.lang.Object#toString()
452         */
453        @Override
454        public String toString()
455        {
456                return markup.toString();
457        }
458}