001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.wicket.markup; 018 019import java.io.IOException; 020import java.text.ParseException; 021import java.util.List; 022import java.util.regex.Matcher; 023import java.util.regex.Pattern; 024 025import org.apache.wicket.Application; 026import org.apache.wicket.markup.parser.IMarkupFilter; 027import org.apache.wicket.markup.parser.IXmlPullParser; 028import org.apache.wicket.markup.parser.XmlPullParser; 029import org.apache.wicket.markup.parser.filter.RootMarkupFilter; 030import org.apache.wicket.settings.MarkupSettings; 031import org.apache.wicket.util.resource.ResourceStreamNotFoundException; 032import org.apache.wicket.util.resource.StringResourceStream; 033import org.slf4j.Logger; 034import org.slf4j.LoggerFactory; 035 036/** 037 * This is a base MarkupParser specifically for (X)HTML. It makes use of a streaming XML parser to 038 * read the markup and IMarkupFilters to remove comments, identify Wicket relevant tags, apply html 039 * specific treatments etc.. Please see WicketMarkupParser for a parser preconfigured for Wicket. 040 * <p> 041 * The result will be an Markup object, which is basically a list, containing Wicket relevant tags 042 * and RawMarkup. 043 * 044 * @see IMarkupFilter 045 * @see MarkupFactory 046 * @see MarkupSettings 047 * 048 * @author Jonathan Locke 049 * @author Juergen Donnerstag 050 */ 051public abstract class AbstractMarkupParser 052{ 053 /** Log for reporting. */ 054 private static final Logger log = LoggerFactory.getLogger(AbstractMarkupParser.class); 055 056 /** Opening a conditional comment section, which is NOT treated as a comment section */ 057 public static final Pattern CONDITIONAL_COMMENT_OPENING = Pattern.compile("(s?)^[^>]*?<!--\\[if.*?\\]>(-->)?(<!.*?-->)?"); 058 059 private static final Pattern PRE_BLOCK = Pattern.compile("<pre>.*?</pre>", Pattern.DOTALL | Pattern.MULTILINE); 060 private static final Pattern SPACE_OR_TAB_PATTERN = Pattern.compile("[ \\t]+"); 061 private static final Pattern NEW_LINE_PATTERN = Pattern.compile("( ?[\\r\\n] ?)+"); 062 063 /** The XML parser to use */ 064 private final IXmlPullParser xmlParser; 065 066 /** The markup handler chain: each filter has a specific task */ 067 private IMarkupFilter markupFilterChain; 068 069 /** The markup created by reading the markup file */ 070 private final Markup markup; 071 072 /** Temporary variable: Application.get().getMarkupSettings() */ 073 private final MarkupSettings markupSettings; 074 075 private final List<IMarkupFilter> filters; 076 077 /** 078 * Constructor. 079 * 080 * @param resource 081 * The markup resource (file) 082 */ 083 public AbstractMarkupParser(final MarkupResourceStream resource) 084 { 085 this(new XmlPullParser(), resource); 086 } 087 088 /** 089 * Constructor. Usually for testing purposes only 090 * 091 * @param markup 092 * The markup resource. 093 */ 094 public AbstractMarkupParser(final String markup) 095 { 096 this(new XmlPullParser(), new MarkupResourceStream(new StringResourceStream(markup))); 097 } 098 099 /** 100 * Constructor. 101 * 102 * @param xmlParser 103 * The streaming xml parser to read and parse the markup 104 * @param resource 105 * The markup resource (file) 106 */ 107 public AbstractMarkupParser(final IXmlPullParser xmlParser, final MarkupResourceStream resource) 108 { 109 this.xmlParser = xmlParser; 110 markupSettings = Application.get().getMarkupSettings(); 111 112 markup = new Markup(resource); 113 114 // The root of all filters is the xml parser 115 markupFilterChain = new RootMarkupFilter(xmlParser, resource); 116 117 // Initialize the markup filter chain 118 filters = initializeMarkupFilters(markup); 119 } 120 121 /** 122 * @return Gets the list of markup filters 123 */ 124 public List<IMarkupFilter> getMarkupFilters() 125 { 126 return filters; 127 } 128 129 /** 130 * In case you want to analyze markup which BY DEFAULT does not use "wicket" to find relevant 131 * tags. 132 * 133 * @param namespace 134 */ 135 public final void setWicketNamespace(final String namespace) 136 { 137 markup.getMarkupResourceStream().setWicketNamespace(namespace); 138 } 139 140 /** 141 * Applications which subclass initFilterChain() might also wish to access the markup resource 142 * stream. 143 * 144 * @return The markup resource stream 145 */ 146 protected MarkupResourceStream getMarkupResourceStream() 147 { 148 return markup.getMarkupResourceStream(); 149 } 150 151 /** 152 * Create a new markup filter chain and initialize with all default filters required. 153 * 154 * @param markup 155 * @return The list of markup filters to be considered by the markup parser 156 */ 157 protected abstract List<IMarkupFilter> initializeMarkupFilters(final Markup markup); 158 159 /** 160 * Reads and parses markup from a file. 161 * 162 * @return The markup 163 * @throws IOException 164 * @throws ResourceStreamNotFoundException 165 */ 166 public final Markup parse() throws IOException, ResourceStreamNotFoundException 167 { 168 // The root of all markup filters is the xml parser 169 markupFilterChain = new RootMarkupFilter(xmlParser, markup.getMarkupResourceStream()); 170 171 // Convert the list of markup filters into a chain 172 for (IMarkupFilter filter : getMarkupFilters()) 173 { 174 filter.setNextFilter(markupFilterChain); 175 markupFilterChain = filter; 176 } 177 178 // Initialize the xml parser 179 MarkupResourceStream markupResourceStream = markup.getMarkupResourceStream(); 180 xmlParser.parse(markupResourceStream.getResource().getInputStream(), 181 markupSettings.getDefaultMarkupEncoding()); 182 183 // parse the xml markup and tokenize it into wicket relevant markup 184 // elements 185 parseMarkup(); 186 187 markupResourceStream.setEncoding(xmlParser.getEncoding()); 188 markupResourceStream.setDoctype(xmlParser.getDoctype()); 189 190 if (xmlParser.getEncoding() == null) 191 { 192 String a = "The markup file does not have a XML declaration prolog with 'encoding' attribute"; 193 String b = ". E.g. <?xml version=\"1.0\" encoding=\"UTF-8\" ?>"; 194 195 if (markupSettings.getThrowExceptionOnMissingXmlDeclaration()) 196 { 197 throw new MarkupException(markupResourceStream.getResource(), a + b); 198 } 199 else 200 { 201 log.debug(a + ":" + markupResourceStream.getResource() + ". It is safer to use it" + 202 b); 203 } 204 } 205 206 return markup; 207 } 208 209 /** 210 * Get the next tag from the markup file 211 * 212 * @return The next tag 213 * @throws ParseException 214 */ 215 private MarkupElement getNextTag() throws ParseException 216 { 217 return markupFilterChain.nextElement(); 218 } 219 220 /** 221 * Scans the given markup and extracts balancing tags. 222 */ 223 private void parseMarkup() 224 { 225 try 226 { 227 // always remember the latest index (size) 228 int size = markup.size(); 229 230 // Loop through tags 231 MarkupElement elem; 232 while (null != (elem = getNextTag())) 233 { 234 if (elem instanceof HtmlSpecialTag) 235 { 236 elem = new ComponentTag(((HtmlSpecialTag)elem).getXmlTag()); 237 } 238 239 if (elem instanceof ComponentTag) 240 { 241 ComponentTag tag = (ComponentTag)elem; 242 243 boolean add = (tag.getId() != null); 244 if (!add && tag.isClose()) 245 { 246 add = ((tag.getOpenTag() != null) && (tag.getOpenTag().getId() != null)); 247 } 248 249 // Add tag to list? 250 if (add || tag.isModified() || (markup.size() != size)) 251 { 252 // Add text from last position to the current tag position 253 CharSequence text = xmlParser.getInputFromPositionMarker(tag.getPos()); 254 if (text.length() > 0) 255 { 256 text = handleRawText(text.toString()); 257 258 // Make sure you add it at the correct location. 259 // IMarkupFilters might have added elements as well. 260 markup.addMarkupElement(size, new RawMarkup(text)); 261 } 262 263 xmlParser.setPositionMarker(); 264 265 if (add) 266 { 267 // Add to the markup unless the tag has been flagged as 268 // to be removed from the markup. (e.g. <wicket:remove> 269 if (tag.isIgnore() == false) 270 { 271 markup.addMarkupElement(tag); 272 } 273 } 274 else if (tag.isModified()) 275 { 276 markup.addMarkupElement(new RawMarkup(tag.toCharSequence())); 277 } 278 else 279 { 280 xmlParser.setPositionMarker(tag.getPos()); 281 } 282 } 283 284 // always remember the latest index (size) 285 size = markup.size(); 286 } 287 } 288 } 289 catch (final ParseException ex) 290 { 291 // Add remaining input string 292 final CharSequence text = xmlParser.getInputFromPositionMarker(-1); 293 if (text.length() > 0) 294 { 295 markup.addMarkupElement(new RawMarkup(text)); 296 } 297 298 markup.getMarkupResourceStream().setEncoding(xmlParser.getEncoding()); 299 markup.getMarkupResourceStream().setDoctype(xmlParser.getDoctype()); 300 301 final MarkupStream markupStream = new MarkupStream(markup); 302 markupStream.setCurrentIndex(markup.size() - 1); 303 throw new MarkupException(markupStream, ex.getMessage(), ex); 304 } 305 306 // Add tail? 307 CharSequence text = xmlParser.getInputFromPositionMarker(-1); 308 if (text.length() > 0) 309 { 310 text = handleRawText(text.toString()); 311 312 // Make sure you add it at the correct location. 313 // IMarkupFilters might have added elements as well. 314 markup.addMarkupElement(new RawMarkup(text)); 315 } 316 317 postProcess(markup); 318 319 // Make all tags immutable and the list of elements unmodifiable 320 markup.makeImmutable(); 321 } 322 323 /** 324 * 325 * @param markup 326 */ 327 protected void postProcess(final Markup markup) 328 { 329 IMarkupFilter filter = markupFilterChain; 330 while (filter != null) 331 { 332 filter.postProcess(markup); 333 filter = filter.getNextFilter(); 334 } 335 } 336 337 /** 338 * 339 * @param rawMarkup 340 * @return The modified raw markup 341 */ 342 protected CharSequence handleRawText(String rawMarkup) 343 { 344 // Get relevant settings from the Application 345 final boolean stripComments = markupSettings.getStripComments(); 346 final boolean compressWhitespace = markupSettings.getCompressWhitespace(); 347 348 if (stripComments) 349 { 350 rawMarkup = removeComment(rawMarkup); 351 } 352 353 if (compressWhitespace) 354 { 355 rawMarkup = compressWhitespace(rawMarkup); 356 } 357 358 return rawMarkup; 359 } 360 361 /** 362 * Remove whitespace from the raw markup 363 * 364 * @param rawMarkup 365 * @return rawMarkup 366 */ 367 protected String compressWhitespace(String rawMarkup) 368 { 369 // We don't want to compress whitespace inside <pre> tags, so we look 370 // for matches and: 371 // - Do whitespace compression on everything before the first match. 372 // - Append the <pre>.*?</pre> match with no compression. 373 // - Loop to find the next match. 374 // - Append with compression everything between the two matches. 375 // - Repeat until no match, then special-case the fragment after the 376 // last <pre>. 377 Matcher m = PRE_BLOCK.matcher(rawMarkup); 378 int lastend = 0; 379 StringBuilder sb = null; 380 while (true) 381 { 382 boolean matched = m.find(); 383 String nonPre = matched ? rawMarkup.substring(lastend, m.start()) 384 : rawMarkup.substring(lastend); 385 nonPre = SPACE_OR_TAB_PATTERN.matcher(nonPre).replaceAll(" "); 386 nonPre = NEW_LINE_PATTERN.matcher(nonPre).replaceAll("\n"); 387 388 // Don't create a StringBuilder if we don't actually need one. 389 // This optimizes the trivial common case where there is no <pre> 390 // tag at all down to just doing the replaceAlls above. 391 if (lastend == 0) 392 { 393 if (matched) 394 { 395 sb = new StringBuilder(rawMarkup.length()); 396 } 397 else 398 { 399 return nonPre; 400 } 401 } 402 sb.append(nonPre); 403 if (matched) 404 { 405 sb.append(m.group()); 406 lastend = m.end(); 407 } 408 else 409 { 410 break; 411 } 412 } 413 return sb.toString(); 414 } 415 416 417 /** 418 * Remove all comment sections (<!-- .. -->) from the raw markup. 419 * 420 * @param rawMarkup 421 * @return raw markup 422 */ 423 private static String removeComment(String rawMarkup) 424 { 425 int pos1 = rawMarkup.indexOf("<!--"); 426 while (pos1 != -1) 427 { 428 final StringBuilder buf = new StringBuilder(rawMarkup.length()); 429 final String possibleComment = rawMarkup.substring(pos1); 430 Matcher matcher = CONDITIONAL_COMMENT_OPENING.matcher(possibleComment); 431 if (matcher.find()) 432 { 433 pos1 = pos1 + matcher.end(); 434 } 435 else 436 { 437 int pos2 = rawMarkup.indexOf("-->", pos1 + 4); 438 buf.append(rawMarkup.substring(0, pos1)); 439 if (rawMarkup.length() >= pos2 + 3) 440 { 441 buf.append(rawMarkup.substring(pos2 + 3)); 442 } 443 rawMarkup = buf.toString(); 444 } 445 pos1 = rawMarkup.indexOf("<!--", pos1); 446 } 447 return rawMarkup; 448 } 449 450 /** 451 * @see java.lang.Object#toString() 452 */ 453 @Override 454 public String toString() 455 { 456 return markup.toString(); 457 } 458}