001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.wicket.response.filter; 018 019import org.apache.wicket.page.XmlPartialPageUpdate; 020import org.apache.wicket.util.string.AppendingStringBuffer; 021import org.apache.wicket.util.string.Strings; 022import org.slf4j.Logger; 023import org.slf4j.LoggerFactory; 024 025/** 026 * An IResponseFilter that removes all invalid XML characters. 027 * By default it is used only for Wicket <em>Ajax</em> responses. 028 * 029 * <p>If the application needs to use it for other use cases then it can either override 030 * {@linkplain #shouldFilter(AppendingStringBuffer)} in the case it is used as IResponseFilter or 031 * {@linkplain #stripNonValidXMLCharacters(AppendingStringBuffer)} can be used directly. 032 * </p> 033 * 034 * <p>Usage: 035 * 036 * MyApplication.java 037 * <code><pre> 038 * public void init() { 039 * super.init(); 040 * 041 * getRequestCycleSettings().addResponseFilter(new XmlCleaningResponseFilter()); 042 * } 043 * </pre></code> 044 * </p> 045 */ 046public class XmlCleaningResponseFilter implements IResponseFilter 047{ 048 private static final Logger LOG = LoggerFactory.getLogger(XmlCleaningResponseFilter.class); 049 050 @Override 051 public AppendingStringBuffer filter(AppendingStringBuffer responseBuffer) 052 { 053 AppendingStringBuffer result = responseBuffer; 054 if (shouldFilter(responseBuffer)) 055 { 056 result = stripNonValidXMLCharacters(responseBuffer); 057 } 058 return result; 059 } 060 061 /** 062 * Decides whether the filter should be applied. 063 * 064 * @param responseBuffer The buffer to filter 065 * @return {@code true} if the buffer brings Ajax response 066 */ 067 protected boolean shouldFilter(AppendingStringBuffer responseBuffer) 068 { 069 // To avoid reading the whole buffer for non-Ajax responses 070 // read just the first N chars. A candidate can start with: 071 // <?xml version="1.0" encoding="UTF-8" standalone="yes"?><ajax-response> 072 int min = Math.min(150, responseBuffer.length()); 073 String firstNChars = responseBuffer.substring(0, min); 074 return firstNChars.contains(XmlPartialPageUpdate.START_ROOT_ELEMENT); 075 } 076 077 /** 078 * This method ensures that the output String has only 079 * valid XML unicode characters as specified by the 080 * XML 1.0 standard. For reference, please see 081 * <a href="http://www.w3.org/TR/2000/REC-xml-20001006#NT-Char">the 082 * standard</a>. This method will return an empty 083 * String if the input is null or empty. 084 * 085 * @param input The StringBuffer whose non-valid characters we want to remove. 086 * @return The in String, stripped of non-valid characters. 087 */ 088 public AppendingStringBuffer stripNonValidXMLCharacters(AppendingStringBuffer input) 089 { 090 if (input == null) { 091 return new AppendingStringBuffer(); 092 } 093 094 char[] chars = input.getValue(); 095 AppendingStringBuffer out = null; 096 boolean isDebugEnabled = LOG.isDebugEnabled(); 097 098 int codePoint; 099 100 int i = 0; 101 102 while (i < input.length()) 103 { 104 codePoint = Character.codePointAt(chars, i, chars.length); 105 106 if (!isValidXmlChar(codePoint)) 107 { 108 if (out == null) 109 { 110 out = new AppendingStringBuffer(chars.length); 111 out.append(input.subSequence(0, i)); 112 113 if (isDebugEnabled) 114 { 115 LOG.debug("An invalid character '{}' found at position '{}' in '{}'", 116 String.format("0x%X", codePoint), i, new String(chars)); 117 } 118 } 119 else if (isDebugEnabled) 120 { 121 LOG.debug(String.format("Dropping character for codePoint '0x%X' at position '%d'", 122 codePoint, i)); 123 } 124 } 125 else if (out != null) 126 { 127 out.append(Character.toChars(codePoint)); 128 } 129 130 // Increment with the number of code units(java chars) needed to represent a Unicode char. 131 i += Character.charCount(codePoint); 132 } 133 134 return out != null ? out : input; 135 } 136 137 /** 138 * Checks whether the character represented by this codePoint is 139 * a valid in XML documents. 140 * 141 * @param codePoint The codePoint for the checked character 142 * @return {@code true} if the character can be used in XML documents 143 */ 144 protected boolean isValidXmlChar(int codePoint) 145 { 146 return (codePoint == 0x9) || 147 (codePoint == 0xA) || 148 (codePoint == 0xD) || 149 ((codePoint >= 0x20) && (codePoint <= 0xD7FF)) || 150 ((codePoint >= 0xE000) && (codePoint <= 0xFFFD)) || 151 ((codePoint >= 0x10000) && (codePoint <= 0x10FFFF)); 152 } 153}