001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.wicket.response.filter;
018
019import org.apache.wicket.page.XmlPartialPageUpdate;
020import org.apache.wicket.util.string.AppendingStringBuffer;
021import org.apache.wicket.util.string.Strings;
022import org.slf4j.Logger;
023import org.slf4j.LoggerFactory;
024
025/**
026 * An IResponseFilter that removes all invalid XML characters.
027 * By default it is used only for Wicket <em>Ajax</em> responses.
028 *
029 * <p>If the application needs to use it for other use cases then it can either override
030 * {@linkplain #shouldFilter(AppendingStringBuffer)} in the case it is used as IResponseFilter or
031 * {@linkplain #stripNonValidXMLCharacters(AppendingStringBuffer)} can be used directly.
032 * </p>
033 *
034 * <p>Usage:
035 *
036 *     MyApplication.java
037 *     <code><pre>
038 *         public void init() {
039 *             super.init();
040 *
041 *             getRequestCycleSettings().addResponseFilter(new XmlCleaningResponseFilter());
042 *         }
043 *     </pre></code>
044 * </p>
045 */
046public class XmlCleaningResponseFilter implements IResponseFilter
047{
048        private static final Logger LOG = LoggerFactory.getLogger(XmlCleaningResponseFilter.class);
049
050        @Override
051        public AppendingStringBuffer filter(AppendingStringBuffer responseBuffer)
052        {
053                AppendingStringBuffer result = responseBuffer;
054                if (shouldFilter(responseBuffer))
055                {
056                        result = stripNonValidXMLCharacters(responseBuffer);
057                }
058                return result;
059        }
060
061        /**
062         * Decides whether the filter should be applied.
063         *
064         * @param responseBuffer The buffer to filter
065         * @return {@code true} if the buffer brings Ajax response
066         */
067        protected boolean shouldFilter(AppendingStringBuffer responseBuffer)
068        {
069                // To avoid reading the whole buffer for non-Ajax responses
070                // read just the first N chars. A candidate can start with:
071                // <?xml version="1.0" encoding="UTF-8" standalone="yes"?><ajax-response>
072                int min = Math.min(150, responseBuffer.length());
073                String firstNChars = responseBuffer.substring(0, min);
074                return firstNChars.contains(XmlPartialPageUpdate.START_ROOT_ELEMENT);
075        }
076
077        /**
078         * This method ensures that the output String has only
079         * valid XML unicode characters as specified by the
080         * XML 1.0 standard. For reference, please see
081         * <a href="http://www.w3.org/TR/2000/REC-xml-20001006#NT-Char">the
082         * standard</a>. This method will return an empty
083         * String if the input is null or empty.
084         *
085         * @param input The StringBuffer whose non-valid characters we want to remove.
086         * @return The in String, stripped of non-valid characters.
087         */
088        public AppendingStringBuffer stripNonValidXMLCharacters(AppendingStringBuffer input)
089        {
090                if (input == null) {
091                        return new AppendingStringBuffer();
092                }
093
094                char[] chars = input.getValue();
095                AppendingStringBuffer out = null;
096                boolean isDebugEnabled = LOG.isDebugEnabled();
097
098                int codePoint;
099
100                int i = 0;
101
102                while (i < input.length())
103                {
104                        codePoint = Character.codePointAt(chars, i, chars.length);
105
106                        if (!isValidXmlChar(codePoint))
107                        {
108                                if (out == null)
109                                {
110                                        out = new AppendingStringBuffer(chars.length);
111                                        out.append(input.subSequence(0, i));
112
113                                        if (isDebugEnabled)
114                                        {
115                                                LOG.debug("An invalid character '{}' found at position '{}' in '{}'",
116                                                                String.format("0x%X", codePoint), i, new String(chars));
117                                        }
118                                }
119                                else if (isDebugEnabled)
120                                {
121                                        LOG.debug(String.format("Dropping character for codePoint '0x%X' at position '%d'",
122                                                        codePoint, i));
123                                }
124                        }
125                        else if (out != null)
126                        {
127                                out.append(Character.toChars(codePoint));
128                        }
129
130                        // Increment with the number of code units(java chars) needed to represent a Unicode char.
131                        i += Character.charCount(codePoint);
132                }
133
134                return out != null ? out : input;
135        }
136
137        /**
138         * Checks whether the character represented by this codePoint is
139         * a valid in XML documents.
140         *
141         * @param codePoint The codePoint for the checked character
142         * @return {@code true} if the character can be used in XML documents
143         */
144        protected boolean isValidXmlChar(int codePoint)
145        {
146                return (codePoint == 0x9) ||
147                        (codePoint == 0xA) ||
148                        (codePoint == 0xD) ||
149                        ((codePoint >= 0x20) && (codePoint <= 0xD7FF)) ||
150                        ((codePoint >= 0xE000) && (codePoint <= 0xFFFD)) ||
151                        ((codePoint >= 0x10000) && (codePoint <= 0x10FFFF));
152        }
153}