001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.wicket.core.util.string;
018
019
020import org.apache.wicket.util.string.AppendingStringBuffer;
021
022/**
023 * Strips comments and whitespace from javascript
024 *
025 * @author Matej Knopp
026 */
027public class JavaScriptStripper
028{
029    /*
030         * Determines the state of script processing.
031         */
032        /** Inside regular text */
033        private final static int REGULAR_TEXT = 1;
034
035        /** String started with single quote (') */
036        private final static int STRING_SINGLE_QUOTE = 2;
037
038        /** String started with double quotes (") */
039        private final static int STRING_DOUBLE_QUOTES = 3;
040
041        /** Inside two or more whitespace characters */
042        private final static int WHITE_SPACE = 4;
043
044        /** Inside a line comment (// ) */
045        private final static int LINE_COMMENT = 5;
046
047        /** Inside a multi line comment */
048        private final static int MULTILINE_COMMENT = 6;
049
050        /** Inside a regular expression */
051        private final static int REG_EXP = 7;
052
053        /** Inside a template literal */
054        private final static int TEMPLATE_LITERAL = 8;
055
056        /** Function "return" keyword, useful to identify if we are inside a regexp */
057        private final static String RETURN_KEYWORD = "return";
058
059        private int getPrevCount(String s, int fromIndex, char c)
060        {
061                int count = 0;
062                --fromIndex;
063                while (fromIndex >= 0)
064                {
065                        if (s.charAt(fromIndex--) == c)
066                        {
067                                ++count;
068                        }
069                        else
070                        {
071                                break;
072                        }
073                }
074                return count;
075        }
076
077        /**
078         * Removes javascript comments and whitespace from specified string.
079         *
080         * @param original
081         *            Source string
082         * @return String with removed comments and whitespace
083         */
084        public String stripCommentsAndWhitespace(String original)
085        {
086                // let's be optimistic
087                AppendingStringBuffer result = new AppendingStringBuffer(original.length() / 2);
088                int state = REGULAR_TEXT;
089                boolean wasNewLineInWhitespace = false;
090
091                for (int i = 0; i < original.length(); ++i)
092                {
093                        char c = original.charAt(i);
094                        char next = (i < original.length() - 1) ? original.charAt(i + 1) : 0;
095                        char prev = (i > 0) ? original.charAt(i - 1) : 0;
096
097                        if (state == WHITE_SPACE)
098                        {
099                                // WICKET 2060
100                                if (c == '\n' && !wasNewLineInWhitespace)
101                                {
102                                        result.append("\n");
103                                        wasNewLineInWhitespace = true;
104                                }
105                                if (Character.isWhitespace(next) == false)
106                                {
107                                        state = REGULAR_TEXT;
108                                }
109                                continue;
110                        }
111
112                        if (state == REGULAR_TEXT)
113                        {
114                                if (c == '/' && next == '/' && prev != '\\')
115                                {
116                                        state = LINE_COMMENT;
117                                        continue;
118                                }
119                                else if (c == '/' && next == '*')
120                                {
121                                        state = MULTILINE_COMMENT;
122                                        ++i;
123                                        continue;
124                                }
125                                else if (c == '/')
126                                {
127                                        // This might be a divide operator, or it might be a regular expression.
128                                        // Work out if it's a regular expression by finding the previous non-whitespace
129                                        // char, which
130                                        // will be either '=' or '('. If it's not, it's just a divide operator.
131                                        int idx = result.length() - 1;
132                                        String trimmedResult = result.toString().trim();
133                                        while (idx > 0)
134                                        {
135                                                char tmp = result.charAt(idx);
136                                                if (Character.isWhitespace(tmp))
137                                                {
138                                                        idx--;
139                                                        continue;
140                                                }
141                                                if (tmp == '=' || tmp == '(' || tmp == '{' || tmp == ':' || tmp == ',' ||
142                                                        tmp == '[' || tmp == ';' || tmp == '!' || trimmedResult.endsWith(RETURN_KEYWORD))
143                                                {
144                                                        state = REG_EXP;
145                                                        break;
146                                                }
147                                                break;
148                                        }
149                                }
150                                else if (Character.isWhitespace(c) && Character.isWhitespace(next))
151                                {
152                                        // WICKET-2060
153                                        if (c == '\n' || next == '\n')
154                                        {
155                                                c = '\n';
156                                                wasNewLineInWhitespace = true;
157                                        }
158                                        else
159                                        {
160                                                c = ' ';
161                                                wasNewLineInWhitespace = false;
162                                        }
163                                        // ignore all whitespace characters after this one
164                                        state = WHITE_SPACE;
165                                }
166                                else if (c == '\'')
167                                {
168                                        state = STRING_SINGLE_QUOTE;
169                                }
170                                else if (c == '"')
171                                {
172                                        state = STRING_DOUBLE_QUOTES;
173                                }
174                                else if (c == '`')
175                                {
176                                        state = TEMPLATE_LITERAL;
177                                }
178                                result.append(c);
179                                continue;
180                        }
181
182                        if (state == LINE_COMMENT)
183                        {
184                                if (c == '\n' || c == '\r')
185                                {
186                                        state = REGULAR_TEXT;
187                                        result.append(c);
188                                        continue;
189                                }
190                        }
191
192                        if (state == MULTILINE_COMMENT)
193                        {
194                                if (c == '*' && next == '/')
195                                {
196                                        state = REGULAR_TEXT;
197                                        ++i;
198                                        continue;
199                                }
200                        }
201
202                        if (state == STRING_SINGLE_QUOTE)
203                        {
204                                // to leave a string expression we need even (or zero) number of backslashes
205                                int count = getPrevCount(original, i, '\\');
206                                if (c == '\'' && count % 2 == 0)
207                                {
208                                        state = REGULAR_TEXT;
209                                }
210                                result.append(c);
211                                continue;
212                        }
213
214                        if (state == STRING_DOUBLE_QUOTES)
215                        {
216                                // to leave a string expression we need even (or zero) number of backslashes
217                                int count = getPrevCount(original, i, '\\');
218                                if (c == '"' && count % 2 == 0)
219                                {
220                                        state = REGULAR_TEXT;
221                                }
222                                result.append(c);
223                                continue;
224                        }
225
226                        if (state == REG_EXP)
227                        {
228                                // to leave regular expression we need even (or zero) number of backslashes
229                                int count = getPrevCount(original, i, '\\');
230                                if (c == '/' && count % 2 == 0)
231                                {
232                                        state = REGULAR_TEXT;
233                                }
234                                result.append(c);
235                        }
236
237                        if (state == TEMPLATE_LITERAL) {
238                                // to leave a template literal expression we need even (or zero) number of backslashes
239                                int count = getPrevCount(original, i, '\\');
240                                if (c == '`' && count % 2 == 0) {
241                                        state = REGULAR_TEXT;
242                                }
243                                result.append(c);
244                                continue;
245                        }
246                }
247
248                return result.toString();
249        }
250}