001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.wicket.core.util.string; 018 019 020import org.apache.wicket.util.string.AppendingStringBuffer; 021 022/** 023 * Strips comments and whitespace from javascript 024 * 025 * @author Matej Knopp 026 */ 027public class JavaScriptStripper 028{ 029 /* 030 * Determines the state of script processing. 031 */ 032 /** Inside regular text */ 033 private final static int REGULAR_TEXT = 1; 034 035 /** String started with single quote (') */ 036 private final static int STRING_SINGLE_QUOTE = 2; 037 038 /** String started with double quotes (") */ 039 private final static int STRING_DOUBLE_QUOTES = 3; 040 041 /** Inside two or more whitespace characters */ 042 private final static int WHITE_SPACE = 4; 043 044 /** Inside a line comment (// ) */ 045 private final static int LINE_COMMENT = 5; 046 047 /** Inside a multi line comment */ 048 private final static int MULTILINE_COMMENT = 6; 049 050 /** Inside a regular expression */ 051 private final static int REG_EXP = 7; 052 053 /** Inside a template literal */ 054 private final static int TEMPLATE_LITERAL = 8; 055 056 /** Function "return" keyword, useful to identify if we are inside a regexp */ 057 private final static String RETURN_KEYWORD = "return"; 058 059 private int getPrevCount(String s, int fromIndex, char c) 060 { 061 int count = 0; 062 --fromIndex; 063 while (fromIndex >= 0) 064 { 065 if (s.charAt(fromIndex--) == c) 066 { 067 ++count; 068 } 069 else 070 { 071 break; 072 } 073 } 074 return count; 075 } 076 077 /** 078 * Removes javascript comments and whitespace from specified string. 079 * 080 * @param original 081 * Source string 082 * @return String with removed comments and whitespace 083 */ 084 public String stripCommentsAndWhitespace(String original) 085 { 086 // let's be optimistic 087 AppendingStringBuffer result = new AppendingStringBuffer(original.length() / 2); 088 int state = REGULAR_TEXT; 089 boolean wasNewLineInWhitespace = false; 090 091 for (int i = 0; i < original.length(); ++i) 092 { 093 char c = original.charAt(i); 094 char next = (i < original.length() - 1) ? original.charAt(i + 1) : 0; 095 char prev = (i > 0) ? original.charAt(i - 1) : 0; 096 097 if (state == WHITE_SPACE) 098 { 099 // WICKET 2060 100 if (c == '\n' && !wasNewLineInWhitespace) 101 { 102 result.append("\n"); 103 wasNewLineInWhitespace = true; 104 } 105 if (Character.isWhitespace(next) == false) 106 { 107 state = REGULAR_TEXT; 108 } 109 continue; 110 } 111 112 if (state == REGULAR_TEXT) 113 { 114 if (c == '/' && next == '/' && prev != '\\') 115 { 116 state = LINE_COMMENT; 117 continue; 118 } 119 else if (c == '/' && next == '*') 120 { 121 state = MULTILINE_COMMENT; 122 ++i; 123 continue; 124 } 125 else if (c == '/') 126 { 127 // This might be a divide operator, or it might be a regular expression. 128 // Work out if it's a regular expression by finding the previous non-whitespace 129 // char, which 130 // will be either '=' or '('. If it's not, it's just a divide operator. 131 int idx = result.length() - 1; 132 String trimmedResult = result.toString().trim(); 133 while (idx > 0) 134 { 135 char tmp = result.charAt(idx); 136 if (Character.isWhitespace(tmp)) 137 { 138 idx--; 139 continue; 140 } 141 if (tmp == '=' || tmp == '(' || tmp == '{' || tmp == ':' || tmp == ',' || 142 tmp == '[' || tmp == ';' || tmp == '!' || trimmedResult.endsWith(RETURN_KEYWORD)) 143 { 144 state = REG_EXP; 145 break; 146 } 147 break; 148 } 149 } 150 else if (Character.isWhitespace(c) && Character.isWhitespace(next)) 151 { 152 // WICKET-2060 153 if (c == '\n' || next == '\n') 154 { 155 c = '\n'; 156 wasNewLineInWhitespace = true; 157 } 158 else 159 { 160 c = ' '; 161 wasNewLineInWhitespace = false; 162 } 163 // ignore all whitespace characters after this one 164 state = WHITE_SPACE; 165 } 166 else if (c == '\'') 167 { 168 state = STRING_SINGLE_QUOTE; 169 } 170 else if (c == '"') 171 { 172 state = STRING_DOUBLE_QUOTES; 173 } 174 else if (c == '`') 175 { 176 state = TEMPLATE_LITERAL; 177 } 178 result.append(c); 179 continue; 180 } 181 182 if (state == LINE_COMMENT) 183 { 184 if (c == '\n' || c == '\r') 185 { 186 state = REGULAR_TEXT; 187 result.append(c); 188 continue; 189 } 190 } 191 192 if (state == MULTILINE_COMMENT) 193 { 194 if (c == '*' && next == '/') 195 { 196 state = REGULAR_TEXT; 197 ++i; 198 continue; 199 } 200 } 201 202 if (state == STRING_SINGLE_QUOTE) 203 { 204 // to leave a string expression we need even (or zero) number of backslashes 205 int count = getPrevCount(original, i, '\\'); 206 if (c == '\'' && count % 2 == 0) 207 { 208 state = REGULAR_TEXT; 209 } 210 result.append(c); 211 continue; 212 } 213 214 if (state == STRING_DOUBLE_QUOTES) 215 { 216 // to leave a string expression we need even (or zero) number of backslashes 217 int count = getPrevCount(original, i, '\\'); 218 if (c == '"' && count % 2 == 0) 219 { 220 state = REGULAR_TEXT; 221 } 222 result.append(c); 223 continue; 224 } 225 226 if (state == REG_EXP) 227 { 228 // to leave regular expression we need even (or zero) number of backslashes 229 int count = getPrevCount(original, i, '\\'); 230 if (c == '/' && count % 2 == 0) 231 { 232 state = REGULAR_TEXT; 233 } 234 result.append(c); 235 } 236 237 if (state == TEMPLATE_LITERAL) { 238 // to leave a template literal expression we need even (or zero) number of backslashes 239 int count = getPrevCount(original, i, '\\'); 240 if (c == '`' && count % 2 == 0) { 241 state = REGULAR_TEXT; 242 } 243 result.append(c); 244 continue; 245 } 246 } 247 248 return result.toString(); 249 } 250}