JarContents.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.catalina.webresources;

import java.util.BitSet;
import java.util.Enumeration;
import java.util.jar.JarEntry;
import java.util.jar.JarFile;

/**
 * This class represents the contents of a jar by determining whether a given resource <b>might</b> be in the cache,
 * based on a bloom filter. This is not a general-purpose bloom filter because it contains logic to strip out characters
 * from the beginning of the key. The hash methods are simple but good enough for this purpose.
 */
public final class JarContents {
    private final BitSet bits1;
    private final BitSet bits2;
    /**
     * Constant used by a typical hashing method.
     */
    private static final int HASH_PRIME_1 = 31;

    /**
     * Constant used by a typical hashing method.
     */
    private static final int HASH_PRIME_2 = 17;

    /**
     * Size of the fixed-length bit table. Larger reduces false positives, smaller saves memory.
     */
    private static final int TABLE_SIZE = 2048;

    /**
     * Parses the passed-in jar and populates the bit array.
     *
     * @param jar the JAR file
     */
    public JarContents(JarFile jar) {
        Enumeration<JarEntry> entries = jar.entries();
        bits1 = new BitSet(TABLE_SIZE);
        bits2 = new BitSet(TABLE_SIZE);

        while (entries.hasMoreElements()) {
            JarEntry entry = entries.nextElement();
            String name = entry.getName();
            int startPos = 0;

            // If the path starts with a slash, that's not useful information.
            // Skipping it increases the significance of our key by
            // removing an insignificant character.
            boolean precedingSlash = name.charAt(0) == '/';
            if (precedingSlash) {
                startPos = 1;
            }

            // Find the correct table slot
            int pathHash1 = hashcode(name, startPos, HASH_PRIME_1);
            int pathHash2 = hashcode(name, startPos, HASH_PRIME_2);

            bits1.set(pathHash1 % TABLE_SIZE);
            bits2.set(pathHash2 % TABLE_SIZE);

            // While directory entry names always end in "/", application code
            // may look them up without the trailing "/". Add this second form.
            if (entry.isDirectory()) {
                pathHash1 = hashcode(name, startPos, name.length() - 1, HASH_PRIME_1);
                pathHash2 = hashcode(name, startPos, name.length() - 1, HASH_PRIME_2);

                bits1.set(pathHash1 % TABLE_SIZE);
                bits2.set(pathHash2 % TABLE_SIZE);
            }
        }
    }

    /**
     * Simple hashcode of a portion of the string. Typically we would use substring, but memory and runtime speed are
     * critical.
     *
     * @param content  Wrapping String.
     * @param startPos First character in the range.
     *
     * @return hashcode of the range.
     */
    private int hashcode(String content, int startPos, int hashPrime) {
        return hashcode(content, startPos, content.length(), hashPrime);
    }

    private int hashcode(String content, int startPos, int endPos, int hashPrime) {
        int h = hashPrime / 2;
        for (int i = startPos; i < endPos; i++) {
            h = hashPrime * h + content.charAt(i);
        }

        if (h < 0) {
            h = h * -1;
        }
        return h;
    }


    /**
     * Method that identifies whether a given path <b>MIGHT</b> be in this jar. Uses the Bloom filter mechanism.
     *
     * @param path       Requested path. Sometimes starts with "/WEB-INF/classes".
     * @param webappRoot The value of the webapp location, which can be stripped from the path. Typically is
     *                       "/WEB-INF/classes".
     *
     * @return Whether the prefix of the path is known to be in this jar.
     */
    public boolean mightContainResource(String path, String webappRoot) {
        int startPos = 0;
        if (path.startsWith(webappRoot)) {
            startPos = webappRoot.length();
        }

        if (path.charAt(startPos) == '/') {
            // ignore leading slash
            startPos++;
        }

        // calculate the hash lazily and return a boolean value for this path
        return (bits1.get(hashcode(path, startPos, HASH_PRIME_1) % TABLE_SIZE) &&
                bits2.get(hashcode(path, startPos, HASH_PRIME_2) % TABLE_SIZE));
    }

}