001package org.jsoup.helper;
002
003import org.jsoup.internal.SharedConstants;
004
005import java.lang.reflect.InvocationTargetException;
006import java.lang.reflect.Method;
007import java.util.regex.Pattern;
008import java.util.regex.PatternSyntaxException;
009
010/**
011 A regular expression abstraction. Allows jsoup to optionally use the re2j regular expression engine (linear time)
012 instead of the JDK's backtracking regex implementation.
013
014 <p>If the {@code com.google.re2j} library is found on the classpath, by default it will be used. You can override this
015 by setting {@code -Djsoup.useRe2j=false} to explicitly disable, and use the JDK regex engine.</p>
016
017 <p>(Currently this a simplified implementation for jsoup's specific use; can extend as required.)</p>
018 */
019public class Regex {
020    private static final boolean hasRe2j = hasRe2j();
021
022    private final Pattern jdkPattern;
023
024    Regex(Pattern jdkPattern) {
025        this.jdkPattern = jdkPattern;
026    }
027
028    /**
029     Compile a regex, using re2j if enabled and available; otherwise JDK regex.
030
031     @param regex the regex to compile
032     @return the compiled regex
033     @throws ValidationException if the regex is invalid
034     */
035    public static Regex compile(String regex) {
036        if (usingRe2j()) {
037            return Re2jRegex.compile(regex);
038        }
039
040        try {
041            return new Regex(Pattern.compile(regex));
042        } catch (PatternSyntaxException e) {
043            throw new ValidationException("Pattern syntax error: " + e.getMessage());
044        }
045    }
046
047    /** Wraps an existing JDK Pattern (for API compat); doesn't switch */
048    public static Regex fromPattern(Pattern pattern) {
049        return new Regex(pattern);
050    }
051
052    /**
053     Checks if re2j is available (on classpath) and enabled (via system property).
054     @return true if re2j is available and enabled
055     */
056    public static boolean usingRe2j() {
057        return hasRe2j && wantsRe2j();
058    }
059
060    static boolean wantsRe2j() {
061        return Boolean.parseBoolean(System.getProperty(SharedConstants.UseRe2j, "true"));
062    }
063
064    static void wantsRe2j(boolean use) {
065        System.setProperty(SharedConstants.UseRe2j, Boolean.toString(use));
066    }
067
068    static boolean hasRe2j() {
069        try {
070            Class<?> re2 = Class.forName("com.google.re2j.Pattern", false, Regex.class.getClassLoader()); // check if re2j is in classpath
071            try {
072                // if it is, and we are on JVM9+, we need to dork around with modules, because re2j doesn't publish a module name.
073                // done via reflection so we can still run on JVM 8.
074                // todo remove if re2j publishes as a module
075                Class<?> moduleCls = Class.forName("java.lang.Module");
076                Method getModule = Class.class.getMethod("getModule");
077                Object jsoupMod = getModule.invoke(Regex.class);
078                Object re2Mod = getModule.invoke(re2);
079                boolean reads = (boolean) moduleCls.getMethod("canRead", moduleCls).invoke(jsoupMod, re2Mod);
080                if (!reads) moduleCls.getMethod("addReads", moduleCls).invoke(jsoupMod, re2Mod);
081            } catch (ClassNotFoundException ignore) {
082                // jvm8 - no Module class; so we can use as-is
083            }
084            return true;
085        } catch (ClassNotFoundException e) {
086            return false; // no re2j
087        } catch (ReflectiveOperationException e) {
088            // unexpectedly couldn’t wire modules on 9+; return false to avoid IllegalAccessError later
089            System.err.println("Warning: (bug? please report) couldn't access re2j from jsoup due to modules: " + e);
090            return false;
091        }
092    }
093
094    public Matcher matcher(CharSequence input) {
095        return new JdkMatcher(jdkPattern.matcher(input));
096    }
097
098    @Override
099    public String toString() {
100        return jdkPattern.toString();
101    }
102
103    public interface Matcher {
104        boolean find();
105    }
106
107    private static final class JdkMatcher implements Matcher {
108        private final java.util.regex.Matcher delegate;
109
110        JdkMatcher(java.util.regex.Matcher delegate) {
111            this.delegate = delegate;
112        }
113
114        @Override
115        public boolean find() {
116            return delegate.find();
117        }
118    }
119}