001package org.jsoup.safety;
002
003/*
004    Thank you to Ryan Grove (wonko.com) for the Ruby HTML cleaner http://github.com/rgrove/sanitize/, which inspired
005    this safe-list configuration, and the initial defaults.
006 */
007
008import org.jsoup.helper.Validate;
009import org.jsoup.internal.Normalizer;
010import org.jsoup.nodes.Attribute;
011import org.jsoup.nodes.Attributes;
012import org.jsoup.nodes.Element;
013
014import java.util.HashMap;
015import java.util.HashSet;
016import java.util.Iterator;
017import java.util.Map;
018import java.util.Objects;
019import java.util.Set;
020
021import static org.jsoup.internal.Normalizer.lowerCase;
022
023
024/**
025 Safelists define what HTML (elements and attributes) to allow through a {@link Cleaner}. Everything else is removed.
026 <p>
027 Start with one of the defaults:
028 </p>
029 <ul>
030 <li>{@link #none}
031 <li>{@link #simpleText}
032 <li>{@link #basic}
033 <li>{@link #basicWithImages}
034 <li>{@link #relaxed}
035 </ul>
036 <p>
037 If you need to allow more through (please be careful!), tweak a base safelist with:
038 </p>
039 <ul>
040 <li>{@link #addTags(String... tagNames)}
041 <li>{@link #addAttributes(String tagName, String... attributes)}
042 <li>{@link #addEnforcedAttribute(String tagName, String attribute, String value)}
043 <li>{@link #addProtocols(String tagName, String attribute, String... protocols)}
044 </ul>
045 <p>
046 You can remove any setting from an existing safelist with:
047 </p>
048 <ul>
049 <li>{@link #removeTags(String... tagNames)}
050 <li>{@link #removeAttributes(String tagName, String... attributes)}
051 <li>{@link #removeEnforcedAttribute(String tagName, String attribute)}
052 <li>{@link #removeProtocols(String tagName, String attribute, String... removeProtocols)}
053 </ul>
054
055 <p>
056 The {@link Cleaner} and these safelists assume that you want to clean a <code>body</code> fragment of HTML (to add user
057 supplied HTML into a templated page), and not to clean a full HTML document. If the latter is the case, you could wrap
058 the templated document HTML around the cleaned body HTML.
059 </p>
060 <p>
061 Safelists are mutable. A {@link Cleaner} uses the supplied safelist directly, so later changes affect later cleaning
062 calls. If you want to share a safelist across threads, finish configuring it first and do not mutate it while it is in
063 use. To build a variant from an existing configuration, use {@link #Safelist(Safelist)} to make a copy.
064 </p>
065 <p>
066 If you are going to extend a safelist, please be very careful. Make sure you understand what attributes may lead to
067 XSS attack vectors. URL attributes are particularly vulnerable and require careful validation. See 
068 the <a href="https://owasp.org/www-community/xss-filter-evasion-cheatsheet">XSS Filter Evasion Cheat Sheet</a> for some
069 XSS attack examples (that jsoup will safeguard against with the default Cleaner and Safelist configuration).
070 </p>
071 */
072public class Safelist {
073    private static final String All = ":all";
074    private static final TagName AllTag = TagName.valueOf(All);
075    private final Set<TagName> tagNames; // tags allowed, lower case. e.g. [p, br, span]
076    private final Map<TagName, Set<AttributeKey>> attributes; // tag -> attribute[]. allowed attributes [href] for a tag.
077    private final Map<TagName, Map<AttributeKey, AttributeValue>> enforcedAttributes; // always set these attribute values
078    private final Map<TagName, Map<AttributeKey, Set<Protocol>>> protocols; // allowed URL protocols for attributes
079    private boolean preserveRelativeLinks; // option to preserve relative links
080
081    /**
082     This safelist allows only text nodes: any HTML Element or any Node other than a TextNode will be removed.
083     <p>
084     Note that the output of {@link org.jsoup.Jsoup#clean(String, Safelist)} is still <b>HTML</b> even when using
085     this Safelist, and so any HTML entities in the output will be appropriately escaped. If you want plain text, not
086     HTML, you should use a text method such as {@link Element#text()} instead, after cleaning the document.
087     </p>
088     <p>Example:</p>
089     <pre>{@code
090     String sourceBodyHtml = "<p>5 is &lt; 6.</p>";
091     String html = Jsoup.clean(sourceBodyHtml, Safelist.none());
092
093     Cleaner cleaner = new Cleaner(Safelist.none());
094     String text = cleaner.clean(Jsoup.parse(sourceBodyHtml)).text();
095
096     // html is: 5 is &lt; 6.
097     // text is: 5 is < 6.
098     }</pre>
099
100     @return safelist
101     */
102    public static Safelist none() {
103        return new Safelist();
104    }
105
106    /**
107     This safelist allows only simple text formatting: <code>b, em, i, strong, u</code>. All other HTML (tags and
108     attributes) will be removed.
109
110     @return safelist
111     */
112    public static Safelist simpleText() {
113        return new Safelist()
114                .addTags("b", "em", "i", "strong", "u")
115                ;
116    }
117
118    /**
119     <p>
120     This safelist allows a fuller range of text nodes: <code>a, b, blockquote, br, cite, code, dd, dl, dt, em, i, li,
121     ol, p, pre, q, small, span, strike, strong, sub, sup, u, ul</code>, and appropriate attributes.
122     </p>
123     <p>
124     Links (<code>a</code> elements) can point to <code>http, https, ftp, mailto</code>, and have an enforced
125     <code>rel=nofollow</code> attribute if they link offsite (as indicated by the specified base URI).
126     </p>
127     <p>
128     Does not allow images.
129     </p>
130
131     @return safelist
132     */
133    public static Safelist basic() {
134        return new Safelist()
135                .addTags(
136                        "a", "b", "blockquote", "br", "cite", "code", "dd", "dl", "dt", "em",
137                        "i", "li", "ol", "p", "pre", "q", "small", "span", "strike", "strong", "sub",
138                        "sup", "u", "ul")
139
140                .addAttributes("a", "href")
141                .addAttributes("blockquote", "cite")
142                .addAttributes("q", "cite")
143
144                .addProtocols("a", "href", "ftp", "http", "https", "mailto")
145                .addProtocols("blockquote", "cite", "http", "https")
146                .addProtocols("cite", "cite", "http", "https")
147
148                .addEnforcedAttribute("a", "rel", "nofollow") // has special handling for external links, in Cleaner
149                ;
150
151    }
152
153    /**
154     This safelist allows the same text tags as {@link #basic}, and also allows <code>img</code> tags, with appropriate
155     attributes, with <code>src</code> pointing to <code>http</code> or <code>https</code>.
156
157     @return safelist
158     */
159    public static Safelist basicWithImages() {
160        return basic()
161                .addTags("img")
162                .addAttributes("img", "align", "alt", "height", "src", "title", "width")
163                .addProtocols("img", "src", "http", "https")
164                ;
165    }
166
167    /**
168     This safelist allows a full range of text and structural body HTML: <code>a, b, blockquote, br, caption, cite,
169     code, col, colgroup, dd, div, dl, dt, em, h1, h2, h3, h4, h5, h6, i, img, li, ol, p, pre, q, small, span, strike, strong, sub,
170     sup, table, tbody, td, tfoot, th, thead, tr, u, ul</code>
171     <p>
172     Links do not have an enforced <code>rel=nofollow</code> attribute, but you can add that if desired.
173     </p>
174
175     @return safelist
176     */
177    public static Safelist relaxed() {
178        return new Safelist()
179                .addTags(
180                        "a", "b", "blockquote", "br", "caption", "cite", "code", "col",
181                        "colgroup", "dd", "div", "dl", "dt", "em", "h1", "h2", "h3", "h4", "h5", "h6",
182                        "i", "img", "li", "ol", "p", "pre", "q", "small", "span", "strike", "strong",
183                        "sub", "sup", "table", "tbody", "td", "tfoot", "th", "thead", "tr", "u",
184                        "ul")
185
186                .addAttributes("a", "href", "title")
187                .addAttributes("blockquote", "cite")
188                .addAttributes("col", "span", "width")
189                .addAttributes("colgroup", "span", "width")
190                .addAttributes("img", "align", "alt", "height", "src", "title", "width")
191                .addAttributes("ol", "start", "type")
192                .addAttributes("q", "cite")
193                .addAttributes("table", "summary", "width")
194                .addAttributes("td", "abbr", "axis", "colspan", "rowspan", "width")
195                .addAttributes(
196                        "th", "abbr", "axis", "colspan", "rowspan", "scope",
197                        "width")
198                .addAttributes("ul", "type")
199
200                .addProtocols("a", "href", "ftp", "http", "https", "mailto")
201                .addProtocols("blockquote", "cite", "http", "https")
202                .addProtocols("cite", "cite", "http", "https")
203                .addProtocols("img", "src", "http", "https")
204                .addProtocols("q", "cite", "http", "https")
205                ;
206    }
207
208    /**
209     Create a new, empty safelist. Generally it will be better to start with a default prepared safelist instead.
210
211     @see #basic()
212     @see #basicWithImages()
213     @see #simpleText()
214     @see #relaxed()
215     */
216    public Safelist() {
217        tagNames = new HashSet<>();
218        attributes = new HashMap<>();
219        enforcedAttributes = new HashMap<>();
220        protocols = new HashMap<>();
221        preserveRelativeLinks = false;
222    }
223
224    /**
225     Deep copy an existing Safelist to a new Safelist.
226     @param copy the Safelist to copy
227     */
228    public Safelist(Safelist copy) {
229        this();
230        tagNames.addAll(copy.tagNames);
231        for (Map.Entry<TagName, Set<AttributeKey>> copyTagAttributes : copy.attributes.entrySet()) {
232            attributes.put(copyTagAttributes.getKey(), new HashSet<>(copyTagAttributes.getValue()));
233        }
234        for (Map.Entry<TagName, Map<AttributeKey, AttributeValue>> enforcedEntry : copy.enforcedAttributes.entrySet()) {
235            enforcedAttributes.put(enforcedEntry.getKey(), new HashMap<>(enforcedEntry.getValue()));
236        }
237        for (Map.Entry<TagName, Map<AttributeKey, Set<Protocol>>> protocolsEntry : copy.protocols.entrySet()) {
238            Map<AttributeKey, Set<Protocol>> attributeProtocolsCopy = new HashMap<>();
239            for (Map.Entry<AttributeKey, Set<Protocol>> attributeProtocols : protocolsEntry.getValue().entrySet()) {
240                attributeProtocolsCopy.put(attributeProtocols.getKey(), new HashSet<>(attributeProtocols.getValue()));
241            }
242            protocols.put(protocolsEntry.getKey(), attributeProtocolsCopy);
243        }
244        preserveRelativeLinks = copy.preserveRelativeLinks;
245    }
246
247    /**
248     Add a list of allowed elements to a safelist. (If a tag is not allowed, it will be removed from the HTML.)
249
250     @param tags tag names to allow
251     @return this (for chaining)
252     */
253    public Safelist addTags(String... tags) {
254        Validate.notNull(tags);
255
256        for (String tagName : tags) {
257            Validate.notEmpty(tagName);
258            Validate.isFalse(tagName.equalsIgnoreCase("noscript"),
259                "noscript is unsupported in Safelists, due to incompatibilities between parsers with and without script-mode enabled");
260            tagNames.add(TagName.valueOf(tagName));
261        }
262        return this;
263    }
264
265    /**
266     Remove a list of allowed elements from a safelist. (If a tag is not allowed, it will be removed from the HTML.)
267
268     @param tags tag names to disallow
269     @return this (for chaining)
270     */
271    public Safelist removeTags(String... tags) {
272        Validate.notNull(tags);
273
274        for(String tag: tags) {
275            Validate.notEmpty(tag);
276            TagName tagName = TagName.valueOf(tag);
277
278            if(tagNames.remove(tagName)) { // Only look in sub-maps if tag was allowed
279                attributes.remove(tagName);
280                enforcedAttributes.remove(tagName);
281                protocols.remove(tagName);
282            }
283        }
284        return this;
285    }
286
287    /**
288     Add a list of allowed attributes to a tag. (If an attribute is not allowed on an element, it will be removed.)
289     <p>
290     E.g.: <code>addAttributes("a", "href", "class")</code> allows <code>href</code> and <code>class</code> attributes
291     on <code>a</code> tags.
292     </p>
293     <p>
294     To make an attribute valid for <b>all tags</b>, use the pseudo tag <code>:all</code>, e.g.
295     <code>addAttributes(":all", "class")</code>.
296     </p>
297
298     @param tag  The tag the attributes are for. The tag will be added to the allowed tag list if necessary.
299     @param attributes List of valid attributes for the tag
300     @return this (for chaining)
301     */
302    public Safelist addAttributes(String tag, String... attributes) {
303        Validate.notEmpty(tag);
304        Validate.notNull(attributes);
305        Validate.isTrue(attributes.length > 0, "No attribute names supplied.");
306
307        addTags(tag);
308        TagName tagName = TagName.valueOf(tag);
309        Set<AttributeKey> attributeSet = new HashSet<>();
310        for (String key : attributes) {
311            Validate.notEmpty(key);
312            attributeSet.add(AttributeKey.valueOf(key));
313        }
314        Set<AttributeKey> currentSet = this.attributes.computeIfAbsent(tagName, k -> new HashSet<>());
315        currentSet.addAll(attributeSet);
316        return this;
317    }
318
319    /**
320     Remove a list of allowed attributes from a tag. (If an attribute is not allowed on an element, it will be removed.)
321     <p>
322     E.g.: <code>removeAttributes("a", "href", "class")</code> disallows <code>href</code> and <code>class</code>
323     attributes on <code>a</code> tags.
324     </p>
325     <p>
326     To make an attribute invalid for <b>all tags</b>, use the pseudo tag <code>:all</code>, e.g.
327     <code>removeAttributes(":all", "class")</code>.
328     </p>
329
330     @param tag  The tag the attributes are for.
331     @param attributes List of invalid attributes for the tag
332     @return this (for chaining)
333     */
334    public Safelist removeAttributes(String tag, String... attributes) {
335        Validate.notEmpty(tag);
336        Validate.notNull(attributes);
337        Validate.isTrue(attributes.length > 0, "No attribute names supplied.");
338
339        TagName tagName = TagName.valueOf(tag);
340        Set<AttributeKey> attributeSet = new HashSet<>();
341        for (String key : attributes) {
342            Validate.notEmpty(key);
343            attributeSet.add(AttributeKey.valueOf(key));
344        }
345        if(tagNames.contains(tagName) && this.attributes.containsKey(tagName)) { // Only look in sub-maps if tag was allowed
346            Set<AttributeKey> currentSet = this.attributes.get(tagName);
347            currentSet.removeAll(attributeSet);
348
349            if(currentSet.isEmpty()) // Remove tag from attribute map if no attributes are allowed for tag
350                this.attributes.remove(tagName);
351        }
352        if(tag.equals(All)) { // Attribute needs to be removed from all individually set tags
353            Iterator<Map.Entry<TagName, Set<AttributeKey>>> it = this.attributes.entrySet().iterator();
354            while (it.hasNext()) {
355                Map.Entry<TagName, Set<AttributeKey>> entry = it.next();
356                Set<AttributeKey> currentSet = entry.getValue();
357                currentSet.removeAll(attributeSet);
358                if(currentSet.isEmpty()) // Remove tag from attribute map if no attributes are allowed for tag
359                    it.remove();
360            }
361        }
362        return this;
363    }
364
365    /**
366     Add an enforced attribute to a tag. An enforced attribute will always be added to the element. If the element
367     already has the attribute set, it will be overridden with this value.
368     <p>
369     E.g.: <code>addEnforcedAttribute("a", "rel", "nofollow")</code> will make all <code>a</code> tags output as
370     <code>&lt;a href="..." rel="nofollow"&gt;</code>
371     </p>
372
373     @param tag   The tag the enforced attribute is for. The tag will be added to the allowed tag list if necessary.
374     @param attribute   The attribute name
375     @param value The enforced attribute value
376     @return this (for chaining)
377     */
378    public Safelist addEnforcedAttribute(String tag, String attribute, String value) {
379        Validate.notEmpty(tag);
380        Validate.notEmpty(attribute);
381        Validate.notEmpty(value);
382
383        TagName tagName = TagName.valueOf(tag);
384        tagNames.add(tagName);
385        AttributeKey attrKey = AttributeKey.valueOf(attribute);
386        AttributeValue attrVal = AttributeValue.valueOf(value);
387
388        Map<AttributeKey, AttributeValue> attrMap = enforcedAttributes.computeIfAbsent(tagName, k -> new HashMap<>());
389        attrMap.put(attrKey, attrVal);
390        return this;
391    }
392
393    /**
394     Remove a previously configured enforced attribute from a tag.
395
396     @param tag   The tag the enforced attribute is for.
397     @param attribute   The attribute name
398     @return this (for chaining)
399     */
400    public Safelist removeEnforcedAttribute(String tag, String attribute) {
401        Validate.notEmpty(tag);
402        Validate.notEmpty(attribute);
403
404        TagName tagName = TagName.valueOf(tag);
405        if(tagNames.contains(tagName) && enforcedAttributes.containsKey(tagName)) {
406            AttributeKey attrKey = AttributeKey.valueOf(attribute);
407            Map<AttributeKey, AttributeValue> attrMap = enforcedAttributes.get(tagName);
408            attrMap.remove(attrKey);
409
410            if(attrMap.isEmpty()) // Remove tag from enforced attribute map if no enforced attributes are present
411                enforcedAttributes.remove(tagName);
412        }
413        return this;
414    }
415
416    /**
417     * Configure this Safelist to preserve relative links in an element's URL attribute, or convert them to absolute
418     * links. By default, this is <b>false</b>: URLs will be  made absolute (e.g. start with an allowed protocol, like
419     * e.g. {@code http://}.
420     *
421     * @param preserve {@code true} to allow relative links, {@code false} (default) to deny
422     * @return this Safelist, for chaining.
423     * @see #addProtocols
424     */
425    public Safelist preserveRelativeLinks(boolean preserve) {
426        preserveRelativeLinks = preserve;
427        return this;
428    }
429
430    /**
431     * Get the current setting for preserving relative links.
432     * @return {@code true} if relative links are preserved, {@code false} if they are converted to absolute.
433     */
434    public boolean preserveRelativeLinks() {
435        return preserveRelativeLinks;
436    }
437
438    /**
439     Add allowed URL protocols for an element's URL attribute. This restricts the possible values of the attribute to
440     URLs with the defined protocol.
441     <p>
442     E.g.: <code>addProtocols("a", "href", "ftp", "http", "https")</code>
443     </p>
444     <p>
445     To allow a link to an in-page URL anchor (i.e. <code>&lt;a href="#anchor"&gt;</code>, add a <code>#</code>:<br>
446     E.g.: <code>addProtocols("a", "href", "#")</code>
447     </p>
448
449     @param tag       Tag the URL protocol is for
450     @param attribute       Attribute name
451     @param protocols List of valid protocols
452     @return this, for chaining
453     */
454    public Safelist addProtocols(String tag, String attribute, String... protocols) {
455        Validate.notEmpty(tag);
456        Validate.notEmpty(attribute);
457        Validate.notNull(protocols);
458
459        TagName tagName = TagName.valueOf(tag);
460        AttributeKey attrKey = AttributeKey.valueOf(attribute);
461        Map<AttributeKey, Set<Protocol>> attrMap = this.protocols.computeIfAbsent(tagName, k -> new HashMap<>());
462        Set<Protocol> protSet = attrMap.computeIfAbsent(attrKey, k -> new HashSet<>());
463
464        for (String protocol : protocols) {
465            Validate.notEmpty(protocol);
466            Protocol prot = Protocol.valueOf(protocol);
467            protSet.add(prot);
468        }
469        return this;
470    }
471
472    /**
473     Remove allowed URL protocols for an element's URL attribute. If you remove all protocols for an attribute, that
474     attribute will allow any protocol.
475     <p>
476     E.g.: <code>removeProtocols("a", "href", "ftp")</code>
477     </p>
478
479     @param tag Tag the URL protocol is for
480     @param attribute Attribute name
481     @param removeProtocols List of invalid protocols
482     @return this, for chaining
483     */
484    public Safelist removeProtocols(String tag, String attribute, String... removeProtocols) {
485        Validate.notEmpty(tag);
486        Validate.notEmpty(attribute);
487        Validate.notNull(removeProtocols);
488
489        TagName tagName = TagName.valueOf(tag);
490        AttributeKey attr = AttributeKey.valueOf(attribute);
491
492        // make sure that what we're removing actually exists; otherwise can open the tag to any data and that can
493        // be surprising
494        Validate.isTrue(protocols.containsKey(tagName), "Cannot remove a protocol that is not set.");
495        Map<AttributeKey, Set<Protocol>> tagProtocols = protocols.get(tagName);
496        Validate.isTrue(tagProtocols.containsKey(attr), "Cannot remove a protocol that is not set.");
497
498        Set<Protocol> attrProtocols = tagProtocols.get(attr);
499        for (String protocol : removeProtocols) {
500            Validate.notEmpty(protocol);
501            attrProtocols.remove(Protocol.valueOf(protocol));
502        }
503
504        if (attrProtocols.isEmpty()) { // Remove protocol set if empty
505            tagProtocols.remove(attr);
506            if (tagProtocols.isEmpty()) // Remove entry for tag if empty
507                protocols.remove(tagName);
508        }
509        return this;
510    }
511
512    /**
513     * Test if the supplied tag is allowed by this safelist.
514     * @param tag test tag
515     * @return true if allowed
516     */
517    public boolean isSafeTag(String tag) {
518        return tagNames.contains(TagName.valueOf(tag));
519    }
520
521    /**
522     * Test if the supplied attribute is allowed by this safelist for this tag.
523     * <p>This method does not modify the input element or attribute.</p>
524     * @param tagName tag to consider allowing the attribute in
525     * @param el element under test, to confirm protocol
526     * @param attr attribute under test
527     * @return true if allowed
528     */
529    public boolean isSafeAttribute(String tagName, Element el, Attribute attr) {
530        TagName tag = TagName.valueOf(tagName);
531        AttributeKey key = AttributeKey.valueOf(attr.getKey());
532
533        Set<AttributeKey> okSet = attributes.get(tag);
534        if (okSet != null && okSet.contains(key)) {
535            if (protocols.containsKey(tag)) {
536                Map<AttributeKey, Set<Protocol>> attrProts = protocols.get(tag);
537                // ok if not defined protocol; otherwise test
538                return !attrProts.containsKey(key) || isSafeProtocol(getProtocolValue(el, attr), attrProts.get(key));
539            } else { // attribute found, no protocols defined, so OK
540                return true;
541            }
542        }
543        Map<AttributeKey, AttributeValue> enforcedSet = enforcedAttributes.get(tag);
544        if (enforcedSet != null && enforcedSet.containsKey(key)) {
545            // enforced attr key was LCed via AttributeKey.valueOf(attr.getKey()),
546            // if the input already has that exact value, treat it as safe
547            return enforcedSet.get(key).equals(AttributeValue.valueOf(attr.getValue()));
548        }
549        // no attributes defined for tag, try :all tag
550        return !tagName.equals(All) && isSafeAttribute(All, el, attr);
551    }
552
553    private String getProtocolValue(Element el, Attribute attr) {
554        String value = el.absUrl(attr.getKey());
555        if (value.isEmpty())
556            value = attr.getValue(); // if it could not be made abs, run as-is to allow custom unknown protocols
557        return value;
558    }
559
560    private boolean isSafeProtocol(String value, Set<Protocol> protocols) {
561        for (Protocol protocol : protocols) {
562            String prot = protocol.toString();
563
564            if (prot.equals("#")) { // allows anchor links
565                if (isValidAnchor(value)) {
566                    return true;
567                } else {
568                    continue;
569                }
570            }
571
572            prot += ":";
573
574            if (lowerCase(value).startsWith(prot)) {
575                return true;
576            }
577        }
578        return false;
579    }
580
581    /**
582     Check if a URL attribute should be normalized to an absolute URL in the cleaned output. Uses the configured
583     protocols for that tag+attribute pair, falling back to {@code :all} only if the tag does not define the
584     attribute.
585     */
586    boolean shouldAbsUrl(String tagName, String attrKey) {
587        if (preserveRelativeLinks) return false;
588        return shouldAbsUrl(TagName.valueOf(tagName), AttributeKey.valueOf(attrKey));
589    }
590
591    private boolean shouldAbsUrl(TagName tag, AttributeKey key) {
592        Set<AttributeKey> allowedAttrs = attributes.get(tag);
593        if (allowedAttrs != null && allowedAttrs.contains(key)) {
594            Map<AttributeKey, Set<Protocol>> protocolsByAttr = protocols.get(tag);
595            return protocolsByAttr != null && protocolsByAttr.containsKey(key);
596        }
597
598        Map<AttributeKey, AttributeValue> enforcedAttrs = enforcedAttributes.get(tag);
599        if (enforcedAttrs != null && enforcedAttrs.containsKey(key)) return false;
600
601        return !tag.equals(AllTag) && shouldAbsUrl(AllTag, key);
602    }
603
604    private static boolean isValidAnchor(String value) {
605        return value.startsWith("#") && !value.matches(".*\\s.*");
606    }
607
608    /**
609     Gets the Attributes that should be enforced for a given tag
610     * @param tagName the tag
611     * @return the attributes that will be enforced; empty if none are set for the given tag
612     */
613    public Attributes getEnforcedAttributes(String tagName) {
614        Attributes attrs = new Attributes();
615        TagName tag = TagName.valueOf(tagName);
616        if (enforcedAttributes.containsKey(tag)) {
617            Map<AttributeKey, AttributeValue> keyVals = enforcedAttributes.get(tag);
618            for (Map.Entry<AttributeKey, AttributeValue> entry : keyVals.entrySet()) {
619                attrs.put(entry.getKey().toString(), entry.getValue().toString());
620            }
621        }
622        return attrs;
623    }
624    
625    // named types for config. All just hold strings, but here for my sanity.
626
627    static class TagName extends TypedValue {
628        TagName(String value) {
629            super(value);
630        }
631
632        static TagName valueOf(String value) {
633            return new TagName(Normalizer.lowerCase(value));
634        }
635    }
636
637    static class AttributeKey extends TypedValue {
638        AttributeKey(String value) {
639            super(value);
640        }
641
642        static AttributeKey valueOf(String value) {
643            return new AttributeKey(Normalizer.lowerCase(value));
644        }
645    }
646
647    static class AttributeValue extends TypedValue {
648        AttributeValue(String value) {
649            super(value);
650        }
651
652        static AttributeValue valueOf(String value) {
653            return new AttributeValue(value);
654        }
655    }
656
657    static class Protocol extends TypedValue {
658        Protocol(String value) {
659            super(value);
660        }
661
662        static Protocol valueOf(String value) {
663            return new Protocol(value);
664        }
665    }
666
667    abstract static class TypedValue {
668        private final String value;
669
670        TypedValue(String value) {
671            Validate.notNull(value);
672            this.value = value;
673        }
674
675        @Override
676        public int hashCode() {
677            return value.hashCode();
678        }
679
680        @Override
681        public boolean equals(Object obj) {
682            if (this == obj) return true;
683            if (obj == null || getClass() != obj.getClass()) return false;
684            TypedValue other = (TypedValue) obj;
685            return Objects.equals(value, other.value);
686        }
687
688        @Override
689        public String toString() {
690            return value;
691        }
692    }
693}