001package org.jsoup.safety; 002 003/* 004 Thank you to Ryan Grove (wonko.com) for the Ruby HTML cleaner http://github.com/rgrove/sanitize/, which inspired 005 this safe-list configuration, and the initial defaults. 006 */ 007 008import org.jsoup.helper.Validate; 009import org.jsoup.internal.Normalizer; 010import org.jsoup.nodes.Attribute; 011import org.jsoup.nodes.Attributes; 012import org.jsoup.nodes.Element; 013 014import java.util.HashMap; 015import java.util.HashSet; 016import java.util.Iterator; 017import java.util.Map; 018import java.util.Objects; 019import java.util.Set; 020 021import static org.jsoup.internal.Normalizer.lowerCase; 022 023 024/** 025 Safelists define what HTML (elements and attributes) to allow through a {@link Cleaner}. Everything else is removed. 026 <p> 027 Start with one of the defaults: 028 </p> 029 <ul> 030 <li>{@link #none} 031 <li>{@link #simpleText} 032 <li>{@link #basic} 033 <li>{@link #basicWithImages} 034 <li>{@link #relaxed} 035 </ul> 036 <p> 037 If you need to allow more through (please be careful!), tweak a base safelist with: 038 </p> 039 <ul> 040 <li>{@link #addTags(String... tagNames)} 041 <li>{@link #addAttributes(String tagName, String... attributes)} 042 <li>{@link #addEnforcedAttribute(String tagName, String attribute, String value)} 043 <li>{@link #addProtocols(String tagName, String attribute, String... protocols)} 044 </ul> 045 <p> 046 You can remove any setting from an existing safelist with: 047 </p> 048 <ul> 049 <li>{@link #removeTags(String... tagNames)} 050 <li>{@link #removeAttributes(String tagName, String... attributes)} 051 <li>{@link #removeEnforcedAttribute(String tagName, String attribute)} 052 <li>{@link #removeProtocols(String tagName, String attribute, String... removeProtocols)} 053 </ul> 054 055 <p> 056 The {@link Cleaner} and these safelists assume that you want to clean a <code>body</code> fragment of HTML (to add user 057 supplied HTML into a templated page), and not to clean a full HTML document. If the latter is the case, you could wrap 058 the templated document HTML around the cleaned body HTML. 059 </p> 060 <p> 061 Safelists are mutable. A {@link Cleaner} uses the supplied safelist directly, so later changes affect later cleaning 062 calls. If you want to share a safelist across threads, finish configuring it first and do not mutate it while it is in 063 use. To build a variant from an existing configuration, use {@link #Safelist(Safelist)} to make a copy. 064 </p> 065 <p> 066 If you are going to extend a safelist, please be very careful. Make sure you understand what attributes may lead to 067 XSS attack vectors. URL attributes are particularly vulnerable and require careful validation. See 068 the <a href="https://owasp.org/www-community/xss-filter-evasion-cheatsheet">XSS Filter Evasion Cheat Sheet</a> for some 069 XSS attack examples (that jsoup will safeguard against with the default Cleaner and Safelist configuration). 070 </p> 071 */ 072public class Safelist { 073 private static final String All = ":all"; 074 private static final TagName AllTag = TagName.valueOf(All); 075 private final Set<TagName> tagNames; // tags allowed, lower case. e.g. [p, br, span] 076 private final Map<TagName, Set<AttributeKey>> attributes; // tag -> attribute[]. allowed attributes [href] for a tag. 077 private final Map<TagName, Map<AttributeKey, AttributeValue>> enforcedAttributes; // always set these attribute values 078 private final Map<TagName, Map<AttributeKey, Set<Protocol>>> protocols; // allowed URL protocols for attributes 079 private boolean preserveRelativeLinks; // option to preserve relative links 080 081 /** 082 This safelist allows only text nodes: any HTML Element or any Node other than a TextNode will be removed. 083 <p> 084 Note that the output of {@link org.jsoup.Jsoup#clean(String, Safelist)} is still <b>HTML</b> even when using 085 this Safelist, and so any HTML entities in the output will be appropriately escaped. If you want plain text, not 086 HTML, you should use a text method such as {@link Element#text()} instead, after cleaning the document. 087 </p> 088 <p>Example:</p> 089 <pre>{@code 090 String sourceBodyHtml = "<p>5 is < 6.</p>"; 091 String html = Jsoup.clean(sourceBodyHtml, Safelist.none()); 092 093 Cleaner cleaner = new Cleaner(Safelist.none()); 094 String text = cleaner.clean(Jsoup.parse(sourceBodyHtml)).text(); 095 096 // html is: 5 is < 6. 097 // text is: 5 is < 6. 098 }</pre> 099 100 @return safelist 101 */ 102 public static Safelist none() { 103 return new Safelist(); 104 } 105 106 /** 107 This safelist allows only simple text formatting: <code>b, em, i, strong, u</code>. All other HTML (tags and 108 attributes) will be removed. 109 110 @return safelist 111 */ 112 public static Safelist simpleText() { 113 return new Safelist() 114 .addTags("b", "em", "i", "strong", "u") 115 ; 116 } 117 118 /** 119 <p> 120 This safelist allows a fuller range of text nodes: <code>a, b, blockquote, br, cite, code, dd, dl, dt, em, i, li, 121 ol, p, pre, q, small, span, strike, strong, sub, sup, u, ul</code>, and appropriate attributes. 122 </p> 123 <p> 124 Links (<code>a</code> elements) can point to <code>http, https, ftp, mailto</code>, and have an enforced 125 <code>rel=nofollow</code> attribute if they link offsite (as indicated by the specified base URI). 126 </p> 127 <p> 128 Does not allow images. 129 </p> 130 131 @return safelist 132 */ 133 public static Safelist basic() { 134 return new Safelist() 135 .addTags( 136 "a", "b", "blockquote", "br", "cite", "code", "dd", "dl", "dt", "em", 137 "i", "li", "ol", "p", "pre", "q", "small", "span", "strike", "strong", "sub", 138 "sup", "u", "ul") 139 140 .addAttributes("a", "href") 141 .addAttributes("blockquote", "cite") 142 .addAttributes("q", "cite") 143 144 .addProtocols("a", "href", "ftp", "http", "https", "mailto") 145 .addProtocols("blockquote", "cite", "http", "https") 146 .addProtocols("cite", "cite", "http", "https") 147 148 .addEnforcedAttribute("a", "rel", "nofollow") // has special handling for external links, in Cleaner 149 ; 150 151 } 152 153 /** 154 This safelist allows the same text tags as {@link #basic}, and also allows <code>img</code> tags, with appropriate 155 attributes, with <code>src</code> pointing to <code>http</code> or <code>https</code>. 156 157 @return safelist 158 */ 159 public static Safelist basicWithImages() { 160 return basic() 161 .addTags("img") 162 .addAttributes("img", "align", "alt", "height", "src", "title", "width") 163 .addProtocols("img", "src", "http", "https") 164 ; 165 } 166 167 /** 168 This safelist allows a full range of text and structural body HTML: <code>a, b, blockquote, br, caption, cite, 169 code, col, colgroup, dd, div, dl, dt, em, h1, h2, h3, h4, h5, h6, i, img, li, ol, p, pre, q, small, span, strike, strong, sub, 170 sup, table, tbody, td, tfoot, th, thead, tr, u, ul</code> 171 <p> 172 Links do not have an enforced <code>rel=nofollow</code> attribute, but you can add that if desired. 173 </p> 174 175 @return safelist 176 */ 177 public static Safelist relaxed() { 178 return new Safelist() 179 .addTags( 180 "a", "b", "blockquote", "br", "caption", "cite", "code", "col", 181 "colgroup", "dd", "div", "dl", "dt", "em", "h1", "h2", "h3", "h4", "h5", "h6", 182 "i", "img", "li", "ol", "p", "pre", "q", "small", "span", "strike", "strong", 183 "sub", "sup", "table", "tbody", "td", "tfoot", "th", "thead", "tr", "u", 184 "ul") 185 186 .addAttributes("a", "href", "title") 187 .addAttributes("blockquote", "cite") 188 .addAttributes("col", "span", "width") 189 .addAttributes("colgroup", "span", "width") 190 .addAttributes("img", "align", "alt", "height", "src", "title", "width") 191 .addAttributes("ol", "start", "type") 192 .addAttributes("q", "cite") 193 .addAttributes("table", "summary", "width") 194 .addAttributes("td", "abbr", "axis", "colspan", "rowspan", "width") 195 .addAttributes( 196 "th", "abbr", "axis", "colspan", "rowspan", "scope", 197 "width") 198 .addAttributes("ul", "type") 199 200 .addProtocols("a", "href", "ftp", "http", "https", "mailto") 201 .addProtocols("blockquote", "cite", "http", "https") 202 .addProtocols("cite", "cite", "http", "https") 203 .addProtocols("img", "src", "http", "https") 204 .addProtocols("q", "cite", "http", "https") 205 ; 206 } 207 208 /** 209 Create a new, empty safelist. Generally it will be better to start with a default prepared safelist instead. 210 211 @see #basic() 212 @see #basicWithImages() 213 @see #simpleText() 214 @see #relaxed() 215 */ 216 public Safelist() { 217 tagNames = new HashSet<>(); 218 attributes = new HashMap<>(); 219 enforcedAttributes = new HashMap<>(); 220 protocols = new HashMap<>(); 221 preserveRelativeLinks = false; 222 } 223 224 /** 225 Deep copy an existing Safelist to a new Safelist. 226 @param copy the Safelist to copy 227 */ 228 public Safelist(Safelist copy) { 229 this(); 230 tagNames.addAll(copy.tagNames); 231 for (Map.Entry<TagName, Set<AttributeKey>> copyTagAttributes : copy.attributes.entrySet()) { 232 attributes.put(copyTagAttributes.getKey(), new HashSet<>(copyTagAttributes.getValue())); 233 } 234 for (Map.Entry<TagName, Map<AttributeKey, AttributeValue>> enforcedEntry : copy.enforcedAttributes.entrySet()) { 235 enforcedAttributes.put(enforcedEntry.getKey(), new HashMap<>(enforcedEntry.getValue())); 236 } 237 for (Map.Entry<TagName, Map<AttributeKey, Set<Protocol>>> protocolsEntry : copy.protocols.entrySet()) { 238 Map<AttributeKey, Set<Protocol>> attributeProtocolsCopy = new HashMap<>(); 239 for (Map.Entry<AttributeKey, Set<Protocol>> attributeProtocols : protocolsEntry.getValue().entrySet()) { 240 attributeProtocolsCopy.put(attributeProtocols.getKey(), new HashSet<>(attributeProtocols.getValue())); 241 } 242 protocols.put(protocolsEntry.getKey(), attributeProtocolsCopy); 243 } 244 preserveRelativeLinks = copy.preserveRelativeLinks; 245 } 246 247 /** 248 Add a list of allowed elements to a safelist. (If a tag is not allowed, it will be removed from the HTML.) 249 250 @param tags tag names to allow 251 @return this (for chaining) 252 */ 253 public Safelist addTags(String... tags) { 254 Validate.notNull(tags); 255 256 for (String tagName : tags) { 257 Validate.notEmpty(tagName); 258 Validate.isFalse(tagName.equalsIgnoreCase("noscript"), 259 "noscript is unsupported in Safelists, due to incompatibilities between parsers with and without script-mode enabled"); 260 tagNames.add(TagName.valueOf(tagName)); 261 } 262 return this; 263 } 264 265 /** 266 Remove a list of allowed elements from a safelist. (If a tag is not allowed, it will be removed from the HTML.) 267 268 @param tags tag names to disallow 269 @return this (for chaining) 270 */ 271 public Safelist removeTags(String... tags) { 272 Validate.notNull(tags); 273 274 for(String tag: tags) { 275 Validate.notEmpty(tag); 276 TagName tagName = TagName.valueOf(tag); 277 278 if(tagNames.remove(tagName)) { // Only look in sub-maps if tag was allowed 279 attributes.remove(tagName); 280 enforcedAttributes.remove(tagName); 281 protocols.remove(tagName); 282 } 283 } 284 return this; 285 } 286 287 /** 288 Add a list of allowed attributes to a tag. (If an attribute is not allowed on an element, it will be removed.) 289 <p> 290 E.g.: <code>addAttributes("a", "href", "class")</code> allows <code>href</code> and <code>class</code> attributes 291 on <code>a</code> tags. 292 </p> 293 <p> 294 To make an attribute valid for <b>all tags</b>, use the pseudo tag <code>:all</code>, e.g. 295 <code>addAttributes(":all", "class")</code>. 296 </p> 297 298 @param tag The tag the attributes are for. The tag will be added to the allowed tag list if necessary. 299 @param attributes List of valid attributes for the tag 300 @return this (for chaining) 301 */ 302 public Safelist addAttributes(String tag, String... attributes) { 303 Validate.notEmpty(tag); 304 Validate.notNull(attributes); 305 Validate.isTrue(attributes.length > 0, "No attribute names supplied."); 306 307 addTags(tag); 308 TagName tagName = TagName.valueOf(tag); 309 Set<AttributeKey> attributeSet = new HashSet<>(); 310 for (String key : attributes) { 311 Validate.notEmpty(key); 312 attributeSet.add(AttributeKey.valueOf(key)); 313 } 314 Set<AttributeKey> currentSet = this.attributes.computeIfAbsent(tagName, k -> new HashSet<>()); 315 currentSet.addAll(attributeSet); 316 return this; 317 } 318 319 /** 320 Remove a list of allowed attributes from a tag. (If an attribute is not allowed on an element, it will be removed.) 321 <p> 322 E.g.: <code>removeAttributes("a", "href", "class")</code> disallows <code>href</code> and <code>class</code> 323 attributes on <code>a</code> tags. 324 </p> 325 <p> 326 To make an attribute invalid for <b>all tags</b>, use the pseudo tag <code>:all</code>, e.g. 327 <code>removeAttributes(":all", "class")</code>. 328 </p> 329 330 @param tag The tag the attributes are for. 331 @param attributes List of invalid attributes for the tag 332 @return this (for chaining) 333 */ 334 public Safelist removeAttributes(String tag, String... attributes) { 335 Validate.notEmpty(tag); 336 Validate.notNull(attributes); 337 Validate.isTrue(attributes.length > 0, "No attribute names supplied."); 338 339 TagName tagName = TagName.valueOf(tag); 340 Set<AttributeKey> attributeSet = new HashSet<>(); 341 for (String key : attributes) { 342 Validate.notEmpty(key); 343 attributeSet.add(AttributeKey.valueOf(key)); 344 } 345 if(tagNames.contains(tagName) && this.attributes.containsKey(tagName)) { // Only look in sub-maps if tag was allowed 346 Set<AttributeKey> currentSet = this.attributes.get(tagName); 347 currentSet.removeAll(attributeSet); 348 349 if(currentSet.isEmpty()) // Remove tag from attribute map if no attributes are allowed for tag 350 this.attributes.remove(tagName); 351 } 352 if(tag.equals(All)) { // Attribute needs to be removed from all individually set tags 353 Iterator<Map.Entry<TagName, Set<AttributeKey>>> it = this.attributes.entrySet().iterator(); 354 while (it.hasNext()) { 355 Map.Entry<TagName, Set<AttributeKey>> entry = it.next(); 356 Set<AttributeKey> currentSet = entry.getValue(); 357 currentSet.removeAll(attributeSet); 358 if(currentSet.isEmpty()) // Remove tag from attribute map if no attributes are allowed for tag 359 it.remove(); 360 } 361 } 362 return this; 363 } 364 365 /** 366 Add an enforced attribute to a tag. An enforced attribute will always be added to the element. If the element 367 already has the attribute set, it will be overridden with this value. 368 <p> 369 E.g.: <code>addEnforcedAttribute("a", "rel", "nofollow")</code> will make all <code>a</code> tags output as 370 <code><a href="..." rel="nofollow"></code> 371 </p> 372 373 @param tag The tag the enforced attribute is for. The tag will be added to the allowed tag list if necessary. 374 @param attribute The attribute name 375 @param value The enforced attribute value 376 @return this (for chaining) 377 */ 378 public Safelist addEnforcedAttribute(String tag, String attribute, String value) { 379 Validate.notEmpty(tag); 380 Validate.notEmpty(attribute); 381 Validate.notEmpty(value); 382 383 TagName tagName = TagName.valueOf(tag); 384 tagNames.add(tagName); 385 AttributeKey attrKey = AttributeKey.valueOf(attribute); 386 AttributeValue attrVal = AttributeValue.valueOf(value); 387 388 Map<AttributeKey, AttributeValue> attrMap = enforcedAttributes.computeIfAbsent(tagName, k -> new HashMap<>()); 389 attrMap.put(attrKey, attrVal); 390 return this; 391 } 392 393 /** 394 Remove a previously configured enforced attribute from a tag. 395 396 @param tag The tag the enforced attribute is for. 397 @param attribute The attribute name 398 @return this (for chaining) 399 */ 400 public Safelist removeEnforcedAttribute(String tag, String attribute) { 401 Validate.notEmpty(tag); 402 Validate.notEmpty(attribute); 403 404 TagName tagName = TagName.valueOf(tag); 405 if(tagNames.contains(tagName) && enforcedAttributes.containsKey(tagName)) { 406 AttributeKey attrKey = AttributeKey.valueOf(attribute); 407 Map<AttributeKey, AttributeValue> attrMap = enforcedAttributes.get(tagName); 408 attrMap.remove(attrKey); 409 410 if(attrMap.isEmpty()) // Remove tag from enforced attribute map if no enforced attributes are present 411 enforcedAttributes.remove(tagName); 412 } 413 return this; 414 } 415 416 /** 417 * Configure this Safelist to preserve relative links in an element's URL attribute, or convert them to absolute 418 * links. By default, this is <b>false</b>: URLs will be made absolute (e.g. start with an allowed protocol, like 419 * e.g. {@code http://}. 420 * 421 * @param preserve {@code true} to allow relative links, {@code false} (default) to deny 422 * @return this Safelist, for chaining. 423 * @see #addProtocols 424 */ 425 public Safelist preserveRelativeLinks(boolean preserve) { 426 preserveRelativeLinks = preserve; 427 return this; 428 } 429 430 /** 431 * Get the current setting for preserving relative links. 432 * @return {@code true} if relative links are preserved, {@code false} if they are converted to absolute. 433 */ 434 public boolean preserveRelativeLinks() { 435 return preserveRelativeLinks; 436 } 437 438 /** 439 Add allowed URL protocols for an element's URL attribute. This restricts the possible values of the attribute to 440 URLs with the defined protocol. 441 <p> 442 E.g.: <code>addProtocols("a", "href", "ftp", "http", "https")</code> 443 </p> 444 <p> 445 To allow a link to an in-page URL anchor (i.e. <code><a href="#anchor"></code>, add a <code>#</code>:<br> 446 E.g.: <code>addProtocols("a", "href", "#")</code> 447 </p> 448 449 @param tag Tag the URL protocol is for 450 @param attribute Attribute name 451 @param protocols List of valid protocols 452 @return this, for chaining 453 */ 454 public Safelist addProtocols(String tag, String attribute, String... protocols) { 455 Validate.notEmpty(tag); 456 Validate.notEmpty(attribute); 457 Validate.notNull(protocols); 458 459 TagName tagName = TagName.valueOf(tag); 460 AttributeKey attrKey = AttributeKey.valueOf(attribute); 461 Map<AttributeKey, Set<Protocol>> attrMap = this.protocols.computeIfAbsent(tagName, k -> new HashMap<>()); 462 Set<Protocol> protSet = attrMap.computeIfAbsent(attrKey, k -> new HashSet<>()); 463 464 for (String protocol : protocols) { 465 Validate.notEmpty(protocol); 466 Protocol prot = Protocol.valueOf(protocol); 467 protSet.add(prot); 468 } 469 return this; 470 } 471 472 /** 473 Remove allowed URL protocols for an element's URL attribute. If you remove all protocols for an attribute, that 474 attribute will allow any protocol. 475 <p> 476 E.g.: <code>removeProtocols("a", "href", "ftp")</code> 477 </p> 478 479 @param tag Tag the URL protocol is for 480 @param attribute Attribute name 481 @param removeProtocols List of invalid protocols 482 @return this, for chaining 483 */ 484 public Safelist removeProtocols(String tag, String attribute, String... removeProtocols) { 485 Validate.notEmpty(tag); 486 Validate.notEmpty(attribute); 487 Validate.notNull(removeProtocols); 488 489 TagName tagName = TagName.valueOf(tag); 490 AttributeKey attr = AttributeKey.valueOf(attribute); 491 492 // make sure that what we're removing actually exists; otherwise can open the tag to any data and that can 493 // be surprising 494 Validate.isTrue(protocols.containsKey(tagName), "Cannot remove a protocol that is not set."); 495 Map<AttributeKey, Set<Protocol>> tagProtocols = protocols.get(tagName); 496 Validate.isTrue(tagProtocols.containsKey(attr), "Cannot remove a protocol that is not set."); 497 498 Set<Protocol> attrProtocols = tagProtocols.get(attr); 499 for (String protocol : removeProtocols) { 500 Validate.notEmpty(protocol); 501 attrProtocols.remove(Protocol.valueOf(protocol)); 502 } 503 504 if (attrProtocols.isEmpty()) { // Remove protocol set if empty 505 tagProtocols.remove(attr); 506 if (tagProtocols.isEmpty()) // Remove entry for tag if empty 507 protocols.remove(tagName); 508 } 509 return this; 510 } 511 512 /** 513 * Test if the supplied tag is allowed by this safelist. 514 * @param tag test tag 515 * @return true if allowed 516 */ 517 public boolean isSafeTag(String tag) { 518 return tagNames.contains(TagName.valueOf(tag)); 519 } 520 521 /** 522 * Test if the supplied attribute is allowed by this safelist for this tag. 523 * <p>This method does not modify the input element or attribute.</p> 524 * @param tagName tag to consider allowing the attribute in 525 * @param el element under test, to confirm protocol 526 * @param attr attribute under test 527 * @return true if allowed 528 */ 529 public boolean isSafeAttribute(String tagName, Element el, Attribute attr) { 530 TagName tag = TagName.valueOf(tagName); 531 AttributeKey key = AttributeKey.valueOf(attr.getKey()); 532 533 Set<AttributeKey> okSet = attributes.get(tag); 534 if (okSet != null && okSet.contains(key)) { 535 if (protocols.containsKey(tag)) { 536 Map<AttributeKey, Set<Protocol>> attrProts = protocols.get(tag); 537 // ok if not defined protocol; otherwise test 538 return !attrProts.containsKey(key) || isSafeProtocol(getProtocolValue(el, attr), attrProts.get(key)); 539 } else { // attribute found, no protocols defined, so OK 540 return true; 541 } 542 } 543 Map<AttributeKey, AttributeValue> enforcedSet = enforcedAttributes.get(tag); 544 if (enforcedSet != null && enforcedSet.containsKey(key)) { 545 // enforced attr key was LCed via AttributeKey.valueOf(attr.getKey()), 546 // if the input already has that exact value, treat it as safe 547 return enforcedSet.get(key).equals(AttributeValue.valueOf(attr.getValue())); 548 } 549 // no attributes defined for tag, try :all tag 550 return !tagName.equals(All) && isSafeAttribute(All, el, attr); 551 } 552 553 private String getProtocolValue(Element el, Attribute attr) { 554 String value = el.absUrl(attr.getKey()); 555 if (value.isEmpty()) 556 value = attr.getValue(); // if it could not be made abs, run as-is to allow custom unknown protocols 557 return value; 558 } 559 560 private boolean isSafeProtocol(String value, Set<Protocol> protocols) { 561 for (Protocol protocol : protocols) { 562 String prot = protocol.toString(); 563 564 if (prot.equals("#")) { // allows anchor links 565 if (isValidAnchor(value)) { 566 return true; 567 } else { 568 continue; 569 } 570 } 571 572 prot += ":"; 573 574 if (lowerCase(value).startsWith(prot)) { 575 return true; 576 } 577 } 578 return false; 579 } 580 581 /** 582 Check if a URL attribute should be normalized to an absolute URL in the cleaned output. Uses the configured 583 protocols for that tag+attribute pair, falling back to {@code :all} only if the tag does not define the 584 attribute. 585 */ 586 boolean shouldAbsUrl(String tagName, String attrKey) { 587 if (preserveRelativeLinks) return false; 588 return shouldAbsUrl(TagName.valueOf(tagName), AttributeKey.valueOf(attrKey)); 589 } 590 591 private boolean shouldAbsUrl(TagName tag, AttributeKey key) { 592 Set<AttributeKey> allowedAttrs = attributes.get(tag); 593 if (allowedAttrs != null && allowedAttrs.contains(key)) { 594 Map<AttributeKey, Set<Protocol>> protocolsByAttr = protocols.get(tag); 595 return protocolsByAttr != null && protocolsByAttr.containsKey(key); 596 } 597 598 Map<AttributeKey, AttributeValue> enforcedAttrs = enforcedAttributes.get(tag); 599 if (enforcedAttrs != null && enforcedAttrs.containsKey(key)) return false; 600 601 return !tag.equals(AllTag) && shouldAbsUrl(AllTag, key); 602 } 603 604 private static boolean isValidAnchor(String value) { 605 return value.startsWith("#") && !value.matches(".*\\s.*"); 606 } 607 608 /** 609 Gets the Attributes that should be enforced for a given tag 610 * @param tagName the tag 611 * @return the attributes that will be enforced; empty if none are set for the given tag 612 */ 613 public Attributes getEnforcedAttributes(String tagName) { 614 Attributes attrs = new Attributes(); 615 TagName tag = TagName.valueOf(tagName); 616 if (enforcedAttributes.containsKey(tag)) { 617 Map<AttributeKey, AttributeValue> keyVals = enforcedAttributes.get(tag); 618 for (Map.Entry<AttributeKey, AttributeValue> entry : keyVals.entrySet()) { 619 attrs.put(entry.getKey().toString(), entry.getValue().toString()); 620 } 621 } 622 return attrs; 623 } 624 625 // named types for config. All just hold strings, but here for my sanity. 626 627 static class TagName extends TypedValue { 628 TagName(String value) { 629 super(value); 630 } 631 632 static TagName valueOf(String value) { 633 return new TagName(Normalizer.lowerCase(value)); 634 } 635 } 636 637 static class AttributeKey extends TypedValue { 638 AttributeKey(String value) { 639 super(value); 640 } 641 642 static AttributeKey valueOf(String value) { 643 return new AttributeKey(Normalizer.lowerCase(value)); 644 } 645 } 646 647 static class AttributeValue extends TypedValue { 648 AttributeValue(String value) { 649 super(value); 650 } 651 652 static AttributeValue valueOf(String value) { 653 return new AttributeValue(value); 654 } 655 } 656 657 static class Protocol extends TypedValue { 658 Protocol(String value) { 659 super(value); 660 } 661 662 static Protocol valueOf(String value) { 663 return new Protocol(value); 664 } 665 } 666 667 abstract static class TypedValue { 668 private final String value; 669 670 TypedValue(String value) { 671 Validate.notNull(value); 672 this.value = value; 673 } 674 675 @Override 676 public int hashCode() { 677 return value.hashCode(); 678 } 679 680 @Override 681 public boolean equals(Object obj) { 682 if (this == obj) return true; 683 if (obj == null || getClass() != obj.getClass()) return false; 684 TypedValue other = (TypedValue) obj; 685 return Objects.equals(value, other.value); 686 } 687 688 @Override 689 public String toString() { 690 return value; 691 } 692 } 693}