001package org.jsoup.nodes; 002 003import org.jsoup.helper.Validate; 004import org.jsoup.internal.QuietAppendable; 005import org.jsoup.internal.SharedConstants; 006import org.jsoup.internal.StringUtil; 007import org.jsoup.parser.ParseSettings; 008import org.jspecify.annotations.Nullable; 009 010import java.util.AbstractMap; 011import java.util.AbstractSet; 012import java.util.ArrayList; 013import java.util.Arrays; 014import java.util.Collections; 015import java.util.ConcurrentModificationException; 016import java.util.HashMap; 017import java.util.Iterator; 018import java.util.List; 019import java.util.Map; 020import java.util.NoSuchElementException; 021import java.util.Objects; 022import java.util.Set; 023 024import static org.jsoup.internal.Normalizer.lowerCase; 025import static org.jsoup.internal.SharedConstants.AttrRangeKey; 026import static org.jsoup.nodes.Range.AttributeRange.UntrackedAttr; 027 028/** 029 * The attributes of an Element. 030 * <p> 031 * During parsing, attributes in with the same name in an element are deduplicated, according to the configured parser's 032 * attribute case-sensitive setting. It is possible to have duplicate attributes subsequently if 033 * {@link #add(String, String)} vs {@link #put(String, String)} is used. 034 * </p> 035 * <p> 036 * Attribute name and value comparisons are generally <b>case sensitive</b>. By default for HTML, attribute names are 037 * normalized to lower-case on parsing. That means you should use lower-case strings when referring to attributes by 038 * name. 039 * </p> 040 * 041 * @author Jonathan Hedley, jonathan@hedley.net 042 */ 043public class Attributes implements Iterable<Attribute>, Cloneable { 044 // The Attributes object is only created on the first use of an attribute; the Element will just have a null 045 // Attribute slot otherwise 046 047 static final char InternalPrefix = '/'; // Indicates an internal key. Can't be set via HTML. (It could be set via accessor, but not too worried about that. Suppressed from list, iter, size.) 048 protected static final String dataPrefix = "data-"; // data attributes 049 private static final String EmptyString = ""; 050 051 // manages the key/val arrays 052 private static final int InitialCapacity = 3; // sampling found mean count when attrs present = 1.49; 1.08 overall. 2.6:1 don't have any attrs. 053 private static final int GrowthFactor = 2; 054 static final int NotFound = -1; 055 056 // the number of instance fields is kept as low as possible giving an object size of 24 bytes 057 int size = 0; // number of slots used (not total capacity, which is keys.length). Package visible for actual size (incl internal) 058 @Nullable String[] keys = new String[InitialCapacity]; // keys is not null, but contents may be. Same for vals 059 @Nullable Object[] vals = new Object[InitialCapacity]; // Genericish: all non-internal attribute values must be Strings and are cast on access. 060 // todo - make keys iterable without creating Attribute objects 061 062 // check there's room for more 063 private void checkCapacity(int minNewSize) { 064 Validate.isTrue(minNewSize >= size); 065 int curCap = keys.length; 066 if (curCap >= minNewSize) 067 return; 068 int newCap = curCap >= InitialCapacity ? size * GrowthFactor : InitialCapacity; 069 if (minNewSize > newCap) 070 newCap = minNewSize; 071 072 keys = Arrays.copyOf(keys, newCap); 073 vals = Arrays.copyOf(vals, newCap); 074 } 075 076 int indexOfKey(String key) { 077 Validate.notNull(key); 078 for (int i = 0; i < size; i++) { 079 if (key.equals(keys[i])) 080 return i; 081 } 082 return NotFound; 083 } 084 085 private int indexOfKeyIgnoreCase(String key) { 086 Validate.notNull(key); 087 for (int i = 0; i < size; i++) { 088 if (key.equalsIgnoreCase(keys[i])) 089 return i; 090 } 091 return NotFound; 092 } 093 094 // we track boolean attributes as null in values - they're just keys. so returns empty for consumers 095 // casts to String, so only for non-internal attributes 096 static String checkNotNull(@Nullable Object val) { 097 return val == null ? EmptyString : (String) val; 098 } 099 100 /** 101 Get an attribute value by key. 102 @param key the (case-sensitive) attribute key 103 @return the attribute value if set; or empty string if not set (or a boolean attribute). 104 @see #hasKey(String) 105 */ 106 public String get(String key) { 107 int i = indexOfKey(key); 108 return i == NotFound ? EmptyString : checkNotNull(vals[i]); 109 } 110 111 /** 112 Get an Attribute by key. The Attribute will remain connected to these Attributes, so changes made via 113 {@link Attribute#setKey(String)}, {@link Attribute#setValue(String)} etc will cascade back to these Attributes and 114 their owning Element. 115 @param key the (case-sensitive) attribute key 116 @return the Attribute for this key, or null if not present. 117 @since 1.17.2 118 */ 119 @Nullable public Attribute attribute(String key) { 120 int i = indexOfKey(key); 121 return i == NotFound ? null : new Attribute(key, checkNotNull(vals[i]), this); 122 } 123 124 /** 125 * Get an attribute's value by case-insensitive key 126 * @param key the attribute name 127 * @return the first matching attribute value if set; or empty string if not set (ora boolean attribute). 128 */ 129 public String getIgnoreCase(String key) { 130 int i = indexOfKeyIgnoreCase(key); 131 return i == NotFound ? EmptyString : checkNotNull(vals[i]); 132 } 133 134 /** 135 * Adds a new attribute. Will produce duplicates if the key already exists. 136 * @see Attributes#put(String, String) 137 */ 138 public Attributes add(String key, @Nullable String value) { 139 addObject(key, value); 140 return this; 141 } 142 143 private void addObject(String key, @Nullable Object value) { 144 checkCapacity(size + 1); 145 keys[size] = key; 146 vals[size] = value; 147 size++; 148 } 149 150 /** 151 * Set a new attribute, or replace an existing one by key. 152 * @param key case sensitive attribute key (not null) 153 * @param value attribute value (which can be null, to set a true boolean attribute) 154 * @return these attributes, for chaining 155 */ 156 public Attributes put(String key, @Nullable String value) { 157 Validate.notNull(key); 158 int i = indexOfKey(key); 159 if (i != NotFound) 160 vals[i] = value; 161 else 162 addObject(key, value); 163 return this; 164 } 165 166 /** 167 Get the map holding any user-data associated with these Attributes. Will be created empty on first use. Held as 168 an internal attribute, not a field member, to reduce the memory footprint of Attributes when not used. Can hold 169 arbitrary objects; use for source ranges, connecting W3C nodes to Elements, etc. 170 * @return the map holding user-data 171 */ 172 Map<String, Object> userData() { 173 final Map<String, Object> userData; 174 int i = indexOfKey(SharedConstants.UserDataKey); 175 if (i == NotFound) { 176 userData = new HashMap<>(); 177 addObject(SharedConstants.UserDataKey, userData); 178 } else { 179 //noinspection unchecked 180 userData = (Map<String, Object>) vals[i]; 181 } 182 assert userData != null; 183 return userData; 184 } 185 186 /** 187 Check if these attributes have any user data associated with them. 188 */ 189 boolean hasUserData() { 190 return hasKey(SharedConstants.UserDataKey); 191 } 192 193 /** 194 Get an arbitrary user-data object by key. 195 * @param key case-sensitive key to the object. 196 * @return the object associated to this key, or {@code null} if not found. 197 * @see #userData(String key, Object val) 198 * @since 1.17.1 199 */ 200 @Nullable 201 public Object userData(String key) { 202 Validate.notNull(key); 203 if (!hasUserData()) return null; // no user data exists 204 Map<String, Object> userData = userData(); 205 return userData.get(key); 206 } 207 208 /** 209 Set an arbitrary user-data object by key. Will be treated as an internal attribute, so will not be emitted in HTML. 210 * @param key case-sensitive key 211 * @param value object value. Providing a {@code null} value has the effect of removing the key from the userData map. 212 * @return these attributes 213 * @see #userData(String key) 214 * @since 1.17.1 215 */ 216 public Attributes userData(String key, @Nullable Object value) { 217 Validate.notNull(key); 218 if (value == null && !hasKey(SharedConstants.UserDataKey)) return this; // no user data exists, so short-circuit 219 Map<String, Object> userData = userData(); 220 if (value == null) userData.remove(key); 221 else userData.put(key, value); 222 return this; 223 } 224 225 void putIgnoreCase(String key, @Nullable String value) { 226 int i = indexOfKeyIgnoreCase(key); 227 if (i != NotFound) { 228 vals[i] = value; 229 String old = keys[i]; 230 assert old != null; 231 if (!old.equals(key)) // case changed, update 232 keys[i] = key; 233 } 234 else 235 addObject(key, value); 236 } 237 238 /** 239 * Set a new boolean attribute. Removes the attribute if the value is false. 240 * @param key case <b>insensitive</b> attribute key 241 * @param value attribute value 242 * @return these attributes, for chaining 243 */ 244 public Attributes put(String key, boolean value) { 245 if (value) 246 putIgnoreCase(key, null); 247 else 248 remove(key); 249 return this; 250 } 251 252 /** 253 Set a new attribute, or replace an existing one by key. 254 @param attribute attribute with case-sensitive key 255 @return these attributes, for chaining 256 */ 257 public Attributes put(Attribute attribute) { 258 Validate.notNull(attribute); 259 put(attribute.getKey(), attribute.getValue()); 260 attribute.parent = this; 261 return this; 262 } 263 264 // removes and shifts up 265 @SuppressWarnings("AssignmentToNull") 266 private void remove(int index) { 267 Validate.isFalse(index >= size); 268 int shifted = size - index - 1; 269 if (shifted > 0) { 270 System.arraycopy(keys, index + 1, keys, index, shifted); 271 System.arraycopy(vals, index + 1, vals, index, shifted); 272 } 273 size--; 274 keys[size] = null; // release hold 275 vals[size] = null; 276 } 277 278 /** 279 Remove an attribute by key. <b>Case sensitive.</b> 280 @param key attribute key to remove 281 */ 282 public void remove(String key) { 283 int i = indexOfKey(key); 284 if (i != NotFound) 285 remove(i); 286 } 287 288 /** 289 Remove an attribute by key. <b>Case insensitive.</b> 290 @param key attribute key to remove 291 */ 292 public void removeIgnoreCase(String key) { 293 int i = indexOfKeyIgnoreCase(key); 294 if (i != NotFound) 295 remove(i); 296 } 297 298 /** 299 Tests if these attributes contain an attribute with this key. 300 @param key case-sensitive key to check for 301 @return true if key exists, false otherwise 302 */ 303 public boolean hasKey(String key) { 304 return indexOfKey(key) != NotFound; 305 } 306 307 /** 308 Tests if these attributes contain an attribute with this key. 309 @param key key to check for 310 @return true if key exists, false otherwise 311 */ 312 public boolean hasKeyIgnoreCase(String key) { 313 return indexOfKeyIgnoreCase(key) != NotFound; 314 } 315 316 /** 317 * Check if these attributes contain an attribute with a value for this key. 318 * @param key key to check for 319 * @return true if key exists, and it has a value 320 */ 321 public boolean hasDeclaredValueForKey(String key) { 322 int i = indexOfKey(key); 323 return i != NotFound && vals[i] != null; 324 } 325 326 /** 327 * Check if these attributes contain an attribute with a value for this key. 328 * @param key case-insensitive key to check for 329 * @return true if key exists, and it has a value 330 */ 331 public boolean hasDeclaredValueForKeyIgnoreCase(String key) { 332 int i = indexOfKeyIgnoreCase(key); 333 return i != NotFound && vals[i] != null; 334 } 335 336 /** 337 Get the number of attributes in this set, excluding any internal-only attributes (e.g. user data). 338 <p>Internal attributes are excluded from the {@link #html()}, {@link #asList()}, and {@link #iterator()} 339 methods.</p> 340 341 @return size 342 */ 343 public int size() { 344 if (size == 0) return 0; 345 int count = 0; 346 for (int i = 0; i < size; i++) { 347 if (!isInternalKey(keys[i])) count++; 348 } 349 return count; 350 } 351 352 /** 353 Test if this Attributes list is empty. 354 <p>This does not include internal attributes, such as user data.</p> 355 */ 356 public boolean isEmpty() { 357 return size() == 0; 358 } 359 360 /** 361 Add all the attributes from the incoming set to this set. 362 @param incoming attributes to add to these attributes. 363 */ 364 public void addAll(Attributes incoming) { 365 int incomingSize = incoming.size(); // not adding internal 366 if (incomingSize == 0) return; 367 checkCapacity(size + incomingSize); 368 369 boolean needsPut = size != 0; // if this set is empty, no need to check existing set, so can add() vs put() 370 // (and save bashing on the indexOfKey() 371 for (Attribute attr : incoming) { 372 if (needsPut) 373 put(attr); 374 else 375 addObject(attr.getKey(), attr.getValue()); 376 } 377 } 378 379 /** 380 Get the source ranges (start to end position) in the original input source from which this attribute's <b>name</b> 381 and <b>value</b> were parsed. 382 <p>Position tracking must be enabled prior to parsing the content.</p> 383 @param key the attribute name 384 @return the ranges for the attribute's name and value, or {@code untracked} if the attribute does not exist or its range 385 was not tracked. 386 @see org.jsoup.parser.Parser#setTrackPosition(boolean) 387 @see Attribute#sourceRange() 388 @see Node#sourceRange() 389 @see Element#endSourceRange() 390 @since 1.17.1 391 */ 392 public Range.AttributeRange sourceRange(String key) { 393 if (!hasKey(key)) return UntrackedAttr; 394 Map<String, Range.AttributeRange> ranges = getRanges(); 395 if (ranges == null) return Range.AttributeRange.UntrackedAttr; 396 Range.AttributeRange range = ranges.get(key); 397 return range != null ? range : Range.AttributeRange.UntrackedAttr; 398 } 399 400 /** Get the Ranges, if tracking is enabled; null otherwise. */ 401 @Nullable Map<String, Range.AttributeRange> getRanges() { 402 //noinspection unchecked 403 return (Map<String, Range.AttributeRange>) userData(AttrRangeKey); 404 } 405 406 /** 407 Set the source ranges (start to end position) from which this attribute's <b>name</b> and <b>value</b> were parsed. 408 @param key the attribute name 409 @param range the range for the attribute's name and value 410 @return these attributes, for chaining 411 @since 1.18.2 412 */ 413 public Attributes sourceRange(String key, Range.AttributeRange range) { 414 Validate.notNull(key); 415 Validate.notNull(range); 416 Map<String, Range.AttributeRange> ranges = getRanges(); 417 if (ranges == null) { 418 ranges = new HashMap<>(); 419 userData(AttrRangeKey, ranges); 420 } 421 ranges.put(key, range); 422 return this; 423 } 424 425 426 @Override 427 public Iterator<Attribute> iterator() { 428 //noinspection ReturnOfInnerClass 429 return new Iterator<Attribute>() { 430 int expectedSize = size; 431 int i = 0; 432 433 @Override 434 public boolean hasNext() { 435 checkModified(); 436 while (i < size) { 437 String key = keys[i]; 438 assert key != null; 439 if (isInternalKey(key)) // skip over internal keys 440 i++; 441 else 442 break; 443 } 444 445 return i < size; 446 } 447 448 @Override 449 public Attribute next() { 450 checkModified(); 451 if (i >= size) throw new NoSuchElementException(); 452 String key = keys[i]; 453 assert key != null; 454 final Attribute attr = new Attribute(key, (String) vals[i], Attributes.this); 455 i++; 456 return attr; 457 } 458 459 private void checkModified() { 460 if (size != expectedSize) throw new ConcurrentModificationException("Use Iterator#remove() instead to remove attributes while iterating."); 461 } 462 463 @Override 464 public void remove() { 465 Attributes.this.remove(--i); // next() advanced, so rewind 466 expectedSize--; 467 } 468 }; 469 } 470 471 /** 472 Get the attributes as a List, for iteration. 473 @return a view of the attributes as an unmodifiable List. 474 */ 475 public List<Attribute> asList() { 476 ArrayList<Attribute> list = new ArrayList<>(size); 477 for (int i = 0; i < size; i++) { 478 String key = keys[i]; 479 assert key != null; 480 if (isInternalKey(key)) 481 continue; // skip internal keys 482 Attribute attr = new Attribute(key, (String) vals[i], Attributes.this); 483 list.add(attr); 484 } 485 return Collections.unmodifiableList(list); 486 } 487 488 /** 489 * Retrieves a filtered view of attributes that are HTML5 custom data attributes; that is, attributes with keys 490 * starting with {@code data-}. 491 * @return map of custom data attributes. 492 */ 493 public Map<String, String> dataset() { 494 return new Dataset(this); 495 } 496 497 /** 498 Get the HTML representation of these attributes. 499 @return HTML 500 */ 501 public String html() { 502 StringBuilder sb = StringUtil.borrowBuilder(); 503 html(QuietAppendable.wrap(sb), new Document.OutputSettings()); // output settings a bit funky, but this html() seldom used 504 return StringUtil.releaseBuilder(sb); 505 } 506 507 final void html(final QuietAppendable accum, final Document.OutputSettings out) { 508 final int sz = size; 509 for (int i = 0; i < sz; i++) { 510 String key = keys[i]; 511 assert key != null; 512 if (isInternalKey(key)) 513 continue; 514 final String validated = Attribute.getValidKey(key, out.syntax()); 515 if (validated != null) 516 Attribute.htmlNoValidate(validated, (String) vals[i], accum.append(' '), out); 517 } 518 } 519 520 @Override 521 public String toString() { 522 return html(); 523 } 524 525 /** 526 * Checks if these attributes are equal to another set of attributes, by comparing the two sets. Note that the order 527 * of the attributes does not impact this equality (as per the Map interface equals()). 528 * @param o attributes to compare with 529 * @return if both sets of attributes have the same content 530 */ 531 @Override 532 public boolean equals(@Nullable Object o) { 533 if (this == o) return true; 534 if (o == null || getClass() != o.getClass()) return false; 535 536 Attributes that = (Attributes) o; 537 if (size != that.size) return false; 538 for (int i = 0; i < size; i++) { 539 String key = keys[i]; 540 assert key != null; 541 int thatI = that.indexOfKey(key); 542 if (thatI == NotFound || !Objects.equals(vals[i], that.vals[thatI])) 543 return false; 544 } 545 return true; 546 } 547 548 /** 549 * Calculates the hashcode of these attributes, by iterating all attributes and summing their hashcodes. 550 * @return calculated hashcode 551 */ 552 @Override 553 public int hashCode() { 554 int result = size; 555 result = 31 * result + Arrays.hashCode(keys); 556 result = 31 * result + Arrays.hashCode(vals); 557 return result; 558 } 559 560 @Override 561 public Attributes clone() { 562 Attributes clone; 563 try { 564 clone = (Attributes) super.clone(); 565 } catch (CloneNotSupportedException e) { 566 throw new RuntimeException(e); 567 } 568 clone.size = size; 569 clone.keys = Arrays.copyOf(keys, size); 570 clone.vals = Arrays.copyOf(vals, size); 571 572 // make a copy of the user data map. (Contents are shallow). 573 int i = indexOfKey(SharedConstants.UserDataKey); 574 if (i != NotFound) { 575 //noinspection unchecked 576 vals[i] = new HashMap<>((Map<String, Object>) vals[i]); 577 } 578 579 return clone; 580 } 581 582 /** 583 * Internal method. Lowercases all (non-internal) keys. 584 */ 585 public void normalize() { 586 for (int i = 0; i < size; i++) { 587 assert keys[i] != null; 588 String key = keys[i]; 589 assert key != null; 590 if (!isInternalKey(key)) 591 keys[i] = lowerCase(key); 592 } 593 } 594 595 /** 596 * Internal method. Removes duplicate attribute by name. Settings for case sensitivity of key names. 597 * @param settings case sensitivity 598 * @return number of removed dupes 599 */ 600 public int deduplicate(ParseSettings settings) { 601 if (size == 0) return 0; 602 boolean preserve = settings.preserveAttributeCase(); 603 int dupes = 0; 604 for (int i = 0; i < size; i++) { 605 String keyI = keys[i]; 606 assert keyI != null; 607 for (int j = i + 1; j < size; j++) { 608 if ((preserve && keyI.equals(keys[j])) || (!preserve && keyI.equalsIgnoreCase(keys[j]))) { 609 dupes++; 610 remove(j); 611 j--; 612 } 613 } 614 } 615 return dupes; 616 } 617 618 private static class Dataset extends AbstractMap<String, String> { 619 private final Attributes attributes; 620 621 private Dataset(Attributes attributes) { 622 this.attributes = attributes; 623 } 624 625 @Override 626 public Set<Entry<String, String>> entrySet() { 627 return new EntrySet(); 628 } 629 630 @Override 631 public String put(String key, String value) { 632 String dataKey = dataKey(key); 633 String oldValue = attributes.hasKey(dataKey) ? attributes.get(dataKey) : null; 634 attributes.put(dataKey, value); 635 return oldValue; 636 } 637 638 private class EntrySet extends AbstractSet<Map.Entry<String, String>> { 639 640 @Override 641 public Iterator<Map.Entry<String, String>> iterator() { 642 return new DatasetIterator(); 643 } 644 645 @Override 646 public int size() { 647 int count = 0; 648 Iterator<Entry<String, String>> iter = new DatasetIterator(); 649 while (iter.hasNext()) 650 count++; 651 return count; 652 } 653 } 654 655 private class DatasetIterator implements Iterator<Map.Entry<String, String>> { 656 private final Iterator<Attribute> attrIter = attributes.iterator(); 657 private Attribute attr; 658 @Override public boolean hasNext() { 659 while (attrIter.hasNext()) { 660 attr = attrIter.next(); 661 if (attr.isDataAttribute()) return true; 662 } 663 return false; 664 } 665 666 @Override public Entry<String, String> next() { 667 return new Attribute(attr.getKey().substring(dataPrefix.length()), attr.getValue()); 668 } 669 670 @Override public void remove() { 671 attributes.remove(attr.getKey()); 672 } 673 } 674 } 675 676 private static String dataKey(String key) { 677 return dataPrefix + key; 678 } 679 680 static String internalKey(String key) { 681 return InternalPrefix + key; 682 } 683 684 static boolean isInternalKey(String key) { 685 return key.length() > 1 && key.charAt(0) == InternalPrefix; 686 } 687}