001package org.jsoup.nodes; 002 003import org.jsoup.helper.Validate; 004import org.jsoup.internal.Normalizer; 005import org.jsoup.internal.QuietAppendable; 006import org.jsoup.helper.Regex; 007import org.jsoup.internal.StringUtil; 008import org.jsoup.parser.ParseSettings; 009import org.jsoup.parser.Parser; 010import org.jsoup.parser.Tag; 011import org.jsoup.parser.TokenQueue; 012import org.jsoup.select.Collector; 013import org.jsoup.select.Elements; 014import org.jsoup.select.Evaluator; 015import org.jsoup.select.NodeFilter; 016import org.jsoup.select.NodeVisitor; 017import org.jsoup.select.Nodes; 018import org.jsoup.select.Selector; 019import org.jspecify.annotations.Nullable; 020 021import java.lang.ref.WeakReference; 022import java.util.ArrayList; 023import java.util.Arrays; 024import java.util.Collection; 025import java.util.Collections; 026import java.util.Iterator; 027import java.util.LinkedHashSet; 028import java.util.List; 029import java.util.Map; 030import java.util.Set; 031import java.util.concurrent.atomic.AtomicBoolean; 032import java.util.function.Consumer; 033import java.util.regex.Pattern; 034import java.util.regex.PatternSyntaxException; 035import java.util.stream.Collectors; 036import java.util.stream.Stream; 037 038import static org.jsoup.internal.Normalizer.normalize; 039import static org.jsoup.nodes.Document.OutputSettings.Syntax.xml; 040import static org.jsoup.nodes.TextNode.lastCharIsWhitespace; 041import static org.jsoup.parser.Parser.NamespaceHtml; 042import static org.jsoup.parser.TokenQueue.escapeCssIdentifier; 043import static org.jsoup.select.Selector.evaluatorOf; 044 045/** 046 An HTML Element consists of a tag name, attributes, and child nodes (including text nodes and other elements). 047 <p> 048 From an Element, you can extract data, traverse the node graph, and manipulate the HTML. 049*/ 050public class Element extends Node implements Iterable<Element> { 051 private static final List<Element> EmptyChildren = Collections.emptyList(); 052 private static final NodeList EmptyNodeList = new NodeList(0); 053 private static final Pattern ClassSplit = Pattern.compile("\\s+"); 054 static final String BaseUriKey = Attributes.internalKey("baseUri"); 055 Tag tag; 056 NodeList childNodes; 057 @Nullable Attributes attributes; // field is nullable but all methods for attributes are non-null 058 059 /** 060 * Create a new, standalone element, in the specified namespace. 061 * @param tag tag name 062 * @param namespace namespace for this element 063 */ 064 public Element(String tag, String namespace) { 065 this(Tag.valueOf(tag, namespace, ParseSettings.preserveCase), null); 066 } 067 068 /** 069 * Create a new, standalone element, in the HTML namespace. 070 * @param tag tag name 071 * @see #Element(String tag, String namespace) 072 */ 073 public Element(String tag) { 074 this(tag, Parser.NamespaceHtml); 075 } 076 077 /** 078 * Create a new, standalone Element. (Standalone in that it has no parent.) 079 * 080 * @param tag tag of this element 081 * @param baseUri the base URI (optional, may be null to inherit from parent, or "" to clear parent's) 082 * @param attributes initial attributes (optional, may be null) 083 * @see #appendChild(Node) 084 * @see #appendElement(String) 085 */ 086 public Element(Tag tag, @Nullable String baseUri, @Nullable Attributes attributes) { 087 Validate.notNull(tag); 088 childNodes = EmptyNodeList; 089 this.attributes = attributes; 090 this.tag = tag; 091 if (!StringUtil.isBlank(baseUri)) this.setBaseUri(baseUri); 092 } 093 094 /** 095 * Create a new Element from a Tag and a base URI. 096 * 097 * @param tag element tag 098 * @param baseUri the base URI of this element. Optional, and will inherit from its parent, if any. 099 * @see Tag#valueOf(String, ParseSettings) 100 */ 101 public Element(Tag tag, @Nullable String baseUri) { 102 this(tag, baseUri, null); 103 } 104 105 /** 106 Internal test to check if a nodelist object has been created. 107 */ 108 protected boolean hasChildNodes() { 109 return childNodes != EmptyNodeList; 110 } 111 112 @Override protected List<Node> ensureChildNodes() { 113 if (childNodes == EmptyNodeList) { 114 childNodes = new NodeList(4); 115 } 116 return childNodes; 117 } 118 119 @Override 120 protected boolean hasAttributes() { 121 return attributes != null; 122 } 123 124 @Override 125 public Attributes attributes() { 126 if (attributes == null) // not using hasAttributes, as doesn't clear warning 127 attributes = new Attributes(); 128 return attributes; 129 } 130 131 @Override 132 public String baseUri() { 133 String baseUri = searchUpForAttribute(this, BaseUriKey); 134 return baseUri != null ? baseUri : ""; 135 } 136 137 @Nullable 138 static String searchUpForAttribute(final Element start, final String key) { 139 Element el = start; 140 while (el != null) { 141 if (el.attributes != null && el.attributes.hasKey(key)) 142 return el.attributes.get(key); 143 el = el.parent(); 144 } 145 return null; 146 } 147 148 @Override 149 protected void doSetBaseUri(String baseUri) { 150 attributes().put(BaseUriKey, baseUri); 151 } 152 153 @Override 154 public int childNodeSize() { 155 return childNodes.size(); 156 } 157 158 @Override 159 public String nodeName() { 160 return tag.getName(); 161 } 162 163 /** 164 * Get the name of the tag for this element. E.g. {@code div}. If you are using {@link ParseSettings#preserveCase 165 * case preserving parsing}, this will return the source's original case. 166 * 167 * @return the tag name 168 */ 169 public String tagName() { 170 return tag.getName(); 171 } 172 173 /** 174 * Get the normalized name of this Element's tag. This will always be the lower-cased version of the tag, regardless 175 * of the tag case preserving setting of the parser. For e.g., {@code <DIV>} and {@code <div>} both have a 176 * normal name of {@code div}. 177 * @return normal name 178 */ 179 @Override 180 public String normalName() { 181 return tag.normalName(); 182 } 183 184 /** 185 Test if this Element has the specified normalized name, and is in the specified namespace. 186 * @param normalName a normalized element name (e.g. {@code div}). 187 * @param namespace the namespace 188 * @return true if the element's normal name matches exactly, and is in the specified namespace 189 * @since 1.17.2 190 */ 191 public boolean elementIs(String normalName, String namespace) { 192 return tag.normalName().equals(normalName) && tag.namespace().equals(namespace); 193 } 194 195 /** 196 * Change (rename) the tag of this element. For example, convert a {@code <span>} to a {@code <div>} with 197 * {@code el.tagName("div");}. 198 * 199 * @param tagName new tag name for this element 200 * @return this element, for chaining 201 * @see Elements#tagName(String) 202 */ 203 public Element tagName(String tagName) { 204 return tagName(tagName, tag.namespace()); 205 } 206 207 /** 208 * Change (rename) the tag of this element. For example, convert a {@code <span>} to a {@code <div>} with 209 * {@code el.tagName("div");}. 210 * 211 * @param tagName new tag name for this element 212 * @param namespace the new namespace for this element 213 * @return this element, for chaining 214 * @see Elements#tagName(String) 215 */ 216 public Element tagName(String tagName, String namespace) { 217 Validate.notEmptyParam(tagName, "tagName"); 218 Validate.notEmptyParam(namespace, "namespace"); 219 Parser parser = NodeUtils.parser(this); 220 tag = parser.tagSet().valueOf(tagName, namespace, parser.settings()); // maintains the case option of the original parse 221 return this; 222 } 223 224 /** 225 * Get the Tag for this element. 226 * 227 * @return the tag object 228 */ 229 public Tag tag() { 230 return tag; 231 } 232 233 /** 234 Change the Tag of this element. 235 @param tag the new tag 236 @return this element, for chaining 237 @since 1.20.1 238 */ 239 public Element tag(Tag tag) { 240 Validate.notNull(tag); 241 this.tag = tag; 242 return this; 243 } 244 245 /** 246 * Test if this element is a block-level element. (E.g. {@code <div> == true} or an inline element 247 * {@code <span> == false}). 248 * 249 * @return true if block, false if not (and thus inline) 250 */ 251 public boolean isBlock() { 252 return tag.isBlock(); 253 } 254 255 /** 256 * Get the {@code id} attribute of this element. 257 * 258 * @return The id attribute, if present, or an empty string if not. 259 */ 260 public String id() { 261 return attributes != null ? attributes.getIgnoreCase("id") :""; 262 } 263 264 /** 265 Set the {@code id} attribute of this element. 266 @param id the ID value to use 267 @return this Element, for chaining 268 */ 269 public Element id(String id) { 270 Validate.notNull(id); 271 attr("id", id); 272 return this; 273 } 274 275 /** 276 * Set an attribute value on this element. If this element already has an attribute with the 277 * key, its value is updated; otherwise, a new attribute is added. 278 * 279 * @return this element 280 */ 281 @Override public Element attr(String attributeKey, String attributeValue) { 282 super.attr(attributeKey, attributeValue); 283 return this; 284 } 285 286 /** 287 * Set a boolean attribute value on this element. Setting to <code>true</code> sets the attribute value to "" and 288 * marks the attribute as boolean so no value is written out. Setting to <code>false</code> removes the attribute 289 * with the same key if it exists. 290 * 291 * @param attributeKey the attribute key 292 * @param attributeValue the attribute value 293 * 294 * @return this element 295 */ 296 public Element attr(String attributeKey, boolean attributeValue) { 297 attributes().put(attributeKey, attributeValue); 298 return this; 299 } 300 301 /** 302 Get an Attribute by key. Changes made via {@link Attribute#setKey(String)}, {@link Attribute#setValue(String)} etc 303 will cascade back to this Element. 304 @param key the (case-sensitive) attribute key 305 @return the Attribute for this key, or null if not present. 306 @since 1.17.2 307 */ 308 @Nullable public Attribute attribute(String key) { 309 return hasAttributes() ? attributes().attribute(key) : null; 310 } 311 312 /** 313 * Get this element's HTML5 custom data attributes. Each attribute in the element that has a key 314 * starting with "data-" is included the dataset. 315 * <p> 316 * E.g., the element {@code <div data-package="jsoup" data-language="Java" class="group">...} has the dataset 317 * {@code package=jsoup, language=java}. 318 * <p> 319 * This map is a filtered view of the element's attribute map. Changes to one map (add, remove, update) are reflected 320 * in the other map. 321 * <p> 322 * You can find elements that have data attributes using the {@code [^data-]} attribute key prefix selector. 323 * @return a map of {@code key=value} custom data attributes. 324 */ 325 public Map<String, String> dataset() { 326 return attributes().dataset(); 327 } 328 329 @Override @Nullable 330 public final Element parent() { 331 return (Element) parentNode; 332 } 333 334 /** 335 * Get this element's parent and ancestors, up to the document root. 336 * @return this element's stack of parents, starting with the closest first. 337 */ 338 public Elements parents() { 339 Elements parents = new Elements(); 340 Element parent = this.parent(); 341 while (parent != null && !parent.nameIs("#root")) { 342 parents.add(parent); 343 parent = parent.parent(); 344 } 345 return parents; 346 } 347 348 /** 349 * Get a child element of this element, by its 0-based index number. 350 * <p> 351 * Note that an element can have both mixed Nodes and Elements as children. This method inspects 352 * a filtered list of children that are elements, and the index is based on that filtered list. 353 * </p> 354 * 355 * @param index the index number of the element to retrieve 356 * @return the child element, if it exists, otherwise throws an {@code IndexOutOfBoundsException} 357 * @see #childNode(int) 358 */ 359 public Element child(int index) { 360 Validate.isTrue(index >= 0, "Index must be >= 0"); 361 List<Element> cached = cachedChildren(); 362 if (cached != null) return cached.get(index); 363 // otherwise, iter on elementChild; saves creating list 364 int size = childNodes.size(); 365 for (int i = 0, e = 0; i < size; i++) { // direct iter is faster than chasing firstElSib, nextElSibd 366 Node node = childNodes.get(i); 367 if (node instanceof Element) { 368 if (e++ == index) return (Element) node; 369 } 370 } 371 throw new IndexOutOfBoundsException("No child at index: " + index); 372 } 373 374 /** 375 * Get the number of child nodes of this element that are elements. 376 * <p> 377 * This method works on the same filtered list like {@link #child(int)}. Use {@link #childNodes()} and {@link 378 * #childNodeSize()} to get the unfiltered Nodes (e.g. includes TextNodes etc.) 379 * </p> 380 * 381 * @return the number of child nodes that are elements 382 * @see #children() 383 * @see #child(int) 384 */ 385 public int childrenSize() { 386 if (childNodeSize() == 0) return 0; 387 return childElementsList().size(); // gets children into cache; faster subsequent child(i) if unmodified 388 } 389 390 /** 391 * Get this element's child elements. 392 * <p> 393 * This is effectively a filter on {@link #childNodes()} to get Element nodes. 394 * </p> 395 * @return child elements. If this element has no children, returns an empty list. 396 * @see #childNodes() 397 */ 398 public Elements children() { 399 return new Elements(childElementsList()); 400 } 401 402 /** 403 * Maintains a shadow copy of this element's child elements. If the nodelist is changed, this cache is invalidated. 404 * @return a list of child elements 405 */ 406 List<Element> childElementsList() { 407 if (childNodeSize() == 0) return EmptyChildren; // short circuit creating empty 408 // set atomically, so works in multi-thread. Calling methods look like reads, so should be thread-safe 409 synchronized (childNodes) { // sync vs re-entrant lock, to save another field 410 List<Element> children = cachedChildren(); 411 if (children == null) { 412 children = filterNodes(Element.class); 413 stashChildren(children); 414 } 415 return children; 416 } 417 } 418 419 private static final String childElsKey = "jsoup.childEls"; 420 private static final String childElsMod = "jsoup.childElsMod"; 421 422 /** returns the cached child els, if they exist, and the modcount of our childnodes matches the stashed modcount */ 423 @Nullable List<Element> cachedChildren() { 424 if (attributes == null || !attributes.hasUserData()) return null; // don't create empty userdata 425 Map<String, Object> userData = attributes.userData(); 426 //noinspection unchecked 427 WeakReference<List<Element>> ref = (WeakReference<List<Element>>) userData.get(childElsKey); 428 if (ref != null) { 429 List<Element> els = ref.get(); 430 if (els != null) { 431 Integer modCount = (Integer) userData.get(childElsMod); 432 if (modCount != null && modCount == childNodes.modCount()) 433 return els; 434 } 435 } 436 return null; 437 } 438 439 /** caches the child els into the Attribute user data. */ 440 private void stashChildren(List<Element> els) { 441 Map<String, Object> userData = attributes().userData(); 442 WeakReference<List<Element>> ref = new WeakReference<>(els); 443 userData.put(childElsKey, ref); 444 userData.put(childElsMod, childNodes.modCount()); 445 } 446 447 /** 448 Returns a Stream of this Element and all of its descendant Elements. The stream has document order. 449 @return a stream of this element and its descendants. 450 @see #nodeStream() 451 @since 1.17.1 452 */ 453 public Stream<Element> stream() { 454 return NodeUtils.stream(this, Element.class); 455 } 456 457 private <T> List<T> filterNodes(Class<T> clazz) { 458 return childNodes.stream() 459 .filter(clazz::isInstance) 460 .map(clazz::cast) 461 .collect(Collectors.collectingAndThen(Collectors.toList(), Collections::unmodifiableList)); 462 } 463 464 /** 465 * Get this element's child text nodes. The list is unmodifiable but the text nodes may be manipulated. 466 * <p> 467 * This is effectively a filter on {@link #childNodes()} to get Text nodes. 468 * @return child text nodes. If this element has no text nodes, returns an 469 * empty list. 470 * </p> 471 * For example, with the input HTML: {@code <p>One <span>Two</span> Three <br> Four</p>} with the {@code p} element selected: 472 * <ul> 473 * <li>{@code p.text()} = {@code "One Two Three Four"}</li> 474 * <li>{@code p.ownText()} = {@code "One Three Four"}</li> 475 * <li>{@code p.children()} = {@code Elements[<span>, <br>]}</li> 476 * <li>{@code p.childNodes()} = {@code List<Node>["One ", <span>, " Three ", <br>, " Four"]}</li> 477 * <li>{@code p.textNodes()} = {@code List<TextNode>["One ", " Three ", " Four"]}</li> 478 * </ul> 479 */ 480 public List<TextNode> textNodes() { 481 return filterNodes(TextNode.class); 482 } 483 484 /** 485 * Get this element's child data nodes. The list is unmodifiable but the data nodes may be manipulated. 486 * <p> 487 * This is effectively a filter on {@link #childNodes()} to get Data nodes. 488 * </p> 489 * @return child data nodes. If this element has no data nodes, returns an 490 * empty list. 491 * @see #data() 492 */ 493 public List<DataNode> dataNodes() { 494 return filterNodes(DataNode.class); 495 } 496 497 /** 498 * Find elements that match the {@link Selector} CSS query, with this element as the starting context. Matched elements 499 * may include this element, or any of its descendents. 500 * <p>If the query starts with a combinator (e.g. {@code *} or {@code >}), that will combine to this element.</p> 501 * <p>This method is generally more powerful to use than the DOM-type {@code getElementBy*} methods, because 502 * multiple filters can be combined, e.g.:</p> 503 * <ul> 504 * <li>{@code el.select("a[href]")} - finds links ({@code a} tags with {@code href} attributes)</li> 505 * <li>{@code el.select("a[href*=example.com]")} - finds links pointing to example.com (loosely)</li> 506 * <li>{@code el.select("* div")} - finds all divs that descend from this element (and excludes this element)</li> 507 * <li>{@code el.select("> div")} - finds all divs that are direct children of this element (and excludes this element)</li> 508 * </ul> 509 * <p>See the query syntax documentation in {@link org.jsoup.select.Selector}.</p> 510 * <p>Also known as {@code querySelectorAll()} in the Web DOM.</p> 511 * 512 * @param cssQuery a {@link Selector} CSS-like query 513 * @return an {@link Elements} list containing elements that match the query (empty if none match) 514 * @see Selector selector query syntax 515 * @see #select(Evaluator) 516 * @throws Selector.SelectorParseException (unchecked) on an invalid CSS query. 517 */ 518 public Elements select(String cssQuery) { 519 return Selector.select(cssQuery, this); 520 } 521 522 /** 523 * Find elements that match the supplied Evaluator. This has the same functionality as {@link #select(String)}, but 524 * may be useful if you are running the same query many times (on many documents) and want to save the overhead of 525 * repeatedly parsing the CSS query. 526 * @param evaluator an element evaluator 527 * @return an {@link Elements} list containing elements that match the query (empty if none match) 528 * @see Selector#evaluatorOf(String css) 529 */ 530 public Elements select(Evaluator evaluator) { 531 return Selector.select(evaluator, this); 532 } 533 534 /** 535 Selects elements from the given root that match the specified {@link Selector} CSS query, with this element as the 536 starting context, and returns them as a lazy Stream. Matched elements may include this element, or any of its 537 children. 538 <p> 539 Unlike {@link #select(String query)}, which returns a complete list of all matching elements, this method returns a 540 {@link Stream} that processes elements lazily as they are needed. The stream operates in a "pull" model — elements 541 are fetched from the root as the stream is traversed. You can use standard {@code Stream} operations such as 542 {@code filter}, {@code map}, or {@code findFirst} to process elements on demand. 543 </p> 544 545 @param cssQuery a {@link Selector} CSS-like query 546 @return a {@link Stream} containing elements that match the query (empty if none match) 547 @throws Selector.SelectorParseException (unchecked) on an invalid CSS query. 548 @see Selector selector query syntax 549 @see #selectStream(Evaluator eval) 550 @since 1.19.1 551 */ 552 public Stream<Element> selectStream(String cssQuery) { 553 return Selector.selectStream(cssQuery, this); 554 } 555 556 /** 557 Find a Stream of elements that match the supplied Evaluator. 558 559 @param evaluator an element Evaluator 560 @return a {@link Stream} containing elements that match the query (empty if none match) 561 @see Selector#evaluatorOf(String css) 562 @since 1.19.1 563 */ 564 public Stream<Element> selectStream(Evaluator evaluator) { 565 return Selector.selectStream(evaluator, this); 566 } 567 568 /** 569 * Find the first Element that matches the {@link Selector} CSS query, with this element as the starting context. 570 * <p>This is effectively the same as calling {@code element.select(query).first()}, but is more efficient as query 571 * execution stops on the first hit.</p> 572 * <p>Also known as {@code querySelector()} in the Web DOM.</p> 573 * @param cssQuery cssQuery a {@link Selector} CSS-like query 574 * @return the first matching element, or <b>{@code null}</b> if there is no match. 575 * @see #expectFirst(String) 576 */ 577 public @Nullable Element selectFirst(String cssQuery) { 578 return Selector.selectFirst(cssQuery, this); 579 } 580 581 /** 582 * Finds the first Element that matches the supplied Evaluator, with this element as the starting context, or 583 * {@code null} if none match. 584 * 585 * @param evaluator an element evaluator 586 * @return the first matching element (walking down the tree, starting from this element), or {@code null} if none 587 * match. 588 */ 589 public @Nullable Element selectFirst(Evaluator evaluator) { 590 return Collector.findFirst(evaluator, this); 591 } 592 593 /** 594 Just like {@link #selectFirst(String)}, but if there is no match, throws an {@link IllegalArgumentException}. This 595 is useful if you want to simply abort processing on a failed match. 596 @param cssQuery a {@link Selector} CSS-like query 597 @return the first matching element 598 @throws IllegalArgumentException if no match is found 599 @since 1.15.2 600 */ 601 public Element expectFirst(String cssQuery) { 602 return Validate.expectNotNull( 603 Selector.selectFirst(cssQuery, this), 604 parent() != null ? 605 "No elements matched the query '%s' on element '%s'." : 606 "No elements matched the query '%s' in the document." 607 , cssQuery, this.tagName() 608 ); 609 } 610 611 /** 612 Find nodes that match the supplied {@link Evaluator}, with this element as the starting context. Matched 613 nodes may include this element, or any of its descendents. 614 615 @param evaluator an evaluator 616 @return a list of nodes that match the query (empty if none match) 617 @since 1.21.1 618 */ 619 public Nodes<Node> selectNodes(Evaluator evaluator) { 620 return selectNodes(evaluator, Node.class); 621 } 622 623 /** 624 Find nodes that match the supplied {@link Selector} CSS query, with this element as the starting context. Matched 625 nodes may include this element, or any of its descendents. 626 <p>To select leaf nodes, the query should specify the node type, e.g. {@code ::text}, 627 {@code ::comment}, {@code ::data}, {@code ::leafnode}.</p> 628 629 @param cssQuery a {@link Selector} CSS query 630 @return a list of nodes that match the query (empty if none match) 631 @since 1.21.1 632 */ 633 public Nodes<Node> selectNodes(String cssQuery) { 634 return selectNodes(cssQuery, Node.class); 635 } 636 637 /** 638 Find nodes that match the supplied Evaluator, with this element as the starting context. Matched 639 nodes may include this element, or any of its descendents. 640 641 @param evaluator an evaluator 642 @param type the type of node to collect (e.g. {@link Element}, {@link LeafNode}, {@link TextNode} etc) 643 @param <T> the type of node to collect 644 @return a list of nodes that match the query (empty if none match) 645 @since 1.21.1 646 */ 647 public <T extends Node> Nodes<T> selectNodes(Evaluator evaluator, Class<T> type) { 648 Validate.notNull(evaluator); 649 return Collector.collectNodes(evaluator, this, type); 650 } 651 652 /** 653 Find nodes that match the supplied {@link Selector} CSS query, with this element as the starting context. Matched 654 nodes may include this element, or any of its descendents. 655 <p>To select specific node types, use {@code ::text}, {@code ::comment}, {@code ::leafnode}, etc. For example, to 656 select all text nodes under {@code p} elements: </p> 657 <pre> Nodes<TextNode> textNodes = doc.selectNodes("p ::text", TextNode.class);</pre> 658 659 @param cssQuery a {@link Selector} CSS query 660 @param type the type of node to collect (e.g. {@link Element}, {@link LeafNode}, {@link TextNode} etc) 661 @param <T> the type of node to collect 662 @return a list of nodes that match the query (empty if none match) 663 @since 1.21.1 664 */ 665 public <T extends Node> Nodes<T> selectNodes(String cssQuery, Class<T> type) { 666 Validate.notEmpty(cssQuery); 667 return selectNodes(evaluatorOf(cssQuery), type); 668 } 669 670 /** 671 Find the first Node that matches the {@link Selector} CSS query, with this element as the starting context. 672 <p>This is effectively the same as calling {@code element.selectNodes(query).first()}, but is more efficient as 673 query 674 execution stops on the first hit.</p> 675 <p>Also known as {@code querySelector()} in the Web DOM.</p> 676 677 @param cssQuery cssQuery a {@link Selector} CSS-like query 678 @return the first matching node, or <b>{@code null}</b> if there is no match. 679 @since 1.21.1 680 @see #expectFirst(String) 681 */ 682 public @Nullable <T extends Node> T selectFirstNode(String cssQuery, Class<T> type) { 683 return selectFirstNode(evaluatorOf(cssQuery), type); 684 } 685 686 /** 687 Finds the first Node that matches the supplied Evaluator, with this element as the starting context, or 688 {@code null} if none match. 689 690 @param evaluator an element evaluator 691 @return the first matching node (walking down the tree, starting from this element), or {@code null} if none 692 match. 693 @since 1.21.1 694 */ 695 public @Nullable <T extends Node> T selectFirstNode(Evaluator evaluator, Class<T> type) { 696 return Collector.findFirstNode(evaluator, this, type); 697 } 698 699 /** 700 Just like {@link #selectFirstNode(String, Class)}, but if there is no match, throws an 701 {@link IllegalArgumentException}. This is useful if you want to simply abort processing on a failed match. 702 703 @param cssQuery a {@link Selector} CSS-like query 704 @return the first matching node 705 @throws IllegalArgumentException if no match is found 706 @since 1.21.1 707 */ 708 public <T extends Node> T expectFirstNode(String cssQuery, Class<T> type) { 709 return Validate.expectNotNull( 710 selectFirstNode(cssQuery, type), 711 parent() != null ? 712 "No nodes matched the query '%s' on element '%s'.": 713 "No nodes matched the query '%s' in the document." 714 , cssQuery, this.tagName() 715 ); 716 } 717 718 /** 719 * Checks if this element matches the given {@link Selector} CSS query. Also knows as {@code matches()} in the Web 720 * DOM. 721 * 722 * @param cssQuery a {@link Selector} CSS query 723 * @return if this element matches the query 724 */ 725 public boolean is(String cssQuery) { 726 return is(evaluatorOf(cssQuery)); 727 } 728 729 /** 730 * Check if this element matches the given evaluator. 731 * @param evaluator an element evaluator 732 * @return if this element matches 733 */ 734 public boolean is(Evaluator evaluator) { 735 return evaluator.matches(this.root(), this); 736 } 737 738 /** 739 * Find the closest element up the tree of parents that matches the specified CSS query. Will return itself, an 740 * ancestor, or {@code null} if there is no such matching element. 741 * @param cssQuery a {@link Selector} CSS query 742 * @return the closest ancestor element (possibly itself) that matches the provided evaluator. {@code null} if not 743 * found. 744 */ 745 public @Nullable Element closest(String cssQuery) { 746 return closest(evaluatorOf(cssQuery)); 747 } 748 749 /** 750 * Find the closest element up the tree of parents that matches the specified evaluator. Will return itself, an 751 * ancestor, or {@code null} if there is no such matching element. 752 * @param evaluator a query evaluator 753 * @return the closest ancestor element (possibly itself) that matches the provided evaluator. {@code null} if not 754 * found. 755 */ 756 public @Nullable Element closest(Evaluator evaluator) { 757 Validate.notNull(evaluator); 758 Element el = this; 759 final Element root = root(); 760 do { 761 if (evaluator.matches(root, el)) 762 return el; 763 el = el.parent(); 764 } while (el != null); 765 return null; 766 } 767 768 /** 769 Find Elements that match the supplied {@index XPath} expression. 770 <p>Note that for convenience of writing the Xpath expression, namespaces are disabled, and queries can be 771 expressed using the element's local name only.</p> 772 <p>By default, XPath 1.0 expressions are supported. If you would to use XPath 2.0 or higher, you can provide an 773 alternate XPathFactory implementation:</p> 774 <ol> 775 <li>Add the implementation to your classpath. E.g. to use <a href="https://www.saxonica.com/products/products.xml">Saxon-HE</a>, add <a href="https://mvnrepository.com/artifact/net.sf.saxon/Saxon-HE">net.sf.saxon:Saxon-HE</a> to your build.</li> 776 <li>Set the system property <code>javax.xml.xpath.XPathFactory:jsoup</code> to the implementing classname. E.g.:<br> 777 <code>System.setProperty(W3CDom.XPathFactoryProperty, "net.sf.saxon.xpath.XPathFactoryImpl");</code> 778 </li> 779 </ol> 780 781 @param xpath XPath expression 782 @return matching elements, or an empty list if none match. 783 @see #selectXpath(String, Class) 784 @since 1.14.3 785 */ 786 public Elements selectXpath(String xpath) { 787 return new Elements(NodeUtils.selectXpath(xpath, this, Element.class)); 788 } 789 790 /** 791 Find Nodes that match the supplied XPath expression. 792 <p>For example, to select TextNodes under {@code p} elements: </p> 793 <pre>List<TextNode> textNodes = doc.selectXpath("//body//p//text()", TextNode.class);</pre> 794 <p>Note that in the jsoup DOM, Attribute objects are not Nodes. To directly select attribute values, do something 795 like:</p> 796 <pre>List<String> hrefs = doc.selectXpath("//a").eachAttr("href");</pre> 797 @param xpath XPath expression 798 @param nodeType the jsoup node type to return 799 @see #selectXpath(String) 800 @return a list of matching nodes 801 @since 1.14.3 802 */ 803 public <T extends Node> List<T> selectXpath(String xpath, Class<T> nodeType) { 804 return NodeUtils.selectXpath(xpath, this, nodeType); 805 } 806 807 /** 808 * Insert a node to the end of this Element's children. The incoming node will be re-parented. 809 * 810 * @param child node to add. 811 * @return this Element, for chaining 812 * @see #prependChild(Node) 813 * @see #insertChildren(int, Collection) 814 */ 815 public Element appendChild(Node child) { 816 Validate.notNull(child); 817 818 // was - Node#addChildren(child). short-circuits an array create and a loop. 819 reparentChild(child); 820 ensureChildNodes(); 821 childNodes.add(child); 822 child.setSiblingIndex(childNodes.size() - 1); 823 return this; 824 } 825 826 /** 827 Insert the given nodes to the end of this Element's children. 828 829 @param children nodes to add 830 @return this Element, for chaining 831 @see #insertChildren(int, Collection) 832 */ 833 public Element appendChildren(Collection<? extends Node> children) { 834 insertChildren(-1, children); 835 return this; 836 } 837 838 /** 839 * Add this element to the supplied parent element, as its next child. 840 * 841 * @param parent element to which this element will be appended 842 * @return this element, so that you can continue modifying the element 843 */ 844 public Element appendTo(Element parent) { 845 Validate.notNull(parent); 846 parent.appendChild(this); 847 return this; 848 } 849 850 /** 851 * Add a node to the start of this element's children. 852 * 853 * @param child node to add. 854 * @return this element, so that you can add more child nodes or elements. 855 */ 856 public Element prependChild(Node child) { 857 Validate.notNull(child); 858 859 addChildren(0, child); 860 return this; 861 } 862 863 /** 864 Insert the given nodes to the start of this Element's children. 865 866 @param children nodes to add 867 @return this Element, for chaining 868 @see #insertChildren(int, Collection) 869 */ 870 public Element prependChildren(Collection<? extends Node> children) { 871 insertChildren(0, children); 872 return this; 873 } 874 875 876 /** 877 * Inserts the given child nodes into this element at the specified index. Current nodes will be shifted to the 878 * right. The inserted nodes will be moved from their current parent. To prevent moving, copy the nodes first. 879 * 880 * @param index 0-based index to insert children at. Specify {@code 0} to insert at the start, {@code -1} at the 881 * end 882 * @param children child nodes to insert 883 * @return this element, for chaining. 884 */ 885 public Element insertChildren(int index, Collection<? extends Node> children) { 886 Validate.notNull(children, "Children collection to be inserted must not be null."); 887 int currentSize = childNodeSize(); 888 if (index < 0) index += currentSize +1; // roll around 889 Validate.isTrue(index >= 0 && index <= currentSize, "Insert position out of bounds."); 890 addChildren(index, children.toArray(new Node[0])); 891 return this; 892 } 893 894 /** 895 * Inserts the given child nodes into this element at the specified index. Current nodes will be shifted to the 896 * right. The inserted nodes will be moved from their current parent. To prevent moving, copy the nodes first. 897 * 898 * @param index 0-based index to insert children at. Specify {@code 0} to insert at the start, {@code -1} at the 899 * end 900 * @param children child nodes to insert 901 * @return this element, for chaining. 902 */ 903 public Element insertChildren(int index, Node... children) { 904 Validate.notNull(children, "Children collection to be inserted must not be null."); 905 int currentSize = childNodeSize(); 906 if (index < 0) index += currentSize +1; // roll around 907 Validate.isTrue(index >= 0 && index <= currentSize, "Insert position out of bounds."); 908 909 addChildren(index, children); 910 return this; 911 } 912 913 /** 914 * Create a new element by tag name, and add it as this Element's last child. 915 * 916 * @param tagName the name of the tag (e.g. {@code div}). 917 * @return the new element, to allow you to add content to it, e.g.: 918 * {@code parent.appendElement("h1").attr("id", "header").text("Welcome");} 919 */ 920 public Element appendElement(String tagName) { 921 return appendElement(tagName, tag.namespace()); 922 } 923 924 /** 925 * Create a new element by tag name and namespace, add it as this Element's last child. 926 * 927 * @param tagName the name of the tag (e.g. {@code div}). 928 * @param namespace the namespace of the tag (e.g. {@link Parser#NamespaceHtml}) 929 * @return the new element, in the specified namespace 930 */ 931 public Element appendElement(String tagName, String namespace) { 932 Parser parser = NodeUtils.parser(this); 933 Element child = new Element(parser.tagSet().valueOf(tagName, namespace, parser.settings()), baseUri()); 934 appendChild(child); 935 return child; 936 } 937 938 /** 939 * Create a new element by tag name, and add it as this Element's first child. 940 * 941 * @param tagName the name of the tag (e.g. {@code div}). 942 * @return the new element, to allow you to add content to it, e.g.: 943 * {@code parent.prependElement("h1").attr("id", "header").text("Welcome");} 944 */ 945 public Element prependElement(String tagName) { 946 return prependElement(tagName, tag.namespace()); 947 } 948 949 /** 950 * Create a new element by tag name and namespace, and add it as this Element's first child. 951 * 952 * @param tagName the name of the tag (e.g. {@code div}). 953 * @param namespace the namespace of the tag (e.g. {@link Parser#NamespaceHtml}) 954 * @return the new element, in the specified namespace 955 */ 956 public Element prependElement(String tagName, String namespace) { 957 Parser parser = NodeUtils.parser(this); 958 Element child = new Element(parser.tagSet().valueOf(tagName, namespace, parser.settings()), baseUri()); 959 prependChild(child); 960 return child; 961 } 962 963 /** 964 * Create and append a new TextNode to this element. 965 * 966 * @param text the (un-encoded) text to add 967 * @return this element 968 */ 969 public Element appendText(String text) { 970 Validate.notNull(text); 971 TextNode node = new TextNode(text); 972 appendChild(node); 973 return this; 974 } 975 976 /** 977 * Create and prepend a new TextNode to this element. 978 * 979 * @param text the decoded text to add 980 * @return this element 981 */ 982 public Element prependText(String text) { 983 Validate.notNull(text); 984 TextNode node = new TextNode(text); 985 prependChild(node); 986 return this; 987 } 988 989 /** 990 * Add inner HTML to this element. The supplied HTML will be parsed, and each node appended to the end of the children. 991 * @param html HTML to add inside this element, after the existing HTML 992 * @return this element 993 * @see #html(String) 994 */ 995 public Element append(String html) { 996 Validate.notNull(html); 997 List<Node> nodes = NodeUtils.parser(this).parseFragmentInput(html, this, baseUri()); 998 addChildren(nodes.toArray(new Node[0])); 999 return this; 1000 } 1001 1002 /** 1003 * Add inner HTML into this element. The supplied HTML will be parsed, and each node prepended to the start of the element's children. 1004 * @param html HTML to add inside this element, before the existing HTML 1005 * @return this element 1006 * @see #html(String) 1007 */ 1008 public Element prepend(String html) { 1009 Validate.notNull(html); 1010 List<Node> nodes = NodeUtils.parser(this).parseFragmentInput(html, this, baseUri()); 1011 addChildren(0, nodes.toArray(new Node[0])); 1012 return this; 1013 } 1014 1015 /** 1016 * Insert the specified HTML into the DOM before this element (as a preceding sibling). 1017 * 1018 * @param html HTML to add before this element 1019 * @return this element, for chaining 1020 * @see #after(String) 1021 */ 1022 @Override 1023 public Element before(String html) { 1024 return (Element) super.before(html); 1025 } 1026 1027 /** 1028 * Insert the specified node into the DOM before this node (as a preceding sibling). 1029 * @param node to add before this element 1030 * @return this Element, for chaining 1031 * @see #after(Node) 1032 */ 1033 @Override 1034 public Element before(Node node) { 1035 return (Element) super.before(node); 1036 } 1037 1038 /** 1039 * Insert the specified HTML into the DOM after this element (as a following sibling). 1040 * 1041 * @param html HTML to add after this element 1042 * @return this element, for chaining 1043 * @see #before(String) 1044 */ 1045 @Override 1046 public Element after(String html) { 1047 return (Element) super.after(html); 1048 } 1049 1050 /** 1051 * Insert the specified node into the DOM after this node (as a following sibling). 1052 * @param node to add after this element 1053 * @return this element, for chaining 1054 * @see #before(Node) 1055 */ 1056 @Override 1057 public Element after(Node node) { 1058 return (Element) super.after(node); 1059 } 1060 1061 /** 1062 * Remove all the element's child nodes. Any attributes are left as-is. Each child node has its parent set to 1063 * {@code null}. 1064 * @return this element 1065 */ 1066 @Override 1067 public Element empty() { 1068 // Detach each of the children -> parent links: 1069 int size = childNodes.size(); 1070 for (int i = 0; i < size; i++) 1071 childNodes.get(i).parentNode = null; 1072 childNodes.clear(); 1073 return this; 1074 } 1075 1076 /** 1077 * Wrap the supplied HTML around this element. 1078 * 1079 * @param html HTML to wrap around this element, e.g. {@code <div class="head"></div>}. Can be arbitrarily deep. 1080 * @return this element, for chaining. 1081 */ 1082 @Override 1083 public Element wrap(String html) { 1084 return (Element) super.wrap(html); 1085 } 1086 1087 /** 1088 Gets an #id selector for this element, if it has a unique ID. Otherwise, returns an empty string. 1089 1090 @param ownerDoc the document that owns this element, if there is one 1091 */ 1092 private String uniqueIdSelector(@Nullable Document ownerDoc) { 1093 String id = id(); 1094 if (!id.isEmpty()) { // check if the ID is unique and matches this 1095 String idSel = "#" + escapeCssIdentifier(id); 1096 if (ownerDoc != null) { 1097 Elements els = ownerDoc.select(idSel); 1098 if (els.size() == 1 && els.get(0) == this) return idSel; 1099 } else { 1100 return idSel; 1101 } 1102 } 1103 return EmptyString; 1104 } 1105 1106 /** 1107 Get a CSS selector that will uniquely select this element. 1108 <p> 1109 If the element has an ID, returns #id; otherwise returns the parent (if any) CSS selector, followed by 1110 {@literal '>'}, followed by a unique selector for the element (tag.class.class:nth-child(n)). 1111 </p> 1112 1113 @return the CSS Path that can be used to retrieve the element in a selector. 1114 */ 1115 public String cssSelector() { 1116 Document ownerDoc = ownerDocument(); 1117 String idSel = uniqueIdSelector(ownerDoc); 1118 if (!idSel.isEmpty()) return idSel; 1119 1120 // No unique ID, work up the parent stack and find either a unique ID to hang from, or just a GP > Parent > Child chain 1121 StringBuilder selector = StringUtil.borrowBuilder(); 1122 Element el = this; 1123 while (el != null && !(el instanceof Document)) { 1124 idSel = el.uniqueIdSelector(ownerDoc); 1125 if (!idSel.isEmpty()) { 1126 selector.insert(0, idSel); 1127 break; // found a unique ID to use as ancestor; stop 1128 } 1129 selector.insert(0, el.cssSelectorComponent()); 1130 el = el.parent(); 1131 } 1132 return StringUtil.releaseBuilder(selector); 1133 } 1134 1135 private String cssSelectorComponent() { 1136 // Escape tagname, and translate HTML namespace ns:tag to CSS namespace syntax ns|tag 1137 String tagName = escapeCssIdentifier(tagName()).replace("\\:", "|"); 1138 StringBuilder selector = StringUtil.borrowBuilder().append(tagName); 1139 String classes = classNames().stream().map(TokenQueue::escapeCssIdentifier) 1140 .collect(StringUtil.joining(".")); 1141 if (!classes.isEmpty()) 1142 selector.append('.').append(classes); 1143 1144 if (parent() == null || parent() instanceof Document) // don't add Document to selector, as will always have a html node 1145 return StringUtil.releaseBuilder(selector); 1146 1147 selector.insert(0, " > "); 1148 if (parent().select(selector.toString()).size() > 1) 1149 selector.append(String.format( 1150 ":nth-child(%d)", elementSiblingIndex() + 1)); 1151 1152 return StringUtil.releaseBuilder(selector); 1153 } 1154 1155 /** 1156 * Get sibling elements. If the element has no sibling elements, returns an empty list. An element is not a sibling 1157 * of itself, so will not be included in the returned list. 1158 * @return sibling elements 1159 */ 1160 public Elements siblingElements() { 1161 if (parentNode == null) 1162 return new Elements(0); 1163 1164 List<Element> elements = parent().childElementsList(); 1165 Elements siblings = new Elements(elements.size() - 1); 1166 for (Element el: elements) 1167 if (el != this) 1168 siblings.add(el); 1169 return siblings; 1170 } 1171 1172 1173 1174 /** 1175 * Get each of the sibling elements that come after this element. 1176 * 1177 * @return each of the element siblings after this element, or an empty list if there are no next sibling elements 1178 */ 1179 public Elements nextElementSiblings() { 1180 return nextElementSiblings(true); 1181 } 1182 1183 /** 1184 * Get each of the element siblings before this element. 1185 * 1186 * @return the previous element siblings, or an empty list if there are none. 1187 */ 1188 public Elements previousElementSiblings() { 1189 return nextElementSiblings(false); 1190 } 1191 1192 private Elements nextElementSiblings(boolean next) { 1193 Elements els = new Elements(); 1194 if (parentNode == null) 1195 return els; 1196 els.add(this); 1197 return next ? els.nextAll() : els.prevAll(); 1198 } 1199 1200 /** 1201 * Gets the first Element sibling of this element. That may be this element. 1202 * @return the first sibling that is an element (aka the parent's first element child) 1203 */ 1204 public Element firstElementSibling() { 1205 if (parent() != null) { 1206 //noinspection DataFlowIssue (not nullable, would be this is no other sibs) 1207 return parent().firstElementChild(); 1208 } else 1209 return this; // orphan is its own first sibling 1210 } 1211 1212 /** 1213 * Get the list index of this element in its element sibling list. I.e. if this is the first element 1214 * sibling, returns 0. 1215 * @return position in element sibling list 1216 */ 1217 public int elementSiblingIndex() { 1218 if (parent() == null) return 0; 1219 return indexInList(this, parent().childElementsList()); 1220 } 1221 1222 /** 1223 * Gets the last element sibling of this element. That may be this element. 1224 * @return the last sibling that is an element (aka the parent's last element child) 1225 */ 1226 public Element lastElementSibling() { 1227 if (parent() != null) { 1228 //noinspection DataFlowIssue (not nullable, would be this if no other sibs) 1229 return parent().lastElementChild(); 1230 } else 1231 return this; 1232 } 1233 1234 private static <E extends Element> int indexInList(Element search, List<E> elements) { 1235 final int size = elements.size(); 1236 for (int i = 0; i < size; i++) { 1237 if (elements.get(i) == search) 1238 return i; 1239 } 1240 return 0; 1241 } 1242 1243 /** 1244 Gets the first child of this Element that is an Element, or {@code null} if there is none. 1245 @return the first Element child node, or null. 1246 @see #firstChild() 1247 @see #lastElementChild() 1248 @since 1.15.2 1249 */ 1250 public @Nullable Element firstElementChild() { 1251 int size = childNodes.size(); 1252 for (int i = 0; i < size; i++) { 1253 Node node = childNodes.get(i); 1254 if (node instanceof Element) return (Element) node; 1255 } 1256 return null; 1257 } 1258 1259 /** 1260 Gets the last child of this Element that is an Element, or @{code null} if there is none. 1261 @return the last Element child node, or null. 1262 @see #lastChild() 1263 @see #firstElementChild() 1264 @since 1.15.2 1265 */ 1266 public @Nullable Element lastElementChild() { 1267 for (int i = childNodes.size() - 1; i >= 0; i--) { 1268 Node node = childNodes.get(i); 1269 if (node instanceof Element) return (Element) node; 1270 } 1271 return null; 1272 } 1273 1274 // DOM type methods 1275 1276 /** 1277 * Finds elements, including and recursively under this element, with the specified tag name. 1278 * @param tagName The tag name to search for (case insensitively). 1279 * @return a matching unmodifiable list of elements. Will be empty if this element and none of its children match. 1280 */ 1281 public Elements getElementsByTag(String tagName) { 1282 Validate.notEmpty(tagName); 1283 tagName = normalize(tagName); 1284 1285 return Collector.collect(new Evaluator.Tag(tagName), this); 1286 } 1287 1288 /** 1289 * Find an element by ID, including or under this element. 1290 * <p> 1291 * Note that this finds the first matching ID, starting with this element. If you search down from a different 1292 * starting point, it is possible to find a different element by ID. For unique element by ID within a Document, 1293 * use {@link Document#getElementById(String)} 1294 * @param id The ID to search for. 1295 * @return The first matching element by ID, starting with this element, or null if none found. 1296 */ 1297 public @Nullable Element getElementById(String id) { 1298 Validate.notEmpty(id); 1299 return Collector.findFirst(new Evaluator.Id(id), this); 1300 } 1301 1302 /** 1303 * Find elements that have this class, including or under this element. Case-insensitive. 1304 * <p> 1305 * Elements can have multiple classes (e.g. {@code <div class="header round first">}). This method 1306 * checks each class, so you can find the above with {@code el.getElementsByClass("header");}. 1307 * 1308 * @param className the name of the class to search for. 1309 * @return elements with the supplied class name, empty if none 1310 * @see #hasClass(String) 1311 * @see #classNames() 1312 */ 1313 public Elements getElementsByClass(String className) { 1314 Validate.notEmpty(className); 1315 1316 return Collector.collect(new Evaluator.Class(className), this); 1317 } 1318 1319 /** 1320 * Find elements that have a named attribute set. Case-insensitive. 1321 * 1322 * @param key name of the attribute, e.g. {@code href} 1323 * @return elements that have this attribute, empty if none 1324 */ 1325 public Elements getElementsByAttribute(String key) { 1326 Validate.notEmpty(key); 1327 key = key.trim(); 1328 1329 return Collector.collect(new Evaluator.Attribute(key), this); 1330 } 1331 1332 /** 1333 * Find elements that have an attribute name starting with the supplied prefix. Use {@code data-} to find elements 1334 * that have HTML5 datasets. 1335 * @param keyPrefix name prefix of the attribute e.g. {@code data-} 1336 * @return elements that have attribute names that start with the prefix, empty if none. 1337 */ 1338 public Elements getElementsByAttributeStarting(String keyPrefix) { 1339 Validate.notEmpty(keyPrefix); 1340 keyPrefix = keyPrefix.trim(); 1341 1342 return Collector.collect(new Evaluator.AttributeStarting(keyPrefix), this); 1343 } 1344 1345 /** 1346 * Find elements that have an attribute with the specific value. Case-insensitive. 1347 * 1348 * @param key name of the attribute 1349 * @param value value of the attribute 1350 * @return elements that have this attribute with this value, empty if none 1351 */ 1352 public Elements getElementsByAttributeValue(String key, String value) { 1353 return Collector.collect(new Evaluator.AttributeWithValue(key, value), this); 1354 } 1355 1356 /** 1357 * Find elements that either do not have this attribute, or have it with a different value. Case-insensitive. 1358 * 1359 * @param key name of the attribute 1360 * @param value value of the attribute 1361 * @return elements that do not have a matching attribute 1362 */ 1363 public Elements getElementsByAttributeValueNot(String key, String value) { 1364 return Collector.collect(new Evaluator.AttributeWithValueNot(key, value), this); 1365 } 1366 1367 /** 1368 * Find elements that have attributes that start with the value prefix. Case-insensitive. 1369 * 1370 * @param key name of the attribute 1371 * @param valuePrefix start of attribute value 1372 * @return elements that have attributes that start with the value prefix 1373 */ 1374 public Elements getElementsByAttributeValueStarting(String key, String valuePrefix) { 1375 return Collector.collect(new Evaluator.AttributeWithValueStarting(key, valuePrefix), this); 1376 } 1377 1378 /** 1379 * Find elements that have attributes that end with the value suffix. Case-insensitive. 1380 * 1381 * @param key name of the attribute 1382 * @param valueSuffix end of the attribute value 1383 * @return elements that have attributes that end with the value suffix 1384 */ 1385 public Elements getElementsByAttributeValueEnding(String key, String valueSuffix) { 1386 return Collector.collect(new Evaluator.AttributeWithValueEnding(key, valueSuffix), this); 1387 } 1388 1389 /** 1390 * Find elements that have attributes whose value contains the match string. Case-insensitive. 1391 * 1392 * @param key name of the attribute 1393 * @param match substring of value to search for 1394 * @return elements that have attributes containing this text 1395 */ 1396 public Elements getElementsByAttributeValueContaining(String key, String match) { 1397 return Collector.collect(new Evaluator.AttributeWithValueContaining(key, match), this); 1398 } 1399 1400 /** 1401 * Find elements that have an attribute whose value matches the supplied regular expression. 1402 * @param key name of the attribute 1403 * @param pattern compiled regular expression to match against attribute values 1404 * @return elements that have attributes matching this regular expression 1405 */ 1406 public Elements getElementsByAttributeValueMatching(String key, Pattern pattern) { 1407 return Collector.collect(new Evaluator.AttributeWithValueMatching(key, pattern), this); 1408 } 1409 1410 /** 1411 * Find elements that have attributes whose values match the supplied regular expression. 1412 * @param key name of the attribute 1413 * @param regex regular expression to match against attribute values. You can use <a href="http://java.sun.com/docs/books/tutorial/essential/regex/pattern.html#embedded">embedded flags</a> (such as {@code (?i)} and {@code (?m)}) to control regex options. 1414 * @return elements that have attributes matching this regular expression 1415 */ 1416 public Elements getElementsByAttributeValueMatching(String key, String regex) { 1417 Regex pattern; 1418 try { 1419 pattern = Regex.compile(regex); 1420 } catch (PatternSyntaxException e) { 1421 throw new IllegalArgumentException("Pattern syntax error: " + regex, e); 1422 } 1423 return Collector.collect(new Evaluator.AttributeWithValueMatching(key, pattern), this); 1424 } 1425 1426 /** 1427 * Find elements whose sibling index is less than the supplied index. 1428 * @param index 0-based index 1429 * @return elements less than index 1430 */ 1431 public Elements getElementsByIndexLessThan(int index) { 1432 return Collector.collect(new Evaluator.IndexLessThan(index), this); 1433 } 1434 1435 /** 1436 * Find elements whose sibling index is greater than the supplied index. 1437 * @param index 0-based index 1438 * @return elements greater than index 1439 */ 1440 public Elements getElementsByIndexGreaterThan(int index) { 1441 return Collector.collect(new Evaluator.IndexGreaterThan(index), this); 1442 } 1443 1444 /** 1445 * Find elements whose sibling index is equal to the supplied index. 1446 * @param index 0-based index 1447 * @return elements equal to index 1448 */ 1449 public Elements getElementsByIndexEquals(int index) { 1450 return Collector.collect(new Evaluator.IndexEquals(index), this); 1451 } 1452 1453 /** 1454 * Find elements that contain the specified string. The search is case-insensitive. The text may appear directly 1455 * in the element, or in any of its descendants. 1456 * @param searchText to look for in the element's text 1457 * @return elements that contain the string, case-insensitive. 1458 * @see Element#text() 1459 */ 1460 public Elements getElementsContainingText(String searchText) { 1461 return Collector.collect(new Evaluator.ContainsText(searchText), this); 1462 } 1463 1464 /** 1465 * Find elements that directly contain the specified string. The search is case-insensitive. The text must appear directly 1466 * in the element, not in any of its descendants. 1467 * @param searchText to look for in the element's own text 1468 * @return elements that contain the string, case-insensitive. 1469 * @see Element#ownText() 1470 */ 1471 public Elements getElementsContainingOwnText(String searchText) { 1472 return Collector.collect(new Evaluator.ContainsOwnText(searchText), this); 1473 } 1474 1475 /** 1476 * Find elements whose text matches the supplied regular expression. 1477 * @param pattern regular expression to match text against 1478 * @return elements matching the supplied regular expression. 1479 * @see Element#text() 1480 */ 1481 public Elements getElementsMatchingText(Pattern pattern) { 1482 return Collector.collect(new Evaluator.Matches(pattern), this); 1483 } 1484 1485 /** 1486 * Find elements whose text matches the supplied regular expression. 1487 * @param regex regular expression to match text against. You can use <a href="http://java.sun.com/docs/books/tutorial/essential/regex/pattern.html#embedded">embedded flags</a> (such as {@code (?i)} and {@code (?m)}) to control regex options. 1488 * @return elements matching the supplied regular expression. 1489 * @see Element#text() 1490 */ 1491 public Elements getElementsMatchingText(String regex) { 1492 Regex pattern; 1493 try { 1494 pattern = Regex.compile(regex); 1495 } catch (PatternSyntaxException e) { 1496 throw new IllegalArgumentException("Pattern syntax error: " + regex, e); 1497 } 1498 return Collector.collect(new Evaluator.Matches(pattern), this); 1499 } 1500 1501 /** 1502 * Find elements whose own text matches the supplied regular expression. 1503 * @param pattern regular expression to match text against 1504 * @return elements matching the supplied regular expression. 1505 * @see Element#ownText() 1506 */ 1507 public Elements getElementsMatchingOwnText(Pattern pattern) { 1508 return Collector.collect(new Evaluator.MatchesOwn(pattern), this); 1509 } 1510 1511 /** 1512 * Find elements whose own text matches the supplied regular expression. 1513 * @param regex regular expression to match text against. You can use <a href="http://java.sun.com/docs/books/tutorial/essential/regex/pattern.html#embedded">embedded flags</a> (such as {@code (?i)} and {@code (?m)}) to control regex options. 1514 * @return elements matching the supplied regular expression. 1515 * @see Element#ownText() 1516 */ 1517 public Elements getElementsMatchingOwnText(String regex) { 1518 Regex pattern; 1519 try { 1520 pattern = Regex.compile(regex); 1521 } catch (PatternSyntaxException e) { 1522 throw new IllegalArgumentException("Pattern syntax error: " + regex, e); 1523 } 1524 return Collector.collect(new Evaluator.MatchesOwn(pattern), this); 1525 } 1526 1527 /** 1528 * Find all elements under this element (including self, and children of children). 1529 * 1530 * @return all elements 1531 */ 1532 public Elements getAllElements() { 1533 return Collector.collect(new Evaluator.AllElements(), this); 1534 } 1535 1536 /** 1537 Gets the <b>normalized, combined text</b> of this element and all its children. Whitespace is normalized and 1538 trimmed. 1539 <p>For example, given HTML {@code <p>Hello <b>there</b> now! </p>}, {@code p.text()} returns {@code "Hello there 1540 now!"} 1541 <p>If you do not want normalized text, use {@link #wholeText()}. If you want just the text of this node (and not 1542 children), use {@link #ownText()} 1543 <p>Note that this method returns the textual content that would be presented to a reader. The contents of data 1544 nodes (such as {@code <script>} tags) are not considered text. Use {@link #data()} or {@link #html()} to retrieve 1545 that content. 1546 1547 @return decoded, normalized text, or empty string if none. 1548 @see #wholeText() 1549 @see #ownText() 1550 @see #textNodes() 1551 */ 1552 public String text() { 1553 final StringBuilder accum = StringUtil.borrowBuilder(); 1554 new TextAccumulator(accum).traverse(this); 1555 return StringUtil.releaseBuilder(accum).trim(); 1556 } 1557 1558 private static class TextAccumulator implements NodeVisitor { 1559 private final StringBuilder accum; 1560 1561 public TextAccumulator(StringBuilder accum) { 1562 this.accum = accum; 1563 } 1564 1565 @Override public void head(Node node, int depth) { 1566 if (node instanceof TextNode) { 1567 TextNode textNode = (TextNode) node; 1568 appendNormalisedText(accum, textNode); 1569 } else if (node instanceof Element) { 1570 Element element = (Element) node; 1571 if (accum.length() > 0 && 1572 (element.isBlock() || element.nameIs("br")) && 1573 !lastCharIsWhitespace(accum)) 1574 accum.append(' '); 1575 } 1576 } 1577 1578 @Override public void tail(Node node, int depth) { 1579 // make sure there is a space between block tags and immediately following text nodes or inline elements <div>One</div>Two should be "One Two". 1580 if (node instanceof Element) { 1581 Element element = (Element) node; 1582 Node next = node.nextSibling(); 1583 if (!element.tag.isInline() && (next instanceof TextNode || next instanceof Element && ((Element) next).tag.isInline()) && !lastCharIsWhitespace(accum)) 1584 accum.append(' '); 1585 } 1586 1587 } 1588 } 1589 1590 /** 1591 Get the non-normalized, decoded text of this element and its children, including only any newlines and spaces 1592 present in the original source. 1593 @return decoded, non-normalized text 1594 @see #text() 1595 @see #wholeOwnText() 1596 */ 1597 public String wholeText() { 1598 return wholeTextOf(nodeStream()); 1599 } 1600 1601 /** 1602 An Element's nodeValue is its whole own text. 1603 */ 1604 @Override 1605 public String nodeValue() { 1606 return wholeOwnText(); 1607 } 1608 1609 private static String wholeTextOf(Stream<Node> stream) { 1610 return stream.map(node -> { 1611 if (node instanceof TextNode) return ((TextNode) node).getWholeText(); 1612 if (node.nameIs("br")) return "\n"; 1613 return ""; 1614 }).collect(StringUtil.joining("")); 1615 } 1616 1617 /** 1618 Get the non-normalized, decoded text of this element, <b>not including</b> any child elements, including any 1619 newlines and spaces present in the original source. 1620 @return decoded, non-normalized text that is a direct child of this Element 1621 @see #text() 1622 @see #wholeText() 1623 @see #ownText() 1624 @since 1.15.1 1625 */ 1626 public String wholeOwnText() { 1627 return wholeTextOf(childNodes.stream()); 1628 } 1629 1630 /** 1631 * Gets the (normalized) text owned by this element only; does not get the combined text of all children. 1632 * <p> 1633 * For example, given HTML {@code <p>Hello <b>there</b> now!</p>}, {@code p.ownText()} returns {@code "Hello now!"}, 1634 * whereas {@code p.text()} returns {@code "Hello there now!"}. 1635 * Note that the text within the {@code b} element is not returned, as it is not a direct child of the {@code p} element. 1636 * 1637 * @return decoded text, or empty string if none. 1638 * @see #text() 1639 * @see #textNodes() 1640 */ 1641 public String ownText() { 1642 StringBuilder sb = StringUtil.borrowBuilder(); 1643 ownText(sb); 1644 return StringUtil.releaseBuilder(sb).trim(); 1645 } 1646 1647 private void ownText(StringBuilder accum) { 1648 for (int i = 0; i < childNodeSize(); i++) { 1649 Node child = childNodes.get(i); 1650 if (child instanceof TextNode) { 1651 TextNode textNode = (TextNode) child; 1652 appendNormalisedText(accum, textNode); 1653 } else if (child.nameIs("br") && !lastCharIsWhitespace(accum)) { 1654 accum.append(" "); 1655 } 1656 } 1657 } 1658 1659 private static void appendNormalisedText(StringBuilder accum, TextNode textNode) { 1660 String text = textNode.getWholeText(); 1661 if (preserveWhitespace(textNode.parentNode) || textNode instanceof CDataNode) 1662 accum.append(text); 1663 else 1664 StringUtil.appendNormalisedWhitespace(accum, text, lastCharIsWhitespace(accum)); 1665 } 1666 1667 static boolean preserveWhitespace(@Nullable Node node) { 1668 // looks only at this element and five levels up, to prevent recursion & needless stack searches 1669 if (node instanceof Element) { 1670 Element el = (Element) node; 1671 int i = 0; 1672 do { 1673 if (el.tag.preserveWhitespace()) 1674 return true; 1675 el = el.parent(); 1676 i++; 1677 } while (i < 6 && el != null); 1678 } 1679 return false; 1680 } 1681 1682 /** 1683 * Set the text of this element. Any existing contents (text or elements) will be cleared. 1684 * <p>As a special case, for {@code <script>} and {@code <style>} tags, the input text will be treated as data, 1685 * not visible text.</p> 1686 * @param text decoded text 1687 * @return this element 1688 */ 1689 public Element text(String text) { 1690 Validate.notNull(text); 1691 empty(); 1692 // special case for script/style in HTML (or customs): should be data node 1693 if (tag().is(Tag.Data)) 1694 appendChild(new DataNode(text)); 1695 else 1696 appendChild(new TextNode(text)); 1697 1698 return this; 1699 } 1700 1701 /** 1702 Checks if the current element or any of its child elements contain non-whitespace text. 1703 @return {@code true} if the element has non-blank text content, {@code false} otherwise. 1704 */ 1705 public boolean hasText() { 1706 AtomicBoolean hasText = new AtomicBoolean(false); 1707 filter((node, depth) -> { 1708 if (node instanceof TextNode) { 1709 TextNode textNode = (TextNode) node; 1710 if (!textNode.isBlank()) { 1711 hasText.set(true); 1712 return NodeFilter.FilterResult.STOP; 1713 } 1714 } 1715 return NodeFilter.FilterResult.CONTINUE; 1716 }); 1717 return hasText.get(); 1718 } 1719 1720 /** 1721 * Get the combined data of this element. Data is e.g. the inside of a {@code <script>} tag. Note that data is NOT the 1722 * text of the element. Use {@link #text()} to get the text that would be visible to a user, and {@code data()} 1723 * for the contents of scripts, comments, CSS styles, etc. 1724 * 1725 * @return the data, or empty string if none 1726 * 1727 * @see #dataNodes() 1728 */ 1729 public String data() { 1730 StringBuilder sb = StringUtil.borrowBuilder(); 1731 traverse((childNode, depth) -> { 1732 if (childNode instanceof DataNode) { 1733 DataNode data = (DataNode) childNode; 1734 sb.append(data.getWholeData()); 1735 } else if (childNode instanceof Comment) { 1736 Comment comment = (Comment) childNode; 1737 sb.append(comment.getData()); 1738 } else if (childNode instanceof CDataNode) { 1739 // this shouldn't really happen because the html parser won't see the cdata as anything special when parsing script. 1740 // but in case another type gets through. 1741 CDataNode cDataNode = (CDataNode) childNode; 1742 sb.append(cDataNode.getWholeText()); 1743 } 1744 }); 1745 return StringUtil.releaseBuilder(sb); 1746 } 1747 1748 /** 1749 * Gets the literal value of this element's "class" attribute, which may include multiple class names, space 1750 * separated. (E.g. on <code><div class="header gray"></code> returns, "<code>header gray</code>") 1751 * @return The literal class attribute, or <b>empty string</b> if no class attribute set. 1752 */ 1753 public String className() { 1754 return attr("class").trim(); 1755 } 1756 1757 /** 1758 * Get each of the element's class names. E.g. on element {@code <div class="header gray">}, 1759 * returns a set of two elements {@code "header", "gray"}. Note that modifications to this set are not pushed to 1760 * the backing {@code class} attribute; use the {@link #classNames(java.util.Set)} method to persist them. 1761 * @return set of classnames, empty if no class attribute 1762 */ 1763 public Set<String> classNames() { 1764 String[] names = ClassSplit.split(className()); 1765 Set<String> classNames = new LinkedHashSet<>(Arrays.asList(names)); 1766 classNames.remove(""); // if classNames() was empty, would include an empty class 1767 1768 return classNames; 1769 } 1770 1771 /** 1772 Set the element's {@code class} attribute to the supplied class names. 1773 @param classNames set of classes 1774 @return this element, for chaining 1775 */ 1776 public Element classNames(Set<String> classNames) { 1777 Validate.notNull(classNames); 1778 if (classNames.isEmpty()) { 1779 attributes().remove("class"); 1780 } else { 1781 attributes().put("class", StringUtil.join(classNames, " ")); 1782 } 1783 return this; 1784 } 1785 1786 /** 1787 * Tests if this element has a class. Case-insensitive. 1788 * @param className name of class to check for 1789 * @return true if it does, false if not 1790 */ 1791 // performance sensitive 1792 public boolean hasClass(String className) { 1793 if (attributes == null) 1794 return false; 1795 1796 final String classAttr = attributes.getIgnoreCase("class"); 1797 final int len = classAttr.length(); 1798 final int wantLen = className.length(); 1799 1800 if (len == 0 || len < wantLen) { 1801 return false; 1802 } 1803 1804 // if both lengths are equal, only need compare the className with the attribute 1805 if (len == wantLen) { 1806 return className.equalsIgnoreCase(classAttr); 1807 } 1808 1809 // otherwise, scan for whitespace and compare regions (with no string or arraylist allocations) 1810 boolean inClass = false; 1811 int start = 0; 1812 for (int i = 0; i < len; i++) { 1813 if (Character.isWhitespace(classAttr.charAt(i))) { 1814 if (inClass) { 1815 // white space ends a class name, compare it with the requested one, ignore case 1816 if (i - start == wantLen && classAttr.regionMatches(true, start, className, 0, wantLen)) { 1817 return true; 1818 } 1819 inClass = false; 1820 } 1821 } else { 1822 if (!inClass) { 1823 // we're in a class name : keep the start of the substring 1824 inClass = true; 1825 start = i; 1826 } 1827 } 1828 } 1829 1830 // check the last entry 1831 if (inClass && len - start == wantLen) { 1832 return classAttr.regionMatches(true, start, className, 0, wantLen); 1833 } 1834 1835 return false; 1836 } 1837 1838 /** 1839 Add a class name to this element's {@code class} attribute. 1840 @param className class name to add 1841 @return this element 1842 */ 1843 public Element addClass(String className) { 1844 Validate.notNull(className); 1845 1846 Set<String> classes = classNames(); 1847 classes.add(className); 1848 classNames(classes); 1849 1850 return this; 1851 } 1852 1853 /** 1854 Remove a class name from this element's {@code class} attribute. 1855 @param className class name to remove 1856 @return this element 1857 */ 1858 public Element removeClass(String className) { 1859 Validate.notNull(className); 1860 1861 Set<String> classes = classNames(); 1862 classes.remove(className); 1863 classNames(classes); 1864 1865 return this; 1866 } 1867 1868 /** 1869 Toggle a class name on this element's {@code class} attribute: if present, remove it; otherwise add it. 1870 @param className class name to toggle 1871 @return this element 1872 */ 1873 public Element toggleClass(String className) { 1874 Validate.notNull(className); 1875 1876 Set<String> classes = classNames(); 1877 if (classes.contains(className)) 1878 classes.remove(className); 1879 else 1880 classes.add(className); 1881 classNames(classes); 1882 1883 return this; 1884 } 1885 1886 /** 1887 * Get the value of a form element (input, textarea, etc). 1888 * @return the value of the form element, or empty string if not set. 1889 */ 1890 public String val() { 1891 if (elementIs("textarea", NamespaceHtml)) 1892 return text(); 1893 else 1894 return attr("value"); 1895 } 1896 1897 /** 1898 * Set the value of a form element (input, textarea, etc). 1899 * @param value value to set 1900 * @return this element (for chaining) 1901 */ 1902 public Element val(String value) { 1903 if (elementIs("textarea", NamespaceHtml)) 1904 text(value); 1905 else 1906 attr("value", value); 1907 return this; 1908 } 1909 1910 /** 1911 Get the source range (start and end positions) of the end (closing) tag for this Element. Position tracking must be 1912 enabled prior to parsing the content. 1913 @return the range of the closing tag for this element, or {@code untracked} if its range was not tracked. 1914 @see org.jsoup.parser.Parser#setTrackPosition(boolean) 1915 @see Node#sourceRange() 1916 @see Range#isImplicit() 1917 @since 1.15.2 1918 */ 1919 public Range endSourceRange() { 1920 return Range.of(this, false); 1921 } 1922 1923 @Override 1924 void outerHtmlHead(final QuietAppendable accum, Document.OutputSettings out) { 1925 String tagName = safeTagName(out.syntax()); 1926 accum.append('<').append(tagName); 1927 if (attributes != null) attributes.html(accum, out); 1928 1929 if (childNodes.isEmpty()) { 1930 boolean xmlMode = out.syntax() == xml || !tag.namespace().equals(NamespaceHtml); 1931 if (xmlMode && (tag.is(Tag.SeenSelfClose) || (tag.isKnownTag() && (tag.isEmpty() || tag.isSelfClosing())))) { 1932 accum.append(" />"); 1933 } else if (!xmlMode && tag.isEmpty()) { // html void element 1934 accum.append('>'); 1935 } else { 1936 accum.append("></").append(tagName).append('>'); 1937 } 1938 } else { 1939 accum.append('>'); 1940 } 1941 } 1942 1943 @Override 1944 void outerHtmlTail(QuietAppendable accum, Document.OutputSettings out) { 1945 if (!childNodes.isEmpty()) 1946 accum.append("</").append(safeTagName(out.syntax())).append('>'); 1947 // if empty, we have already closed in htmlHead 1948 } 1949 1950 /* If XML syntax, normalizes < to _ in tag name. */ 1951 @Nullable private String safeTagName(Document.OutputSettings.Syntax syntax) { 1952 return syntax == xml ? Normalizer.xmlSafeTagName(tagName()) : tagName(); 1953 } 1954 1955 /** 1956 * Retrieves the element's inner HTML. E.g. on a {@code <div>} with one empty {@code <p>}, would return 1957 * {@code <p></p>}. (Whereas {@link #outerHtml()} would return {@code <div><p></p></div>}.) 1958 * 1959 * @return String of HTML. 1960 * @see #outerHtml() 1961 */ 1962 public String html() { 1963 StringBuilder sb = StringUtil.borrowBuilder(); 1964 html(sb); 1965 String html = StringUtil.releaseBuilder(sb); 1966 return NodeUtils.outputSettings(this).prettyPrint() ? html.trim() : html; 1967 } 1968 1969 @Override 1970 public <T extends Appendable> T html(T accum) { 1971 Node child = firstChild(); 1972 if (child != null) { 1973 Printer printer = Printer.printerFor(child, QuietAppendable.wrap(accum)); 1974 while (child != null) { 1975 printer.traverse(child); 1976 child = child.nextSibling(); 1977 } 1978 } 1979 return accum; 1980 } 1981 1982 /** 1983 * Set this element's inner HTML. Clears the existing HTML first. 1984 * @param html HTML to parse and set into this element 1985 * @return this element 1986 * @see #append(String) 1987 */ 1988 public Element html(String html) { 1989 empty(); 1990 append(html); 1991 return this; 1992 } 1993 1994 @Override 1995 public Element clone() { 1996 return (Element) super.clone(); 1997 } 1998 1999 @Override 2000 public Element shallowClone() { 2001 // simpler than implementing a clone version with no child copy 2002 String baseUri = baseUri(); 2003 if (baseUri.isEmpty()) baseUri = null; // saves setting a blank internal attribute 2004 return new Element(tag, baseUri, attributes == null ? null : attributes.clone()); 2005 } 2006 2007 @Override 2008 protected Element doClone(@Nullable Node parent) { 2009 Element clone = (Element) super.doClone(parent); 2010 clone.childNodes = new NodeList(childNodes.size()); 2011 clone.childNodes.addAll(childNodes); // the children then get iterated and cloned in Node.clone 2012 if (attributes != null) { 2013 clone.attributes = attributes.clone(); 2014 // clear any cached children 2015 clone.attributes.userData(childElsKey, null); 2016 } 2017 2018 return clone; 2019 } 2020 2021 // overrides of Node for call chaining 2022 @Override 2023 public Element clearAttributes() { 2024 if (attributes != null) { 2025 super.clearAttributes(); // keeps internal attributes via iterator 2026 if (attributes.size == 0) 2027 attributes = null; // only remove entirely if no internal attributes 2028 } 2029 2030 return this; 2031 } 2032 2033 @Override 2034 public Element removeAttr(String attributeKey) { 2035 return (Element) super.removeAttr(attributeKey); 2036 } 2037 2038 @Override 2039 public Element root() { 2040 return (Element) super.root(); // probably a document, but always at least an element 2041 } 2042 2043 @Override 2044 public Element traverse(NodeVisitor nodeVisitor) { 2045 return (Element) super.traverse(nodeVisitor); 2046 } 2047 2048 @Override 2049 public Element forEachNode(Consumer<? super Node> action) { 2050 return (Element) super.forEachNode(action); 2051 } 2052 2053 /** 2054 Perform the supplied action on this Element and each of its descendant Elements, during a depth-first traversal. 2055 Elements may be inspected, changed, added, replaced, or removed. 2056 @param action the function to perform on the element 2057 @see Node#forEachNode(Consumer) 2058 */ 2059 @Override 2060 public void forEach(Consumer<? super Element> action) { 2061 stream().forEach(action); 2062 } 2063 2064 /** 2065 Returns an Iterator that iterates this Element and each of its descendant Elements, in document order. 2066 @return an Iterator 2067 */ 2068 @Override 2069 public Iterator<Element> iterator() { 2070 return new NodeIterator<>(this, Element.class); 2071 } 2072 2073 @Override 2074 public Element filter(NodeFilter nodeFilter) { 2075 return (Element) super.filter(nodeFilter); 2076 } 2077 2078 static final class NodeList extends ArrayList<Node> { 2079 /** Tracks if the children have valid sibling indices. We only need to reindex on siblingIndex() demand. */ 2080 boolean validChildren = true; 2081 2082 public NodeList(int size) { 2083 super(size); 2084 } 2085 2086 /** The modCount is used to invalidate the cached element children. */ 2087 int modCount() { 2088 return this.modCount; 2089 } 2090 2091 void incrementMod() { 2092 this.modCount++; 2093 } 2094 } 2095 2096 void reindexChildren() { 2097 final int size = childNodes.size(); 2098 for (int i = 0; i < size; i++) { 2099 childNodes.get(i).setSiblingIndex(i); 2100 } 2101 childNodes.validChildren = true; 2102 } 2103 2104 void invalidateChildren() { 2105 childNodes.validChildren = false; 2106 } 2107 2108 boolean hasValidChildren() { 2109 return childNodes.validChildren; 2110 } 2111}