001package org.jsoup.nodes; 002 003import org.jsoup.helper.Validate; 004import org.jsoup.internal.QuietAppendable; 005import org.jsoup.internal.StringUtil; 006import org.jsoup.parser.ParseSettings; 007import org.jsoup.select.NodeFilter; 008import org.jsoup.select.NodeVisitor; 009import org.jspecify.annotations.Nullable; 010 011import java.io.IOException; 012import java.util.ArrayList; 013import java.util.Arrays; 014import java.util.Collections; 015import java.util.Iterator; 016import java.util.LinkedList; 017import java.util.List; 018import java.util.function.Consumer; 019import java.util.stream.Stream; 020 021/** 022 The base, abstract Node model. {@link Element}, {@link Document}, {@link Comment}, {@link TextNode}, et al., 023 are instances of Node. 024 025 @author Jonathan Hedley, jonathan@hedley.net */ 026public abstract class Node implements Cloneable { 027 static final List<Node> EmptyNodes = Collections.emptyList(); 028 static final String EmptyString = ""; 029 @Nullable Element parentNode; // Nodes don't always have parents 030 int siblingIndex; 031 032 /** 033 * Default constructor. Doesn't set up base uri, children, or attributes; use with caution. 034 */ 035 protected Node() { 036 } 037 038 /** 039 Get the node name of this node. Use for debugging purposes and not logic switching (for that, use instanceof). 040 @return node name 041 */ 042 public abstract String nodeName(); 043 044 /** 045 Get the normalized name of this node. For node types other than Element, this is the same as {@link #nodeName()}. 046 For an Element, will be the lower-cased tag name. 047 @return normalized node name 048 @since 1.15.4. 049 */ 050 public String normalName() { 051 return nodeName(); 052 } 053 054 /** 055 Get the node's value. For a TextNode, the whole text; for a Comment, the comment data; for an Element, 056 wholeOwnText. Returns "" if there is no value. 057 @return the node's value 058 */ 059 public String nodeValue() { 060 return ""; 061 } 062 063 /** 064 Test if this node has the specified normalized name, in any namespace. 065 * @param normalName a normalized element name (e.g. {@code div}). 066 * @return true if the element's normal name matches exactly 067 * @since 1.17.2 068 */ 069 public boolean nameIs(String normalName) { 070 return normalName().equals(normalName); 071 } 072 073 /** 074 Test if this node's parent has the specified normalized name. 075 * @param normalName a normalized name (e.g. {@code div}). 076 * @return true if the parent element's normal name matches exactly 077 * @since 1.17.2 078 */ 079 public boolean parentNameIs(String normalName) { 080 return parentNode != null && parentNode.normalName().equals(normalName); 081 } 082 083 /** 084 Test if this node's parent is an Element with the specified normalized name and namespace. 085 * @param normalName a normalized element name (e.g. {@code div}). 086 * @param namespace the namespace 087 * @return true if the parent element's normal name matches exactly, and that element is in the specified namespace 088 * @since 1.17.2 089 */ 090 public boolean parentElementIs(String normalName, String namespace) { 091 return parentNode != null && parentNode instanceof Element 092 && ((Element) parentNode).elementIs(normalName, namespace); 093 } 094 095 /** 096 * Check if this Node has an actual Attributes object. 097 */ 098 protected abstract boolean hasAttributes(); 099 100 /** 101 Checks if this node has a parent. Nodes won't have parents if (e.g.) they are newly created and not added as a child 102 to an existing node, or if they are a {@link #shallowClone()}. In such cases, {@link #parent()} will return {@code null}. 103 @return if this node has a parent. 104 */ 105 public boolean hasParent() { 106 return parentNode != null; 107 } 108 109 /** 110 * Get an attribute's value by its key. <b>Case insensitive</b> 111 * <p> 112 * To get an absolute URL from an attribute that may be a relative URL, prefix the key with <code><b>abs:</b></code>, 113 * which is a shortcut to the {@link #absUrl} method. 114 * </p> 115 * E.g.: 116 * <blockquote><code>String url = a.attr("abs:href");</code></blockquote> 117 * 118 * @param attributeKey The attribute key. 119 * @return The attribute, or empty string if not present (to avoid nulls). 120 * @see #attributes() 121 * @see #hasAttr(String) 122 * @see #absUrl(String) 123 */ 124 public String attr(String attributeKey) { 125 Validate.notNull(attributeKey); 126 if (!hasAttributes()) 127 return EmptyString; 128 129 String val = attributes().getIgnoreCase(attributeKey); 130 if (val.length() > 0) 131 return val; 132 else if (attributeKey.startsWith("abs:")) 133 return absUrl(attributeKey.substring("abs:".length())); 134 else return ""; 135 } 136 137 /** 138 * Get each of the Element's attributes. 139 * @return attributes (which implements Iterable, with the same order as presented in the original HTML). 140 */ 141 public abstract Attributes attributes(); 142 143 /** 144 Get the number of attributes that this Node has. 145 @return the number of attributes 146 @since 1.14.2 147 */ 148 public int attributesSize() { 149 // added so that we can test how many attributes exist without implicitly creating the Attributes object 150 return hasAttributes() ? attributes().size() : 0; 151 } 152 153 /** 154 * Set an attribute (key=value). If the attribute already exists, it is replaced. The attribute key comparison is 155 * <b>case insensitive</b>. The key will be set with case sensitivity as set in the parser settings. 156 * @param attributeKey The attribute key. 157 * @param attributeValue The attribute value. 158 * @return this (for chaining) 159 */ 160 public Node attr(String attributeKey, String attributeValue) { 161 Document doc = ownerDocument(); 162 ParseSettings settings = doc != null ? doc.parser().settings() : ParseSettings.htmlDefault; 163 attributeKey = settings.normalizeAttribute(attributeKey); 164 attributes().putIgnoreCase(attributeKey, attributeValue); 165 return this; 166 } 167 168 /** 169 * Test if this Node has an attribute. <b>Case insensitive</b>. 170 * @param attributeKey The attribute key to check. 171 * @return true if the attribute exists, false if not. 172 */ 173 public boolean hasAttr(String attributeKey) { 174 Validate.notNull(attributeKey); 175 if (!hasAttributes()) 176 return false; 177 178 if (attributeKey.startsWith("abs:")) { 179 String key = attributeKey.substring("abs:".length()); 180 if (attributes().hasKeyIgnoreCase(key) && !absUrl(key).isEmpty()) 181 return true; 182 } 183 return attributes().hasKeyIgnoreCase(attributeKey); 184 } 185 186 /** 187 * Remove an attribute from this node. 188 * @param attributeKey The attribute to remove. 189 * @return this (for chaining) 190 */ 191 public Node removeAttr(String attributeKey) { 192 Validate.notNull(attributeKey); 193 if (hasAttributes()) 194 attributes().removeIgnoreCase(attributeKey); 195 return this; 196 } 197 198 /** 199 * Clear (remove) each of the attributes in this node. 200 * @return this, for chaining 201 */ 202 public Node clearAttributes() { 203 if (hasAttributes()) { 204 Iterator<Attribute> it = attributes().iterator(); 205 while (it.hasNext()) { 206 it.next(); 207 it.remove(); 208 } 209 } 210 return this; 211 } 212 213 /** 214 Get the base URI that applies to this node. Will return an empty string if not defined. Used to make relative links 215 absolute. 216 217 @return base URI 218 @see #absUrl 219 */ 220 public abstract String baseUri(); 221 222 /** 223 * Set the baseUri for just this node (not its descendants), if this Node tracks base URIs. 224 * @param baseUri new URI 225 */ 226 protected abstract void doSetBaseUri(String baseUri); 227 228 /** 229 Update the base URI of this node and all of its descendants. 230 @param baseUri base URI to set 231 */ 232 public void setBaseUri(final String baseUri) { 233 Validate.notNull(baseUri); 234 doSetBaseUri(baseUri); 235 } 236 237 /** 238 * Get an absolute URL from a URL attribute that may be relative (such as an <code><a href></code> or 239 * <code><img src></code>). 240 * <p> 241 * E.g.: <code>String absUrl = linkEl.absUrl("href");</code> 242 * </p> 243 * <p> 244 * If the attribute value is already absolute (i.e. it starts with a protocol, like 245 * <code>http://</code> or <code>https://</code> etc), and it successfully parses as a URL, the attribute is 246 * returned directly. Otherwise, it is treated as a URL relative to the element's {@link #baseUri}, and made 247 * absolute using that. 248 * </p> 249 * <p> 250 * As an alternate, you can use the {@link #attr} method with the <code>abs:</code> prefix, e.g.: 251 * <code>String absUrl = linkEl.attr("abs:href");</code> 252 * </p> 253 * 254 * @param attributeKey The attribute key 255 * @return An absolute URL if one could be made, or an empty string (not null) if the attribute was missing or 256 * could not be made successfully into a URL. 257 * @see #attr 258 * @see java.net.URL#URL(java.net.URL, String) 259 */ 260 public String absUrl(String attributeKey) { 261 Validate.notEmpty(attributeKey); 262 if (!(hasAttributes() && attributes().hasKeyIgnoreCase(attributeKey))) // not using hasAttr, so that we don't recurse down hasAttr->absUrl 263 return ""; 264 265 return StringUtil.resolve(baseUri(), attributes().getIgnoreCase(attributeKey)); 266 } 267 268 protected abstract List<Node> ensureChildNodes(); 269 270 /** 271 Get a child node by its 0-based index. 272 @param index index of child node 273 @return the child node at this index. 274 @throws IndexOutOfBoundsException if the index is out of bounds. 275 */ 276 public Node childNode(int index) { 277 return ensureChildNodes().get(index); 278 } 279 280 /** 281 Get this node's children. Presented as an unmodifiable list: new children can not be added, but the child nodes 282 themselves can be manipulated. 283 @return list of children. If no children, returns an empty list. 284 */ 285 public List<Node> childNodes() { 286 if (childNodeSize() == 0) 287 return EmptyNodes; 288 289 List<Node> children = ensureChildNodes(); 290 List<Node> rewrap = new ArrayList<>(children.size()); // wrapped so that looping and moving will not throw a CME as the source changes 291 rewrap.addAll(children); 292 return Collections.unmodifiableList(rewrap); 293 } 294 295 /** 296 * Returns a deep copy of this node's children. Changes made to these nodes will not be reflected in the original 297 * nodes 298 * @return a deep copy of this node's children 299 */ 300 public List<Node> childNodesCopy() { 301 final List<Node> nodes = ensureChildNodes(); 302 final ArrayList<Node> children = new ArrayList<>(nodes.size()); 303 for (Node node : nodes) { 304 children.add(node.clone()); 305 } 306 return children; 307 } 308 309 /** 310 * Get the number of child nodes that this node holds. 311 * @return the number of child nodes that this node holds. 312 */ 313 public abstract int childNodeSize(); 314 315 protected Node[] childNodesAsArray() { 316 return ensureChildNodes().toArray(new Node[0]); 317 } 318 319 /** 320 * Delete all this node's children. 321 * @return this node, for chaining 322 */ 323 public abstract Node empty(); 324 325 /** 326 Gets this node's parent node. This is always an Element. 327 @return parent node; or null if no parent. 328 @see #hasParent() 329 @see #parentElement(); 330 */ 331 public @Nullable Node parent() { 332 return parentNode; 333 } 334 335 /** 336 Gets this node's parent Element. 337 @return parent element; or null if this node has no parent. 338 @see #hasParent() 339 @since 1.21.1 340 */ 341 public @Nullable Element parentElement() { 342 return parentNode; 343 } 344 345 /** 346 Gets this node's parent node. Not overridable by extending classes, so useful if you really just need the Node type. 347 @return parent node; or null if no parent. 348 */ 349 public @Nullable final Node parentNode() { 350 return parentNode; 351 } 352 353 /** 354 * Get this node's root node; that is, its topmost ancestor. If this node is the top ancestor, returns {@code this}. 355 * @return topmost ancestor. 356 */ 357 public Node root() { 358 Node node = this; 359 while (node.parentNode != null) 360 node = node.parentNode; 361 return node; 362 } 363 364 /** 365 * Gets the Document associated with this Node. 366 * @return the Document associated with this Node, or null if there is no such Document. 367 */ 368 public @Nullable Document ownerDocument() { 369 Node node = this; 370 while (node != null) { 371 if (node instanceof Document) return (Document) node; 372 node = node.parentNode; 373 } 374 return null; 375 } 376 377 /** 378 * Remove (delete) this node from the DOM tree. If this node has children, they are also removed. If this node is 379 * an orphan, nothing happens. 380 */ 381 public void remove() { 382 if (parentNode != null) 383 parentNode.removeChild(this); 384 } 385 386 /** 387 * Insert the specified HTML into the DOM before this node (as a preceding sibling). 388 * @param html HTML to add before this node 389 * @return this node, for chaining 390 * @see #after(String) 391 */ 392 public Node before(String html) { 393 addSiblingHtml(siblingIndex(), html); 394 return this; 395 } 396 397 /** 398 * Insert the specified node into the DOM before this node (as a preceding sibling). 399 * @param node to add before this node 400 * @return this node, for chaining 401 * @see #after(Node) 402 */ 403 public Node before(Node node) { 404 Validate.notNull(node); 405 Validate.notNull(parentNode); 406 407 // if the incoming node is a sibling of this, remove it first so siblingIndex is correct on add 408 if (node.parentNode == parentNode) node.remove(); 409 410 parentNode.addChildren(siblingIndex(), node); 411 return this; 412 } 413 414 /** 415 * Insert the specified HTML into the DOM after this node (as a following sibling). 416 * @param html HTML to add after this node 417 * @return this node, for chaining 418 * @see #before(String) 419 */ 420 public Node after(String html) { 421 addSiblingHtml(siblingIndex() + 1, html); 422 return this; 423 } 424 425 /** 426 * Insert the specified node into the DOM after this node (as a following sibling). 427 * @param node to add after this node 428 * @return this node, for chaining 429 * @see #before(Node) 430 */ 431 public Node after(Node node) { 432 Validate.notNull(node); 433 Validate.notNull(parentNode); 434 435 // if the incoming node is a sibling of this, remove it first so siblingIndex is correct on add 436 if (node.parentNode == parentNode) node.remove(); 437 438 parentNode.addChildren(siblingIndex() + 1, node); 439 return this; 440 } 441 442 private void addSiblingHtml(int index, String html) { 443 Validate.notNull(html); 444 Validate.notNull(parentNode); 445 446 Element context = parentNode instanceof Element ? (Element) parentNode : null; 447 List<Node> nodes = NodeUtils.parser(this).parseFragmentInput(html, context, baseUri()); 448 parentNode.addChildren(index, nodes.toArray(new Node[0])); 449 } 450 451 /** 452 Wrap the supplied HTML around this node. 453 454 @param html HTML to wrap around this node, e.g. {@code <div class="head"></div>}. Can be arbitrarily deep. If 455 the input HTML does not parse to a result starting with an Element, this will be a no-op. 456 @return this node, for chaining. 457 */ 458 public Node wrap(String html) { 459 Validate.notEmpty(html); 460 461 // Parse context - parent (because wrapping), this, or null 462 Element context = 463 parentNode != null && parentNode instanceof Element ? (Element) parentNode : 464 this instanceof Element ? (Element) this : 465 null; 466 List<Node> wrapChildren = NodeUtils.parser(this).parseFragmentInput(html, context, baseUri()); 467 Node wrapNode = wrapChildren.get(0); 468 if (!(wrapNode instanceof Element)) // nothing to wrap with; noop 469 return this; 470 471 Element wrap = (Element) wrapNode; 472 Element deepest = getDeepChild(wrap); 473 if (parentNode != null) 474 parentNode.replaceChild(this, wrap); 475 deepest.addChildren(this); // side effect of tricking wrapChildren to lose first 476 477 // remainder (unbalanced wrap, like <div></div><p></p> -- The <p> is remainder 478 if (wrapChildren.size() > 0) { 479 //noinspection ForLoopReplaceableByForEach (beacause it allocates an Iterator which is wasteful here) 480 for (int i = 0; i < wrapChildren.size(); i++) { 481 Node remainder = wrapChildren.get(i); 482 // if no parent, this could be the wrap node, so skip 483 if (wrap == remainder) 484 continue; 485 486 if (remainder.parentNode != null) 487 remainder.parentNode.removeChild(remainder); 488 wrap.after(remainder); 489 } 490 } 491 return this; 492 } 493 494 /** 495 * Removes this node from the DOM, and moves its children up into the node's parent. This has the effect of dropping 496 * the node but keeping its children. 497 * <p> 498 * For example, with the input html: 499 * </p> 500 * <p>{@code <div>One <span>Two <b>Three</b></span></div>}</p> 501 * Calling {@code element.unwrap()} on the {@code span} element will result in the html: 502 * <p>{@code <div>One Two <b>Three</b></div>}</p> 503 * and the {@code "Two "} {@link TextNode} being returned. 504 * 505 * @return the first child of this node, after the node has been unwrapped. @{code Null} if the node had no children. 506 * @see #remove() 507 * @see #wrap(String) 508 */ 509 public @Nullable Node unwrap() { 510 Validate.notNull(parentNode); 511 Node firstChild = firstChild(); 512 parentNode.addChildren(siblingIndex(), this.childNodesAsArray()); 513 this.remove(); 514 515 return firstChild; 516 } 517 518 private static Element getDeepChild(Element el) { 519 Element child = el.firstElementChild(); 520 while (child != null) { 521 el = child; 522 child = child.firstElementChild(); 523 } 524 return el; 525 } 526 527 /** 528 * Replace this node in the DOM with the supplied node. 529 * @param in the node that will replace the existing node. 530 */ 531 public void replaceWith(Node in) { 532 Validate.notNull(in); 533 if (parentNode == null) parentNode = in.parentNode; // allows old to have been temp removed before replacing 534 Validate.notNull(parentNode); 535 parentNode.replaceChild(this, in); 536 } 537 538 protected void setParentNode(Node parentNode) { 539 Validate.notNull(parentNode); 540 if (this.parentNode != null) 541 this.parentNode.removeChild(this); 542 assert parentNode instanceof Element; 543 this.parentNode = (Element) parentNode; 544 } 545 546 protected void replaceChild(Node out, Node in) { 547 Validate.isTrue(out.parentNode == this); 548 Validate.notNull(in); 549 if (out == in) return; // no-op self replacement 550 551 if (in.parentNode != null) 552 in.parentNode.removeChild(in); 553 554 final int index = out.siblingIndex(); 555 ensureChildNodes().set(index, in); 556 in.parentNode = (Element) this; 557 in.setSiblingIndex(index); 558 out.parentNode = null; 559 560 ((Element) this).childNodes.incrementMod(); // as mod count not changed in set(), requires explicit update, to invalidate the child element cache 561 } 562 563 protected void removeChild(Node out) { 564 Validate.isTrue(out.parentNode == this); 565 Element el = (Element) this; 566 if (el.hasValidChildren()) // can remove by index 567 ensureChildNodes().remove(out.siblingIndex); 568 else 569 ensureChildNodes().remove(out); // iterates, but potentially not every one 570 571 el.invalidateChildren(); 572 out.parentNode = null; 573 } 574 575 protected void addChildren(Node... children) { 576 //most used. short circuit addChildren(int), which hits reindex children and array copy 577 final List<Node> nodes = ensureChildNodes(); 578 579 for (Node child: children) { 580 reparentChild(child); 581 nodes.add(child); 582 child.setSiblingIndex(nodes.size()-1); 583 } 584 } 585 586 protected void addChildren(int index, Node... children) { 587 // todo clean up all these and use the list, not the var array. just need to be careful when iterating the incoming (as we are removing as we go) 588 Validate.notNull(children); 589 if (children.length == 0) return; 590 final List<Node> nodes = ensureChildNodes(); 591 592 // fast path - if used as a wrap (index=0, children = child[0].parent.children - do inplace 593 final Node firstParent = children[0].parent(); 594 if (firstParent != null && firstParent.childNodeSize() == children.length) { 595 boolean sameList = true; 596 final List<Node> firstParentNodes = firstParent.ensureChildNodes(); 597 // identity check contents to see if same 598 int i = children.length; 599 while (i-- > 0) { 600 if (children[i] != firstParentNodes.get(i)) { 601 sameList = false; 602 break; 603 } 604 } 605 if (sameList) { // moving, so OK to empty firstParent and short-circuit 606 firstParent.empty(); 607 nodes.addAll(index, Arrays.asList(children)); 608 i = children.length; 609 assert this instanceof Element; 610 while (i-- > 0) { 611 children[i].parentNode = (Element) this; 612 } 613 ((Element) this).invalidateChildren(); 614 return; 615 } 616 } 617 618 Validate.noNullElements(children); 619 for (Node child : children) { 620 reparentChild(child); 621 } 622 nodes.addAll(index, Arrays.asList(children)); 623 ((Element) this).invalidateChildren(); 624 } 625 626 protected void reparentChild(Node child) { 627 child.setParentNode(this); 628 } 629 630 /** 631 Retrieves this node's sibling nodes. Similar to {@link #childNodes() node.parent.childNodes()}, but does not 632 include this node (a node is not a sibling of itself). 633 @return node siblings. If the node has no parent, returns an empty list. 634 */ 635 public List<Node> siblingNodes() { 636 if (parentNode == null) 637 return Collections.emptyList(); 638 639 List<Node> nodes = parentNode.ensureChildNodes(); 640 List<Node> siblings = new ArrayList<>(nodes.size() - 1); 641 for (Node node: nodes) 642 if (node != this) 643 siblings.add(node); 644 return siblings; 645 } 646 647 /** 648 Get this node's next sibling. 649 @return next sibling, or {@code null} if this is the last sibling 650 */ 651 public @Nullable Node nextSibling() { 652 if (parentNode == null) 653 return null; // root 654 655 final List<Node> siblings = parentNode.ensureChildNodes(); 656 final int index = siblingIndex() + 1; 657 if (siblings.size() > index) { 658 Node node = siblings.get(index); 659 assert (node.siblingIndex == index); // sanity test that invalidations haven't missed 660 return node; 661 } else 662 return null; 663 } 664 665 /** 666 Get this node's previous sibling. 667 @return the previous sibling, or @{code null} if this is the first sibling 668 */ 669 public @Nullable Node previousSibling() { 670 if (parentNode == null) 671 return null; // root 672 673 if (siblingIndex() > 0) 674 return parentNode.ensureChildNodes().get(siblingIndex-1); 675 else 676 return null; 677 } 678 679 /** 680 * Get the list index of this node in its node sibling list. E.g. if this is the first node 681 * sibling, returns 0. 682 * @return position in node sibling list 683 * @see org.jsoup.nodes.Element#elementSiblingIndex() 684 */ 685 public int siblingIndex() { 686 if (parentNode != null && !parentNode.childNodes.validChildren) 687 parentNode.reindexChildren(); 688 689 return siblingIndex; 690 } 691 692 protected void setSiblingIndex(int siblingIndex) { 693 this.siblingIndex = siblingIndex; 694 } 695 696 /** 697 Gets the first child node of this node, or {@code null} if there is none. This could be any Node type, such as an 698 Element, TextNode, Comment, etc. Use {@link Element#firstElementChild()} to get the first Element child. 699 @return the first child node, or null if there are no children. 700 @see Element#firstElementChild() 701 @see #lastChild() 702 @since 1.15.2 703 */ 704 public @Nullable Node firstChild() { 705 if (childNodeSize() == 0) return null; 706 return ensureChildNodes().get(0); 707 } 708 709 /** 710 Gets the last child node of this node, or {@code null} if there is none. 711 @return the last child node, or null if there are no children. 712 @see Element#lastElementChild() 713 @see #firstChild() 714 @since 1.15.2 715 */ 716 public @Nullable Node lastChild() { 717 final int size = childNodeSize(); 718 if (size == 0) return null; 719 List<Node> children = ensureChildNodes(); 720 return children.get(size - 1); 721 } 722 723 /** 724 Gets the first sibling of this node. That may be this node. 725 726 @return the first sibling node 727 @since 1.21.1 728 */ 729 public Node firstSibling() { 730 if (parentNode != null) { 731 //noinspection DataFlowIssue 732 return parentNode.firstChild(); 733 } else 734 return this; // orphan is its own first sibling 735 } 736 737 /** 738 Gets the last sibling of this node. That may be this node. 739 740 @return the last sibling (aka the parent's last child) 741 @since 1.21.1 742 */ 743 public Node lastSibling() { 744 if (parentNode != null) { 745 //noinspection DataFlowIssue (not nullable, would be this if no other sibs) 746 return parentNode.lastChild(); 747 } else 748 return this; 749 } 750 751 /** 752 Gets the next sibling Element of this node. E.g., if a {@code div} contains two {@code p}s, the 753 {@code nextElementSibling} of the first {@code p} is the second {@code p}. 754 <p>This is similar to {@link #nextSibling()}, but specifically finds only Elements.</p> 755 756 @return the next element, or null if there is no next element 757 @see #previousElementSibling() 758 */ 759 public @Nullable Element nextElementSibling() { 760 Node next = this; 761 while ((next = next.nextSibling()) != null) { 762 if (next instanceof Element) return (Element) next; 763 } 764 return null; 765 } 766 767 /** 768 Gets the previous Element sibling of this node. 769 770 @return the previous element, or null if there is no previous element 771 @see #nextElementSibling() 772 */ 773 public @Nullable Element previousElementSibling() { 774 Node prev = this; 775 while ((prev = prev.previousSibling()) != null) { 776 if (prev instanceof Element) return (Element) prev; 777 } 778 return null; 779 } 780 781 /** 782 * Perform a depth-first traversal through this node and its descendants. 783 * @param nodeVisitor the visitor callbacks to perform on each node 784 * @return this node, for chaining 785 */ 786 public Node traverse(NodeVisitor nodeVisitor) { 787 Validate.notNull(nodeVisitor); 788 nodeVisitor.traverse(this); 789 return this; 790 } 791 792 /** 793 Perform the supplied action on this Node and each of its descendants, during a depth-first traversal. Nodes may be 794 inspected, changed, added, replaced, or removed. 795 @param action the function to perform on the node 796 @return this Node, for chaining 797 @see Element#forEach(Consumer) 798 */ 799 public Node forEachNode(Consumer<? super Node> action) { 800 Validate.notNull(action); 801 nodeStream().forEach(action); 802 return this; 803 } 804 805 /** 806 * Perform a depth-first controllable traversal through this node and its descendants. 807 * @param nodeFilter the filter callbacks to perform on each node 808 * @return this node, for chaining 809 */ 810 public Node filter(NodeFilter nodeFilter) { 811 Validate.notNull(nodeFilter); 812 nodeFilter.traverse(this); 813 return this; 814 } 815 816 /** 817 Returns a Stream of this Node and all of its descendant Nodes. The stream has document order. 818 @return a stream of all nodes. 819 @see Element#stream() 820 @since 1.17.1 821 */ 822 public Stream<Node> nodeStream() { 823 return NodeUtils.stream(this, Node.class); 824 } 825 826 /** 827 Returns a Stream of this and descendant nodes, containing only nodes of the specified type. The stream has document 828 order. 829 @return a stream of nodes filtered by type. 830 @see Element#stream() 831 @since 1.17.1 832 */ 833 public <T extends Node> Stream<T> nodeStream(Class<T> type) { 834 return NodeUtils.stream(this, type); 835 } 836 837 /** 838 Get the outer HTML of this node. For example, on a {@code p} element, may return {@code <p>Para</p>}. 839 @return outer HTML 840 @see Element#html() 841 @see Element#text() 842 */ 843 public String outerHtml() { 844 StringBuilder sb = StringUtil.borrowBuilder(); 845 outerHtml(QuietAppendable.wrap(sb)); 846 return StringUtil.releaseBuilder(sb); 847 } 848 849 protected void outerHtml(Appendable accum) { 850 outerHtml(QuietAppendable.wrap(accum)); 851 } 852 853 protected void outerHtml(QuietAppendable accum) { 854 Printer printer = Printer.printerFor(this, accum); 855 printer.traverse(this); 856 } 857 858 /** 859 Get the outer HTML of this node. 860 861 @param accum accumulator to place HTML into 862 @param out 863 */ 864 abstract void outerHtmlHead(final QuietAppendable accum, final Document.OutputSettings out); 865 866 abstract void outerHtmlTail(final QuietAppendable accum, final Document.OutputSettings out); 867 868 /** 869 Write this node and its children to the given {@link Appendable}. 870 871 @param appendable the {@link Appendable} to write to. 872 @return the supplied {@link Appendable}, for chaining. 873 @throws org.jsoup.SerializationException if the appendable throws an IOException. 874 */ 875 public <T extends Appendable> T html(T appendable) { 876 outerHtml(appendable); 877 return appendable; 878 } 879 880 /** 881 Get the source range (start and end positions) in the original input source from which this node was parsed. 882 Position tracking must be enabled prior to parsing the content. For an Element, this will be the positions of the 883 start tag. 884 @return the range for the start of the node, or {@code untracked} if its range was not tracked. 885 @see org.jsoup.parser.Parser#setTrackPosition(boolean) 886 @see Range#isImplicit() 887 @see Element#endSourceRange() 888 @see Attributes#sourceRange(String name) 889 @since 1.15.2 890 */ 891 public Range sourceRange() { 892 return Range.of(this, true); 893 } 894 895 /** 896 * Gets this node's outer HTML. 897 * @return outer HTML. 898 * @see #outerHtml() 899 */ 900 @Override 901 public String toString() { 902 return outerHtml(); 903 } 904 905 /** @deprecated internal method moved into Printer; will be removed in jsoup 1.24.1. */ 906 @Deprecated 907 protected void indent(Appendable accum, int depth, Document.OutputSettings out) throws IOException { 908 accum.append('\n').append(StringUtil.padding(depth * out.indentAmount(), out.maxPaddingWidth())); 909 } 910 911 /** 912 * Check if this node is the same instance of another (object identity test). 913 * <p>For a node value equality check, see {@link #hasSameValue(Object)}</p> 914 * @param o other object to compare to 915 * @return true if the content of this node is the same as the other 916 * @see Node#hasSameValue(Object) 917 */ 918 @Override 919 public boolean equals(@Nullable Object o) { 920 // implemented just so that javadoc is clear this is an identity test 921 return this == o; 922 } 923 924 /** 925 Provides a hashCode for this Node, based on its object identity. Changes to the Node's content will not impact the 926 result. 927 @return an object identity based hashcode for this Node 928 */ 929 @Override 930 public int hashCode() { 931 // implemented so that javadoc and scanners are clear this is an identity test 932 return super.hashCode(); 933 } 934 935 /** 936 * Check if this node has the same content as another node. A node is considered the same if its name, attributes and content match the 937 * other node; particularly its position in the tree does not influence its similarity. 938 * @param o other object to compare to 939 * @return true if the content of this node is the same as the other 940 */ 941 public boolean hasSameValue(@Nullable Object o) { 942 if (this == o) return true; 943 if (o == null || getClass() != o.getClass()) return false; 944 945 return this.outerHtml().equals(((Node) o).outerHtml()); 946 } 947 948 /** 949 Create a stand-alone, deep copy of this node, and all of its children. The cloned node will have no siblings. 950 <p><ul> 951 <li>If this node is a {@link LeafNode}, the clone will have no parent.</li> 952 <li>If this node is an {@link Element}, the clone will have a simple owning {@link Document} to retain the 953 configured output settings and parser.</li> 954 </ul></p> 955 <p>The cloned node may be adopted into another Document or node structure using 956 {@link Element#appendChild(Node)}.</p> 957 958 @return a stand-alone cloned node, including clones of any children 959 @see #shallowClone() 960 */ 961 @SuppressWarnings("MethodDoesntCallSuperMethod") 962 // because it does call super.clone in doClone - analysis just isn't following 963 @Override 964 public Node clone() { 965 Node thisClone = doClone(null); // splits for orphan 966 967 // Queue up nodes that need their children cloned (BFS). 968 final LinkedList<Node> nodesToProcess = new LinkedList<>(); 969 nodesToProcess.add(thisClone); 970 971 while (!nodesToProcess.isEmpty()) { 972 Node currParent = nodesToProcess.remove(); 973 974 final int size = currParent.childNodeSize(); 975 for (int i = 0; i < size; i++) { 976 final List<Node> childNodes = currParent.ensureChildNodes(); 977 Node childClone = childNodes.get(i).doClone(currParent); 978 childNodes.set(i, childClone); 979 nodesToProcess.add(childClone); 980 } 981 } 982 983 return thisClone; 984 } 985 986 /** 987 * Create a stand-alone, shallow copy of this node. None of its children (if any) will be cloned, and it will have 988 * no parent or sibling nodes. 989 * @return a single independent copy of this node 990 * @see #clone() 991 */ 992 public Node shallowClone() { 993 return doClone(null); 994 } 995 996 /* 997 * Return a clone of the node using the given parent (which can be null). 998 * Not a deep copy of children. 999 */ 1000 protected Node doClone(@Nullable Node parent) { 1001 assert parent == null || parent instanceof Element; 1002 Node clone; 1003 1004 try { 1005 clone = (Node) super.clone(); 1006 } catch (CloneNotSupportedException e) { 1007 throw new RuntimeException(e); 1008 } 1009 1010 clone.parentNode = (Element) parent; // can be null, to create an orphan split 1011 clone.siblingIndex = parent == null ? 0 : siblingIndex(); 1012 // if not keeping the parent, shallowClone the ownerDocument to preserve its settings 1013 if (parent == null && !(this instanceof Document)) { 1014 Document doc = ownerDocument(); 1015 if (doc != null) { 1016 Document docClone = doc.shallowClone(); 1017 clone.parentNode = docClone; 1018 docClone.ensureChildNodes().add(clone); 1019 } 1020 } 1021 1022 return clone; 1023 } 1024}