001package org.jsoup.select; 002 003import org.jsoup.nodes.Element; 004import org.jsoup.nodes.Node; 005 006/** 007 Node visitor interface, used to walk the DOM and visit each node. Execute via {@link #traverse(Node)} or 008 {@link Node#traverse(NodeVisitor)}. The traversal is depth-first. 009 <p> 010 This interface provides two methods, {@link #head} and {@link #tail}. The head method is called when a node is first 011 seen, and the tail method when all that node's children have been visited. As an example, {@code head} can be used to 012 emit a start tag for a node, and {@code tail} to emit the end tag. The {@code tail} method defaults to a no-op, so 013 this interface can be used as a {@link FunctionalInterface}, with {@code head} as its single abstract method. 014 </p> 015 <p><b>Example:</b></p> 016 <pre><code> 017 doc.body().traverse((node, depth) -> { 018 switch (node) { 019 case Element el -> print(el.tag() + ": " + el.ownText()); 020 case DataNode data -> print("Data: " + data.getWholeData()); 021 default -> print(node.nodeName() + " at depth " + depth); 022 } 023 }); 024 </code></pre> 025 */ 026@FunctionalInterface 027public interface NodeVisitor { 028 /** 029 Callback for when a node is first visited. 030 <p>The node may be modified (for example via {@link Node#attr(String)}), removed with 031 {@link Node#remove()}, or replaced with {@link Node#replaceWith(Node)}. If the node is an 032 {@link Element}, you may cast it and access those methods.</p> 033 <p>Traversal uses a forward cursor. After {@code head()} completes:</p> 034 <ul> 035 <li>If the current node is still attached, traversal continues into its current children and then its following 036 siblings. Nodes inserted before the current node are not visited.</li> 037 <li>If the current node was detached and another node now occupies its former sibling position, the node now at 038 that position is not passed to {@code head()} again. Traversal continues from there: its children are visited, 039 then the node is passed to {@link #tail(Node, int)}, then later siblings are visited.</li> 040 <li>If the current node was detached and no node occupies its former sibling position, the current node is not 041 passed to {@code tail()}, and traversal resumes at the node that originally followed it.</li> 042 </ul> 043 <p>Traversal never advances outside the original root subtree. If the traversal root is detached during 044 {@code head()}, traversal stops at the original root boundary.</p> 045 046 @param node the node being visited. 047 @param depth the depth of the node, relative to the root node. E.g., the root node has depth 0, and a child node 048 of that will have depth 1. 049 */ 050 void head(Node node, int depth); 051 052 /** 053 Callback for when a node is last visited, after all of its descendants have been visited. 054 <p>This method defaults to a no-op.</p> 055 <p>The node passed to {@code tail()} is the node at the current traversal position when the subtree completes. 056 If {@code head()} replaced the original node, this may be the replacement node instead.</p> 057 <p>Structural changes to the current node are not supported during {@code tail()}.</p> 058 059 @param node the node being visited. 060 @param depth the depth of the node, relative to the root node. E.g., the root node has depth 0, and a child node 061 of that will have depth 1. 062 */ 063 default void tail(Node node, int depth) { 064 // no-op by default, to allow just specifying the head() method 065 } 066 067 /** 068 Run a depth-first traverse of the root and all of its descendants. 069 @param root the initial node point to traverse. 070 @since 1.21.1 071 */ 072 default void traverse(Node root) { 073 NodeTraversor.traverse(this, root); 074 } 075}