001package org.jsoup.select; 002 003import org.jsoup.helper.Validate; 004import org.jsoup.nodes.Element; 005import org.jsoup.nodes.Node; 006import org.jsoup.select.NodeFilter.FilterResult; 007 008/** 009 A depth-first node traversor. Use to walk through all nodes under and including the specified root node, in document 010 order. The {@link NodeVisitor#head(Node, int)} and {@link NodeVisitor#tail(Node, int)} methods will be called for 011 each node. 012 <p>During the <code>head()</code> visit, DOM structural changes around the node currently being visited are 013 supported, including {@link Node#replaceWith(Node)} and {@link Node#remove()}. See 014 {@link NodeVisitor#head(Node, int) head()} for the traversal contract after mutation. Other non-structural node 015 changes are also supported.</p> 016 <p>DOM structural changes to the current node are not supported during the <code>tail()</code> visit.</p> 017 */ 018public class NodeTraversor { 019 // cursor state 020 private static final byte VisitHead = 0; 021 private static final byte AfterHead = 1; 022 private static final byte VisitTail = 2; 023 024 /** 025 Run a depth-first traverse of the root and all of its descendants. 026 @param visitor Node visitor. 027 @param root the initial node point to traverse. 028 @see NodeVisitor#traverse(Node root) 029 */ 030 public static void traverse(NodeVisitor visitor, Node root) { 031 Validate.notNull(visitor); 032 Validate.notNull(root); 033 Node node = root; 034 final Node rootNext = root.nextSibling(); // don't traverse siblings beyond the original root 035 int depth = 0; 036 byte state = VisitHead; 037 038 while (true) { 039 if (state == VisitHead) { 040 // snapshot the current cursor position so we can recover if head() structurally changes it: 041 Node parent = node.parentNode(); 042 Node next = node.nextSibling(); 043 int sibIndex = parent != null ? node.siblingIndex() : 0; 044 045 visitor.head(node, depth); 046 047 // any structural changes? 048 if (parent != null && node.parentNode() != parent) { // removed / replaced / moved 049 Node occupant = sibIndex < parent.childNodeSize() ? parent.childNode(sibIndex) : null; 050 // ^^ the node now at this node's former position 051 Node boundary = depth == 0 ? rootNext : next; // don't advance beyond this node when resuming 052 if (occupant != null && occupant != boundary) { 053 node = occupant; 054 state = AfterHead; // continue from that slot without re-heading it 055 } else if (depth == 0) { // root detached or replaced 056 break; 057 } else if (next != null && next.parentNode() == parent) { 058 node = next; // old slot is empty or shifted to the original next, visit 059 } else { // removed last child; tail the parent next 060 node = parent; 061 depth--; 062 state = VisitTail; 063 } 064 } else { 065 state = AfterHead; 066 } 067 continue; // next loop handles the updated node/state 068 } 069 070 if (state == AfterHead && node.childNodeSize() > 0) { // descend into current children 071 node = node.childNode(0); 072 depth++; 073 state = VisitHead; 074 continue; 075 } 076 077 visitor.tail(node, depth); 078 079 Node next = node.nextSibling(); 080 if (depth == 0) { 081 if (next == null || next == rootNext) break; // done with the original root range 082 node = next; 083 state = VisitHead; 084 } else if (next != null) { // traverse siblings 085 node = next; 086 state = VisitHead; 087 } else { // no siblings left, ascend 088 node = node.parentNode(); 089 depth--; 090 state = VisitTail; 091 } 092 } 093 } 094 095 /** 096 Run a depth-first traversal of each Element. 097 @param visitor Node visitor. 098 @param elements Elements to traverse. 099 */ 100 public static void traverse(NodeVisitor visitor, Elements elements) { 101 Validate.notNull(visitor); 102 Validate.notNull(elements); 103 for (Element el : elements) 104 traverse(visitor, el); 105 } 106 107 /** 108 Run a depth-first controllable traversal of the root and all of its descendants. 109 @param filter NodeFilter visitor. 110 @param root the root node point to traverse. 111 @return The filter result of the root node, or {@link FilterResult#STOP}. 112 113 @see NodeFilter 114 */ 115 public static FilterResult filter(NodeFilter filter, Node root) { 116 Node node = root; 117 int depth = 0; 118 119 while (node != null) { 120 FilterResult result = filter.head(node, depth); 121 if (result == FilterResult.STOP) 122 return result; 123 // Descend into child nodes: 124 if (result == FilterResult.CONTINUE && node.childNodeSize() > 0) { 125 node = node.childNode(0); 126 ++depth; 127 continue; 128 } 129 // No siblings, move upwards: 130 while (true) { 131 assert node != null; // depth > 0, so has parent 132 if (!(node.nextSibling() == null && depth > 0)) break; 133 // 'tail' current node: 134 if (result == FilterResult.CONTINUE || result == FilterResult.SKIP_CHILDREN) { 135 result = filter.tail(node, depth); 136 if (result == FilterResult.STOP) 137 return result; 138 } 139 Node prev = node; // In case we need to remove it below. 140 node = node.parentNode(); 141 depth--; 142 if (result == FilterResult.REMOVE) 143 prev.remove(); // Remove AFTER finding parent. 144 result = FilterResult.CONTINUE; // Parent was not pruned. 145 } 146 // 'tail' current node, then proceed with siblings: 147 if (result == FilterResult.CONTINUE || result == FilterResult.SKIP_CHILDREN) { 148 result = filter.tail(node, depth); 149 if (result == FilterResult.STOP) 150 return result; 151 } 152 if (node == root) 153 return result; 154 Node prev = node; // In case we need to remove it below. 155 node = node.nextSibling(); 156 if (result == FilterResult.REMOVE) 157 prev.remove(); // Remove AFTER finding sibling. 158 } 159 // root == null? 160 return FilterResult.CONTINUE; 161 } 162 163 /** 164 Run a depth-first controllable traversal of each Element. 165 @param filter NodeFilter visitor. 166 @see NodeFilter 167 */ 168 public static void filter(NodeFilter filter, Elements elements) { 169 Validate.notNull(filter); 170 Validate.notNull(elements); 171 for (Element el : elements) 172 if (filter(filter, el) == FilterResult.STOP) 173 break; 174 } 175}