001package org.jsoup.select; 002 003import org.jsoup.helper.Validate; 004import org.jsoup.internal.StringUtil; 005import org.jsoup.nodes.Element; 006import org.jsoup.nodes.Node; 007import org.jspecify.annotations.Nullable; 008 009import java.util.ArrayList; 010import java.util.Arrays; 011import java.util.Collection; 012import java.util.Iterator; 013import java.util.List; 014import java.util.function.Predicate; 015import java.util.function.UnaryOperator; 016 017/** 018 A list of {@link Node} objects, with methods that act on every node in the list. 019 <p>Methods that {@link #set(int, T) set}, {@link #remove(int) remove}, or 020 {@link #replaceAll(UnaryOperator) replace} nodes in the list will also act on the underlying 021 {@link org.jsoup.nodes.Document DOM}.</p> 022 023 <p>If there are other bulk methods (perhaps from Elements) that would be useful here, please <a 024 href="https://jsoup.org/discussion">provide feedback</a>.</p> 025 026 @see Element#selectNodes(String) 027 @see Element#selectNodes(String, Class) 028 @since 1.21.1 */ 029public class Nodes<T extends Node> extends ArrayList<T> { 030 public Nodes() { 031 } 032 033 public Nodes(int initialCapacity) { 034 super(initialCapacity); 035 } 036 037 public Nodes(Collection<T> nodes) { 038 super(nodes); 039 } 040 041 public Nodes(List<T> nodes) { 042 super(nodes); 043 } 044 045 @SafeVarargs 046 public Nodes(T... nodes) { 047 super(Arrays.asList(nodes)); 048 } 049 050 /** 051 * Creates a deep copy of these nodes. 052 * @return a deep copy 053 */ 054 @Override 055 public Nodes<T> clone() { 056 Nodes<T> clone = new Nodes<>(size()); 057 for (T node : this) 058 clone.add((T) node.clone()); 059 return clone; 060 } 061 062 /** 063 Convenience method to get the Nodes as a plain ArrayList. This allows modification to the list of nodes 064 without modifying the source Document. I.e. whereas calling {@code nodes.remove(0)} will remove the nodes from 065 both the Nodes and the DOM, {@code nodes.asList().remove(0)} will remove the node from the list only. 066 <p>Each Node is still the same DOM connected Node.</p> 067 068 @return a new ArrayList containing the nodes in this list 069 @see #Nodes(List) 070 */ 071 public ArrayList<T> asList() { 072 return new ArrayList<>(this); 073 } 074 075 /** 076 Remove each matched node from the DOM. 077 <p>The nodes will still be retained in this list, in case further processing of them is desired.</p> 078 <p> 079 E.g. HTML: {@code <div><p>Hello</p> <p>there</p> <img></div>}<br> 080 <code>doc.select("p").remove();</code><br> 081 HTML = {@code <div> <img></div>} 082 <p> 083 Note that this method should not be used to clean user-submitted HTML; rather, use {@link org.jsoup.safety.Cleaner} 084 to clean HTML. 085 086 @return this, for chaining 087 @see Element#empty() 088 @see Elements#empty() 089 @see #clear() 090 */ 091 public Nodes<T> remove() { 092 for (T node : this) { 093 node.remove(); 094 } 095 return this; 096 } 097 098 /** 099 Get the combined outer HTML of all matched nodes. 100 101 @return string of all node's outer HTML. 102 @see Elements#text() 103 @see Elements#html() 104 */ 105 public String outerHtml() { 106 return stream() 107 .map(Node::outerHtml) 108 .collect(StringUtil.joining("\n")); 109 } 110 111 /** 112 Get the combined outer HTML of all matched nodes. Alias of {@link #outerHtml()}. 113 114 @return string of all the node's outer HTML. 115 @see Elements#text() 116 @see #outerHtml() 117 */ 118 @Override 119 public String toString() { 120 return outerHtml(); 121 } 122 123 /** 124 Insert the supplied HTML before each matched node's outer HTML. 125 126 @param html HTML to insert before each node 127 @return this, for chaining 128 @see Element#before(String) 129 */ 130 public Nodes<T> before(String html) { 131 for (T node : this) { 132 node.before(html); 133 } 134 return this; 135 } 136 137 /** 138 Insert the supplied HTML after each matched nodes's outer HTML. 139 140 @param html HTML to insert after each node 141 @return this, for chaining 142 @see Element#after(String) 143 */ 144 public Nodes<T> after(String html) { 145 for (T node : this) { 146 node.after(html); 147 } 148 return this; 149 } 150 151 /** 152 Wrap the supplied HTML around each matched node. For example, with HTML 153 {@code <p><b>This</b> is <b>Jsoup</b></p>}, 154 <code>doc.select("b").wrap("<i></i>");</code> 155 becomes {@code <p><i><b>This</b></i> is <i><b>jsoup</b></i></p>} 156 @param html HTML to wrap around each node, e.g. {@code <div class="head"></div>}. Can be arbitrarily deep. 157 @return this (for chaining) 158 @see Element#wrap 159 */ 160 public Nodes<T> wrap(String html) { 161 Validate.notEmpty(html); 162 for (T node : this) { 163 node.wrap(html); 164 } 165 return this; 166 } 167 168 // list-like methods 169 /** 170 Get the first matched element. 171 @return The first matched element, or <code>null</code> if contents is empty. 172 */ 173 public @Nullable T first() { 174 return isEmpty() ? null : get(0); 175 } 176 177 /** 178 Get the last matched element. 179 @return The last matched element, or <code>null</code> if contents is empty. 180 */ 181 public @Nullable T last() { 182 return isEmpty() ? null : get(size() - 1); 183 } 184 185 // ArrayList<T> methods that update the DOM: 186 187 /** 188 Replace the node at the specified index in this list, and in the DOM. 189 190 @param index index of the node to replace 191 @param node node to be stored at the specified position 192 @return the old Node at this index 193 */ 194 @Override 195 public T set(int index, T node) { 196 Validate.notNull(node); 197 T old = super.set(index, node); 198 old.replaceWith(node); 199 return old; 200 } 201 202 /** 203 Remove the node at the specified index in this list, and from the DOM. 204 205 @param index the index of the node to be removed 206 @return the old node at this index 207 @see #deselect(int) 208 */ 209 @Override 210 public T remove(int index) { 211 T old = super.remove(index); 212 old.remove(); 213 return old; 214 } 215 216 /** 217 Remove the specified node from this list, and from the DOM. 218 219 @param o node to be removed from this list, if present 220 @return if this list contained the Node 221 @see #deselect(Object) 222 */ 223 @Override 224 public boolean remove(Object o) { 225 int index = super.indexOf(o); 226 if (index == -1) { 227 return false; 228 } else { 229 remove(index); 230 return true; 231 } 232 } 233 234 /** 235 Remove the node at the specified index in this list, but not from the DOM. 236 237 @param index the index of the node to be removed 238 @return the old node at this index 239 @see #remove(int) 240 */ 241 public T deselect(int index) { 242 return super.remove(index); 243 } 244 245 /** 246 Remove the specified node from this list, but not from the DOM. 247 248 @param o node to be removed from this list, if present 249 @return if this list contained the Node 250 @see #remove(Object) 251 */ 252 public boolean deselect(Object o) { 253 return super.remove(o); 254 } 255 256 /** 257 Removes all the nodes from this list, and each of them from the DOM. 258 259 @see #deselectAll() 260 */ 261 @Override 262 public void clear() { 263 remove(); 264 super.clear(); 265 } 266 267 /** 268 Like {@link #clear()}, removes all the nodes from this list, but not from the DOM. 269 270 @see #clear() 271 */ 272 public void deselectAll() { 273 super.clear(); 274 } 275 276 /** 277 Removes from this list, and from the DOM, each of the nodes that are contained in the specified collection and are 278 in this list. 279 280 @param c collection containing nodes to be removed from this list 281 @return {@code true} if nodes were removed from this list 282 */ 283 @Override 284 public boolean removeAll(Collection<?> c) { 285 boolean anyRemoved = false; 286 for (Object o : c) { 287 anyRemoved |= this.remove(o); 288 } 289 return anyRemoved; 290 } 291 292 /** 293 Retain in this list, and in the DOM, only the nodes that are in the specified collection and are in this list. In 294 other words, remove nodes from this list and the DOM any item that is in this list but not in the specified 295 collection. 296 297 @param toRemove collection containing nodes to be retained in this list 298 @return {@code true} if nodes were removed from this list 299 @since 1.17.1 300 */ 301 @Override 302 public boolean retainAll(Collection<?> toRemove) { 303 boolean anyRemoved = false; 304 for (Iterator<T> it = this.iterator(); it.hasNext(); ) { 305 T el = it.next(); 306 if (!toRemove.contains(el)) { 307 it.remove(); 308 anyRemoved = true; 309 } 310 } 311 return anyRemoved; 312 } 313 314 /** 315 Remove from the list, and from the DOM, all nodes in this list that mach the given predicate. 316 317 @param filter a predicate which returns {@code true} for nodes to be removed 318 @return {@code true} if nodes were removed from this list 319 */ 320 @Override 321 public boolean removeIf(Predicate<? super T> filter) { 322 boolean anyRemoved = false; 323 for (Iterator<T> it = this.iterator(); it.hasNext(); ) { 324 T node = it.next(); 325 if (filter.test(node)) { 326 it.remove(); 327 anyRemoved = true; 328 } 329 } 330 return anyRemoved; 331 } 332 333 /** 334 Replace each node in this list with the result of the operator, and update the DOM. 335 336 @param operator the operator to apply to each node 337 */ 338 @Override 339 public void replaceAll(UnaryOperator<T> operator) { 340 for (int i = 0; i < this.size(); i++) { 341 this.set(i, operator.apply(this.get(i))); 342 } 343 } 344}