001package org.jsoup.select; 002 003import org.jsoup.helper.Validate; 004import org.jsoup.internal.StringUtil; 005import org.jsoup.nodes.Comment; 006import org.jsoup.nodes.DataNode; 007import org.jsoup.nodes.Element; 008import org.jsoup.nodes.FormElement; 009import org.jsoup.nodes.Node; 010import org.jsoup.nodes.TextNode; 011import org.jspecify.annotations.Nullable; 012 013import java.util.ArrayList; 014import java.util.Arrays; 015import java.util.Collection; 016import java.util.HashSet; 017import java.util.Iterator; 018import java.util.LinkedHashSet; 019import java.util.List; 020import java.util.function.Predicate; 021import java.util.function.UnaryOperator; 022 023/** 024 A list of {@link Element}s, with methods that act on every element in the list. 025 <p>To get an {@code Elements} object, use the {@link Element#select(String)} method.</p> 026 <p>Methods that {@link #set(int, Element) set}, {@link #remove(int) remove}, or {@link #replaceAll(UnaryOperator) 027 replace} Elements in the list will also act on the underlying {@link org.jsoup.nodes.Document DOM}.</p> 028 029 @author Jonathan Hedley, jonathan@hedley.net */ 030public class Elements extends Nodes<Element> { 031 public Elements() { 032 } 033 034 public Elements(int initialCapacity) { 035 super(initialCapacity); 036 } 037 038 public Elements(Collection<Element> elements) { 039 super(elements); 040 } 041 042 public Elements(List<Element> elements) { 043 super(elements); 044 } 045 046 public Elements(Element... elements) { 047 super(Arrays.asList(elements)); 048 } 049 050 /** 051 * Creates a deep copy of these elements. 052 * @return a deep copy 053 */ 054 @Override 055 public Elements clone() { 056 Elements clone = new Elements(size()); 057 for (Element e : this) 058 clone.add(e.clone()); 059 return clone; 060 } 061 062 /** 063 Convenience method to get the Elements as a plain ArrayList. This allows modification to the list of elements 064 without modifying the source Document. I.e. whereas calling {@code elements.remove(0)} will remove the element from 065 both the Elements and the DOM, {@code elements.asList().remove(0)} will remove the element from the list only. 066 <p>Each Element is still the same DOM connected Element.</p> 067 068 @return a new ArrayList containing the elements in this list 069 @since 1.19.2 070 @see #Elements(List) 071 */ 072 @Override 073 public ArrayList<Element> asList() { 074 return new ArrayList<>(this); 075 } 076 077 // attribute methods 078 /** 079 Get an attribute value from the first matched element that has the attribute. 080 @param attributeKey The attribute key. 081 @return The attribute value from the first matched element that has the attribute. If no elements were matched (isEmpty() == true), 082 or if the no elements have the attribute, returns empty string. 083 @see #hasAttr(String) 084 */ 085 public String attr(String attributeKey) { 086 for (Element element : this) { 087 if (element.hasAttr(attributeKey)) 088 return element.attr(attributeKey); 089 } 090 return ""; 091 } 092 093 /** 094 Checks if any of the matched elements have this attribute defined. 095 @param attributeKey attribute key 096 @return true if any of the elements have the attribute; false if none do. 097 */ 098 public boolean hasAttr(String attributeKey) { 099 for (Element element : this) { 100 if (element.hasAttr(attributeKey)) 101 return true; 102 } 103 return false; 104 } 105 106 /** 107 * Get the attribute value for each of the matched elements. If an element does not have this attribute, no value is 108 * included in the result set for that element. 109 * @param attributeKey the attribute name to return values for. You can add the {@code abs:} prefix to the key to 110 * get absolute URLs from relative URLs, e.g.: {@code doc.select("a").eachAttr("abs:href")} . 111 * @return a list of each element's attribute value for the attribute 112 */ 113 public List<String> eachAttr(String attributeKey) { 114 List<String> attrs = new ArrayList<>(size()); 115 for (Element element : this) { 116 if (element.hasAttr(attributeKey)) 117 attrs.add(element.attr(attributeKey)); 118 } 119 return attrs; 120 } 121 122 /** 123 * Set an attribute on all matched elements. 124 * @param attributeKey attribute key 125 * @param attributeValue attribute value 126 * @return this 127 */ 128 public Elements attr(String attributeKey, String attributeValue) { 129 for (Element element : this) { 130 element.attr(attributeKey, attributeValue); 131 } 132 return this; 133 } 134 135 /** 136 * Remove an attribute from every matched element. 137 * @param attributeKey The attribute to remove. 138 * @return this (for chaining) 139 */ 140 public Elements removeAttr(String attributeKey) { 141 for (Element element : this) { 142 element.removeAttr(attributeKey); 143 } 144 return this; 145 } 146 147 /** 148 Add the class name to every matched element's {@code class} attribute. 149 @param className class name to add 150 @return this 151 */ 152 public Elements addClass(String className) { 153 for (Element element : this) { 154 element.addClass(className); 155 } 156 return this; 157 } 158 159 /** 160 Remove the class name from every matched element's {@code class} attribute, if present. 161 @param className class name to remove 162 @return this 163 */ 164 public Elements removeClass(String className) { 165 for (Element element : this) { 166 element.removeClass(className); 167 } 168 return this; 169 } 170 171 /** 172 Toggle the class name on every matched element's {@code class} attribute. 173 @param className class name to add if missing, or remove if present, from every element. 174 @return this 175 */ 176 public Elements toggleClass(String className) { 177 for (Element element : this) { 178 element.toggleClass(className); 179 } 180 return this; 181 } 182 183 /** 184 Determine if any of the matched elements have this class name set in their {@code class} attribute. 185 @param className class name to check for 186 @return true if any do, false if none do 187 */ 188 public boolean hasClass(String className) { 189 for (Element element : this) { 190 if (element.hasClass(className)) 191 return true; 192 } 193 return false; 194 } 195 196 /** 197 * Get the form element's value of the first matched element. 198 * @return The form element's value, or empty if not set. 199 * @see Element#val() 200 */ 201 public String val() { 202 if (size() > 0) 203 //noinspection ConstantConditions 204 return first().val(); // first() != null as size() > 0 205 else 206 return ""; 207 } 208 209 /** 210 * Set the form element's value in each of the matched elements. 211 * @param value The value to set into each matched element 212 * @return this (for chaining) 213 */ 214 public Elements val(String value) { 215 for (Element element : this) 216 element.val(value); 217 return this; 218 } 219 220 /** 221 * Get the combined text of all the matched elements. 222 * <p> 223 * Note that it is possible to get repeats if the matched elements contain both parent elements and their own 224 * children, as the Element.text() method returns the combined text of a parent and all its children. 225 * @return string of all text: unescaped and no HTML. 226 * @see Element#text() 227 * @see #eachText() 228 */ 229 public String text() { 230 return stream() 231 .map(Element::text) 232 .collect(StringUtil.joining(" ")); 233 } 234 235 /** 236 Test if any matched Element has any text content, that is not just whitespace. 237 @return true if any element has non-blank text content. 238 @see Element#hasText() 239 */ 240 public boolean hasText() { 241 for (Element element: this) { 242 if (element.hasText()) 243 return true; 244 } 245 return false; 246 } 247 248 /** 249 * Get the text content of each of the matched elements. If an element has no text, then it is not included in the 250 * result. 251 * @return A list of each matched element's text content. 252 * @see Element#text() 253 * @see Element#hasText() 254 * @see #text() 255 */ 256 public List<String> eachText() { 257 ArrayList<String> texts = new ArrayList<>(size()); 258 for (Element el: this) { 259 if (el.hasText()) 260 texts.add(el.text()); 261 } 262 return texts; 263 } 264 265 /** 266 * Get the combined inner HTML of all matched elements. 267 * @return string of all element's inner HTML. 268 * @see #text() 269 * @see #outerHtml() 270 */ 271 public String html() { 272 return stream() 273 .map(Element::html) 274 .collect(StringUtil.joining("\n")); 275 } 276 277 /** 278 * Update (rename) the tag name of each matched element. For example, to change each {@code <i>} to a {@code <em>}, do 279 * {@code doc.select("i").tagName("em");} 280 * 281 * @param tagName the new tag name 282 * @return this, for chaining 283 * @see Element#tagName(String) 284 */ 285 public Elements tagName(String tagName) { 286 for (Element element : this) { 287 element.tagName(tagName); 288 } 289 return this; 290 } 291 292 /** 293 * Set the inner HTML of each matched element. 294 * @param html HTML to parse and set into each matched element. 295 * @return this, for chaining 296 * @see Element#html(String) 297 */ 298 public Elements html(String html) { 299 for (Element element : this) { 300 element.html(html); 301 } 302 return this; 303 } 304 305 /** 306 * Add the supplied HTML to the start of each matched element's inner HTML. 307 * @param html HTML to add inside each element, before the existing HTML 308 * @return this, for chaining 309 * @see Element#prepend(String) 310 */ 311 public Elements prepend(String html) { 312 for (Element element : this) { 313 element.prepend(html); 314 } 315 return this; 316 } 317 318 /** 319 * Add the supplied HTML to the end of each matched element's inner HTML. 320 * @param html HTML to add inside each element, after the existing HTML 321 * @return this, for chaining 322 * @see Element#append(String) 323 */ 324 public Elements append(String html) { 325 for (Element element : this) { 326 element.append(html); 327 } 328 return this; 329 } 330 331 /** 332 Insert the supplied HTML before each matched element's outer HTML. 333 334 @param html HTML to insert before each element 335 @return this, for chaining 336 @see Element#before(String) 337 */ 338 @Override 339 public Elements before(String html) { 340 super.before(html); 341 return this; 342 } 343 344 /** 345 Insert the supplied HTML after each matched element's outer HTML. 346 347 @param html HTML to insert after each element 348 @return this, for chaining 349 @see Element#after(String) 350 */ 351 @Override 352 public Elements after(String html) { 353 super.after(html); 354 return this; 355 } 356 357 /** 358 Wrap the supplied HTML around each matched elements. For example, with HTML 359 {@code <p><b>This</b> is <b>Jsoup</b></p>}, 360 <code>doc.select("b").wrap("<i></i>");</code> 361 becomes {@code <p><i><b>This</b></i> is <i><b>jsoup</b></i></p>} 362 363 @param html HTML to wrap around each element, e.g. {@code <div class="head"></div>}. Can be arbitrarily deep. 364 @return this (for chaining) 365 @see Element#wrap 366 */ 367 @Override 368 public Elements wrap(String html) { 369 super.wrap(html); 370 return this; 371 } 372 373 /** 374 * Removes the matched elements from the DOM, and moves their children up into their parents. This has the effect of 375 * dropping the elements but keeping their children. 376 * <p> 377 * This is useful for e.g removing unwanted formatting elements but keeping their contents. 378 * </p> 379 * 380 * E.g. with HTML: <p>{@code <div><font>One</font> <font><a href="/">Two</a></font></div>}</p> 381 * <p>{@code doc.select("font").unwrap();}</p> 382 * <p>HTML = {@code <div>One <a href="/">Two</a></div>}</p> 383 * 384 * @return this (for chaining) 385 * @see Node#unwrap 386 */ 387 public Elements unwrap() { 388 for (Element element : this) { 389 element.unwrap(); 390 } 391 return this; 392 } 393 394 /** 395 * Empty (remove all child nodes from) each matched element. This is similar to setting the inner HTML of each 396 * element to nothing. 397 * <p> 398 * E.g. HTML: {@code <div><p>Hello <b>there</b></p> <p>now</p></div>}<br> 399 * <code>doc.select("p").empty();</code><br> 400 * HTML = {@code <div><p></p> <p></p></div>} 401 * @return this, for chaining 402 * @see Element#empty() 403 * @see #remove() 404 */ 405 public Elements empty() { 406 for (Element element : this) { 407 element.empty(); 408 } 409 return this; 410 } 411 412 /** 413 * Remove each matched element from the DOM. This is similar to setting the outer HTML of each element to nothing. 414 * <p>The elements will still be retained in this list, in case further processing of them is desired.</p> 415 * <p> 416 * E.g. HTML: {@code <div><p>Hello</p> <p>there</p> <img /></div>}<br> 417 * <code>doc.select("p").remove();</code><br> 418 * HTML = {@code <div> <img /></div>} 419 * <p> 420 * Note that this method should not be used to clean user-submitted HTML; rather, use {@link org.jsoup.safety.Cleaner} to clean HTML. 421 * @return this, for chaining 422 * @see Element#empty() 423 * @see #empty() 424 * @see #clear() 425 */ 426 @Override 427 public Elements remove() { 428 super.remove(); 429 return this; 430 } 431 432 // filters 433 434 /** 435 * Find matching elements within this element list. 436 * @param query A {@link Selector} query 437 * @return the filtered list of elements, or an empty list if none match. 438 */ 439 public Elements select(String query) { 440 return Selector.select(query, this); 441 } 442 443 /** 444 Find the first Element that matches the {@link Selector} CSS query within this element list. 445 <p>This is effectively the same as calling {@code elements.select(query).first()}, but is more efficient as query 446 execution stops on the first hit.</p> 447 448 @param cssQuery a {@link Selector} query 449 @return the first matching element, or <b>{@code null}</b> if there is no match. 450 @see #expectFirst(String) 451 @since 1.19.1 452 */ 453 public @Nullable Element selectFirst(String cssQuery) { 454 return Selector.selectFirst(cssQuery, this); 455 } 456 457 /** 458 Just like {@link #selectFirst(String)}, but if there is no match, throws an {@link IllegalArgumentException}. 459 460 @param cssQuery a {@link Selector} query 461 @return the first matching element 462 @throws IllegalArgumentException if no match is found 463 @since 1.19.1 464 */ 465 public Element expectFirst(String cssQuery) { 466 return Validate.expectNotNull( 467 Selector.selectFirst(cssQuery, this), 468 "No elements matched the query '%s' in the elements.", cssQuery 469 ); 470 } 471 472 /** 473 * Remove elements from this list that match the {@link Selector} query. 474 * <p> 475 * E.g. HTML: {@code <div class=logo>One</div> <div>Two</div>}<br> 476 * <code>Elements divs = doc.select("div").not(".logo");</code><br> 477 * Result: {@code divs: [<div>Two</div>]} 478 * <p> 479 * @param query the selector query whose results should be removed from these elements 480 * @return a new elements list that contains only the filtered results 481 */ 482 public Elements not(String query) { 483 Elements out = Selector.select(query, this); 484 return Selector.filterOut(this, out); 485 } 486 487 /** 488 * Get the <i>nth</i> matched element as an Elements object. 489 * <p> 490 * See also {@link #get(int)} to retrieve an Element. 491 * @param index the (zero-based) index of the element in the list to retain 492 * @return Elements containing only the specified element, or, if that element did not exist, an empty list. 493 */ 494 public Elements eq(int index) { 495 return size() > index ? new Elements(get(index)) : new Elements(); 496 } 497 498 /** 499 * Test if any of the matched elements match the supplied query. 500 * @param query A selector 501 * @return true if at least one element in the list matches the query. 502 */ 503 public boolean is(String query) { 504 Evaluator eval = Selector.evaluatorOf(query); 505 for (Element e : this) { 506 if (e.is(eval)) 507 return true; 508 } 509 return false; 510 } 511 512 /** 513 * Get the immediate next element sibling of each element in this list. 514 * @return next element siblings. 515 */ 516 public Elements next() { 517 return siblings(null, true, false); 518 } 519 520 /** 521 * Get the immediate next element sibling of each element in this list, filtered by the query. 522 * @param query CSS query to match siblings against 523 * @return next element siblings. 524 */ 525 public Elements next(String query) { 526 return siblings(query, true, false); 527 } 528 529 /** 530 * Get each of the following element siblings of each element in this list. 531 * @return all following element siblings. 532 */ 533 public Elements nextAll() { 534 return siblings(null, true, true); 535 } 536 537 /** 538 * Get each of the following element siblings of each element in this list, that match the query. 539 * @param query CSS query to match siblings against 540 * @return all following element siblings. 541 */ 542 public Elements nextAll(String query) { 543 return siblings(query, true, true); 544 } 545 546 /** 547 * Get the immediate previous element sibling of each element in this list. 548 * @return previous element siblings. 549 */ 550 public Elements prev() { 551 return siblings(null, false, false); 552 } 553 554 /** 555 * Get the immediate previous element sibling of each element in this list, filtered by the query. 556 * @param query CSS query to match siblings against 557 * @return previous element siblings. 558 */ 559 public Elements prev(String query) { 560 return siblings(query, false, false); 561 } 562 563 /** 564 * Get each of the previous element siblings of each element in this list. 565 * @return all previous element siblings. 566 */ 567 public Elements prevAll() { 568 return siblings(null, false, true); 569 } 570 571 /** 572 * Get each of the previous element siblings of each element in this list, that match the query. 573 * @param query CSS query to match siblings against 574 * @return all previous element siblings. 575 */ 576 public Elements prevAll(String query) { 577 return siblings(query, false, true); 578 } 579 580 private Elements siblings(@Nullable String query, boolean next, boolean all) { 581 Elements els = new Elements(); 582 Evaluator eval = query != null? Selector.evaluatorOf(query) : null; 583 for (Element e : this) { 584 do { 585 Element sib = next ? e.nextElementSibling() : e.previousElementSibling(); 586 if (sib == null) break; 587 if (eval == null || sib.is(eval)) els.add(sib); 588 e = sib; 589 } while (all); 590 } 591 return els; 592 } 593 594 /** 595 * Get all of the parents and ancestor elements of the matched elements. 596 * @return all of the parents and ancestor elements of the matched elements 597 */ 598 public Elements parents() { 599 HashSet<Element> combo = new LinkedHashSet<>(); 600 for (Element e: this) { 601 combo.addAll(e.parents()); 602 } 603 return new Elements(combo); 604 } 605 606 // list-like methods 607 /** 608 Get the first matched element. 609 @return The first matched element, or <code>null</code> if contents is empty. 610 */ 611 @Override 612 public @Nullable Element first() { 613 return super.first(); 614 } 615 616 /** 617 Get the last matched element. 618 @return The last matched element, or <code>null</code> if contents is empty. 619 */ 620 @Override 621 public @Nullable Element last() { 622 return super.last(); 623 } 624 625 /** 626 * Perform a depth-first traversal on each of the selected elements. 627 * @param nodeVisitor the visitor callbacks to perform on each node 628 * @return this, for chaining 629 */ 630 public Elements traverse(NodeVisitor nodeVisitor) { 631 NodeTraversor.traverse(nodeVisitor, this); 632 return this; 633 } 634 635 /** 636 * Perform a depth-first filtering on each of the selected elements. 637 * @param nodeFilter the filter callbacks to perform on each node 638 * @return this, for chaining 639 */ 640 public Elements filter(NodeFilter nodeFilter) { 641 NodeTraversor.filter(nodeFilter, this); 642 return this; 643 } 644 645 /** 646 * Get the {@link FormElement} forms from the selected elements, if any. 647 * @return a list of {@link FormElement}s pulled from the matched elements. The list will be empty if the elements contain 648 * no forms. 649 */ 650 public List<FormElement> forms() { 651 ArrayList<FormElement> forms = new ArrayList<>(); 652 for (Element el: this) 653 if (el instanceof FormElement) 654 forms.add((FormElement) el); 655 return forms; 656 } 657 658 /** 659 * Get {@link Comment} nodes that are direct child nodes of the selected elements. 660 * @return Comment nodes, or an empty list if none. 661 */ 662 public List<Comment> comments() { 663 return childNodesOfType(Comment.class); 664 } 665 666 /** 667 * Get {@link TextNode} nodes that are direct child nodes of the selected elements. 668 * @return TextNode nodes, or an empty list if none. 669 */ 670 public List<TextNode> textNodes() { 671 return childNodesOfType(TextNode.class); 672 } 673 674 /** 675 * Get {@link DataNode} nodes that are direct child nodes of the selected elements. DataNode nodes contain the 676 * content of tags such as {@code script}, {@code style} etc and are distinct from {@link TextNode}s. 677 * @return Comment nodes, or an empty list if none. 678 */ 679 public List<DataNode> dataNodes() { 680 return childNodesOfType(DataNode.class); 681 } 682 683 private <T extends Node> List<T> childNodesOfType(Class<T> tClass) { 684 ArrayList<T> nodes = new ArrayList<>(); 685 for (Element el: this) { 686 for (int i = 0; i < el.childNodeSize(); i++) { 687 Node node = el.childNode(i); 688 if (tClass.isInstance(node)) 689 nodes.add(tClass.cast(node)); 690 } 691 } 692 return nodes; 693 } 694 695 // list methods that update the DOM: 696 697 /** 698 Replace the Element at the specified index in this list, and in the DOM. 699 700 @param index index of the element to replace 701 @param element element to be stored at the specified position 702 @return the old Element at this index 703 @since 1.17.1 704 */ 705 @Override 706 public Element set(int index, Element element) { 707 return super.set(index, element); 708 } 709 710 /** 711 Remove the Element at the specified index in this ist, and from the DOM. 712 713 @param index the index of the element to be removed 714 @return the old element at this index 715 @see #deselect(int) 716 @since 1.17.1 717 */ 718 @Override 719 public Element remove(int index) { 720 return super.remove(index); 721 } 722 723 724 /** 725 Remove the Element at the specified index in this list, but not from the DOM. 726 727 @param index the index of the element to be removed 728 @return the old element at this index 729 @see #remove(int) 730 @since 1.19.2 731 */ 732 @Override 733 public Element deselect(int index) { 734 return super.deselect(index); 735 } 736}