001package org.jsoup.select;
002
003import org.jsoup.helper.Validate;
004import org.jsoup.internal.StringUtil;
005import org.jsoup.nodes.Comment;
006import org.jsoup.nodes.DataNode;
007import org.jsoup.nodes.Element;
008import org.jsoup.nodes.FormElement;
009import org.jsoup.nodes.Node;
010import org.jsoup.nodes.TextNode;
011import org.jspecify.annotations.Nullable;
012
013import java.util.ArrayList;
014import java.util.Arrays;
015import java.util.Collection;
016import java.util.HashSet;
017import java.util.Iterator;
018import java.util.LinkedHashSet;
019import java.util.List;
020import java.util.function.Predicate;
021import java.util.function.UnaryOperator;
022
023/**
024 A list of {@link Element}s, with methods that act on every element in the list.
025 <p>To get an {@code Elements} object, use the {@link Element#select(String)} method.</p>
026 <p>Methods that {@link #set(int, Element) set}, {@link #remove(int) remove}, or {@link #replaceAll(UnaryOperator)
027 replace} Elements in the list will also act on the underlying {@link org.jsoup.nodes.Document DOM}.</p>
028
029 @author Jonathan Hedley, jonathan@hedley.net */
030public class Elements extends Nodes<Element> {
031    public Elements() {
032    }
033
034    public Elements(int initialCapacity) {
035        super(initialCapacity);
036    }
037
038    public Elements(Collection<Element> elements) {
039        super(elements);
040    }
041
042    public Elements(List<Element> elements) {
043        super(elements);
044    }
045
046    public Elements(Element... elements) {
047        super(Arrays.asList(elements));
048    }
049
050    /**
051     * Creates a deep copy of these elements.
052     * @return a deep copy
053     */
054    @Override
055    public Elements clone() {
056        Elements clone = new Elements(size());
057        for (Element e : this)
058            clone.add(e.clone());
059        return clone;
060    }
061
062    /**
063     Convenience method to get the Elements as a plain ArrayList. This allows modification to the list of elements
064     without modifying the source Document. I.e. whereas calling {@code elements.remove(0)} will remove the element from
065     both the Elements and the DOM, {@code elements.asList().remove(0)} will remove the element from the list only.
066     <p>Each Element is still the same DOM connected Element.</p>
067
068     @return a new ArrayList containing the elements in this list
069     @since 1.19.2
070     @see #Elements(List)
071     */
072    @Override
073    public ArrayList<Element> asList() {
074        return new ArrayList<>(this);
075    }
076
077    // attribute methods
078    /**
079     Get an attribute value from the first matched element that has the attribute.
080     @param attributeKey The attribute key.
081     @return The attribute value from the first matched element that has the attribute. If no elements were matched (isEmpty() == true),
082     or if the no elements have the attribute, returns empty string.
083     @see #hasAttr(String)
084     */
085    public String attr(String attributeKey) {
086        for (Element element : this) {
087            if (element.hasAttr(attributeKey))
088                return element.attr(attributeKey);
089        }
090        return "";
091    }
092
093    /**
094     Checks if any of the matched elements have this attribute defined.
095     @param attributeKey attribute key
096     @return true if any of the elements have the attribute; false if none do.
097     */
098    public boolean hasAttr(String attributeKey) {
099        for (Element element : this) {
100            if (element.hasAttr(attributeKey))
101                return true;
102        }
103        return false;
104    }
105
106    /**
107     * Get the attribute value for each of the matched elements. If an element does not have this attribute, no value is
108     * included in the result set for that element.
109     * @param attributeKey the attribute name to return values for. You can add the {@code abs:} prefix to the key to
110     * get absolute URLs from relative URLs, e.g.: {@code doc.select("a").eachAttr("abs:href")} .
111     * @return a list of each element's attribute value for the attribute
112     */
113    public List<String> eachAttr(String attributeKey) {
114        List<String> attrs = new ArrayList<>(size());
115        for (Element element : this) {
116            if (element.hasAttr(attributeKey))
117                attrs.add(element.attr(attributeKey));
118        }
119        return attrs;
120    }
121
122    /**
123     * Set an attribute on all matched elements.
124     * @param attributeKey attribute key
125     * @param attributeValue attribute value
126     * @return this
127     */
128    public Elements attr(String attributeKey, String attributeValue) {
129        for (Element element : this) {
130            element.attr(attributeKey, attributeValue);
131        }
132        return this;
133    }
134
135    /**
136     * Remove an attribute from every matched element.
137     * @param attributeKey The attribute to remove.
138     * @return this (for chaining)
139     */
140    public Elements removeAttr(String attributeKey) {
141        for (Element element : this) {
142            element.removeAttr(attributeKey);
143        }
144        return this;
145    }
146
147    /**
148     Add the class name to every matched element's {@code class} attribute.
149     @param className class name to add
150     @return this
151     */
152    public Elements addClass(String className) {
153        for (Element element : this) {
154            element.addClass(className);
155        }
156        return this;
157    }
158
159    /**
160     Remove the class name from every matched element's {@code class} attribute, if present.
161     @param className class name to remove
162     @return this
163     */
164    public Elements removeClass(String className) {
165        for (Element element : this) {
166            element.removeClass(className);
167        }
168        return this;
169    }
170
171    /**
172     Toggle the class name on every matched element's {@code class} attribute.
173     @param className class name to add if missing, or remove if present, from every element.
174     @return this
175     */
176    public Elements toggleClass(String className) {
177        for (Element element : this) {
178            element.toggleClass(className);
179        }
180        return this;
181    }
182
183    /**
184     Determine if any of the matched elements have this class name set in their {@code class} attribute.
185     @param className class name to check for
186     @return true if any do, false if none do
187     */
188    public boolean hasClass(String className) {
189        for (Element element : this) {
190            if (element.hasClass(className))
191                return true;
192        }
193        return false;
194    }
195    
196    /**
197     * Get the form element's value of the first matched element.
198     * @return The form element's value, or empty if not set.
199     * @see Element#val()
200     */
201    public String val() {
202        if (size() > 0)
203            //noinspection ConstantConditions
204            return first().val(); // first() != null as size() > 0
205        else
206            return "";
207    }
208    
209    /**
210     * Set the form element's value in each of the matched elements.
211     * @param value The value to set into each matched element
212     * @return this (for chaining)
213     */
214    public Elements val(String value) {
215        for (Element element : this)
216            element.val(value);
217        return this;
218    }
219    
220    /**
221     * Get the combined text of all the matched elements.
222     * <p>
223     * Note that it is possible to get repeats if the matched elements contain both parent elements and their own
224     * children, as the Element.text() method returns the combined text of a parent and all its children.
225     * @return string of all text: unescaped and no HTML.
226     * @see Element#text()
227     * @see #eachText()
228     */
229    public String text() {
230        return stream()
231            .map(Element::text)
232            .collect(StringUtil.joining(" "));
233    }
234
235    /**
236     Test if any matched Element has any text content, that is not just whitespace.
237     @return true if any element has non-blank text content.
238     @see Element#hasText()
239     */
240    public boolean hasText() {
241        for (Element element: this) {
242            if (element.hasText())
243                return true;
244        }
245        return false;
246    }
247
248    /**
249     * Get the text content of each of the matched elements. If an element has no text, then it is not included in the
250     * result.
251     * @return A list of each matched element's text content.
252     * @see Element#text()
253     * @see Element#hasText()
254     * @see #text()
255     */
256    public List<String> eachText() {
257        ArrayList<String> texts = new ArrayList<>(size());
258        for (Element el: this) {
259            if (el.hasText())
260                texts.add(el.text());
261        }
262        return texts;
263    }
264    
265    /**
266     * Get the combined inner HTML of all matched elements.
267     * @return string of all element's inner HTML.
268     * @see #text()
269     * @see #outerHtml()
270     */
271    public String html() {
272        return stream()
273            .map(Element::html)
274            .collect(StringUtil.joining("\n"));
275    }
276
277    /**
278     * Update (rename) the tag name of each matched element. For example, to change each {@code <i>} to a {@code <em>}, do
279     * {@code doc.select("i").tagName("em");}
280     *
281     * @param tagName the new tag name
282     * @return this, for chaining
283     * @see Element#tagName(String)
284     */
285    public Elements tagName(String tagName) {
286        for (Element element : this) {
287            element.tagName(tagName);
288        }
289        return this;
290    }
291    
292    /**
293     * Set the inner HTML of each matched element.
294     * @param html HTML to parse and set into each matched element.
295     * @return this, for chaining
296     * @see Element#html(String)
297     */
298    public Elements html(String html) {
299        for (Element element : this) {
300            element.html(html);
301        }
302        return this;
303    }
304    
305    /**
306     * Add the supplied HTML to the start of each matched element's inner HTML.
307     * @param html HTML to add inside each element, before the existing HTML
308     * @return this, for chaining
309     * @see Element#prepend(String)
310     */
311    public Elements prepend(String html) {
312        for (Element element : this) {
313            element.prepend(html);
314        }
315        return this;
316    }
317    
318    /**
319     * Add the supplied HTML to the end of each matched element's inner HTML.
320     * @param html HTML to add inside each element, after the existing HTML
321     * @return this, for chaining
322     * @see Element#append(String)
323     */
324    public Elements append(String html) {
325        for (Element element : this) {
326            element.append(html);
327        }
328        return this;
329    }
330
331    /**
332     Insert the supplied HTML before each matched element's outer HTML.
333
334     @param html HTML to insert before each element
335     @return this, for chaining
336     @see Element#before(String)
337     */
338    @Override
339    public Elements before(String html) {
340        super.before(html);
341        return this;
342    }
343
344    /**
345     Insert the supplied HTML after each matched element's outer HTML.
346
347     @param html HTML to insert after each element
348     @return this, for chaining
349     @see Element#after(String)
350     */
351    @Override
352    public Elements after(String html) {
353        super.after(html);
354        return this;
355    }
356
357    /**
358     Wrap the supplied HTML around each matched elements. For example, with HTML
359     {@code <p><b>This</b> is <b>Jsoup</b></p>},
360     <code>doc.select("b").wrap("&lt;i&gt;&lt;/i&gt;");</code>
361     becomes {@code <p><i><b>This</b></i> is <i><b>jsoup</b></i></p>}
362
363     @param html HTML to wrap around each element, e.g. {@code <div class="head"></div>}. Can be arbitrarily deep.
364     @return this (for chaining)
365     @see Element#wrap
366     */
367    @Override
368    public Elements wrap(String html) {
369        super.wrap(html);
370        return this;
371    }
372
373    /**
374     * Removes the matched elements from the DOM, and moves their children up into their parents. This has the effect of
375     * dropping the elements but keeping their children.
376     * <p>
377     * This is useful for e.g removing unwanted formatting elements but keeping their contents.
378     * </p>
379     * 
380     * E.g. with HTML: <p>{@code <div><font>One</font> <font><a href="/">Two</a></font></div>}</p>
381     * <p>{@code doc.select("font").unwrap();}</p>
382     * <p>HTML = {@code <div>One <a href="/">Two</a></div>}</p>
383     *
384     * @return this (for chaining)
385     * @see Node#unwrap
386     */
387    public Elements unwrap() {
388        for (Element element : this) {
389            element.unwrap();
390        }
391        return this;
392    }
393
394    /**
395     * Empty (remove all child nodes from) each matched element. This is similar to setting the inner HTML of each
396     * element to nothing.
397     * <p>
398     * E.g. HTML: {@code <div><p>Hello <b>there</b></p> <p>now</p></div>}<br>
399     * <code>doc.select("p").empty();</code><br>
400     * HTML = {@code <div><p></p> <p></p></div>}
401     * @return this, for chaining
402     * @see Element#empty()
403     * @see #remove()
404     */
405    public Elements empty() {
406        for (Element element : this) {
407            element.empty();
408        }
409        return this;
410    }
411
412    /**
413     * Remove each matched element from the DOM. This is similar to setting the outer HTML of each element to nothing.
414     * <p>The elements will still be retained in this list, in case further processing of them is desired.</p>
415     * <p>
416     * E.g. HTML: {@code <div><p>Hello</p> <p>there</p> <img /></div>}<br>
417     * <code>doc.select("p").remove();</code><br>
418     * HTML = {@code <div> <img /></div>}
419     * <p>
420     * Note that this method should not be used to clean user-submitted HTML; rather, use {@link org.jsoup.safety.Cleaner} to clean HTML.
421     * @return this, for chaining
422     * @see Element#empty()
423     * @see #empty()
424     * @see #clear()
425     */
426    @Override
427    public Elements remove() {
428        super.remove();
429        return this;
430    }
431    
432    // filters
433    
434    /**
435     * Find matching elements within this element list.
436     * @param query A {@link Selector} query
437     * @return the filtered list of elements, or an empty list if none match.
438     */
439    public Elements select(String query) {
440        return Selector.select(query, this);
441    }
442
443    /**
444     Find the first Element that matches the {@link Selector} CSS query within this element list.
445     <p>This is effectively the same as calling {@code elements.select(query).first()}, but is more efficient as query
446     execution stops on the first hit.</p>
447
448     @param cssQuery a {@link Selector} query
449     @return the first matching element, or <b>{@code null}</b> if there is no match.
450     @see #expectFirst(String)
451     @since 1.19.1
452     */
453    public @Nullable Element selectFirst(String cssQuery) {
454        return Selector.selectFirst(cssQuery, this);
455    }
456
457    /**
458     Just like {@link #selectFirst(String)}, but if there is no match, throws an {@link IllegalArgumentException}.
459
460     @param cssQuery a {@link Selector} query
461     @return the first matching element
462     @throws IllegalArgumentException if no match is found
463     @since 1.19.1
464     */
465    public Element expectFirst(String cssQuery) {
466        return Validate.expectNotNull(
467            Selector.selectFirst(cssQuery, this),
468            "No elements matched the query '%s' in the elements.", cssQuery
469        );
470    }
471
472    /**
473     * Remove elements from this list that match the {@link Selector} query.
474     * <p>
475     * E.g. HTML: {@code <div class=logo>One</div> <div>Two</div>}<br>
476     * <code>Elements divs = doc.select("div").not(".logo");</code><br>
477     * Result: {@code divs: [<div>Two</div>]}
478     * <p>
479     * @param query the selector query whose results should be removed from these elements
480     * @return a new elements list that contains only the filtered results
481     */
482    public Elements not(String query) {
483        Elements out = Selector.select(query, this);
484        return Selector.filterOut(this, out);
485    }
486    
487    /**
488     * Get the <i>nth</i> matched element as an Elements object.
489     * <p>
490     * See also {@link #get(int)} to retrieve an Element.
491     * @param index the (zero-based) index of the element in the list to retain
492     * @return Elements containing only the specified element, or, if that element did not exist, an empty list.
493     */
494    public Elements eq(int index) {
495        return size() > index ? new Elements(get(index)) : new Elements();
496    }
497    
498    /**
499     * Test if any of the matched elements match the supplied query.
500     * @param query A selector
501     * @return true if at least one element in the list matches the query.
502     */
503    public boolean is(String query) {
504        Evaluator eval = Selector.evaluatorOf(query);
505        for (Element e : this) {
506            if (e.is(eval))
507                return true;
508        }
509        return false;
510    }
511
512    /**
513     * Get the immediate next element sibling of each element in this list.
514     * @return next element siblings.
515     */
516    public Elements next() {
517        return siblings(null, true, false);
518    }
519
520    /**
521     * Get the immediate next element sibling of each element in this list, filtered by the query.
522     * @param query CSS query to match siblings against
523     * @return next element siblings.
524     */
525    public Elements next(String query) {
526        return siblings(query, true, false);
527    }
528
529    /**
530     * Get each of the following element siblings of each element in this list.
531     * @return all following element siblings.
532     */
533    public Elements nextAll() {
534        return siblings(null, true, true);
535    }
536
537    /**
538     * Get each of the following element siblings of each element in this list, that match the query.
539     * @param query CSS query to match siblings against
540     * @return all following element siblings.
541     */
542    public Elements nextAll(String query) {
543        return siblings(query, true, true);
544    }
545
546    /**
547     * Get the immediate previous element sibling of each element in this list.
548     * @return previous element siblings.
549     */
550    public Elements prev() {
551        return siblings(null, false, false);
552    }
553
554    /**
555     * Get the immediate previous element sibling of each element in this list, filtered by the query.
556     * @param query CSS query to match siblings against
557     * @return previous element siblings.
558     */
559    public Elements prev(String query) {
560        return siblings(query, false, false);
561    }
562
563    /**
564     * Get each of the previous element siblings of each element in this list.
565     * @return all previous element siblings.
566     */
567    public Elements prevAll() {
568        return siblings(null, false, true);
569    }
570
571    /**
572     * Get each of the previous element siblings of each element in this list, that match the query.
573     * @param query CSS query to match siblings against
574     * @return all previous element siblings.
575     */
576    public Elements prevAll(String query) {
577        return siblings(query, false, true);
578    }
579
580    private Elements siblings(@Nullable String query, boolean next, boolean all) {
581        Elements els = new Elements();
582        Evaluator eval = query != null? Selector.evaluatorOf(query) : null;
583        for (Element e : this) {
584            do {
585                Element sib = next ? e.nextElementSibling() : e.previousElementSibling();
586                if (sib == null) break;
587                if (eval == null || sib.is(eval)) els.add(sib);
588                e = sib;
589            } while (all);
590        }
591        return els;
592    }
593
594    /**
595     * Get all of the parents and ancestor elements of the matched elements.
596     * @return all of the parents and ancestor elements of the matched elements
597     */
598    public Elements parents() {
599        HashSet<Element> combo = new LinkedHashSet<>();
600        for (Element e: this) {
601            combo.addAll(e.parents());
602        }
603        return new Elements(combo);
604    }
605
606    // list-like methods
607    /**
608     Get the first matched element.
609     @return The first matched element, or <code>null</code> if contents is empty.
610     */
611    @Override
612    public @Nullable Element first() {
613        return super.first();
614    }
615
616    /**
617     Get the last matched element.
618     @return The last matched element, or <code>null</code> if contents is empty.
619     */
620    @Override
621    public @Nullable Element last() {
622        return super.last();
623    }
624
625    /**
626     * Perform a depth-first traversal on each of the selected elements.
627     * @param nodeVisitor the visitor callbacks to perform on each node
628     * @return this, for chaining
629     */
630    public Elements traverse(NodeVisitor nodeVisitor) {
631        NodeTraversor.traverse(nodeVisitor, this);
632        return this;
633    }
634
635    /**
636     * Perform a depth-first filtering on each of the selected elements.
637     * @param nodeFilter the filter callbacks to perform on each node
638     * @return this, for chaining
639     */
640    public Elements filter(NodeFilter nodeFilter) {
641        NodeTraversor.filter(nodeFilter, this);
642        return this;
643    }
644
645    /**
646     * Get the {@link FormElement} forms from the selected elements, if any.
647     * @return a list of {@link FormElement}s pulled from the matched elements. The list will be empty if the elements contain
648     * no forms.
649     */
650    public List<FormElement> forms() {
651        ArrayList<FormElement> forms = new ArrayList<>();
652        for (Element el: this)
653            if (el instanceof FormElement)
654                forms.add((FormElement) el);
655        return forms;
656    }
657
658    /**
659     * Get {@link Comment} nodes that are direct child nodes of the selected elements.
660     * @return Comment nodes, or an empty list if none.
661     */
662    public List<Comment> comments() {
663        return childNodesOfType(Comment.class);
664    }
665
666    /**
667     * Get {@link TextNode} nodes that are direct child nodes of the selected elements.
668     * @return TextNode nodes, or an empty list if none.
669     */
670    public List<TextNode> textNodes() {
671        return childNodesOfType(TextNode.class);
672    }
673
674    /**
675     * Get {@link DataNode} nodes that are direct child nodes of the selected elements. DataNode nodes contain the
676     * content of tags such as {@code script}, {@code style} etc and are distinct from {@link TextNode}s.
677     * @return Comment nodes, or an empty list if none.
678     */
679    public List<DataNode> dataNodes() {
680        return childNodesOfType(DataNode.class);
681    }
682
683    private <T extends Node> List<T> childNodesOfType(Class<T> tClass) {
684        ArrayList<T> nodes = new ArrayList<>();
685        for (Element el: this) {
686            for (int i = 0; i < el.childNodeSize(); i++) {
687                Node node = el.childNode(i);
688                if (tClass.isInstance(node))
689                    nodes.add(tClass.cast(node));
690            }
691        }
692        return nodes;
693    }
694
695    // list methods that update the DOM:
696
697    /**
698     Replace the Element at the specified index in this list, and in the DOM.
699
700     @param index index of the element to replace
701     @param element element to be stored at the specified position
702     @return the old Element at this index
703     @since 1.17.1
704     */
705    @Override
706    public Element set(int index, Element element) {
707        return super.set(index, element);
708    }
709
710    /**
711     Remove the Element at the specified index in this ist, and from the DOM.
712
713     @param index the index of the element to be removed
714     @return the old element at this index
715     @see #deselect(int)
716     @since 1.17.1
717     */
718    @Override
719    public Element remove(int index) {
720        return super.remove(index);
721    }
722
723
724    /**
725     Remove the Element at the specified index in this list, but not from the DOM.
726
727     @param index the index of the element to be removed
728     @return the old element at this index
729     @see #remove(int)
730     @since 1.19.2
731     */
732    @Override
733    public Element deselect(int index) {
734        return super.deselect(index);
735    }
736}