001package org.jsoup.parser;
002
003import org.jsoup.helper.Validate;
004import org.jsoup.internal.Normalizer;
005import org.jsoup.internal.StringUtil;
006import org.jsoup.nodes.Attributes;
007import org.jsoup.nodes.CDataNode;
008import org.jsoup.nodes.Comment;
009import org.jsoup.nodes.DataNode;
010import org.jsoup.nodes.Document;
011import org.jsoup.nodes.Element;
012import org.jsoup.nodes.FormElement;
013import org.jsoup.nodes.Node;
014import org.jsoup.nodes.TextNode;
015import org.jspecify.annotations.Nullable;
016
017import java.io.Reader;
018import java.util.ArrayList;
019import java.util.List;
020
021import static org.jsoup.internal.StringUtil.inSorted;
022import static org.jsoup.parser.HtmlTreeBuilderState.Constants.InTableFoster;
023import static org.jsoup.parser.HtmlTreeBuilderState.ForeignContent;
024import static org.jsoup.parser.Parser.*;
025
026/**
027 * HTML Tree Builder; creates a DOM from Tokens.
028 */
029public class HtmlTreeBuilder extends TreeBuilder {
030    // tag searches. must be sorted, used in inSorted. HtmlTreeBuilderTest validates they're sorted.
031    static final String[] TagsSearchInScope = new String[]{ // a particular element in scope
032        "applet", "caption", "html", "marquee", "object", "table", "td", "template", "th"
033    };
034    // math and svg namespaces for particular element in scope
035    static final String[]TagSearchInScopeMath = new String[] {
036        "annotation-xml",  "mi", "mn", "mo", "ms", "mtext"
037    };
038    static final String[]TagSearchInScopeSvg = new String[] {
039        "desc", "foreignobject", "title" // note normalized to lowercase to match other scope searches; will preserve input case as appropriate
040    };
041
042    static final String[] TagSearchList = new String[]{"ol", "ul"};
043    static final String[] TagSearchButton = new String[]{"button"};
044    static final String[] TagSearchTableScope = new String[]{"html", "table"};
045    static final String[] TagSearchSelectScope = new String[]{"optgroup", "option"};
046    static final String[] TagSearchEndTags = new String[]{"dd", "dt", "li", "optgroup", "option", "p", "rb", "rp", "rt", "rtc"};
047    static final String[] TagThoroughSearchEndTags = new String[]{"caption", "colgroup", "dd", "dt", "li", "optgroup", "option", "p", "rb", "rp", "rt", "rtc", "tbody", "td", "tfoot", "th", "thead", "tr"};
048    static final String[] TagSearchSpecial = new String[]{
049        "address", "applet", "area", "article", "aside", "base", "basefont", "bgsound", "blockquote", "body", "br",
050        "button", "caption", "center", "col", "colgroup", "dd", "details", "dir", "div", "dl", "dt", "embed",
051        "fieldset", "figcaption", "figure", "footer", "form", "frame", "frameset", "h1", "h2", "h3", "h4", "h5", "h6",
052        "head", "header", "hgroup", "hr", "html", "iframe", "img", "input", "keygen", "li", "link", "listing", "main",
053        "marquee", "menu", "meta", "nav", "noembed", "noframes", "noscript", "object", "ol", "p", "param", "plaintext",
054        "pre", "script", "search", "section", "select", "source", "style", "summary", "table", "tbody", "td",
055        "template", "textarea", "tfoot", "th", "thead", "title", "tr", "track", "ul", "wbr", "xmp"};
056    static String[] TagSearchSpecialMath = {"annotation-xml", "mi", "mn", "mo", "ms", "mtext"}; // differs to MathML text integration point; adds annotation-xml
057    static final String[] TagMathMlTextIntegration = new String[]{"mi", "mn", "mo", "ms", "mtext"};
058    static final String[] TagSvgHtmlIntegration = new String[]{"desc", "foreignObject", "title"};
059    static final String[] TagFormListed = {
060        "button", "fieldset", "input", "keygen", "object", "output", "select", "textarea"
061    };
062
063    /** @deprecated Not used anymore; configure parser depth via {@link Parser#setMaxDepth(int)}. Will be removed in jsoup 1.24.1. */
064    @Deprecated
065    public static final int MaxScopeSearchDepth = 100;
066
067    private HtmlTreeBuilderState state; // the current state
068    private HtmlTreeBuilderState originalState; // original / marked state
069
070    private boolean baseUriSetFromDoc;
071    private @Nullable Element headElement; // the current head element
072    private @Nullable FormElement formElement; // the current form element
073    private @Nullable Element contextElement; // fragment parse root; name only copy of context. could be null even if fragment parsing
074    ArrayList<Element> formattingElements; // active (open) formatting elements
075    private ArrayList<HtmlTreeBuilderState> tmplInsertMode; // stack of Template Insertion modes
076    private List<Token.Character> pendingTableCharacters; // chars in table to be shifted out
077    private Token.EndTag emptyEnd; // reused empty end tag
078
079    private boolean framesetOk; // if ok to go into frameset
080    private boolean fosterInserts; // if next inserts should be fostered
081    private boolean fragmentParsing; // if parsing a fragment of html
082
083    @Override ParseSettings defaultSettings() {
084        return ParseSettings.htmlDefault;
085    }
086
087    @Override
088    HtmlTreeBuilder newInstance() {
089        return new HtmlTreeBuilder();
090    }
091
092    @Override
093    protected void initialiseParse(Reader input, String baseUri, Parser parser) {
094        super.initialiseParse(input, baseUri, parser);
095
096        // this is a bit mucky. todo - probably just create new parser objects to ensure all reset.
097        state = HtmlTreeBuilderState.Initial;
098        originalState = null;
099        baseUriSetFromDoc = false;
100        headElement = null;
101        formElement = null;
102        contextElement = null;
103        formattingElements = new ArrayList<>();
104        tmplInsertMode = new ArrayList<>();
105        pendingTableCharacters = new ArrayList<>();
106        emptyEnd = new Token.EndTag(this);
107        framesetOk = true;
108        fosterInserts = false;
109        fragmentParsing = false;
110    }
111
112    @Override void initialiseParseFragment(@Nullable Element context) {
113        // context may be null
114        state = HtmlTreeBuilderState.Initial;
115        fragmentParsing = true;
116
117        if (context != null) {
118            final String contextName = context.normalName();
119            contextElement = new Element(tagFor(contextName, contextName, defaultNamespace(), settings), baseUri);
120            if (context.ownerDocument() != null) // quirks setup:
121                doc.quirksMode(context.ownerDocument().quirksMode());
122
123            // initialise the tokeniser state:
124            switch (contextName) {
125                case "script":
126                    tokeniser.transition(TokeniserState.ScriptData);
127                    break;
128                case "plaintext":
129                    tokeniser.transition(TokeniserState.PLAINTEXT);
130                    break;
131                case "template":
132                    tokeniser.transition(TokeniserState.Data);
133                    pushTemplateMode(HtmlTreeBuilderState.InTemplate);
134                    break;
135                default:
136                    Tag tag = contextElement.tag();
137                    TokeniserState textState = tag.textState();
138                    if (textState != null)
139                        tokeniser.transition(textState); // style, xmp, title, textarea, etc; or custom
140                    else
141                        tokeniser.transition(TokeniserState.Data);
142            }
143            doc.appendChild(contextElement);
144            push(contextElement);
145            resetInsertionMode();
146
147            // setup form element to nearest form on context (up ancestor chain). ensures form controls are associated
148            // with form correctly
149            Element formSearch = context;
150            while (formSearch != null) {
151                if (formSearch instanceof FormElement) {
152                    formElement = (FormElement) formSearch;
153                    break;
154                }
155                formSearch = formSearch.parent();
156            }
157        }
158    }
159
160    @Override List<Node> completeParseFragment() {
161        if (contextElement != null) {
162            // depending on context and the input html, content may have been added outside of the root el
163            // e.g. context=p, input=div, the div will have been pushed out.
164            List<Node> nodes = contextElement.siblingNodes();
165            if (!nodes.isEmpty())
166                contextElement.insertChildren(-1, nodes);
167            return contextElement.childNodes();
168        }
169        else
170            return doc.childNodes();
171    }
172
173    @Override
174    protected boolean process(Token token) {
175        HtmlTreeBuilderState dispatch = useCurrentOrForeignInsert(token) ? this.state : ForeignContent;
176        return dispatch.process(token, this);
177    }
178
179    boolean useCurrentOrForeignInsert(Token token) {
180        // https://html.spec.whatwg.org/multipage/parsing.html#tree-construction
181        // If the stack of open elements is empty
182        if (stack.isEmpty())
183            return true;
184        final Element el = currentElement();
185        final String ns = el.tag().namespace();
186
187        // If the adjusted current node is an element in the HTML namespace
188        if (NamespaceHtml.equals(ns))
189            return true;
190
191        // If the adjusted current node is a MathML text integration point and the token is a start tag whose tag name is neither "mglyph" nor "malignmark"
192        // If the adjusted current node is a MathML text integration point and the token is a character token
193        if (isMathmlTextIntegration(el)) {
194            if (token.isStartTag()
195                    && !"mglyph".equals(token.asStartTag().normalName)
196                    && !"malignmark".equals(token.asStartTag().normalName))
197                    return true;
198            if (token.isCharacter())
199                    return true;
200        }
201        // If the adjusted current node is a MathML annotation-xml element and the token is a start tag whose tag name is "svg"
202        if (Parser.NamespaceMathml.equals(ns)
203            && el.nameIs("annotation-xml")
204            && token.isStartTag()
205            && "svg".equals(token.asStartTag().normalName))
206            return true;
207
208        // If the adjusted current node is an HTML integration point and the token is a start tag
209        // If the adjusted current node is an HTML integration point and the token is a character token
210        if (isHtmlIntegration(el)
211            && (token.isStartTag() || token.isCharacter()))
212            return true;
213
214        // If the token is an end-of-file token
215        return token.isEOF();
216    }
217
218    static boolean isMathmlTextIntegration(Element el) {
219        /*
220        A node is a MathML text integration point if it is one of the following elements:
221        A MathML mi element
222        A MathML mo element
223        A MathML mn element
224        A MathML ms element
225        A MathML mtext element
226         */
227        return (Parser.NamespaceMathml.equals(el.tag().namespace())
228            && StringUtil.inSorted(el.normalName(), TagMathMlTextIntegration));
229    }
230
231    static boolean isHtmlIntegration(Element el) {
232        /*
233        A node is an HTML integration point if it is one of the following elements:
234        A MathML annotation-xml element whose start tag token had an attribute with the name "encoding" whose value was an ASCII case-insensitive match for the string "text/html"
235        A MathML annotation-xml element whose start tag token had an attribute with the name "encoding" whose value was an ASCII case-insensitive match for the string "application/xhtml+xml"
236        An SVG foreignObject element
237        An SVG desc element
238        An SVG title element
239         */
240        if (Parser.NamespaceMathml.equals(el.tag().namespace())
241            && el.nameIs("annotation-xml")) {
242            String encoding = Normalizer.normalize(el.attr("encoding"));
243            if (encoding.equals("text/html") || encoding.equals("application/xhtml+xml"))
244                return true;
245        }
246        // note using .tagName for case-sensitive hit here of foreignObject
247        return Parser.NamespaceSvg.equals(el.tag().namespace()) && StringUtil.in(el.tagName(), TagSvgHtmlIntegration);
248    }
249
250    boolean process(Token token, HtmlTreeBuilderState state) {
251        return state.process(token, this);
252    }
253
254    void transition(HtmlTreeBuilderState state) {
255        this.state = state;
256    }
257
258    HtmlTreeBuilderState state() {
259        return state;
260    }
261
262    void markInsertionMode() {
263        originalState = state;
264    }
265
266    HtmlTreeBuilderState originalState() {
267        return originalState;
268    }
269
270    void framesetOk(boolean framesetOk) {
271        this.framesetOk = framesetOk;
272    }
273
274    boolean framesetOk() {
275        return framesetOk;
276    }
277
278    Document getDocument() {
279        return doc;
280    }
281
282    String getBaseUri() {
283        return baseUri;
284    }
285
286    void maybeSetBaseUri(Element base) {
287        if (baseUriSetFromDoc) // only listen to the first <base href> in parse
288            return;
289
290        String href = base.absUrl("href");
291        if (href.length() != 0) { // ignore <base target> etc
292            baseUri = href;
293            baseUriSetFromDoc = true;
294            doc.setBaseUri(href); // set on the doc so doc.createElement(Tag) will get updated base, and to update all descendants
295        }
296    }
297
298    boolean isFragmentParsing() {
299        return fragmentParsing;
300    }
301
302    void error(HtmlTreeBuilderState state) {
303        if (parser.getErrors().canAddError())
304            parser.getErrors().add(new ParseError(reader, "Unexpected %s token [%s] when in state [%s]",
305                currentToken.tokenType(), currentToken, state));
306    }
307
308    Element createElementFor(Token.StartTag startTag, String namespace, boolean forcePreserveCase) {
309        // dedupe and normalize the attributes:
310        Attributes attributes = startTag.attributes;
311        if (attributes != null && !attributes.isEmpty()) {
312            if (!forcePreserveCase)
313                settings.normalizeAttributes(attributes);
314            int dupes = attributes.deduplicate(settings);
315            if (dupes > 0) {
316                error("Dropped duplicate attribute(s) in tag [%s]", startTag.normalName);
317            }
318        }
319
320        Tag tag = tagFor(startTag.name(), startTag.normalName, namespace,
321            forcePreserveCase ? ParseSettings.preserveCase : settings);
322
323        return (tag.normalName().equals("form")) ?
324            new FormElement(tag, null, attributes) :
325            new Element(tag, null, attributes);
326    }
327
328    /** Inserts an HTML element for the given tag */
329    Element insertElementFor(final Token.StartTag startTag) {
330        Element el = createElementFor(startTag, NamespaceHtml, false);
331        doInsertElement(el);
332
333        // handle self-closing tags. when the spec expects an empty (void) tag, will directly hit insertEmpty, so won't generate this fake end tag.
334        if (startTag.isSelfClosing()) {
335            Tag tag = el.tag();
336            tag.setSeenSelfClose(); // can infer output if in xml syntax
337            if (tag.isEmpty()) {
338                // treated as empty below; nothing further
339            } else if (tag.isKnownTag() && tag.isSelfClosing()) {
340                // ok, allow it. effectively a pop, but fiddles with the state. handles empty style, title etc which would otherwise leave us in data state
341                tokeniser.transition(TokeniserState.Data); // handles <script />, otherwise needs breakout steps from script data
342                tokeniser.emit(emptyEnd.reset().name(el.tagName()));  // ensure we get out of whatever state we are in. emitted for yielded processing
343            } else {
344                // error it, and leave the inserted element on
345                tokeniser.error("Tag [%s] cannot be self-closing; not a void tag", tag.normalName());
346            }
347        }
348
349        if (el.tag().isEmpty()) {
350            pop(); // custom void tags behave like built-in voids (no children, not left on the stack); known empty go via insertEmpty
351        }
352
353        return el;
354    }
355
356    /**
357     Inserts a foreign element. Preserves the case of the tag name and of the attributes.
358     */
359    Element insertForeignElementFor(final Token.StartTag startTag, String namespace) {
360        Element el = createElementFor(startTag, namespace, true);
361        doInsertElement(el);
362
363        if (startTag.isSelfClosing()) { // foreign els are OK to self-close
364            el.tag().setSeenSelfClose(); // remember this is self-closing for output
365            pop();
366        }
367
368        return el;
369    }
370
371    Element insertEmptyElementFor(Token.StartTag startTag) {
372        Element el = createElementFor(startTag, NamespaceHtml, false);
373        doInsertElement(el);
374        pop();
375        return el;
376    }
377
378    FormElement insertFormElement(Token.StartTag startTag, boolean onStack, boolean checkTemplateStack) {
379        FormElement el = (FormElement) createElementFor(startTag, NamespaceHtml, false);
380
381        if (checkTemplateStack) {
382            if(!onStack("template"))
383                setFormElement(el);
384        } else
385            setFormElement(el);
386
387        doInsertElement(el);
388        if (!onStack) pop();
389        return el;
390    }
391
392    /** Inserts the Element onto the stack. All element inserts must run through this method. Performs any general
393     tests on the Element before insertion.
394     * @param el the Element to insert and make the current element
395     */
396    private void doInsertElement(Element el) {
397        enforceStackDepthLimit();
398
399        if (formElement != null && el.tag().namespace.equals(NamespaceHtml) && StringUtil.inSorted(el.normalName(), TagFormListed))
400            formElement.addElement(el); // connect form controls to their form element
401
402        // in HTML, the xmlns attribute if set must match what the parser set the tag's namespace to
403        if (parser.getErrors().canAddError() && el.hasAttr("xmlns") && !el.attr("xmlns").equals(el.tag().namespace()))
404            error("Invalid xmlns attribute [%s] on tag [%s]", el.attr("xmlns"), el.tagName());
405
406        if (isFosterInserts() && StringUtil.inSorted(currentElement().normalName(), InTableFoster))
407            insertInFosterParent(el);
408        else
409            currentElement().appendChild(el);
410
411        push(el);
412    }
413
414    void insertCommentNode(Token.Comment token) {
415        Comment node = new Comment(token.getData());
416        currentElement().appendChild(node);
417        onNodeInserted(node);
418    }
419
420    /** Inserts the provided character token into the current element. Any nulls in the data will be removed. */
421    void insertCharacterNode(Token.Character characterToken) {
422        insertCharacterNode(characterToken, false);
423    }
424
425    /**
426     Inserts the provided character token into the current element. The tokenizer will have already raised precise character errors.
427
428     @param characterToken the character token to insert
429     @param replace if true, replaces any null chars in the data with the replacement char (U+FFFD). If false, removes
430     null chars.
431     */
432    void insertCharacterNode(Token.Character characterToken, boolean replace) {
433        characterToken.normalizeNulls(replace);
434        Element el = currentElement(); // will be doc if no current element; allows for whitespace to be inserted into the doc root object (not on the stack)
435        insertCharacterToElement(characterToken, el);
436    }
437
438    /** Inserts the provided character token into the provided element. */
439    void insertCharacterToElement(Token.Character characterToken, Element el) {
440        final Node node;
441        final String data = characterToken.getData();
442
443        if (characterToken.isCData())
444            node = new CDataNode(data);
445        else if (el.tag().is(Tag.Data))
446            node = new DataNode(data);
447        else
448            node = new TextNode(data);
449        el.appendChild(node); // doesn't use insertNode, because we don't foster these; and will always have a stack.
450        onNodeInserted(node);
451    }
452
453    ArrayList<Element> getStack() {
454        return stack;
455    }
456
457    boolean onStack(Element el) {
458        return onStack(stack, el);
459    }
460
461    /** Checks if there is an HTML element with the given name on the stack. */
462    boolean onStack(String elName) {
463        return getFromStack(elName) != null;
464    }
465
466    private static final int maxQueueDepth = 256; // an arbitrary tension point between real HTML and crafted pain
467    private static boolean onStack(ArrayList<Element> queue, Element element) {
468        final int bottom = queue.size() - 1;
469        final int upper = bottom >= maxQueueDepth ? bottom - maxQueueDepth : 0;
470        for (int pos = bottom; pos >= upper; pos--) {
471            Element next = queue.get(pos);
472            if (next == element) {
473                return true;
474            }
475        }
476        return false;
477    }
478
479    /** Gets the nearest (lowest) HTML element with the given name from the stack. */
480    @Nullable
481    Element getFromStack(String elName) {
482        final int bottom = stack.size() - 1;
483        final int upper = bottom >= maxQueueDepth ? bottom - maxQueueDepth : 0;
484        for (int pos = bottom; pos >= upper; pos--) {
485            Element next = stack.get(pos);
486            if (next.elementIs(elName, NamespaceHtml)) {
487                return next;
488            }
489        }
490        return null;
491    }
492
493    boolean removeFromStack(Element el) {
494        for (int pos = stack.size() -1; pos >= 0; pos--) {
495            Element next = stack.get(pos);
496            if (next == el) {
497                stack.remove(pos);
498                onNodeClosed(el);
499                return true;
500            }
501        }
502        return false;
503    }
504
505    @Override
506    void onStackPrunedForDepth(Element element) {
507        // handle other effects of popping to keep state correct
508        if (element == headElement) headElement = null;
509        if (element == formElement) setFormElement(null);
510        removeFromActiveFormattingElements(element);
511        if (element.nameIs("template")) {
512            clearFormattingElementsToLastMarker();
513            if (templateModeSize() > 0)
514                popTemplateMode();
515            resetInsertionMode();
516        }
517    }
518
519    /** Pops the stack until the given HTML element is removed. */
520    @Nullable
521    Element popStackToClose(String elName) {
522        for (int pos = stack.size() -1; pos >= 0; pos--) {
523            Element el = pop();
524            if (el.elementIs(elName, NamespaceHtml)) {
525                return el;
526            }
527        }
528        return null;
529    }
530
531    /** Pops the stack until an element with the supplied name is removed, irrespective of namespace. */
532    @Nullable
533    Element popStackToCloseAnyNamespace(String elName) {
534        for (int pos = stack.size() -1; pos >= 0; pos--) {
535            Element el = pop();
536            if (el.nameIs(elName)) {
537                return el;
538            }
539        }
540        return null;
541    }
542
543    /** Pops the stack until one of the given HTML elements is removed. */
544    void popStackToClose(String... elNames) { // elnames is sorted, comes from Constants
545        for (int pos = stack.size() -1; pos >= 0; pos--) {
546            Element el = pop();
547            if (inSorted(el.normalName(), elNames) && NamespaceHtml.equals(el.tag().namespace())) {
548                break;
549            }
550        }
551    }
552
553    void clearStackToTableContext() {
554        clearStackToContext("table", "template");
555    }
556
557    void clearStackToTableBodyContext() {
558        clearStackToContext("tbody", "tfoot", "thead", "template");
559    }
560
561    void clearStackToTableRowContext() {
562        clearStackToContext("tr", "template");
563    }
564
565    /** Removes elements from the stack until one of the supplied HTML elements is removed. */
566    private void clearStackToContext(String... nodeNames) {
567        for (int pos = stack.size() -1; pos >= 0; pos--) {
568            Element next = stack.get(pos);
569            if (NamespaceHtml.equals(next.tag().namespace()) &&
570                (StringUtil.in(next.normalName(), nodeNames) || next.nameIs("html")))
571                break;
572            else
573                pop();
574        }
575    }
576
577    /**
578     Gets the Element immediately above the supplied element on the stack. Which due to adoption, may not necessarily be
579     its parent.
580
581     @param el
582     @return the Element immediately above the supplied element, or null if there is no such element.
583     */
584    @Nullable Element aboveOnStack(Element el) {
585        if (!onStack(el)) return null;
586        for (int pos = stack.size() -1; pos > 0; pos--) {
587            Element next = stack.get(pos);
588            if (next == el) {
589                return stack.get(pos-1);
590            }
591        }
592        return null;
593    }
594
595    void insertOnStackAfter(Element after, Element in) {
596        int i = stack.lastIndexOf(after);
597        if (i == -1) {
598            error("Did not find element on stack to insert after");
599            stack.add(in);
600            // may happen on particularly malformed inputs during adoption
601        } else {
602            stack.add(i+1, in);
603        }
604    }
605
606    void replaceOnStack(Element out, Element in) {
607        replaceInQueue(stack, out, in);
608    }
609
610    private static void replaceInQueue(ArrayList<Element> queue, Element out, Element in) {
611        int i = queue.lastIndexOf(out);
612        Validate.isTrue(i != -1);
613        queue.set(i, in);
614    }
615
616    /**
617     * Reset the insertion mode, by searching up the stack for an appropriate insertion mode. The stack search depth
618     * is limited to {@link #maxQueueDepth}.
619     * @return true if the insertion mode was actually changed.
620     */
621    boolean resetInsertionMode() {
622        // https://html.spec.whatwg.org/multipage/parsing.html#the-insertion-mode
623        boolean last = false;
624        final int bottom = stack.size() - 1;
625        final int upper = bottom >= maxQueueDepth ? bottom - maxQueueDepth : 0;
626        final HtmlTreeBuilderState origState = this.state;
627
628        if (stack.size() == 0) { // nothing left of stack, just get to body
629            transition(HtmlTreeBuilderState.InBody);
630        }
631
632        LOOP: for (int pos = bottom; pos >= upper; pos--) {
633            Element node = stack.get(pos);
634            if (pos == upper) {
635                last = true;
636                if (fragmentParsing)
637                    node = contextElement;
638            }
639            String name = node != null ? node.normalName() : "";
640            if (!NamespaceHtml.equals(node.tag().namespace()))
641                continue; // only looking for HTML elements here
642
643            switch (name) {
644                case "select":
645                    transition(HtmlTreeBuilderState.InSelect);
646                    // todo - should loop up (with some limit) and check for table or template hits
647                    break LOOP;
648                case "td":
649                case "th":
650                    if (!last) {
651                        transition(HtmlTreeBuilderState.InCell);
652                        break LOOP;
653                    }
654                    break;
655                case "tr":
656                    transition(HtmlTreeBuilderState.InRow);
657                    break LOOP;
658                case "tbody":
659                case "thead":
660                case "tfoot":
661                    transition(HtmlTreeBuilderState.InTableBody);
662                    break LOOP;
663                case "caption":
664                    transition(HtmlTreeBuilderState.InCaption);
665                    break LOOP;
666                case "colgroup":
667                    transition(HtmlTreeBuilderState.InColumnGroup);
668                    break LOOP;
669                case "table":
670                    transition(HtmlTreeBuilderState.InTable);
671                    break LOOP;
672                case "template":
673                    HtmlTreeBuilderState tmplState = currentTemplateMode();
674                    Validate.notNull(tmplState, "Bug: no template insertion mode on stack!");
675                    transition(tmplState);
676                    break LOOP;
677                case "head":
678                    if (!last) {
679                        transition(HtmlTreeBuilderState.InHead);
680                        break LOOP;
681                    }
682                    break;
683                case "body":
684                    transition(HtmlTreeBuilderState.InBody);
685                    break LOOP;
686                case "frameset":
687                    transition(HtmlTreeBuilderState.InFrameset);
688                    break LOOP;
689                case "html":
690                    transition(headElement == null ? HtmlTreeBuilderState.BeforeHead : HtmlTreeBuilderState.AfterHead);
691                    break LOOP;
692            }
693            if (last) {
694                transition(HtmlTreeBuilderState.InBody);
695                break;
696            }
697        }
698        return state != origState;
699    }
700
701    /** Places the body back onto the stack and moves to InBody, for cases in AfterBody / AfterAfterBody when more content comes */
702    void resetBody() {
703        if (!onStack("body")) {
704            stack.add(doc.body()); // not onNodeInserted, as already seen
705        }
706        transition(HtmlTreeBuilderState.InBody);
707    }
708
709    // todo: tidy up in specific scope methods
710    private final String[] specificScopeTarget = {null};
711
712    private boolean inSpecificScope(String targetName, String[] baseTypes, String[] extraTypes) {
713        specificScopeTarget[0] = targetName;
714        return inSpecificScope(specificScopeTarget, baseTypes, extraTypes);
715    }
716
717    private boolean inSpecificScope(String[] targetNames, String[] baseTypes, @Nullable String[] extraTypes) {
718        // https://html.spec.whatwg.org/multipage/parsing.html#has-an-element-in-the-specific-scope
719        final int bottom = stack.size() -1;
720        // don't walk too far up the tree
721        for (int pos = bottom; pos >= 0; pos--) {
722            Element el = stack.get(pos);
723            String elName = el.normalName();
724            // namespace checks - arguments provided are always in html ns, with this bolt-on for math and svg:
725            String ns = el.tag().namespace();
726            if (ns.equals(NamespaceHtml)) {
727                if (inSorted(elName, targetNames))
728                    return true;
729                if (inSorted(elName, baseTypes))
730                    return false;
731                if (extraTypes != null && inSorted(elName, extraTypes))
732                    return false;
733            } else if (baseTypes == TagsSearchInScope) {
734                if (ns.equals(NamespaceMathml) && inSorted(elName, TagSearchInScopeMath))
735                    return false;
736                if (ns.equals(NamespaceSvg) && inSorted(elName, TagSearchInScopeSvg))
737                    return false;
738            }
739        }
740        //Validate.fail("Should not be reachable"); // would end up false because hitting 'html' at root (basetypes)
741        return false;
742    }
743
744    boolean inScope(String[] targetNames) {
745        return inSpecificScope(targetNames, TagsSearchInScope, null);
746    }
747
748    boolean inScope(String targetName) {
749        return inScope(targetName, null);
750    }
751
752    boolean inScope(String targetName, String[] extras) {
753        return inSpecificScope(targetName, TagsSearchInScope, extras);
754    }
755
756    boolean inListItemScope(String targetName) {
757        return inScope(targetName, TagSearchList);
758    }
759
760    boolean inButtonScope(String targetName) {
761        return inScope(targetName, TagSearchButton);
762    }
763
764    boolean inTableScope(String targetName) {
765        return inSpecificScope(targetName, TagSearchTableScope, null);
766    }
767
768    boolean inSelectScope(String targetName) {
769        for (int pos = stack.size() -1; pos >= 0; pos--) {
770            Element el = stack.get(pos);
771            String elName = el.normalName();
772            if (elName.equals(targetName))
773                return true;
774            if (!inSorted(elName, TagSearchSelectScope)) // all elements except
775                return false;
776        }
777        return false; // nothing left on stack
778    }
779
780    /** Tests if there is some element on the stack that is not in the provided set. */
781    boolean onStackNot(String[] allowedTags) {
782        for (int pos = stack.size() - 1; pos >= 0; pos--) {
783            final String elName = stack.get(pos).normalName();
784            if (!inSorted(elName, allowedTags))
785                return true;
786        }
787        return false;
788    }
789
790    void setHeadElement(Element headElement) {
791        this.headElement = headElement;
792    }
793
794    Element getHeadElement() {
795        return headElement;
796    }
797
798    boolean isFosterInserts() {
799        return fosterInserts;
800    }
801
802    void setFosterInserts(boolean fosterInserts) {
803        this.fosterInserts = fosterInserts;
804    }
805
806    @Nullable FormElement getFormElement() {
807        return formElement;
808    }
809
810    void setFormElement(FormElement formElement) {
811        this.formElement = formElement;
812    }
813
814    void resetPendingTableCharacters() {
815        pendingTableCharacters.clear();
816    }
817
818    List<Token.Character> getPendingTableCharacters() {
819        return pendingTableCharacters;
820    }
821
822    void addPendingTableCharacters(Token.Character c) {
823        // make a copy of the token to maintain its state (as Tokens are otherwise reset)
824        Token.Character copy = new Token.Character(c);
825        pendingTableCharacters.add(copy);
826    }
827
828    /**
829     13.2.6.3 Closing elements that have implied end tags
830     When the steps below require the UA to generate implied end tags, then, while the current node is a dd element, a dt element, an li element, an optgroup element, an option element, a p element, an rb element, an rp element, an rt element, or an rtc element, the UA must pop the current node off the stack of open elements.
831
832     If a step requires the UA to generate implied end tags but lists an element to exclude from the process, then the UA must perform the above steps as if that element was not in the above list.
833
834     When the steps below require the UA to generate all implied end tags thoroughly, then, while the current node is a caption element, a colgroup element, a dd element, a dt element, an li element, an optgroup element, an option element, a p element, an rb element, an rp element, an rt element, an rtc element, a tbody element, a td element, a tfoot element, a th element, a thead element, or a tr element, the UA must pop the current node off the stack of open elements.
835
836     @param excludeTag If a step requires the UA to generate implied end tags but lists an element to exclude from the
837     process, then the UA must perform the above steps as if that element was not in the above list.
838     */
839    void generateImpliedEndTags(String excludeTag) {
840        while (inSorted(currentElement().normalName(), TagSearchEndTags)) {
841            if (excludeTag != null && currentElementIs(excludeTag))
842                break;
843            pop();
844        }
845    }
846
847    void generateImpliedEndTags() {
848        generateImpliedEndTags(false);
849    }
850
851    /**
852     Pops HTML elements off the stack according to the implied end tag rules
853     @param thorough if we are thorough (includes table elements etc) or not
854     */
855    void generateImpliedEndTags(boolean thorough) {
856        final String[] search = thorough ? TagThoroughSearchEndTags : TagSearchEndTags;
857        while (NamespaceHtml.equals(currentElement().tag().namespace())
858            && inSorted(currentElement().normalName(), search)) {
859            pop();
860        }
861    }
862
863    void closeElement(String name) {
864        generateImpliedEndTags(name);
865        if (!name.equals(currentElement().normalName())) error(state());
866        popStackToClose(name);
867    }
868
869    static boolean isSpecial(Element el) {
870        String namespace = el.tag().namespace();
871        String name = el.normalName();
872        switch (namespace) {
873            case NamespaceHtml:
874                return inSorted(name, TagSearchSpecial);
875            case Parser.NamespaceMathml:
876                return inSorted(name, TagSearchSpecialMath);
877            case Parser.NamespaceSvg:
878                return inSorted(name, TagSvgHtmlIntegration);
879            default:
880                return false;
881        }
882    }
883
884    Element lastFormattingElement() {
885        return formattingElements.size() > 0 ? formattingElements.get(formattingElements.size()-1) : null;
886    }
887
888    int positionOfElement(Element el){
889        for (int i = 0; i < formattingElements.size(); i++){
890            if (el == formattingElements.get(i))
891                return i;
892        }
893        return -1;
894    }
895
896    Element removeLastFormattingElement() {
897        int size = formattingElements.size();
898        if (size > 0)
899            return formattingElements.remove(size-1);
900        else
901            return null;
902    }
903
904    // active formatting elements
905    void pushActiveFormattingElements(Element in) {
906        checkActiveFormattingElements(in);
907        formattingElements.add(in);
908    }
909
910    void pushWithBookmark(Element in, int bookmark){
911        checkActiveFormattingElements(in);
912        // catch any range errors and assume bookmark is incorrect - saves a redundant range check.
913        try {
914            formattingElements.add(bookmark, in);
915        } catch (IndexOutOfBoundsException e) {
916            formattingElements.add(in);
917        }
918    }
919
920    void checkActiveFormattingElements(Element in){
921        int numSeen = 0;
922        final int size = formattingElements.size() -1;
923        int ceil = size - maxUsedFormattingElements; if (ceil <0) ceil = 0;
924
925        for (int pos = size; pos >= ceil; pos--) {
926            Element el = formattingElements.get(pos);
927            if (el == null) // marker
928                break;
929
930            if (isSameFormattingElement(in, el))
931                numSeen++;
932
933            if (numSeen == 3) {
934                formattingElements.remove(pos);
935                break;
936            }
937        }
938    }
939
940    private static boolean isSameFormattingElement(Element a, Element b) {
941        // same if: same namespace, tag, and attributes. Element.equals only checks tag, might in future check children
942        return a.normalName().equals(b.normalName()) &&
943                // a.namespace().equals(b.namespace()) &&
944                a.attributes().equals(b.attributes());
945        // todo: namespaces
946    }
947
948    void reconstructFormattingElements() {
949        if (stack.size() > maxQueueDepth)
950            return;
951        Element last = lastFormattingElement();
952        if (last == null || onStack(last))
953            return;
954
955        Element entry = last;
956        int size = formattingElements.size();
957        int ceil = size - maxUsedFormattingElements; if (ceil <0) ceil = 0;
958        int pos = size - 1;
959        boolean skip = false;
960        while (true) {
961            if (pos == ceil) { // step 4. if none before, skip to 8
962                skip = true;
963                break;
964            }
965            entry = formattingElements.get(--pos); // step 5. one earlier than entry
966            if (entry == null || onStack(entry)) // step 6 - neither marker nor on stack
967                break; // jump to 8, else continue back to 4
968        }
969        while(true) {
970            if (!skip) // step 7: on later than entry
971                entry = formattingElements.get(++pos);
972            Validate.notNull(entry); // should not occur, as we break at last element
973
974            // 8. create new element from element, 9 insert into current node, onto stack
975            skip = false; // can only skip increment from 4.
976            Element newEl = new Element(tagFor(entry.nodeName(), entry.normalName(), defaultNamespace(), settings), null, entry.attributes().clone());
977            doInsertElement(newEl);
978
979            // 10. replace entry with new entry
980            formattingElements.set(pos, newEl);
981
982            // 11
983            if (pos == size-1) // if not last entry in list, jump to 7
984                break;
985        }
986    }
987    private static final int maxUsedFormattingElements = 12; // limit how many elements get recreated
988
989    void clearFormattingElementsToLastMarker() {
990        while (!formattingElements.isEmpty()) {
991            Element el = removeLastFormattingElement();
992            if (el == null)
993                break;
994        }
995    }
996
997    void removeFromActiveFormattingElements(Element el) {
998        for (int pos = formattingElements.size() -1; pos >= 0; pos--) {
999            Element next = formattingElements.get(pos);
1000            if (next == el) {
1001                formattingElements.remove(pos);
1002                break;
1003            }
1004        }
1005    }
1006
1007    boolean isInActiveFormattingElements(Element el) {
1008        return onStack(formattingElements, el);
1009    }
1010
1011    @Nullable
1012    Element getActiveFormattingElement(String nodeName) {
1013        for (int pos = formattingElements.size() -1; pos >= 0; pos--) {
1014            Element next = formattingElements.get(pos);
1015            if (next == null) // scope marker
1016                break;
1017            else if (next.nameIs(nodeName))
1018                return next;
1019        }
1020        return null;
1021    }
1022
1023    void replaceActiveFormattingElement(Element out, Element in) {
1024        replaceInQueue(formattingElements, out, in);
1025    }
1026
1027    void insertMarkerToFormattingElements() {
1028        formattingElements.add(null);
1029    }
1030
1031    void insertInFosterParent(Node in) {
1032        Element fosterParent;
1033        Element lastTable = getFromStack("table");
1034        boolean isLastTableParent = false;
1035        if (lastTable != null) {
1036            if (lastTable.parent() != null) {
1037                fosterParent = lastTable.parent();
1038                isLastTableParent = true;
1039            } else
1040                fosterParent = aboveOnStack(lastTable);
1041        } else { // no table == frag
1042            fosterParent = stack.get(0);
1043        }
1044
1045        if (isLastTableParent) {
1046            Validate.notNull(lastTable); // last table cannot be null by this point.
1047            lastTable.before(in);
1048        }
1049        else
1050            fosterParent.appendChild(in);
1051    }
1052
1053    // Template Insertion Mode stack
1054    void pushTemplateMode(HtmlTreeBuilderState state) {
1055        tmplInsertMode.add(state);
1056    }
1057
1058    @Nullable HtmlTreeBuilderState popTemplateMode() {
1059        if (tmplInsertMode.size() > 0) {
1060            return tmplInsertMode.remove(tmplInsertMode.size() -1);
1061        } else {
1062            return null;
1063        }
1064    }
1065
1066    int templateModeSize() {
1067        return tmplInsertMode.size();
1068    }
1069
1070    @Nullable HtmlTreeBuilderState currentTemplateMode() {
1071        return (tmplInsertMode.size() > 0) ? tmplInsertMode.get(tmplInsertMode.size() -1)  : null;
1072    }
1073
1074    @Override
1075    public String toString() {
1076        return "TreeBuilder{" +
1077                "currentToken=" + currentToken +
1078                ", state=" + state +
1079                ", currentElement=" + currentElement() +
1080                '}';
1081    }
1082
1083}