001package org.jsoup.parser; 002 003import org.jspecify.annotations.Nullable; 004 005import java.util.Objects; 006 007import static org.jsoup.parser.Parser.NamespaceHtml; 008 009/** 010 A Tag represents an Element's name and configured options, common throughout the Document. Options may affect the parse 011 and output. 012 013 @see TagSet 014 @see Parser#tagSet(TagSet) */ 015public class Tag implements Cloneable { 016 /** Tag option: the tag is known (specifically defined). This impacts if options may need to be inferred (when not 017 known) in, e.g., the pretty-printer. Set when a tag is added to a TagSet, or when settings are set(). */ 018 public static int Known = 1; 019 /** Tag option: the tag is a void tag (e.g., {@code <img>}), that can contain no children, and in HTML does not require closing. */ 020 public static int Void = 1 << 1; 021 /** Tag option: the tag is a block tag (e.g., {@code <div>}, {@code <p>}). Causes the element to be indented when pretty-printing. If not a block, it is inline. */ 022 public static int Block = 1 << 2; 023 /** Tag option: pretty-print hint for block tags whose inline children should stay inline. (Must also set Block.) */ 024 public static int InlineContainer = 1 << 3; 025 /** Tag option: the tag can self-close (e.g., {@code <foo />}). */ 026 public static int SelfClose = 1 << 4; 027 /** Tag option: the tag has been seen self-closing in this parse. */ 028 public static int SeenSelfClose = 1 << 5; 029 /** Tag option: the tag preserves whitespace (e.g., {@code <pre>}). */ 030 public static int PreserveWhitespace = 1 << 6; 031 /** Tag option: the tag is an RCDATA element that can have text and character references (e.g., {@code <title>}, {@code <textarea>}). */ 032 public static int RcData = 1 << 7; 033 /** Tag option: the tag is a Data element that can have text but not character references (e.g., {@code <style>}, {@code <script>}). */ 034 public static int Data = 1 << 8; 035 /** Tag option: the tag's value will be included when submitting a form (e.g., {@code <input>}). */ 036 public static int FormSubmittable = 1 << 9; 037 /** Tag option: readable text boundary for {@code Element.text()}, used for controls, widgets, and embedded objects. */ 038 public static int TextBoundary = 1 << 10; 039 040 String namespace; 041 String tagName; 042 String normalName; // always the lower case version of this tag, regardless of case preservation mode 043 int options = 0; 044 045 /** 046 Create a new Tag, with the given name and namespace. 047 <p>The tag is not implicitly added to any TagSet.</p> 048 @param tagName the name of the tag. Case-sensitive. 049 @param namespace the namespace for the tag. 050 @see TagSet#valueOf(String, String) 051 @since 1.20.1 052 */ 053 public Tag(String tagName, String namespace) { 054 this(tagName, ParseSettings.normalName(tagName), namespace); 055 } 056 057 /** 058 Create a new Tag, with the given name, in the HTML namespace. 059 <p>The tag is not implicitly added to any TagSet.</p> 060 @param tagName the name of the tag. Case-sensitive. 061 @see TagSet#valueOf(String, String) 062 @since 1.20.1 063 */ 064 public Tag(String tagName) { 065 this(tagName, ParseSettings.normalName(tagName), NamespaceHtml); 066 } 067 068 /** Path for TagSet defaults, no options set; normal name is already LC. */ 069 Tag(String tagName, String normalName, String namespace) { 070 this.tagName = tagName; 071 this.normalName = normalName; 072 this.namespace = namespace; 073 } 074 075 /** 076 * Get this tag's name. 077 * 078 * @return the tag's name 079 */ 080 public String getName() { 081 return tagName; 082 } 083 084 /** 085 Get this tag's name. 086 @return the tag's name 087 */ 088 public String name() { 089 return tagName; 090 } 091 092 /** 093 Change the tag's name. As Tags are reused throughout a Document, this will change the name for all uses of this tag. 094 @param tagName the new name of the tag. Case-sensitive. 095 @return this tag 096 @since 1.20.1 097 */ 098 public Tag name(String tagName) { 099 this.tagName = tagName; 100 this.normalName = ParseSettings.normalName(tagName); 101 return this; 102 } 103 104 /** 105 Get this tag's prefix, if it has one; else the empty string. 106 <p>For example, {@code <book:title>} has prefix {@code book}, and tag name {@code book:title}.</p> 107 @return the tag's prefix 108 @since 1.20.1 109 */ 110 public String prefix() { 111 int pos = tagName.indexOf(':'); 112 if (pos == -1) return ""; 113 else return tagName.substring(0, pos); 114 } 115 116 /** 117 Get this tag's local name. The local name is the name without the prefix (if any). 118 <p>For exmaple, {@code <book:title>} has local name {@code title}, and tag name {@code book:title}.</p> 119 @return the tag's local name 120 @since 1.20.1 121 */ 122 public String localName() { 123 int pos = tagName.indexOf(':'); 124 if (pos == -1) return tagName; 125 else return tagName.substring(pos + 1); 126 } 127 128 /** 129 * Get this tag's normalized (lowercased) name. 130 * @return the tag's normal name. 131 */ 132 public String normalName() { 133 return normalName; 134 } 135 136 /** 137 Get this tag's namespace. 138 @return the tag's namespace 139 */ 140 public String namespace() { 141 return namespace; 142 } 143 144 /** 145 Set the tag's namespace. As Tags are reused throughout a Document, this will change the namespace for all uses of this tag. 146 @param namespace the new namespace of the tag. 147 @return this tag 148 @since 1.20.1 149 */ 150 public Tag namespace(String namespace) { 151 this.namespace = namespace; 152 return this; 153 } 154 155 /** 156 Set an option on this tag. 157 <p>Once a tag has a setting applied, it will be considered a known tag.</p> 158 @param option the option to set 159 @return this tag 160 @since 1.20.1 161 */ 162 public Tag set(int option) { 163 options |= option; 164 options |= Tag.Known; // considered known if touched 165 return this; 166 } 167 168 /** 169 Test if an option is set on this tag. 170 171 @param option the option to test 172 @return true if the option is set 173 @since 1.20.1 174 */ 175 public boolean is(int option) { 176 return (options & option) != 0; 177 } 178 179 /** 180 Clear (unset) an option from this tag. 181 @param option the option to clear 182 @return this tag 183 @since 1.20.1 184 */ 185 public Tag clear(int option) { 186 options &= ~option; 187 // considered known if touched, unless explicitly clearing known 188 if (option != Tag.Known) options |= Tag.Known; 189 return this; 190 } 191 192 /** 193 * Get a Tag by name. If not previously defined (unknown), returns a new generic tag, that can do anything. 194 * <p> 195 * Pre-defined tags (p, div etc) will be ==, but unknown tags are not registered and will only .equals(). 196 * </p> 197 * 198 * @param tagName Name of tag, e.g. "p". Case-insensitive. 199 * @param namespace the namespace for the tag. 200 * @param settings used to control tag name sensitivity 201 * @see TagSet 202 * @return The tag, either defined or new generic. 203 */ 204 public static Tag valueOf(String tagName, String namespace, ParseSettings settings) { 205 return TagSet.Html().valueOf(tagName, null, namespace, settings.preserveTagCase()); 206 } 207 208 /** 209 * Get a Tag by name. If not previously defined (unknown), returns a new generic tag, that can do anything. 210 * <p> 211 * Pre-defined tags (P, DIV etc) will be ==, but unknown tags are not registered and will only .equals(). 212 * </p> 213 * 214 * @param tagName Name of tag, e.g. "p". <b>Case sensitive</b>. 215 * @return The tag, either defined or new generic. 216 * @see #valueOf(String tagName, String namespace, ParseSettings settings) 217 */ 218 public static Tag valueOf(String tagName) { 219 return valueOf(tagName, NamespaceHtml, ParseSettings.preserveCase); 220 } 221 222 /** 223 * Get a Tag by name. If not previously defined (unknown), returns a new generic tag, that can do anything. 224 * <p> 225 * Pre-defined tags (P, DIV etc) will be ==, but unknown tags are not registered and will only .equals(). 226 * </p> 227 * 228 * @param tagName Name of tag, e.g. "p". <b>Case sensitive</b>. 229 * @param settings used to control tag name sensitivity 230 * @return The tag, either defined or new generic. 231 * @see #valueOf(String tagName, String namespace, ParseSettings settings) 232 */ 233 public static Tag valueOf(String tagName, ParseSettings settings) { 234 return valueOf(tagName, NamespaceHtml, settings); 235 } 236 237 /** 238 * Gets if this is a block tag. 239 * 240 * @return if block tag 241 */ 242 public boolean isBlock() { 243 return (options & Block) != 0; 244 } 245 246 /** 247 Get if this is an InlineContainer tag. 248 249 @return true if this tag has the InlineContainer pretty-print hint. 250 @deprecated internal pretty-printing flag; use {@link #isInline()} or {@link #isBlock()} to check layout intent. Will be removed in jsoup 1.24.1. 251 */ 252 @Deprecated public boolean formatAsBlock() { 253 return (options & InlineContainer) != 0; 254 } 255 256 /** 257 * Gets if this tag is an inline tag. Just the opposite of isBlock. 258 * 259 * @return if this tag is an inline tag. 260 */ 261 public boolean isInline() { 262 return (options & Block) == 0; 263 } 264 265 /** 266 Get if this is void (aka empty) tag. 267 268 @return true if this is a void tag 269 */ 270 public boolean isEmpty() { 271 return (options & Void) != 0; 272 } 273 274 /** 275 * Get if this tag is self-closing. 276 * 277 * @return if this tag should be output as self-closing. 278 */ 279 public boolean isSelfClosing() { 280 return (options & SelfClose) != 0 || (options & Void) != 0; 281 } 282 283 /** 284 * Get if this is a pre-defined tag in the TagSet, or was auto created on parsing. 285 * 286 * @return if a known tag 287 */ 288 public boolean isKnownTag() { 289 return (options & Known) != 0; 290 } 291 292 /** 293 * Check if this tag name is a known HTML tag. 294 * 295 * @param tagName name of tag 296 * @return if known HTML tag 297 */ 298 public static boolean isKnownTag(String tagName) { 299 return TagSet.HtmlTagSet.get(tagName, NamespaceHtml) != null; 300 } 301 302 /** 303 * Get if this tag should preserve whitespace within child text nodes. 304 * 305 * @return if preserve whitespace 306 */ 307 public boolean preserveWhitespace() { 308 return (options & PreserveWhitespace) != 0; 309 } 310 311 /** 312 * Get if this tag represents an element that should be submitted with a form. E.g. input, option 313 * @return if submittable with a form 314 */ 315 public boolean isFormSubmittable() { 316 return (options & FormSubmittable) != 0; 317 } 318 319 void setSeenSelfClose() { 320 options |= Tag.SeenSelfClose; // does not change known status 321 } 322 323 /** 324 If this Tag uses a specific text TokeniserState for its content, returns that; otherwise null. 325 */ 326 @Nullable TokeniserState textState() { 327 if (is(RcData)) return TokeniserState.Rcdata; 328 if (is(Data)) return TokeniserState.Rawtext; 329 else return null; 330 } 331 332 @Override 333 public boolean equals(Object o) { 334 if (this == o) return true; 335 if (!(o instanceof Tag)) return false; 336 Tag tag = (Tag) o; 337 return Objects.equals(tagName, tag.tagName) && 338 Objects.equals(namespace, tag.namespace) && 339 Objects.equals(normalName, tag.normalName) && 340 options == tag.options; 341 } 342 343 /** 344 Hashcode of this Tag, consisting of the tag name and namespace. 345 */ 346 @Override 347 public int hashCode() { 348 return Objects.hash(tagName, namespace); // options not included so that mutations do not prevent use as a key 349 } 350 351 @Override 352 public String toString() { 353 return tagName; 354 } 355 356 @Override 357 protected Tag clone() { 358 try { 359 return (Tag) super.clone(); 360 } catch (CloneNotSupportedException e) { 361 throw new RuntimeException(e); 362 } 363 } 364 365 366}