001package org.jsoup.parser; 002 003import org.jspecify.annotations.Nullable; 004 005import java.util.Objects; 006 007import static org.jsoup.parser.Parser.NamespaceHtml; 008 009/** 010 A Tag represents an Element's name and configured options, common throughout the Document. Options may affect the parse 011 and output. 012 013 @see TagSet 014 @see Parser#tagSet(TagSet) */ 015public class Tag implements Cloneable { 016 /** Tag option: the tag is known (specifically defined). This impacts if options may need to be inferred (when not 017 known) in, e.g., the pretty-printer. Set when a tag is added to a TagSet, or when settings are set(). */ 018 public static int Known = 1; 019 /** Tag option: the tag is a void tag (e.g., {@code <img>}), that can contain no children, and in HTML does not require closing. */ 020 public static int Void = 1 << 1; 021 /** Tag option: the tag is a block tag (e.g., {@code <div>}, {@code <p>}). Causes the element to be indented when pretty-printing. If not a block, it is inline. */ 022 public static int Block = 1 << 2; 023 /** Tag option: the tag is a block tag that will only hold inline tags (e.g., {@code <p>}); used for formatting. (Must also set Block.) */ 024 public static int InlineContainer = 1 << 3; 025 /** Tag option: the tag can self-close (e.g., {@code <foo />}). */ 026 public static int SelfClose = 1 << 4; 027 /** Tag option: the tag has been seen self-closing in this parse. */ 028 public static int SeenSelfClose = 1 << 5; 029 /** Tag option: the tag preserves whitespace (e.g., {@code <pre>}). */ 030 public static int PreserveWhitespace = 1 << 6; 031 /** Tag option: the tag is an RCDATA element that can have text and character references (e.g., {@code <title>}, {@code <textarea>}). */ 032 public static int RcData = 1 << 7; 033 /** Tag option: the tag is a Data element that can have text but not character references (e.g., {@code <style>}, {@code <script>}). */ 034 public static int Data = 1 << 8; 035 /** Tag option: the tag's value will be included when submitting a form (e.g., {@code <input>}). */ 036 public static int FormSubmittable = 1 << 9; 037 038 String namespace; 039 String tagName; 040 String normalName; // always the lower case version of this tag, regardless of case preservation mode 041 int options = 0; 042 043 /** 044 Create a new Tag, with the given name and namespace. 045 <p>The tag is not implicitly added to any TagSet.</p> 046 @param tagName the name of the tag. Case-sensitive. 047 @param namespace the namespace for the tag. 048 @see TagSet#valueOf(String, String) 049 @since 1.20.1 050 */ 051 public Tag(String tagName, String namespace) { 052 this(tagName, ParseSettings.normalName(tagName), namespace); 053 } 054 055 /** 056 Create a new Tag, with the given name, in the HTML namespace. 057 <p>The tag is not implicitly added to any TagSet.</p> 058 @param tagName the name of the tag. Case-sensitive. 059 @see TagSet#valueOf(String, String) 060 @since 1.20.1 061 */ 062 public Tag(String tagName) { 063 this(tagName, ParseSettings.normalName(tagName), NamespaceHtml); 064 } 065 066 /** Path for TagSet defaults, no options set; normal name is already LC. */ 067 Tag(String tagName, String normalName, String namespace) { 068 this.tagName = tagName; 069 this.normalName = normalName; 070 this.namespace = namespace; 071 } 072 073 /** 074 * Get this tag's name. 075 * 076 * @return the tag's name 077 */ 078 public String getName() { 079 return tagName; 080 } 081 082 /** 083 Get this tag's name. 084 @return the tag's name 085 */ 086 public String name() { 087 return tagName; 088 } 089 090 /** 091 Change the tag's name. As Tags are reused throughout a Document, this will change the name for all uses of this tag. 092 @param tagName the new name of the tag. Case-sensitive. 093 @return this tag 094 @since 1.20.1 095 */ 096 public Tag name(String tagName) { 097 this.tagName = tagName; 098 this.normalName = ParseSettings.normalName(tagName); 099 return this; 100 } 101 102 /** 103 Get this tag's prefix, if it has one; else the empty string. 104 <p>For example, {@code <book:title>} has prefix {@code book}, and tag name {@code book:title}.</p> 105 @return the tag's prefix 106 @since 1.20.1 107 */ 108 public String prefix() { 109 int pos = tagName.indexOf(':'); 110 if (pos == -1) return ""; 111 else return tagName.substring(0, pos); 112 } 113 114 /** 115 Get this tag's local name. The local name is the name without the prefix (if any). 116 <p>For exmaple, {@code <book:title>} has local name {@code title}, and tag name {@code book:title}.</p> 117 @return the tag's local name 118 @since 1.20.1 119 */ 120 public String localName() { 121 int pos = tagName.indexOf(':'); 122 if (pos == -1) return tagName; 123 else return tagName.substring(pos + 1); 124 } 125 126 /** 127 * Get this tag's normalized (lowercased) name. 128 * @return the tag's normal name. 129 */ 130 public String normalName() { 131 return normalName; 132 } 133 134 /** 135 Get this tag's namespace. 136 @return the tag's namespace 137 */ 138 public String namespace() { 139 return namespace; 140 } 141 142 /** 143 Set the tag's namespace. As Tags are reused throughout a Document, this will change the namespace for all uses of this tag. 144 @param namespace the new namespace of the tag. 145 @return this tag 146 @since 1.20.1 147 */ 148 public Tag namespace(String namespace) { 149 this.namespace = namespace; 150 return this; 151 } 152 153 /** 154 Set an option on this tag. 155 <p>Once a tag has a setting applied, it will be considered a known tag.</p> 156 @param option the option to set 157 @return this tag 158 @since 1.20.1 159 */ 160 public Tag set(int option) { 161 options |= option; 162 options |= Tag.Known; // considered known if touched 163 return this; 164 } 165 166 /** 167 Test if an option is set on this tag. 168 169 @param option the option to test 170 @return true if the option is set 171 @since 1.20.1 172 */ 173 public boolean is(int option) { 174 return (options & option) != 0; 175 } 176 177 /** 178 Clear (unset) an option from this tag. 179 @param option the option to clear 180 @return this tag 181 @since 1.20.1 182 */ 183 public Tag clear(int option) { 184 options &= ~option; 185 // considered known if touched, unless explicitly clearing known 186 if (option != Tag.Known) options |= Tag.Known; 187 return this; 188 } 189 190 /** 191 * Get a Tag by name. If not previously defined (unknown), returns a new generic tag, that can do anything. 192 * <p> 193 * Pre-defined tags (p, div etc) will be ==, but unknown tags are not registered and will only .equals(). 194 * </p> 195 * 196 * @param tagName Name of tag, e.g. "p". Case-insensitive. 197 * @param namespace the namespace for the tag. 198 * @param settings used to control tag name sensitivity 199 * @see TagSet 200 * @return The tag, either defined or new generic. 201 */ 202 public static Tag valueOf(String tagName, String namespace, ParseSettings settings) { 203 return TagSet.Html().valueOf(tagName, null, namespace, settings.preserveTagCase()); 204 } 205 206 /** 207 * Get a Tag by name. If not previously defined (unknown), returns a new generic tag, that can do anything. 208 * <p> 209 * Pre-defined tags (P, DIV etc) will be ==, but unknown tags are not registered and will only .equals(). 210 * </p> 211 * 212 * @param tagName Name of tag, e.g. "p". <b>Case sensitive</b>. 213 * @return The tag, either defined or new generic. 214 * @see #valueOf(String tagName, String namespace, ParseSettings settings) 215 */ 216 public static Tag valueOf(String tagName) { 217 return valueOf(tagName, NamespaceHtml, ParseSettings.preserveCase); 218 } 219 220 /** 221 * Get a Tag by name. If not previously defined (unknown), returns a new generic tag, that can do anything. 222 * <p> 223 * Pre-defined tags (P, DIV etc) will be ==, but unknown tags are not registered and will only .equals(). 224 * </p> 225 * 226 * @param tagName Name of tag, e.g. "p". <b>Case sensitive</b>. 227 * @param settings used to control tag name sensitivity 228 * @return The tag, either defined or new generic. 229 * @see #valueOf(String tagName, String namespace, ParseSettings settings) 230 */ 231 public static Tag valueOf(String tagName, ParseSettings settings) { 232 return valueOf(tagName, NamespaceHtml, settings); 233 } 234 235 /** 236 * Gets if this is a block tag. 237 * 238 * @return if block tag 239 */ 240 public boolean isBlock() { 241 return (options & Block) != 0; 242 } 243 244 /** 245 Get if this is an InlineContainer tag. 246 247 @return true if an InlineContainer (which formats children as inline). 248 @deprecated internal pretty-printing flag; use {@link #isInline()} or {@link #isBlock()} to check layout intent. Will be removed in jsoup 1.24.1. 249 */ 250 @Deprecated public boolean formatAsBlock() { 251 return (options & InlineContainer) != 0; 252 } 253 254 /** 255 * Gets if this tag is an inline tag. Just the opposite of isBlock. 256 * 257 * @return if this tag is an inline tag. 258 */ 259 public boolean isInline() { 260 return (options & Block) == 0; 261 } 262 263 /** 264 Get if this is void (aka empty) tag. 265 266 @return true if this is a void tag 267 */ 268 public boolean isEmpty() { 269 return (options & Void) != 0; 270 } 271 272 /** 273 * Get if this tag is self-closing. 274 * 275 * @return if this tag should be output as self-closing. 276 */ 277 public boolean isSelfClosing() { 278 return (options & SelfClose) != 0 || (options & Void) != 0; 279 } 280 281 /** 282 * Get if this is a pre-defined tag in the TagSet, or was auto created on parsing. 283 * 284 * @return if a known tag 285 */ 286 public boolean isKnownTag() { 287 return (options & Known) != 0; 288 } 289 290 /** 291 * Check if this tag name is a known HTML tag. 292 * 293 * @param tagName name of tag 294 * @return if known HTML tag 295 */ 296 public static boolean isKnownTag(String tagName) { 297 return TagSet.HtmlTagSet.get(tagName, NamespaceHtml) != null; 298 } 299 300 /** 301 * Get if this tag should preserve whitespace within child text nodes. 302 * 303 * @return if preserve whitespace 304 */ 305 public boolean preserveWhitespace() { 306 return (options & PreserveWhitespace) != 0; 307 } 308 309 /** 310 * Get if this tag represents an element that should be submitted with a form. E.g. input, option 311 * @return if submittable with a form 312 */ 313 public boolean isFormSubmittable() { 314 return (options & FormSubmittable) != 0; 315 } 316 317 void setSeenSelfClose() { 318 options |= Tag.SeenSelfClose; // does not change known status 319 } 320 321 /** 322 If this Tag uses a specific text TokeniserState for its content, returns that; otherwise null. 323 */ 324 @Nullable TokeniserState textState() { 325 if (is(RcData)) return TokeniserState.Rcdata; 326 if (is(Data)) return TokeniserState.Rawtext; 327 else return null; 328 } 329 330 @Override 331 public boolean equals(Object o) { 332 if (this == o) return true; 333 if (!(o instanceof Tag)) return false; 334 Tag tag = (Tag) o; 335 return Objects.equals(tagName, tag.tagName) && 336 Objects.equals(namespace, tag.namespace) && 337 Objects.equals(normalName, tag.normalName) && 338 options == tag.options; 339 } 340 341 /** 342 Hashcode of this Tag, consisting of the tag name and namespace. 343 */ 344 @Override 345 public int hashCode() { 346 return Objects.hash(tagName, namespace); // options not included so that mutations do not prevent use as a key 347 } 348 349 @Override 350 public String toString() { 351 return tagName; 352 } 353 354 @Override 355 protected Tag clone() { 356 try { 357 return (Tag) super.clone(); 358 } catch (CloneNotSupportedException e) { 359 throw new RuntimeException(e); 360 } 361 } 362 363 364}