001package org.jsoup; 002 003import org.jsoup.helper.RequestAuthenticator; 004import org.jsoup.nodes.Document; 005import org.jsoup.parser.Parser; 006import org.jsoup.parser.StreamParser; 007import org.jspecify.annotations.Nullable; 008 009import javax.net.ssl.SSLContext; 010import javax.net.ssl.SSLSocketFactory; 011import java.io.BufferedInputStream; 012import java.io.IOException; 013import java.io.InputStream; 014import java.io.UncheckedIOException; 015import java.net.Authenticator; 016import java.net.CookieStore; 017import java.net.Proxy; 018import java.net.URL; 019import java.util.Collection; 020import java.util.List; 021import java.util.Map; 022 023/** 024 The Connection interface is a convenient HTTP client and session object to fetch content from the web, and parse them 025 into Documents. 026 <p>To start a new session, use either {@link org.jsoup.Jsoup#newSession()} or {@link org.jsoup.Jsoup#connect(String)}. 027 Connections contain {@link Connection.Request} and {@link Connection.Response} objects (once executed). Configuration 028 settings (URL, timeout, useragent, etc) set on a session will be applied by default to each subsequent request.</p> 029 <p>To start a new request from the session, use {@link #newRequest()}.</p> 030 <p>Cookies are stored in memory for the duration of the session. For that reason, do not use one single session for all 031 requests in a long-lived application, or you are likely to run out of memory, unless care is taken to clean up the 032 cookie store. The cookie store for the session is available via {@link #cookieStore()}. You may provide your own 033 implementation via {@link #cookieStore(java.net.CookieStore)} before making requests.</p> 034 <p>Request configuration can be made using either the shortcut methods in Connection (e.g. {@link #userAgent(String)}), 035 or by methods in the {@link Connection.Request} object directly. All request configuration must be made before the request is 036 executed. When used as an ongoing session, initialize all defaults prior to making multi-threaded {@link 037#newRequest()}s.</p> 038 <p>Note that the term "Connection" used here does not mean that a long-lived connection is held against a server for 039 the lifetime of the Connection object. A socket connection is only made at the point of request execution ({@link 040#execute()}, {@link #get()}, or {@link #post()}), and the server's response consumed.</p> 041 <p>For multi-threaded implementations, it is important to use a {@link #newRequest()} for each request. The session may 042 be shared across concurrent threads, but a not a specific request.</p> 043 <p><b>HTTP/2</b> support: On JVM 11 and above, requests use {@link java.net.http.HttpClient}, which supports 044 HTTP/2. To use the legacy {@link java.net.HttpURLConnection} instead, set 045 <code>System.setProperty("jsoup.useHttpClient", "false")</code>.</p> 046 */ 047@SuppressWarnings("unused") 048public interface Connection { 049 050 /** 051 * GET and POST http methods. 052 */ 053 enum Method { 054 GET(false), 055 POST(true), 056 PUT(true), 057 DELETE(true), 058 /** 059 Note that unfortunately, PATCH is not supported in many JDKs. 060 */ 061 PATCH(true), 062 HEAD(false), 063 OPTIONS(false), 064 TRACE(false); 065 066 private final boolean hasBody; 067 068 Method(boolean hasBody) { 069 this.hasBody = hasBody; 070 } 071 072 /** 073 * Check if this HTTP method has/needs a request body 074 * @return if body needed 075 */ 076 public final boolean hasBody() { 077 return hasBody; 078 } 079 } 080 081 /** 082 Creates a new request, using this Connection as the session-state and to initialize the connection settings (which 083 may then be independently changed on the returned {@link Connection.Request} object). 084 @return a new Connection object, with a shared Cookie Store and initialized settings from this Connection and Request 085 @since 1.14.1 086 */ 087 Connection newRequest(); 088 089 /** 090 Creates a new request, using this Connection as the session-state and to initialize the connection settings (which 091 may then be independently changed on the returned {@link Connection.Request} object). 092 @return a new Connection object, with a shared Cookie Store and initialized settings from this Connection and Request 093 @param url URL for the new request 094 @since 1.17.1 095 */ 096 default Connection newRequest(String url) { 097 return newRequest().url(url); 098 } 099 100 /** 101 Creates a new request, using this Connection as the session-state and to initialize the connection settings (which 102 may then be independently changed on the returned {@link Connection.Request} object). 103 @return a new Connection object, with a shared Cookie Store and initialized settings from this Connection and Request 104 @param url URL for the new request 105 @since 1.17.1 106 */ 107 default Connection newRequest(URL url) { 108 return newRequest().url(url); 109 } 110 111 /** 112 * Set the request URL to fetch. The protocol must be HTTP or HTTPS. 113 * @param url URL to connect to 114 * @return this Connection, for chaining 115 */ 116 Connection url(URL url); 117 118 /** 119 * Set the request URL to fetch. The protocol must be HTTP or HTTPS. 120 * @param url URL to connect to 121 * @return this Connection, for chaining 122 */ 123 Connection url(String url); 124 125 /** 126 * Set the proxy to use for this request. Set to <code>null</code> to disable a previously set proxy. 127 * @param proxy proxy to use 128 * @return this Connection, for chaining 129 */ 130 Connection proxy(@Nullable Proxy proxy); 131 132 /** 133 * Set the HTTP proxy to use for this request. 134 * @param host the proxy hostname 135 * @param port the proxy port 136 * @return this Connection, for chaining 137 */ 138 Connection proxy(String host, int port); 139 140 /** 141 * Set the request user-agent header. 142 * @param userAgent user-agent to use 143 * @return this Connection, for chaining 144 * @see org.jsoup.helper.HttpConnection#DEFAULT_UA 145 */ 146 Connection userAgent(String userAgent); 147 148 /** 149 Set the total maximum request duration. If a timeout occurs, an {@link java.net.SocketTimeoutException} will be 150 thrown. 151 <p>The default timeout is <b>30 seconds</b> (30,000 millis). A timeout of zero is treated as an infinite timeout.</p> 152 <p>This timeout specifies the combined maximum duration of the connection time and the time to read 153 the full response.</p> 154 <p>Implementation note: when this <code>Connection</code> is backed by <code>HttpURLConnection</code> (rather than <code>HttpClient</code>, as used in JVM 11+), this timeout is implemented by setting both the socket connect and read timeouts to half of the specified value.</p> 155 156 @param millis number of milliseconds (thousandths of a second) before timing out connects or reads. 157 @return this Connection, for chaining 158 @see #maxBodySize(int) 159 */ 160 Connection timeout(int millis); 161 162 /** 163 * Set the maximum bytes to read from the (uncompressed) connection into the body, before the connection is closed, 164 * and the input truncated (i.e. the body content will be trimmed). <b>The default maximum is 2MB</b>. A max size of 165 * <code>0</code> is treated as an infinite amount (bounded only by your patience and the memory available on your 166 * machine). 167 * 168 * @param bytes number of bytes to read from the input before truncating 169 * @return this Connection, for chaining 170 */ 171 Connection maxBodySize(int bytes); 172 173 /** 174 * Set the request referrer (aka "referer") header. 175 * @param referrer referrer to use 176 * @return this Connection, for chaining 177 */ 178 Connection referrer(String referrer); 179 180 /** 181 * Configures the connection to (not) follow server redirects. By default, this is <b>true</b>. 182 * @param followRedirects true if server redirects should be followed. 183 * @return this Connection, for chaining 184 */ 185 Connection followRedirects(boolean followRedirects); 186 187 /** 188 * Set the request method to use, GET or POST. Default is GET. 189 * @param method HTTP request method 190 * @return this Connection, for chaining 191 */ 192 Connection method(Method method); 193 194 /** 195 * Configures the connection to not throw exceptions when an HTTP error occurs. (4xx - 5xx, e.g. 404 or 500). By 196 * default, this is <b>false</b>; an IOException is thrown if an error is encountered. If set to <b>true</b>, the 197 * response is populated with the error body, and the status message will reflect the error. 198 * @param ignoreHttpErrors - false (default) if HTTP errors should be ignored. 199 * @return this Connection, for chaining 200 */ 201 Connection ignoreHttpErrors(boolean ignoreHttpErrors); 202 203 /** 204 * Ignore the document's Content-Type when parsing the response. By default, this is <b>false</b>, an unrecognised 205 * content-type will cause an IOException to be thrown. (This is to prevent producing garbage by attempting to parse 206 * a JPEG binary image, for example.) Set to true to force a parse attempt regardless of content type. 207 * @param ignoreContentType set to true if you would like the content type ignored on parsing the response into a 208 * Document. 209 * @return this Connection, for chaining 210 */ 211 Connection ignoreContentType(boolean ignoreContentType); 212 213 /** 214 Set a custom SSL socket factory for HTTPS connections. 215 <p>Note: if set, the legacy <code>HttpURLConnection</code> will be used instead of the JVM's 216 <code>HttpClient</code>.</p> 217 218 @param sslSocketFactory SSL socket factory 219 @return this Connection, for chaining 220 @see #sslContext(SSLContext) 221 @deprecated use {@link #sslContext(SSLContext)} instead; will be removed in jsoup 1.24.1. 222 */ 223 @Deprecated 224 Connection sslSocketFactory(SSLSocketFactory sslSocketFactory); 225 226 /** 227 Set a custom SSL context for HTTPS connections. 228 <p>Note: when using the legacy <code>HttpURLConnection</code>, only the <code>SSLSocketFactory</code> from the 229 context will be used.</p> 230 231 @param sslContext SSL context 232 @return this Connection, for chaining 233 @since 1.21.2 234 */ 235 default Connection sslContext(SSLContext sslContext) { 236 throw new UnsupportedOperationException(); 237 } 238 239 /** 240 * Add a request data parameter. Request parameters are sent in the request query string for GETs, and in the 241 * request body for POSTs. A request may have multiple values of the same name. 242 * @param key data key 243 * @param value data value 244 * @return this Connection, for chaining 245 */ 246 Connection data(String key, String value); 247 248 /** 249 * Add an input stream as a request data parameter. For GETs, has no effect, but for POSTS this will upload the 250 * input stream. 251 * <p>Use the {@link #data(String, String, InputStream, String)} method to set the uploaded file's mimetype.</p> 252 * @param key data key (form item name) 253 * @param filename the name of the file to present to the remove server. Typically just the name, not path, 254 * component. 255 * @param inputStream the input stream to upload, that you probably obtained from a {@link java.io.FileInputStream}. 256 * You must close the InputStream in a {@code finally} block. 257 * @return this Connection, for chaining 258 * @see #data(String, String, InputStream, String) 259 */ 260 Connection data(String key, String filename, InputStream inputStream); 261 262 /** 263 * Add an input stream as a request data parameter. For GETs, has no effect, but for POSTS this will upload the 264 * input stream. 265 * @param key data key (form item name) 266 * @param filename the name of the file to present to the remove server. Typically just the name, not path, 267 * component. 268 * @param inputStream the input stream to upload, that you probably obtained from a {@link java.io.FileInputStream}. 269 * @param contentType the Content Type (aka mimetype) to specify for this file. 270 * You must close the InputStream in a {@code finally} block. 271 * @return this Connection, for chaining 272 */ 273 Connection data(String key, String filename, InputStream inputStream, String contentType); 274 275 /** 276 * Adds all of the supplied data to the request data parameters 277 * @param data collection of data parameters 278 * @return this Connection, for chaining 279 */ 280 Connection data(Collection<KeyVal> data); 281 282 /** 283 * Adds all of the supplied data to the request data parameters 284 * @param data map of data parameters 285 * @return this Connection, for chaining 286 */ 287 Connection data(Map<String, String> data); 288 289 /** 290 Add one or more request {@code key, val} data parameter pairs. 291 <p>Multiple parameters may be set at once, e.g.: 292 <code>.data("name", "jsoup", "language", "Java", "language", "English");</code> creates a query string like: 293 <code>{@literal ?name=jsoup&language=Java&language=English}</code></p> 294 <p>For GET requests, data parameters will be sent on the request query string. For POST (and other methods that 295 contain a body), they will be sent as body form parameters, unless the body is explicitly set by 296 {@link #requestBody(String)}, in which case they will be query string parameters.</p> 297 298 @param keyvals a set of key value pairs. 299 @return this Connection, for chaining 300 */ 301 Connection data(String... keyvals); 302 303 /** 304 * Get the data KeyVal for this key, if any 305 * @param key the data key 306 * @return null if not set 307 */ 308 @Nullable KeyVal data(String key); 309 310 /** 311 * Set a POST (or PUT) request body. Useful when a server expects a plain request body (such as JSON), and not a set 312 * of URL encoded form key/value pairs. E.g.: 313 * <code><pre>Jsoup.connect(url) 314 * .requestBody(json) 315 * .header("Content-Type", "application/json") 316 * .post();</pre></code> 317 * If any data key/vals are supplied, they will be sent as URL query params. 318 * @see #requestBodyStream(InputStream) 319 * @return this Request, for chaining 320 */ 321 Connection requestBody(String body); 322 323 /** 324 Set the request body. Useful for posting data such as byte arrays or files, and the server expects a single request 325 body (and not a multipart upload). E.g.: 326 <code><pre> Jsoup.connect(url) 327 .requestBody(new ByteArrayInputStream(bytes)) 328 .header("Content-Type", "application/octet-stream") 329 .post(); 330 </pre></code> 331 <p>Or, use a FileInputStream to data from disk.</p> 332 <p>You should close the stream in a finally block.</p> 333 334 @param stream the input stream to send. 335 @return this Request, for chaining 336 @see #requestBody(String) 337 @since 1.20.1 338 */ 339 default Connection requestBodyStream(InputStream stream) { 340 throw new UnsupportedOperationException(); 341 } 342 343 /** 344 * Set a request header. Replaces any existing header with the same case-insensitive name. 345 * @param name header name 346 * @param value header value 347 * @return this Connection, for chaining 348 * @see org.jsoup.Connection.Request#header(String, String) 349 * @see org.jsoup.Connection.Request#headers() 350 */ 351 Connection header(String name, String value); 352 353 /** 354 * Sets each of the supplied headers on the request. Existing headers with the same case-insensitive name will be 355 * replaced with the new value. 356 * @param headers map of headers name {@literal ->} value pairs 357 * @return this Connection, for chaining 358 * @see org.jsoup.Connection.Request#headers() 359 */ 360 Connection headers(Map<String,String> headers); 361 362 /** 363 * Set a cookie to be sent in the request. 364 * @param name name of cookie 365 * @param value value of cookie 366 * @return this Connection, for chaining 367 */ 368 Connection cookie(String name, String value); 369 370 /** 371 * Adds each of the supplied cookies to the request. 372 * @param cookies map of cookie name {@literal ->} value pairs 373 * @return this Connection, for chaining 374 */ 375 Connection cookies(Map<String, String> cookies); 376 377 /** 378 Provide a custom or pre-filled CookieStore to be used on requests made by this Connection. 379 @param cookieStore a cookie store to use for subsequent requests 380 @return this Connection, for chaining 381 @since 1.14.1 382 */ 383 Connection cookieStore(CookieStore cookieStore); 384 385 /** 386 Get the cookie store used by this Connection. 387 @return the cookie store 388 @since 1.14.1 389 */ 390 CookieStore cookieStore(); 391 392 /** 393 * Provide a specific parser to use when parsing the response to a Document. If not set, jsoup defaults to the 394 * {@link Parser#htmlParser() HTML parser}, unless the response content-type is XML, in which case the 395 * {@link Parser#xmlParser() XML parser} is used. 396 * @param parser alternate parser 397 * @return this Connection, for chaining 398 */ 399 Connection parser(Parser parser); 400 401 /** 402 * Set the character-set used to encode the request body. Defaults to {@code UTF-8}. 403 * @param charset character set to encode the request body 404 * @return this Connection, for chaining 405 */ 406 Connection postDataCharset(String charset); 407 408 /** 409 Set the authenticator to use for this connection, enabling requests to URLs, and via proxies, that require 410 authentication credentials. 411 <p>The authentication scheme used is automatically detected during the request execution. 412 Supported schemes (subject to the platform) are {@code basic}, {@code digest}, {@code NTLM}, 413 and {@code Kerberos}.</p> 414 415 <p>To use, supply a {@link RequestAuthenticator} function that: 416 <ol> 417 <li>validates the URL that is requesting authentication, and</li> 418 <li>returns the appropriate credentials (username and password)</li> 419 </ol> 420 </p> 421 422 <p>For example, to authenticate both to a proxy and a downstream web server: 423 <code><pre> 424 Connection session = Jsoup.newSession() 425 .proxy("proxy.example.com", 8080) 426 .auth(auth -> { 427 if (auth.isServer()) { // provide credentials for the request url 428 Validate.isTrue(auth.url().getHost().equals("example.com")); 429 // check that we're sending credentials were we expect, and not redirected out 430 return auth.credentials("username", "password"); 431 } else { // auth.isProxy() 432 return auth.credentials("proxy-user", "proxy-password"); 433 } 434 }); 435 436 Connection.Response response = session.newRequest("https://example.com/adminzone/").execute(); 437 </pre></code> 438 </p> 439 440 <p>The system may cache the authentication and use it for subsequent requests to the same resource.</p> 441 442 <p><b>Implementation notes</b></p> 443 <p>For compatibility, on a Java 8 platform, authentication is set up via the system-wide default 444 {@link java.net.Authenticator#setDefault(Authenticator)} method via a ThreadLocal delegator. Whilst the 445 authenticator used is request specific and thread-safe, if you have other calls to {@code setDefault}, they will be 446 incompatible with this implementation.</p> 447 <p>On Java 9 and above, the preceding note does not apply; authenticators are directly set on the request. </p> 448 <p>If you are attempting to authenticate to a proxy that uses the {@code basic} scheme and will be fetching HTTPS 449 URLs, you need to configure your Java platform to enable that, by setting the 450 {@code jdk.http.auth.tunneling.disabledSchemes} system property to {@code ""}. 451 This must be executed prior to any authorization attempts. E.g.: 452 <code><pre> 453 static { 454 System.setProperty("jdk.http.auth.tunneling.disabledSchemes", ""); 455 // removes Basic, which is otherwise excluded from auth for CONNECT tunnels 456 }</pre></code> 457 </p> 458 * @param authenticator the authenticator to use in this connection 459 * @return this Connection, for chaining 460 * @since 1.17.1 461 */ 462 default Connection auth(@Nullable RequestAuthenticator authenticator) { 463 throw new UnsupportedOperationException(); 464 } 465 466 /** 467 * Execute the request as a GET, and parse the result. 468 * @return parsed Document 469 * @throws java.net.MalformedURLException if the request URL is not an HTTP or HTTPS URL, or is otherwise malformed 470 * @throws HttpStatusException if the response is not OK and HTTP response errors are not ignored 471 * @throws UnsupportedMimeTypeException if the response mime type is not supported and those errors are not ignored 472 * @throws java.net.SocketTimeoutException if the connection times out 473 * @throws IOException on error 474 */ 475 Document get() throws IOException; 476 477 /** 478 * Execute the request as a POST, and parse the result. 479 * @return parsed Document 480 * @throws java.net.MalformedURLException if the request URL is not a HTTP or HTTPS URL, or is otherwise malformed 481 * @throws HttpStatusException if the response is not OK and HTTP response errors are not ignored 482 * @throws UnsupportedMimeTypeException if the response mime type is not supported and those errors are not ignored 483 * @throws java.net.SocketTimeoutException if the connection times out 484 * @throws IOException on error 485 */ 486 Document post() throws IOException; 487 488 /** 489 * Execute the request. 490 * @return the executed {@link Response} 491 * @throws java.net.MalformedURLException if the request URL is not a HTTP or HTTPS URL, or is otherwise malformed 492 * @throws HttpStatusException if the response is not OK and HTTP response errors are not ignored 493 * @throws UnsupportedMimeTypeException if the response mime type is not supported and those errors are not ignored 494 * @throws java.net.SocketTimeoutException if the connection times out 495 * @throws IOException on error 496 */ 497 Response execute() throws IOException; 498 499 /** 500 * Get the request object associated with this connection 501 * @return request 502 */ 503 Request request(); 504 505 /** 506 * Set the connection's request 507 * @param request new request object 508 * @return this Connection, for chaining 509 */ 510 Connection request(Request request); 511 512 /** 513 * Get the response, once the request has been executed. 514 * @return response 515 * @throws IllegalArgumentException if called before the response has been executed. 516 */ 517 Response response(); 518 519 /** 520 * Set the connection's response 521 * @param response new response 522 * @return this Connection, for chaining 523 */ 524 Connection response(Response response); 525 526 /** 527 Set the response progress handler, which will be called periodically as the response body is downloaded. Since 528 documents are parsed as they are downloaded, this is also a good proxy for the parse progress. 529 <p>The Response object is supplied as the progress context, and may be read from to obtain headers etc.</p> 530 @param handler the progress handler 531 @return this Connection, for chaining 532 @since 1.18.1 533 */ 534 default Connection onResponseProgress(Progress<Response> handler) { 535 throw new UnsupportedOperationException(); 536 } 537 538 /** 539 * Common methods for Requests and Responses 540 * @param <T> Type of Base, either Request or Response 541 */ 542 @SuppressWarnings("UnusedReturnValue") 543 interface Base<T extends Base<T>> { 544 /** 545 * Get the URL of this Request or Response. For redirected responses, this will be the final destination URL. 546 * @return URL 547 * @throws IllegalArgumentException if called on a Request that was created without a URL. 548 */ 549 URL url(); 550 551 /** 552 * Set the URL 553 * @param url new URL 554 * @return this, for chaining 555 */ 556 T url(URL url); 557 558 /** 559 * Get the request method, which defaults to <code>GET</code> 560 * @return method 561 */ 562 Method method(); 563 564 /** 565 * Set the request method 566 * @param method new method 567 * @return this, for chaining 568 */ 569 T method(Method method); 570 571 /** 572 * Get the value of a header. If there is more than one header value with the same name, the headers are returned 573 * comma separated, per <a href="https://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2">rfc2616-sec4</a>. 574 * <p> 575 * Header names are case-insensitive. 576 * </p> 577 * @param name name of header (case-insensitive) 578 * @return value of header, or null if not set. 579 * @see #hasHeader(String) 580 * @see #cookie(String) 581 */ 582 @Nullable String header(String name); 583 584 /** 585 * Get the values of a header. 586 * @param name header name, case-insensitive. 587 * @return a list of values for this header, or an empty list if not set. 588 */ 589 List<String> headers(String name); 590 591 /** 592 * Set a header. This method will overwrite any existing header with the same case-insensitive name. If there 593 * is more than one value for this header, this method will update the first matching header. 594 * <p>For compatibility, if the content of the header includes text that cannot be represented by ISO-8859-1, 595 * then it should be encoded first per <a href="https://www.ietf.org/rfc/rfc2047.txt">RFC 2047</a>.</p> 596 * @param name Name of header 597 * @param value Value of header 598 * @return this, for chaining 599 * @see #addHeader(String, String) 600 */ 601 T header(String name, String value); 602 603 /** 604 * Add a header. The header will be added regardless of whether a header with the same name already exists. 605 * <p>For compatibility, if the content of the header includes text that cannot be represented by ISO-8859-1, 606 * then it should be encoded first per <a href="https://www.ietf.org/rfc/rfc2047.txt">RFC 2047</a>.</p> 607 * @param name Name of new header 608 * @param value Value of new header 609 * @return this, for chaining 610 */ 611 T addHeader(String name, String value); 612 613 /** 614 * Check if a header is present 615 * @param name name of header (case-insensitive) 616 * @return if the header is present in this request/response 617 */ 618 boolean hasHeader(String name); 619 620 /** 621 * Check if a header is present, with the given value 622 * @param name header name (case-insensitive) 623 * @param value value (case-insensitive) 624 * @return if the header and value pair are set in this req/res 625 */ 626 boolean hasHeaderWithValue(String name, String value); 627 628 /** 629 * Remove headers by name. If there is more than one header with this name, they will all be removed. 630 * @param name name of header to remove (case-insensitive) 631 * @return this, for chaining 632 */ 633 T removeHeader(String name); 634 635 /** 636 * Retrieve all of the request/response header names and corresponding values as a map. For headers with multiple 637 * values, only the first header is returned. 638 * <p>Note that this is a view of the headers only, and changes made to this map will not be reflected in the 639 * request/response object.</p> 640 * @return headers 641 * @see #multiHeaders() 642 643 */ 644 Map<String, String> headers(); 645 646 /** 647 * Retreive all of the headers, keyed by the header name, and with a list of values per header. 648 * @return a list of multiple values per header. 649 */ 650 Map<String, List<String>> multiHeaders(); 651 652 /** 653 * Get a cookie value by name from this request/response. 654 * @param name name of cookie to retrieve. 655 * @return value of cookie, or null if not set 656 */ 657 @Nullable String cookie(String name); 658 659 /** 660 * Set a cookie in this request/response. 661 * @param name name of cookie 662 * @param value value of cookie 663 * @return this, for chaining 664 */ 665 T cookie(String name, String value); 666 667 /** 668 * Check if a cookie is present 669 * @param name name of cookie 670 * @return if the cookie is present in this request/response 671 */ 672 boolean hasCookie(String name); 673 674 /** 675 * Remove a cookie by name 676 * @param name name of cookie to remove 677 * @return this, for chaining 678 */ 679 T removeCookie(String name); 680 681 /** 682 Retrieve the request/response cookies as a map. For response cookies, if duplicate cookie names were sent, the 683 last one set will be the one included. For session management, rather than using these response cookies, prefer 684 to use {@link Jsoup#newSession()} and related methods. 685 686 @return simple cookie map 687 @see #cookieStore() 688 */ 689 Map<String, String> cookies(); 690 } 691 692 /** 693 * Represents a HTTP request. 694 */ 695 @SuppressWarnings("UnusedReturnValue") 696 interface Request extends Base<Request> { 697 /** 698 * Get the proxy used for this request. 699 * @return the proxy; <code>null</code> if not enabled. 700 */ 701 @Nullable Proxy proxy(); 702 703 /** 704 * Update the proxy for this request. 705 * @param proxy the proxy ot use; <code>null</code> to disable. 706 * @return this Request, for chaining 707 */ 708 Request proxy(@Nullable Proxy proxy); 709 710 /** 711 * Set the HTTP proxy to use for this request. 712 * @param host the proxy hostname 713 * @param port the proxy port 714 * @return this Connection, for chaining 715 */ 716 Request proxy(String host, int port); 717 718 /** 719 * Get the request timeout, in milliseconds. 720 * @return the timeout in milliseconds. 721 */ 722 int timeout(); 723 724 /** 725 * Update the request timeout. 726 * @param millis timeout, in milliseconds 727 * @return this Request, for chaining 728 */ 729 Request timeout(int millis); 730 731 /** 732 * Get the maximum body size, in bytes. 733 * @return the maximum body size, in bytes. 734 */ 735 int maxBodySize(); 736 737 /** 738 * Update the maximum body size, in bytes. 739 * @param bytes maximum body size, in bytes. 740 * @return this Request, for chaining 741 */ 742 Request maxBodySize(int bytes); 743 744 /** 745 * Get the current followRedirects configuration. 746 * @return true if followRedirects is enabled. 747 */ 748 boolean followRedirects(); 749 750 /** 751 * Configures the request to (not) follow server redirects. By default this is <b>true</b>. 752 * @param followRedirects true if server redirects should be followed. 753 * @return this Request, for chaining 754 */ 755 Request followRedirects(boolean followRedirects); 756 757 /** 758 * Get the current ignoreHttpErrors configuration. 759 * @return true if errors will be ignored; false (default) if HTTP errors will cause an IOException to be 760 * thrown. 761 */ 762 boolean ignoreHttpErrors(); 763 764 /** 765 * Configures the request to ignore HTTP errors in the response. 766 * @param ignoreHttpErrors set to true to ignore HTTP errors. 767 * @return this Request, for chaining 768 */ 769 Request ignoreHttpErrors(boolean ignoreHttpErrors); 770 771 /** 772 * Get the current ignoreContentType configuration. 773 * @return true if invalid content-types will be ignored; false (default) if they will cause an IOException to 774 * be thrown. 775 */ 776 boolean ignoreContentType(); 777 778 /** 779 * Configures the request to ignore the Content-Type of the response. 780 * @param ignoreContentType set to true to ignore the content type. 781 * @return this Request, for chaining 782 */ 783 Request ignoreContentType(boolean ignoreContentType); 784 785 /** 786 * Get the current custom SSL socket factory, if any. 787 * @return custom SSL socket factory if set, null otherwise 788 */ 789 @Nullable SSLSocketFactory sslSocketFactory(); 790 791 /** 792 Set a custom SSL socket factory for HTTPS connections. 793 <p>Note: if set, the legacy <code>HttpURLConnection</code> will be used instead of the JVM's 794 <code>HttpClient</code>.</p> 795 796 @param sslSocketFactory SSL socket factory 797 @see #sslContext(SSLContext) 798 @deprecated use {@link #sslContext(SSLContext)} instead; will be removed in jsoup 1.24.1. 799 */ 800 @Deprecated 801 void sslSocketFactory(SSLSocketFactory sslSocketFactory); 802 803 /** 804 Get the current custom SSL context, if any. 805 806 @return custom SSL context if set, null otherwise 807 @since 1.21.2 808 */ 809 @Nullable 810 default SSLContext sslContext() { 811 throw new UnsupportedOperationException(); 812 } 813 814 /** 815 Set a custom SSL context for HTTPS connections. 816 <p>Note: when using the legacy <code>HttpURLConnection</code>, only the <code>SSLSocketFactory</code> from the 817 context will be used.</p> 818 819 @param sslContext SSL context 820 @return this Request, for chaining 821 @since 1.21.2 822 */ 823 default Request sslContext(SSLContext sslContext) { 824 throw new UnsupportedOperationException(); 825 } 826 827 /** 828 * Add a data parameter to the request 829 * @param keyval data to add. 830 * @return this Request, for chaining 831 */ 832 Request data(KeyVal keyval); 833 834 /** 835 * Get all of the request's data parameters 836 * @return collection of keyvals 837 */ 838 Collection<KeyVal> data(); 839 840 /** 841 * Set a POST (or PUT) request body. Useful when a server expects a plain request body, not a set of URL 842 * encoded form key/value pairs. E.g.: 843 * <code><pre>Jsoup.connect(url) 844 * .requestBody(json) 845 * .header("Content-Type", "application/json") 846 * .post();</pre></code> 847 * <p>If any data key/vals are supplied, they will be sent as URL query params.</p> 848 * @param body to use as the request body. Set to null to clear a previously set body. 849 * @return this Request, for chaining 850 * @see #requestBodyStream(InputStream) 851 */ 852 Request requestBody(@Nullable String body); 853 854 /** 855 * Get the current request body. 856 * @return null if not set. 857 */ 858 @Nullable String requestBody(); 859 860 /** 861 Set the request body. Useful for posting data such as byte arrays or files, and the server expects a single 862 request body (and not a multipart upload). E.g.: 863 <code><pre> Jsoup.connect(url) 864 .requestBody(new ByteArrayInputStream(bytes)) 865 .header("Content-Type", "application/octet-stream") 866 .post(); 867 </pre></code> 868 <p>Or, use a FileInputStream to data from disk.</p> 869 <p>You should close the stream in a finally block.</p> 870 871 @param stream the input stream to send. 872 @return this Request, for chaining 873 @see #requestBody(String) 874 @since 1.20.1 875 */ 876 default Request requestBodyStream(InputStream stream) { 877 throw new UnsupportedOperationException(); 878 } 879 880 /** 881 * Specify the parser to use when parsing the document. 882 * @param parser parser to use. 883 * @return this Request, for chaining 884 */ 885 Request parser(Parser parser); 886 887 /** 888 * Get the current parser to use when parsing the document. 889 * @return current Parser 890 */ 891 Parser parser(); 892 893 /** 894 * Sets the post data character set for x-www-form-urlencoded post data 895 * @param charset character set to encode post data 896 * @return this Request, for chaining 897 */ 898 Request postDataCharset(String charset); 899 900 /** 901 * Gets the post data character set for x-www-form-urlencoded post data 902 * @return character set to encode post data 903 */ 904 String postDataCharset(); 905 906 /** 907 Set the authenticator to use for this request. 908 See {@link Connection#auth(RequestAuthenticator) Connection.auth(authenticator)} for examples and 909 implementation notes. 910 * @param authenticator the authenticator 911 * @return this Request, for chaining. 912 * @since 1.17.1 913 */ 914 default Request auth(@Nullable RequestAuthenticator authenticator) { 915 throw new UnsupportedOperationException(); 916 } 917 918 /** 919 Get the RequestAuthenticator, if any, that will be used on this request. 920 * @return the RequestAuthenticator, or {@code null} if not set 921 * @since 1.17.1 922 */ 923 @Nullable 924 default RequestAuthenticator auth() { 925 throw new UnsupportedOperationException(); 926 } 927 } 928 929 /** 930 * Represents a HTTP response. 931 */ 932 interface Response extends Base<Response> { 933 934 /** 935 * Get the status code of the response. 936 * @return status code 937 */ 938 int statusCode(); 939 940 /** 941 * Get the status message of the response. 942 * @return status message 943 */ 944 String statusMessage(); 945 946 /** 947 * Get the character set name of the response, derived from the content-type header. 948 * @return character set name if set, <b>null</b> if not 949 */ 950 @Nullable String charset(); 951 952 /** 953 * Set / override the response character set. When the document body is parsed it will be with this charset. 954 * @param charset to decode body as 955 * @return this Response, for chaining 956 */ 957 Response charset(String charset); 958 959 /** 960 * Get the response content type (e.g. "text/html"); 961 * @return the response content type, or <b>null</b> if one was not set 962 */ 963 @Nullable String contentType(); 964 965 /** 966 Read and parse the body of the response as a Document. If you intend to parse the same response multiple times, 967 you should {@link #readFully()} first, which will buffer the body into memory. 968 969 @return a parsed Document 970 @throws IOException if an IO exception occurs whilst reading the body. 971 @see #readFully() 972 */ 973 Document parse() throws IOException; 974 975 /** 976 Read the response body, and returns it as a plain String. 977 978 @return body 979 @throws IOException if an IO exception occurs whilst reading the body. 980 @since 1.21.1 981 */ 982 default String readBody() throws IOException { 983 throw new UnsupportedOperationException(); 984 } 985 986 /** 987 Get the body of the response as a plain String. 988 989 <p>Will throw an UncheckedIOException if the body has not been buffered and an error occurs whilst reading the 990 body; use {@link #readFully()} first to buffer the body and catch any exceptions explicitly. Or more simply, 991 {@link #readBody()}.</p> 992 993 @return body 994 @throws UncheckedIOException if an IO exception occurs whilst reading the body. 995 @see #readBody() 996 @see #readFully() 997 */ 998 String body(); 999 1000 /** 1001 Get the body of the response as an array of bytes. 1002 1003 <p>Will throw an UncheckedIOException if the body has not been buffered and an error occurs whilst reading the 1004 body; use {@link #readFully()} first to buffer the body and catch any exceptions explicitly.</p> 1005 1006 @return body bytes 1007 @throws UncheckedIOException if an IO exception occurs whilst reading the body. 1008 @see #readFully() 1009 */ 1010 byte[] bodyAsBytes(); 1011 1012 /** 1013 Read the body of the response into a local buffer, so that {@link #parse()} may be called repeatedly on the same 1014 connection response. Otherwise, once the response is read, its InputStream will have been drained and may not be 1015 re-read. 1016 1017 <p>Subsequent calls methods than consume the body, such as {@link #parse()}, {@link #body()}, 1018 {@link #bodyAsBytes()}, will not need to read the body again, and will not throw exceptions.</p> 1019 <p>Calling {@link #readBody()}} has the same effect.</p> 1020 1021 @return this response, for chaining 1022 @throws IOException if an IO exception occurs during buffering. 1023 @since 1.21.1 1024 */ 1025 default Response readFully() throws IOException { 1026 throw new UnsupportedOperationException(); 1027 } 1028 1029 /** 1030 * Read the body of the response into a local buffer, so that {@link #parse()} may be called repeatedly on the 1031 * same connection response. Otherwise, once the response is read, its InputStream will have been drained and 1032 * may not be re-read. 1033 * <p>Calling {@link #body() } or {@link #bodyAsBytes()} has the same effect.</p> 1034 * @return this response, for chaining 1035 * @throws UncheckedIOException if an IO exception occurs during buffering. 1036 * @deprecated use {@link #readFully()} instead (for the checked exception). Will be removed in jsoup 1.24.1. 1037 */ 1038 @Deprecated 1039 Response bufferUp(); 1040 1041 /** 1042 Get the body of the response as a (buffered) InputStream. You should close the input stream when you're done 1043 with it. 1044 <p>Other body methods (like readFully, body, parse, etc) will generally not work in conjunction with this method, 1045 as it consumes the InputStream.</p> 1046 <p>Any configured max size or maximum read timeout applied to the connection will not be applied to this stream, 1047 unless {@link #readFully()} is called prior.</p> 1048 <p>This method is useful for writing large responses to disk, without buffering them completely into memory 1049 first.</p> 1050 @return the response body input stream 1051 */ 1052 BufferedInputStream bodyStream(); 1053 1054 /** 1055 Returns a {@link StreamParser} that will parse the Response progressively. 1056 * @return a StreamParser, prepared to parse this response. 1057 * @throws IOException if an IO exception occurs preparing the parser. 1058 */ 1059 default StreamParser streamParser() throws IOException { 1060 throw new UnsupportedOperationException(); 1061 } 1062 } 1063 1064 /** 1065 * A Key:Value tuple(+), used for form data. 1066 */ 1067 interface KeyVal { 1068 1069 /** 1070 * Update the key of a keyval 1071 * @param key new key 1072 * @return this KeyVal, for chaining 1073 */ 1074 KeyVal key(String key); 1075 1076 /** 1077 * Get the key of a keyval 1078 * @return the key 1079 */ 1080 String key(); 1081 1082 /** 1083 * Update the value of a keyval 1084 * @param value the new value 1085 * @return this KeyVal, for chaining 1086 */ 1087 KeyVal value(String value); 1088 1089 /** 1090 * Get the value of a keyval 1091 * @return the value 1092 */ 1093 String value(); 1094 1095 /** 1096 * Add or update an input stream to this keyVal 1097 * @param inputStream new input stream 1098 * @return this KeyVal, for chaining 1099 */ 1100 KeyVal inputStream(InputStream inputStream); 1101 1102 /** 1103 * Get the input stream associated with this keyval, if any 1104 * @return input stream if set, or null 1105 */ 1106 @Nullable InputStream inputStream(); 1107 1108 /** 1109 * Does this keyval have an input stream? 1110 * @return true if this keyval does indeed have an input stream 1111 */ 1112 boolean hasInputStream(); 1113 1114 /** 1115 * Set the Content Type header used in the MIME body (aka mimetype) when uploading files. 1116 * Only useful if {@link #inputStream(InputStream)} is set. 1117 * <p>Will default to {@code application/octet-stream}.</p> 1118 * @param contentType the new content type 1119 * @return this KeyVal 1120 */ 1121 KeyVal contentType(String contentType); 1122 1123 /** 1124 * Get the current Content Type, or {@code null} if not set. 1125 * @return the current Content Type. 1126 */ 1127 @Nullable String contentType(); 1128 } 1129}