001package org.jsoup.helper; 002 003import org.jsoup.Connection; 004import org.jsoup.HttpStatusException; 005import org.jsoup.Progress; 006import org.jsoup.UnsupportedMimeTypeException; 007import org.jsoup.internal.ControllableInputStream; 008import org.jsoup.internal.StringUtil; 009import org.jsoup.nodes.Document; 010import org.jsoup.parser.Parser; 011import org.jsoup.parser.StreamParser; 012import org.jspecify.annotations.Nullable; 013 014import javax.net.ssl.SSLContext; 015import javax.net.ssl.SSLSocketFactory; 016import java.io.BufferedInputStream; 017import java.io.BufferedReader; 018import java.io.BufferedWriter; 019import java.io.ByteArrayInputStream; 020import java.io.IOException; 021import java.io.InputStream; 022import java.io.InputStreamReader; 023import java.io.OutputStream; 024import java.io.OutputStreamWriter; 025import java.io.UncheckedIOException; 026import java.net.CookieManager; 027import java.net.CookieStore; 028import java.net.InetSocketAddress; 029import java.net.MalformedURLException; 030import java.net.Proxy; 031import java.net.URL; 032import java.net.URLEncoder; 033import java.nio.Buffer; 034import java.nio.ByteBuffer; 035import java.nio.charset.Charset; 036import java.nio.charset.IllegalCharsetNameException; 037import java.nio.charset.StandardCharsets; 038import java.util.ArrayList; 039import java.util.Collection; 040import java.util.Collections; 041import java.util.LinkedHashMap; 042import java.util.List; 043import java.util.Map; 044import java.util.concurrent.locks.ReentrantLock; 045import java.util.regex.Pattern; 046import java.util.zip.GZIPInputStream; 047import java.util.zip.Inflater; 048import java.util.zip.InflaterInputStream; 049 050import static org.jsoup.Connection.Method.HEAD; 051import static org.jsoup.helper.DataUtil.UTF_8; 052import static org.jsoup.internal.Normalizer.lowerCase; 053import static org.jsoup.internal.SharedConstants.DefaultBufferSize; 054 055/** 056 * Implementation of {@link Connection}. 057 * @see org.jsoup.Jsoup#connect(String) 058 */ 059@SuppressWarnings("CharsetObjectCanBeUsed") 060public class HttpConnection implements Connection { 061 public static final String CONTENT_ENCODING = "Content-Encoding"; 062 /** 063 * Many users would get caught by not setting a user-agent and therefore getting different responses on their desktop 064 * vs in jsoup, which would otherwise default to {@code Java}. So by default, use a desktop UA. 065 */ 066 public static final String DEFAULT_UA = 067 "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36"; 068 private static final String USER_AGENT = "User-Agent"; 069 public static final String CONTENT_TYPE = "Content-Type"; 070 public static final String MULTIPART_FORM_DATA = "multipart/form-data"; 071 public static final String FORM_URL_ENCODED = "application/x-www-form-urlencoded"; 072 private static final int HTTP_TEMP_REDIR = 307; // http/1.1 temporary redirect, not in Java's set. 073 static final String DefaultUploadType = "application/octet-stream"; 074 private static final Charset ISO_8859_1 = Charset.forName("ISO-8859-1"); 075 076 private HttpConnection.Request req; 077 private Connection.@Nullable Response res; 078 @Nullable Object client; // The HttpClient for this Connection, if via the HttpClientExecutor 079 @Nullable RequestAuthenticator lastAuth; // The previous Authenticator used by this Connection, if via the HttpClientExecutor 080 081 /** 082 Create a new Connection, with the request URL specified. 083 @param url the URL to fetch from 084 @return a new Connection object 085 */ 086 public static Connection connect(String url) { 087 Connection con = new HttpConnection(); 088 con.url(url); 089 return con; 090 } 091 092 /** 093 Create a new Connection, with the request URL specified. 094 @param url the URL to fetch from 095 @return a new Connection object 096 */ 097 public static Connection connect(URL url) { 098 Connection con = new HttpConnection(); 099 con.url(url); 100 return con; 101 } 102 103 /** 104 Create a new, empty HttpConnection. 105 */ 106 public HttpConnection() { 107 req = new Request(); 108 req.connection = this; 109 } 110 111 /** 112 Create a new Request by deep-copying an existing Request. Note that the data and body of the original are not 113 copied. All other settings (proxy, parser, cookies, etc) are copied. 114 @param copy the request to copy 115 */ 116 HttpConnection(Request copy) { 117 req = new Request(copy); 118 } 119 120 static String encodeMimeName(String val) { 121 return val.replace("\"", "%22"); 122 } 123 124 @Override 125 public Connection newRequest() { 126 // copy the prototype request for the different settings, cookie manager, etc 127 return new HttpConnection(req); 128 } 129 130 /** Create a new Connection that just wraps the provided Request and Response */ 131 private HttpConnection(Request req, Response res) { 132 this.req = req; 133 this.res = res; 134 } 135 136 @Override 137 public Connection url(URL url) { 138 req.url(url); 139 return this; 140 } 141 142 @Override 143 public Connection url(String url) { 144 Validate.notEmptyParam(url, "url"); 145 try { 146 req.url(new URL(url)); 147 } catch (MalformedURLException e) { 148 throw new IllegalArgumentException(String.format("The supplied URL, '%s', is malformed. Make sure it is an absolute URL, and starts with 'http://' or 'https://'. See https://jsoup.org/cookbook/extracting-data/working-with-urls", url), e); 149 } 150 return this; 151 } 152 153 @Override 154 public Connection proxy(@Nullable Proxy proxy) { 155 req.proxy(proxy); 156 return this; 157 } 158 159 @Override 160 public Connection proxy(String host, int port) { 161 req.proxy(host, port); 162 return this; 163 } 164 165 @Override 166 public Connection userAgent(String userAgent) { 167 Validate.notNullParam(userAgent, "userAgent"); 168 req.header(USER_AGENT, userAgent); 169 return this; 170 } 171 172 @Override 173 public Connection timeout(int millis) { 174 req.timeout(millis); 175 return this; 176 } 177 178 @Override 179 public Connection maxBodySize(int bytes) { 180 req.maxBodySize(bytes); 181 return this; 182 } 183 184 @Override 185 public Connection followRedirects(boolean followRedirects) { 186 req.followRedirects(followRedirects); 187 return this; 188 } 189 190 @Override 191 public Connection referrer(String referrer) { 192 Validate.notNullParam(referrer, "referrer"); 193 req.header("Referer", referrer); 194 return this; 195 } 196 197 @Override 198 public Connection method(Method method) { 199 req.method(method); 200 return this; 201 } 202 203 @Override 204 public Connection ignoreHttpErrors(boolean ignoreHttpErrors) { 205 req.ignoreHttpErrors(ignoreHttpErrors); 206 return this; 207 } 208 209 @Override 210 public Connection ignoreContentType(boolean ignoreContentType) { 211 req.ignoreContentType(ignoreContentType); 212 return this; 213 } 214 215 @Override 216 public Connection data(String key, String value) { 217 req.data(KeyVal.create(key, value)); 218 return this; 219 } 220 221 @Override 222 public Connection sslSocketFactory(SSLSocketFactory sslSocketFactory) { 223 req.sslSocketFactory(sslSocketFactory); 224 return this; 225 } 226 227 @Override 228 public Connection sslContext(SSLContext sslContext) { 229 req.sslContext(sslContext); 230 return this; 231 } 232 233 @Override 234 public Connection data(String key, String filename, InputStream inputStream) { 235 req.data(KeyVal.create(key, filename, inputStream)); 236 return this; 237 } 238 239 @Override 240 public Connection data(String key, String filename, InputStream inputStream, String contentType) { 241 req.data(KeyVal.create(key, filename, inputStream).contentType(contentType)); 242 return this; 243 } 244 245 @Override 246 public Connection data(Map<String, String> data) { 247 Validate.notNullParam(data, "data"); 248 for (Map.Entry<String, String> entry : data.entrySet()) { 249 req.data(KeyVal.create(entry.getKey(), entry.getValue())); 250 } 251 return this; 252 } 253 254 @Override 255 public Connection data(String... keyvals) { 256 Validate.notNullParam(keyvals, "keyvals"); 257 Validate.isTrue(keyvals.length %2 == 0, "Must supply an even number of key value pairs"); 258 for (int i = 0; i < keyvals.length; i += 2) { 259 String key = keyvals[i]; 260 String value = keyvals[i+1]; 261 Validate.notEmpty(key, "Data key must not be empty"); 262 Validate.notNull(value, "Data value must not be null"); 263 req.data(KeyVal.create(key, value)); 264 } 265 return this; 266 } 267 268 @Override 269 public Connection data(Collection<Connection.KeyVal> data) { 270 Validate.notNullParam(data, "data"); 271 for (Connection.KeyVal entry: data) { 272 req.data(entry); 273 } 274 return this; 275 } 276 277 @Override 278 public Connection.@Nullable KeyVal data(String key) { 279 Validate.notEmptyParam(key, "key"); 280 for (Connection.KeyVal keyVal : request().data()) { 281 if (keyVal.key().equals(key)) 282 return keyVal; 283 } 284 return null; 285 } 286 287 @Override 288 public Connection requestBody(String body) { 289 req.requestBody(body); 290 return this; 291 } 292 293 @Override 294 public Connection requestBodyStream(InputStream stream) { 295 req.requestBodyStream(stream); 296 return this; 297 } 298 299 @Override 300 public Connection header(String name, String value) { 301 req.header(name, value); 302 return this; 303 } 304 305 @Override 306 public Connection headers(Map<String,String> headers) { 307 Validate.notNullParam(headers, "headers"); 308 for (Map.Entry<String,String> entry : headers.entrySet()) { 309 req.header(entry.getKey(),entry.getValue()); 310 } 311 return this; 312 } 313 314 @Override 315 public Connection cookie(String name, String value) { 316 req.cookie(name, value); 317 return this; 318 } 319 320 @Override 321 public Connection cookies(Map<String, String> cookies) { 322 Validate.notNullParam(cookies, "cookies"); 323 for (Map.Entry<String, String> entry : cookies.entrySet()) { 324 req.cookie(entry.getKey(), entry.getValue()); 325 } 326 return this; 327 } 328 329 @Override 330 public Connection cookieStore(CookieStore cookieStore) { 331 // create a new cookie manager using the new store 332 req.cookieManager = new CookieManager(cookieStore, null); 333 return this; 334 } 335 336 @Override 337 public CookieStore cookieStore() { 338 return req.cookieManager.getCookieStore(); 339 } 340 341 @Override 342 public Connection parser(Parser parser) { 343 req.parser(parser); 344 return this; 345 } 346 347 @Override 348 public Document get() throws IOException { 349 req.method(Method.GET); 350 execute(); 351 Validate.notNull(res); 352 return res.parse(); 353 } 354 355 @Override 356 public Document post() throws IOException { 357 req.method(Method.POST); 358 execute(); 359 Validate.notNull(res); 360 return res.parse(); 361 } 362 363 @Override 364 public Connection.Response execute() throws IOException { 365 res = Response.execute(req); 366 return res; 367 } 368 369 @Override 370 public Connection.Request request() { 371 return req; 372 } 373 374 @Override 375 public Connection request(Connection.Request request) { 376 req = (HttpConnection.Request) request; // will throw a class-cast exception if the user has extended some but not all of Connection; that's desired 377 return this; 378 } 379 380 @Override 381 public Connection.Response response() { 382 if (res == null) { 383 throw new IllegalArgumentException("You must execute the request before getting a response."); 384 } 385 return res; 386 } 387 388 @Override 389 public Connection response(Connection.Response response) { 390 res = response; 391 return this; 392 } 393 394 @Override 395 public Connection postDataCharset(String charset) { 396 req.postDataCharset(charset); 397 return this; 398 } 399 400 @Override public Connection auth(@Nullable RequestAuthenticator authenticator) { 401 req.auth(authenticator); 402 return this; 403 } 404 405 @Override public Connection onResponseProgress(Progress<Connection.Response> handler) { 406 req.responseProgress = handler; 407 return this; 408 } 409 410 @SuppressWarnings("unchecked") 411 private static abstract class Base<T extends Connection.Base<T>> implements Connection.Base<T> { 412 private static final URL UnsetUrl; // only used if you created a new Request() 413 static { 414 try { 415 UnsetUrl = new URL("http://undefined/"); 416 } catch (MalformedURLException e) { 417 throw new IllegalStateException(e); 418 } 419 } 420 421 URL url = UnsetUrl; 422 Method method = Method.GET; 423 Map<String, List<String>> headers; 424 Map<String, String> cookies; 425 426 private Base() { 427 headers = new LinkedHashMap<>(); 428 cookies = new LinkedHashMap<>(); 429 } 430 431 private Base(Base<T> copy) { 432 url = copy.url; // unmodifiable object 433 method = copy.method; 434 headers = new LinkedHashMap<>(); 435 for (Map.Entry<String, List<String>> entry : copy.headers.entrySet()) { 436 headers.put(entry.getKey(), new ArrayList<>(entry.getValue())); 437 } 438 cookies = new LinkedHashMap<>(); cookies.putAll(copy.cookies); // just holds strings 439 } 440 441 @Override 442 public URL url() { 443 if (url == UnsetUrl) 444 throw new IllegalArgumentException("URL not set. Make sure to call #url(...) before executing the request."); 445 return url; 446 } 447 448 @Override 449 public T url(URL url) { 450 Validate.notNullParam(url, "url"); 451 this.url = new UrlBuilder(url).build(); 452 return (T) this; 453 } 454 455 @Override 456 public Method method() { 457 return method; 458 } 459 460 @Override 461 public T method(Method method) { 462 Validate.notNullParam(method, "method"); 463 this.method = method; 464 return (T) this; 465 } 466 467 @Override @Nullable 468 public String header(String name) { 469 Validate.notNullParam(name, "name"); 470 List<String> vals = getHeadersCaseInsensitive(name); 471 if (!vals.isEmpty()) { 472 // https://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2 473 return StringUtil.join(vals, ", "); 474 } 475 476 return null; 477 } 478 479 @Override 480 public T addHeader(String name, @Nullable String value) { 481 Validate.notEmptyParam(name, "name"); 482 value = value == null ? "" : value; 483 484 List<String> values = headers(name); 485 if (values.isEmpty()) { 486 values = new ArrayList<>(); 487 headers.put(name, values); 488 } 489 values.add(value); 490 491 return (T) this; 492 } 493 494 @Override 495 public List<String> headers(String name) { 496 Validate.notEmptyParam(name, "name"); 497 return getHeadersCaseInsensitive(name); 498 } 499 500 @Override 501 public T header(String name, String value) { 502 Validate.notEmptyParam(name, "name"); 503 removeHeader(name); // ensures we don't get an "accept-encoding" and an "Accept-Encoding" 504 addHeader(name, value); 505 return (T) this; 506 } 507 508 @Override 509 public boolean hasHeader(String name) { 510 Validate.notEmptyParam(name, "name"); 511 return !getHeadersCaseInsensitive(name).isEmpty(); 512 } 513 514 /** 515 * Test if the request has a header with this value (case-insensitive). 516 */ 517 @Override 518 public boolean hasHeaderWithValue(String name, String value) { 519 Validate.notEmpty(name); 520 Validate.notEmpty(value); 521 List<String> values = headers(name); 522 for (String candidate : values) { 523 if (value.equalsIgnoreCase(candidate)) 524 return true; 525 } 526 return false; 527 } 528 529 @Override 530 public T removeHeader(String name) { 531 Validate.notEmptyParam(name, "name"); 532 Map.Entry<String, List<String>> entry = scanHeaders(name); // remove is case-insensitive too 533 if (entry != null) 534 headers.remove(entry.getKey()); // ensures correct case 535 return (T) this; 536 } 537 538 @Override 539 public Map<String, String> headers() { 540 LinkedHashMap<String, String> map = new LinkedHashMap<>(headers.size()); 541 for (Map.Entry<String, List<String>> entry : headers.entrySet()) { 542 String header = entry.getKey(); 543 List<String> values = entry.getValue(); 544 if (!values.isEmpty()) 545 map.put(header, values.get(0)); 546 } 547 return map; 548 } 549 550 @Override 551 public Map<String, List<String>> multiHeaders() { 552 return headers; 553 } 554 555 private List<String> getHeadersCaseInsensitive(String name) { 556 Validate.notNull(name); 557 558 for (Map.Entry<String, List<String>> entry : headers.entrySet()) { 559 if (name.equalsIgnoreCase(entry.getKey())) 560 return entry.getValue(); 561 } 562 563 return Collections.emptyList(); 564 } 565 566 private Map.@Nullable Entry<String, List<String>> scanHeaders(String name) { 567 String lc = lowerCase(name); 568 for (Map.Entry<String, List<String>> entry : headers.entrySet()) { 569 if (lowerCase(entry.getKey()).equals(lc)) 570 return entry; 571 } 572 return null; 573 } 574 575 @Override 576 public String cookie(String name) { 577 Validate.notEmptyParam(name, "name"); 578 return cookies.get(name); 579 } 580 581 @Override 582 public T cookie(String name, String value) { 583 Validate.notEmptyParam(name, "name"); 584 Validate.notNullParam(value, "value"); 585 cookies.put(name, value); 586 return (T) this; 587 } 588 589 @Override 590 public boolean hasCookie(String name) { 591 Validate.notEmptyParam(name, "name"); 592 return cookies.containsKey(name); 593 } 594 595 @Override 596 public T removeCookie(String name) { 597 Validate.notEmptyParam(name, "name"); 598 cookies.remove(name); 599 return (T) this; 600 } 601 602 @Override 603 public Map<String, String> cookies() { 604 return cookies; 605 } 606 } 607 608 public static class Request extends HttpConnection.Base<Connection.Request> implements Connection.Request { 609 static { 610 System.setProperty("sun.net.http.allowRestrictedHeaders", "true"); 611 // make sure that we can send Sec-Fetch-Site headers etc. 612 } 613 614 HttpConnection connection; 615 private @Nullable Proxy proxy; 616 private int timeoutMilliseconds; 617 private int maxBodySizeBytes; 618 private boolean followRedirects; 619 private final Collection<Connection.KeyVal> data; 620 private @Nullable Object body = null; // String or InputStream 621 @Nullable String mimeBoundary; 622 private boolean ignoreHttpErrors = false; 623 private boolean ignoreContentType = false; 624 private Parser parser; 625 private boolean parserDefined = false; // called parser(...) vs initialized in ctor 626 private String postDataCharset = DataUtil.defaultCharsetName; 627 private @Nullable SSLSocketFactory sslSocketFactory; 628 @Nullable SSLContext sslContext; 629 private CookieManager cookieManager; 630 @Nullable RequestAuthenticator authenticator; 631 private @Nullable Progress<Connection.Response> responseProgress; 632 633 private final ReentrantLock executing = new ReentrantLock(); // detects and warns if same request used concurrently 634 635 Request() { 636 super(); 637 timeoutMilliseconds = 30000; // 30 seconds 638 maxBodySizeBytes = 1024 * 1024 * 2; // 2MB 639 followRedirects = true; 640 data = new ArrayList<>(); 641 method = Method.GET; 642 addHeader("Accept-Encoding", "gzip"); 643 addHeader(USER_AGENT, DEFAULT_UA); 644 parser = Parser.htmlParser(); 645 cookieManager = new CookieManager(); // creates a default InMemoryCookieStore 646 } 647 648 Request(Request copy) { 649 super(copy); 650 connection = copy.connection; 651 proxy = copy.proxy; 652 postDataCharset = copy.postDataCharset; 653 timeoutMilliseconds = copy.timeoutMilliseconds; 654 maxBodySizeBytes = copy.maxBodySizeBytes; 655 followRedirects = copy.followRedirects; 656 data = new ArrayList<>(); // data not copied 657 //body not copied 658 ignoreHttpErrors = copy.ignoreHttpErrors; 659 ignoreContentType = copy.ignoreContentType; 660 parser = copy.parser.newInstance(); // parsers and their tree-builders maintain state, so need a fresh copy 661 parserDefined = copy.parserDefined; 662 sslSocketFactory = copy.sslSocketFactory; // these are all synchronized so safe to share 663 sslContext = copy.sslContext; 664 cookieManager = copy.cookieManager; 665 authenticator = copy.authenticator; 666 responseProgress = copy.responseProgress; 667 } 668 669 @Override @Nullable 670 public Proxy proxy() { 671 return proxy; 672 } 673 674 @Override 675 public Request proxy(@Nullable Proxy proxy) { 676 this.proxy = proxy; 677 return this; 678 } 679 680 @Override 681 public Request proxy(String host, int port) { 682 this.proxy = new Proxy(Proxy.Type.HTTP, InetSocketAddress.createUnresolved(host, port)); 683 return this; 684 } 685 686 @Override 687 public int timeout() { 688 return timeoutMilliseconds; 689 } 690 691 @Override 692 public Request timeout(int millis) { 693 Validate.isTrue(millis >= 0, "Timeout milliseconds must be 0 (infinite) or greater"); 694 timeoutMilliseconds = millis; 695 return this; 696 } 697 698 @Override 699 public int maxBodySize() { 700 return maxBodySizeBytes; 701 } 702 703 @Override 704 public Connection.Request maxBodySize(int bytes) { 705 Validate.isTrue(bytes >= 0, "maxSize must be 0 (unlimited) or larger"); 706 maxBodySizeBytes = bytes; 707 return this; 708 } 709 710 @Override 711 public boolean followRedirects() { 712 return followRedirects; 713 } 714 715 @Override 716 public Connection.Request followRedirects(boolean followRedirects) { 717 this.followRedirects = followRedirects; 718 return this; 719 } 720 721 @Override 722 public boolean ignoreHttpErrors() { 723 return ignoreHttpErrors; 724 } 725 726 @Override @Nullable 727 public SSLSocketFactory sslSocketFactory() { 728 return sslSocketFactory; 729 } 730 731 @Override 732 public void sslSocketFactory(SSLSocketFactory sslSocketFactory) { 733 this.sslSocketFactory = sslSocketFactory; 734 } 735 736 @Override @Nullable 737 public SSLContext sslContext() { 738 return sslContext; 739 } 740 741 @Override 742 public Connection.Request sslContext(SSLContext sslContext) { 743 this.sslContext = sslContext; 744 return this; 745 } 746 747 @Override 748 public Connection.Request ignoreHttpErrors(boolean ignoreHttpErrors) { 749 this.ignoreHttpErrors = ignoreHttpErrors; 750 return this; 751 } 752 753 @Override 754 public boolean ignoreContentType() { 755 return ignoreContentType; 756 } 757 758 @Override 759 public Connection.Request ignoreContentType(boolean ignoreContentType) { 760 this.ignoreContentType = ignoreContentType; 761 return this; 762 } 763 764 @Override 765 public Request data(Connection.KeyVal keyval) { 766 Validate.notNullParam(keyval, "keyval"); 767 data.add(keyval); 768 return this; 769 } 770 771 @Override 772 public Collection<Connection.KeyVal> data() { 773 return data; 774 } 775 776 @Override 777 public Connection.Request requestBody(@Nullable String body) { 778 this.body = body; 779 return this; 780 } 781 782 @Override @Nullable 783 public String requestBody() { 784 return body instanceof String ? (String) body : null; 785 } 786 787 @Override 788 public Connection.Request requestBodyStream(InputStream stream) { 789 body = stream; 790 return this; 791 } 792 793 @Override 794 public Request parser(Parser parser) { 795 this.parser = parser; 796 parserDefined = true; 797 return this; 798 } 799 800 @Override 801 public Parser parser() { 802 return parser; 803 } 804 805 @Override 806 public Connection.Request postDataCharset(String charset) { 807 Validate.notNullParam(charset, "charset"); 808 if (!Charset.isSupported(charset)) throw new IllegalCharsetNameException(charset); 809 this.postDataCharset = charset; 810 return this; 811 } 812 813 @Override 814 public String postDataCharset() { 815 return postDataCharset; 816 } 817 818 CookieManager cookieManager() { 819 return cookieManager; 820 } 821 822 @Override public Connection.Request auth(@Nullable RequestAuthenticator authenticator) { 823 this.authenticator = authenticator; 824 return this; 825 } 826 827 @Override @Nullable public RequestAuthenticator auth() { 828 return authenticator; 829 } 830 } 831 832 public static class Response extends HttpConnection.Base<Connection.Response> implements Connection.Response { 833 private static final int MAX_REDIRECTS = 20; 834 private static final String LOCATION = "Location"; 835 int statusCode; 836 String statusMessage = ""; 837 private @Nullable ByteBuffer byteData; 838 private @Nullable ControllableInputStream bodyStream; 839 @Nullable RequestExecutor executor; 840 private @Nullable String charset; 841 @Nullable String contentType; 842 int contentLength; 843 private boolean executed = false; 844 private boolean inputStreamRead = false; 845 private int numRedirects = 0; 846 private final HttpConnection.Request req; 847 848 /* 849 * Matches XML content types (like text/xml, image/svg+xml, application/xhtml+xml;charset=UTF8, etc) 850 */ 851 private static final Pattern xmlContentTypeRxp = Pattern.compile("(\\w+)/\\w*\\+?xml.*"); 852 853 /** 854 <b>Internal only! </b>Creates a dummy HttpConnection.Response, useful for testing. All actual responses 855 are created from the HttpURLConnection and fields defined. 856 */ 857 Response() { 858 super(); 859 statusCode = 400; 860 statusMessage = "Request not made"; 861 req = new Request(); 862 contentType = null; 863 } 864 865 static Response execute(HttpConnection.Request req) throws IOException { 866 return execute(req, null); 867 } 868 869 static Response execute(HttpConnection.Request req, @Nullable Response prevRes) throws IOException { 870 Validate.isTrue(req.executing.tryLock(), "Multiple threads were detected trying to execute the same request concurrently. Make sure to use Connection#newRequest() and do not share an executing request between threads."); 871 Validate.notNullParam(req, "req"); 872 URL url = req.url(); 873 Validate.notNull(url, "URL must be specified to connect"); 874 String protocol = url.getProtocol(); 875 if (!protocol.equals("http") && !protocol.equals("https")) 876 throw new MalformedURLException("Only http & https protocols supported"); 877 final boolean supportsBody = req.method().hasBody(); 878 final boolean hasBody = req.body != null; 879 if (!supportsBody) 880 Validate.isFalse(hasBody, "Cannot set a request body for HTTP method " + req.method()); 881 882 // set up the request for execution 883 if (!req.data().isEmpty() && (!supportsBody || hasBody)) 884 serialiseRequestUrl(req); 885 else if (supportsBody) 886 setOutputContentType(req); 887 888 long startTime = System.nanoTime(); 889 RequestExecutor executor = RequestDispatch.get(req, prevRes); 890 Response res = null; 891 try { 892 res = executor.execute(); 893 894 // redirect if there's a location header (from 3xx, or 201 etc) 895 if (res.hasHeader(LOCATION) && req.followRedirects()) { 896 if (res.statusCode != HTTP_TEMP_REDIR) { 897 req.method(Method.GET); // always redirect with a get. any data param from original req are dropped. 898 req.data().clear(); 899 req.requestBody(null); 900 req.removeHeader(CONTENT_TYPE); 901 } 902 903 String location = res.header(LOCATION); 904 Validate.notNull(location); 905 if (location.startsWith("http:/") && location.charAt(6) != '/') // fix broken Location: http:/temp/AAG_New/en/index.php 906 location = location.substring(6); 907 URL redir = StringUtil.resolve(req.url(), location); 908 req.url(redir); 909 910 return execute(req, res); 911 } 912 if ((res.statusCode < 200 || res.statusCode >= 400) && !req.ignoreHttpErrors()) 913 throw new HttpStatusException("HTTP error fetching URL", res.statusCode, req.url().toString()); 914 915 // check that we can handle the returned content type; if not, abort before fetching it 916 String contentType = res.contentType(); 917 if (contentType != null 918 && !req.ignoreContentType() 919 && !contentType.startsWith("text/") 920 && !xmlContentTypeRxp.matcher(contentType).matches() 921 ) 922 throw new UnsupportedMimeTypeException("Unhandled content type. Must be text/*, */xml, or */*+xml", 923 contentType, req.url().toString()); 924 925 // switch to the XML parser if content type is xml and not parser not explicitly set 926 if (contentType != null && xmlContentTypeRxp.matcher(contentType).matches()) { 927 if (!req.parserDefined) req.parser(Parser.xmlParser()); 928 } 929 930 res.charset = DataUtil.getCharsetFromContentType(res.contentType); // may be null, readInputStream deals with it 931 if (res.contentLength != 0 && req.method() != HEAD) { // -1 means unknown, chunked. sun throws an IO exception on 500 response with no content when trying to read body 932 InputStream stream = executor.responseBody(); 933 if (res.hasHeaderWithValue(CONTENT_ENCODING, "gzip")) 934 stream = new GZIPInputStream(stream); 935 else if (res.hasHeaderWithValue(CONTENT_ENCODING, "deflate")) 936 stream = new InflaterInputStream(stream, new Inflater(true)); 937 938 res.bodyStream = ControllableInputStream.wrap( 939 stream, DefaultBufferSize, req.maxBodySize()) 940 .timeout(startTime, req.timeout()); 941 942 if (req.responseProgress != null) // set response progress listener 943 res.bodyStream.onProgress(res.contentLength, req.responseProgress, res); 944 } else { 945 res.byteData = DataUtil.emptyByteBuffer(); 946 } 947 } catch (IOException e) { 948 if (res != null) res.safeClose(); // will be non-null if got to conn 949 throw e; 950 } finally { 951 req.executing.unlock(); 952 953 // detach any thread local auth delegate 954 if (req.authenticator != null) 955 AuthenticationHandler.handler.remove(); 956 } 957 958 res.executed = true; 959 return res; 960 } 961 962 @Override 963 public int statusCode() { 964 return statusCode; 965 } 966 967 @Override 968 public String statusMessage() { 969 return statusMessage; 970 } 971 972 @Override @Nullable 973 public String charset() { 974 return charset; 975 } 976 977 @Override 978 public Response charset(String charset) { 979 this.charset = charset; 980 return this; 981 } 982 983 @Override @Nullable 984 public String contentType() { 985 return contentType; 986 } 987 988 /** Called from parse() or streamParser(), validates and prepares the input stream, and aligns common settings. */ 989 private ControllableInputStream prepareParse() { 990 Validate.isTrue(executed, "Request must be executed (with .execute(), .get(), or .post() before parsing response"); 991 ControllableInputStream stream = bodyStream; 992 if (byteData != null) { // bytes have been read in to the buffer, parse that 993 ByteArrayInputStream bytes = new ByteArrayInputStream(byteData.array(), 0, byteData.limit()); 994 stream = ControllableInputStream.wrap(bytes, 0); // no max 995 inputStreamRead = false; // ok to reparse if in bytes 996 } 997 Validate.isFalse(inputStreamRead, "Input stream already read and parsed, cannot re-read."); 998 Validate.notNull(stream); 999 inputStreamRead = true; 1000 return stream; 1001 } 1002 1003 @Override public Document parse() throws IOException { 1004 ControllableInputStream stream = prepareParse(); 1005 Document doc = DataUtil.parseInputStream(stream, charset, url.toExternalForm(), req.parser()); 1006 doc.connection(new HttpConnection(req, this)); // because we're static, don't have the connection obj. // todo - maybe hold in the req? 1007 charset = doc.outputSettings().charset().name(); // update charset from meta-equiv, possibly 1008 safeClose(); 1009 return doc; 1010 } 1011 1012 @Override public StreamParser streamParser() throws IOException { 1013 ControllableInputStream stream = prepareParse(); 1014 String baseUri = url.toExternalForm(); 1015 DataUtil.CharsetDoc charsetDoc = DataUtil.detectCharsetForStreamParser(stream, charset, baseUri, req.parser()); 1016 1017 // set up the stream parser and rig this connection up to the parsed doc: 1018 StreamParser streamer = new StreamParser(req.parser()); 1019 BufferedReader reader = new BufferedReader(new InputStreamReader(charsetDoc.input, charsetDoc.charset)); 1020 streamer.parse(reader, baseUri); // initializes the parse and the document, but does not step() it 1021 streamer.document().connection(new HttpConnection(req, this)); 1022 charset = charsetDoc.charset.name(); 1023 1024 // we don't safeClose() as in parse(); caller must close streamParser to close InputStream stream 1025 return streamer; 1026 } 1027 1028 /** 1029 Reads the bodyStream into byteData. A no-op if already executed. 1030 */ 1031 @Override 1032 public Connection.Response readFully() throws IOException { 1033 Validate.isTrue(executed, "Request must be executed (with .execute(), .get(), or .post() before getting response body"); 1034 if (bodyStream != null && byteData == null) { 1035 Validate.isFalse(inputStreamRead, "Request has already been read (with .parse())"); 1036 try { 1037 byteData = DataUtil.readToByteBuffer(bodyStream, req.maxBodySize()); 1038 } finally { 1039 inputStreamRead = true; 1040 safeClose(); 1041 } 1042 } 1043 return this; 1044 } 1045 1046 /** 1047 Reads the body, but throws an UncheckedIOException if an IOException occurs. 1048 @throws UncheckedIOException if an IOException occurs 1049 */ 1050 private void readByteDataUnchecked() { 1051 try { 1052 readFully(); 1053 } catch (IOException e) { 1054 throw new UncheckedIOException(e); 1055 } 1056 } 1057 1058 @Override 1059 public String readBody() throws IOException { 1060 readFully(); 1061 return body(); 1062 } 1063 1064 @Override 1065 public String body() { 1066 readByteDataUnchecked(); 1067 Validate.notNull(byteData); 1068 // charset gets set from header on execute, and from meta-equiv on parse. parse may not have happened yet 1069 String body = (charset == null ? UTF_8 : Charset.forName(charset)) 1070 .decode(byteData).toString(); 1071 ((Buffer)byteData).rewind(); // cast to avoid covariant return type change in jdk9 1072 return body; 1073 } 1074 1075 @Override 1076 public byte[] bodyAsBytes() { 1077 readByteDataUnchecked(); 1078 Validate.notNull(byteData); 1079 Validate.isTrue(byteData.hasArray()); // we made it, so it should 1080 1081 byte[] array = byteData.array(); 1082 int offset = byteData.arrayOffset(); 1083 int length = byteData.limit(); 1084 1085 if (offset == 0 && length == array.length) { // exact, just return it 1086 return array; 1087 } else { // trim to size 1088 byte[] exactArray = new byte[length]; 1089 System.arraycopy(array, offset, exactArray, 0, length); 1090 return exactArray; 1091 } 1092 } 1093 1094 @Override 1095 public Connection.Response bufferUp() { 1096 readByteDataUnchecked(); 1097 return this; 1098 } 1099 1100 @Override 1101 public BufferedInputStream bodyStream() { 1102 Validate.isTrue(executed, "Request must be executed (with .execute(), .get(), or .post() before getting response body"); 1103 1104 // if we have read to bytes (via readFully), return those as a stream. 1105 if (byteData != null) { 1106 return new BufferedInputStream( 1107 new ByteArrayInputStream(byteData.array(), 0, byteData.limit()), 1108 DefaultBufferSize); 1109 } 1110 1111 Validate.isFalse(inputStreamRead, "Request has already been read"); 1112 Validate.notNull(bodyStream); 1113 inputStreamRead = true; 1114 return bodyStream.inputStream(); 1115 } 1116 1117 /** 1118 * Call on completion of stream read, to close the body (or error) stream. The connection.disconnect allows 1119 * keep-alives to work (as the underlying connection is actually held open, despite the name). 1120 */ 1121 private void safeClose() { 1122 if (bodyStream != null) { 1123 try { 1124 bodyStream.close(); 1125 } catch (IOException e) { 1126 // no-op 1127 } finally { 1128 bodyStream = null; 1129 } 1130 } 1131 1132 if (executor != null) executor.safeClose(); // disconnect 1133 } 1134 1135 Response(HttpConnection.Request request) { 1136 this.req = request; 1137 } 1138 1139 // set up url, method, header, cookies 1140 void prepareResponse(Map<String, List<String>> resHeaders, HttpConnection.@Nullable Response previousResponse) throws IOException { 1141 processResponseHeaders(resHeaders); // includes cookie key/val read during header scan 1142 CookieUtil.storeCookies(req, this, url, resHeaders); // add set cookies to cookie store 1143 1144 if (previousResponse != null) { // was redirected 1145 // map previous response cookies into this response cookies() object 1146 for (Map.Entry<String, String> prevCookie : previousResponse.cookies().entrySet()) { 1147 if (!hasCookie(prevCookie.getKey())) 1148 cookie(prevCookie.getKey(), prevCookie.getValue()); 1149 } 1150 previousResponse.safeClose(); 1151 1152 // enforce too many redirects: 1153 numRedirects = previousResponse.numRedirects + 1; 1154 if (numRedirects >= MAX_REDIRECTS) 1155 throw new IOException(String.format("Too many redirects occurred trying to load URL %s", previousResponse.url())); 1156 } 1157 } 1158 1159 void processResponseHeaders(Map<String, List<String>> resHeaders) { 1160 for (Map.Entry<String, List<String>> entry : resHeaders.entrySet()) { 1161 String name = entry.getKey(); 1162 if (name == null) 1163 continue; // http/1.1 line 1164 1165 List<String> values = entry.getValue(); 1166 for (String value : values) { 1167 addHeader(name, fixHeaderEncoding(value)); 1168 } 1169 } 1170 } 1171 1172 /** 1173 Servers may encode response headers in UTF-8 instead of RFC defined 8859. The JVM decodes the headers (before we see them) as 8859, which can lead to mojibake data. 1174 <p>This method attempts to detect that and re-decode the string as UTF-8.</p> 1175 <p>However on Android, the headers will be decoded as UTF8, so we can detect and pass those directly.</p> 1176 * @param val a header value string that may have been incorrectly decoded as 8859. 1177 * @return a potentially re-decoded string. 1178 */ 1179 @Nullable 1180 static String fixHeaderEncoding(@Nullable String val) { 1181 if (val == null) return val; 1182 // If we can't encode the string as 8859, then it couldn't have been decoded as 8859 1183 if (!StandardCharsets.ISO_8859_1.newEncoder().canEncode(val)) 1184 return val; 1185 byte[] bytes = val.getBytes(ISO_8859_1); 1186 if (looksLikeUtf8(bytes)) 1187 return new String(bytes, UTF_8); 1188 else 1189 return val; 1190 } 1191 1192 private static boolean looksLikeUtf8(byte[] input) { 1193 int i = 0; 1194 // BOM: 1195 if (input.length >= 3 1196 && (input[0] & 0xFF) == 0xEF 1197 && (input[1] & 0xFF) == 0xBB 1198 && (input[2] & 0xFF) == 0xBF) { 1199 i = 3; 1200 } 1201 1202 int end; 1203 boolean foundNonAscii = false; 1204 for (int j = input.length; i < j; ++i) { 1205 int o = input[i]; 1206 if ((o & 0x80) == 0) { 1207 continue; // ASCII 1208 } 1209 foundNonAscii = true; 1210 1211 // UTF-8 leading: 1212 if ((o & 0xE0) == 0xC0) { 1213 end = i + 1; 1214 } else if ((o & 0xF0) == 0xE0) { 1215 end = i + 2; 1216 } else if ((o & 0xF8) == 0xF0) { 1217 end = i + 3; 1218 } else { 1219 return false; 1220 } 1221 1222 if (end >= input.length) 1223 return false; 1224 1225 while (i < end) { 1226 i++; 1227 o = input[i]; 1228 if ((o & 0xC0) != 0x80) { 1229 return false; 1230 } 1231 } 1232 } 1233 return foundNonAscii; 1234 } 1235 1236 private static void setOutputContentType(final HttpConnection.Request req) { 1237 final String contentType = req.header(CONTENT_TYPE); 1238 String bound = null; 1239 if (contentType != null) { 1240 // no-op; don't add content type as already set (e.g. for requestBody()) 1241 // todo - if content type already set, we could add charset 1242 1243 // if user has set content type to multipart/form-data, auto add boundary. 1244 if(contentType.contains(MULTIPART_FORM_DATA) && !contentType.contains("boundary")) { 1245 bound = DataUtil.mimeBoundary(); 1246 req.header(CONTENT_TYPE, MULTIPART_FORM_DATA + "; boundary=" + bound); 1247 } 1248 1249 } 1250 else if (needsMultipart(req)) { 1251 bound = DataUtil.mimeBoundary(); 1252 req.header(CONTENT_TYPE, MULTIPART_FORM_DATA + "; boundary=" + bound); 1253 } else { 1254 req.header(CONTENT_TYPE, FORM_URL_ENCODED + "; charset=" + req.postDataCharset()); 1255 } 1256 req.mimeBoundary = bound; 1257 } 1258 1259 static void writePost(final HttpConnection.Request req, final OutputStream outputStream) throws IOException { 1260 try (OutputStreamWriter osw = new OutputStreamWriter(outputStream, req.postDataCharset()); 1261 BufferedWriter w = new BufferedWriter(osw)) { 1262 implWritePost(req, w, outputStream); 1263 } 1264 } 1265 1266 private static void implWritePost(final HttpConnection.Request req, final BufferedWriter w, final OutputStream outputStream) throws IOException { 1267 final Collection<Connection.KeyVal> data = req.data(); 1268 final String boundary = req.mimeBoundary; 1269 1270 if (boundary != null) { // a multipart post 1271 for (Connection.KeyVal keyVal : data) { 1272 w.write("--"); 1273 w.write(boundary); 1274 w.write("\r\n"); 1275 w.write("Content-Disposition: form-data; name=\""); 1276 w.write(encodeMimeName(keyVal.key())); // encodes " to %22 1277 w.write("\""); 1278 final InputStream input = keyVal.inputStream(); 1279 if (input != null) { 1280 w.write("; filename=\""); 1281 w.write(encodeMimeName(keyVal.value())); 1282 w.write("\"\r\nContent-Type: "); 1283 String contentType = keyVal.contentType(); 1284 w.write(contentType != null ? contentType : DefaultUploadType); 1285 w.write("\r\n\r\n"); 1286 w.flush(); 1287 DataUtil.crossStreams(input, outputStream); 1288 outputStream.flush(); 1289 } else { 1290 w.write("\r\n\r\n"); 1291 w.write(keyVal.value()); 1292 } 1293 w.write("\r\n"); 1294 } 1295 w.write("--"); 1296 w.write(boundary); 1297 w.write("--"); 1298 } else if (req.body != null) { // a single body (bytes or plain text); data will be in query string 1299 if (req.body instanceof String) { 1300 w.write((String) req.body); 1301 } else if (req.body instanceof InputStream) { 1302 DataUtil.crossStreams((InputStream) req.body, outputStream); 1303 outputStream.flush(); 1304 } else { 1305 throw new IllegalStateException(); 1306 } 1307 } else { // regular form data (application/x-www-form-urlencoded) 1308 boolean first = true; 1309 for (Connection.KeyVal keyVal : data) { 1310 if (!first) w.append('&'); 1311 else first = false; 1312 1313 w.write(URLEncoder.encode(keyVal.key(), req.postDataCharset())); 1314 w.write('='); 1315 w.write(URLEncoder.encode(keyVal.value(), req.postDataCharset())); 1316 } 1317 } 1318 } 1319 1320 // for get url reqs, serialise the data map into the url 1321 private static void serialiseRequestUrl(Connection.Request req) throws IOException { 1322 UrlBuilder in = new UrlBuilder(req.url()); 1323 1324 for (Connection.KeyVal keyVal : req.data()) { 1325 Validate.isFalse(keyVal.hasInputStream(), "InputStream data not supported in URL query string."); 1326 in.appendKeyVal(keyVal); 1327 } 1328 req.url(in.build()); 1329 req.data().clear(); // moved into url as get params 1330 } 1331 } 1332 1333 private static boolean needsMultipart(Connection.Request req) { 1334 // multipart mode, for files. add the header if we see something with an inputstream, and return a non-null boundary 1335 for (Connection.KeyVal keyVal : req.data()) { 1336 if (keyVal.hasInputStream()) 1337 return true; 1338 } 1339 return false; 1340 } 1341 1342 public static class KeyVal implements Connection.KeyVal { 1343 private String key; 1344 private String value; 1345 private @Nullable InputStream stream; 1346 private @Nullable String contentType; 1347 1348 public static KeyVal create(String key, String value) { 1349 return new KeyVal(key, value); 1350 } 1351 1352 public static KeyVal create(String key, String filename, InputStream stream) { 1353 return new KeyVal(key, filename) 1354 .inputStream(stream); 1355 } 1356 1357 private KeyVal(String key, String value) { 1358 Validate.notEmptyParam(key, "key"); 1359 Validate.notNullParam(value, "value"); 1360 this.key = key; 1361 this.value = value; 1362 } 1363 1364 @Override 1365 public KeyVal key(String key) { 1366 Validate.notEmptyParam(key, "key"); 1367 this.key = key; 1368 return this; 1369 } 1370 1371 @Override 1372 public String key() { 1373 return key; 1374 } 1375 1376 @Override 1377 public KeyVal value(String value) { 1378 Validate.notNullParam(value, "value"); 1379 this.value = value; 1380 return this; 1381 } 1382 1383 @Override 1384 public String value() { 1385 return value; 1386 } 1387 1388 @Override 1389 public KeyVal inputStream(InputStream inputStream) { 1390 Validate.notNullParam(value, "inputStream"); 1391 this.stream = inputStream; 1392 return this; 1393 } 1394 1395 @Override @Nullable 1396 public InputStream inputStream() { 1397 return stream; 1398 } 1399 1400 @Override 1401 public boolean hasInputStream() { 1402 return stream != null; 1403 } 1404 1405 @Override 1406 public Connection.KeyVal contentType(String contentType) { 1407 Validate.notEmpty(contentType); 1408 this.contentType = contentType; 1409 return this; 1410 } 1411 1412 @Override @Nullable 1413 public String contentType() { 1414 return contentType; 1415 } 1416 1417 @Override 1418 public String toString() { 1419 return key + "=" + value; 1420 } 1421 } 1422}