001package org.jsoup.helper; 002 003import org.jsoup.Connection; 004import org.jsoup.HttpStatusException; 005import org.jsoup.Progress; 006import org.jsoup.UnsupportedMimeTypeException; 007import org.jsoup.internal.ControllableInputStream; 008import org.jsoup.internal.StringUtil; 009import org.jsoup.nodes.Document; 010import org.jsoup.parser.Parser; 011import org.jsoup.parser.StreamParser; 012import org.jspecify.annotations.Nullable; 013 014import javax.net.ssl.SSLContext; 015import javax.net.ssl.SSLSocketFactory; 016import java.io.BufferedInputStream; 017import java.io.BufferedReader; 018import java.io.BufferedWriter; 019import java.io.ByteArrayInputStream; 020import java.io.IOException; 021import java.io.InputStream; 022import java.io.InputStreamReader; 023import java.io.OutputStream; 024import java.io.OutputStreamWriter; 025import java.io.UncheckedIOException; 026import java.net.CookieManager; 027import java.net.CookieStore; 028import java.net.InetSocketAddress; 029import java.net.MalformedURLException; 030import java.net.Proxy; 031import java.net.URL; 032import java.net.URLEncoder; 033import java.nio.Buffer; 034import java.nio.ByteBuffer; 035import java.nio.charset.Charset; 036import java.nio.charset.IllegalCharsetNameException; 037import java.nio.charset.StandardCharsets; 038import java.util.ArrayList; 039import java.util.Collection; 040import java.util.Collections; 041import java.util.LinkedHashMap; 042import java.util.List; 043import java.util.Map; 044import java.util.concurrent.locks.ReentrantLock; 045import java.util.regex.Pattern; 046import java.util.zip.GZIPInputStream; 047import java.util.zip.Inflater; 048import java.util.zip.InflaterInputStream; 049 050import static org.jsoup.Connection.Method.HEAD; 051import static org.jsoup.helper.DataUtil.UTF_8; 052import static org.jsoup.internal.Normalizer.lowerCase; 053import static org.jsoup.internal.SharedConstants.DefaultBufferSize; 054 055/** 056 * Implementation of {@link Connection}. 057 * @see org.jsoup.Jsoup#connect(String) 058 */ 059@SuppressWarnings("CharsetObjectCanBeUsed") 060public class HttpConnection implements Connection { 061 public static final String CONTENT_ENCODING = "Content-Encoding"; 062 /** 063 * Many users would get caught by not setting a user-agent and therefore getting different responses on their desktop 064 * vs in jsoup, which would otherwise default to {@code Java}. So by default, use a desktop UA. 065 */ 066 public static final String DEFAULT_UA = 067 "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36"; 068 private static final String USER_AGENT = "User-Agent"; 069 public static final String CONTENT_TYPE = "Content-Type"; 070 public static final String MULTIPART_FORM_DATA = "multipart/form-data"; 071 public static final String FORM_URL_ENCODED = "application/x-www-form-urlencoded"; 072 private static final int HTTP_TEMP_REDIR = 307; // http/1.1 temporary redirect, not in Java's set. 073 static final String DefaultUploadType = "application/octet-stream"; 074 private static final Charset ISO_8859_1 = Charset.forName("ISO-8859-1"); 075 076 private HttpConnection.Request req; 077 private Connection.@Nullable Response res; 078 @Nullable Object client; // The HttpClient for this Connection, if via the HttpClientExecutor 079 @Nullable RequestAuthenticator lastAuth; // The previous Authenticator used by this Connection, if via the HttpClientExecutor 080 081 /** 082 Create a new Connection, with the request URL specified. 083 @param url the URL to fetch from 084 @return a new Connection object 085 */ 086 public static Connection connect(String url) { 087 Connection con = new HttpConnection(); 088 con.url(url); 089 return con; 090 } 091 092 /** 093 Create a new Connection, with the request URL specified. 094 @param url the URL to fetch from 095 @return a new Connection object 096 */ 097 public static Connection connect(URL url) { 098 Connection con = new HttpConnection(); 099 con.url(url); 100 return con; 101 } 102 103 /** 104 Create a new, empty HttpConnection. 105 */ 106 public HttpConnection() { 107 req = new Request(); 108 req.connection = this; 109 } 110 111 /** 112 Create a new Request by deep-copying an existing Request. Note that the data and body of the original are not 113 copied. All other settings (proxy, parser, cookies, etc) are copied. 114 @param copy the request to copy 115 */ 116 HttpConnection(Request copy) { 117 req = new Request(copy); 118 } 119 120 static String encodeMimeName(String val) { 121 return val.replace("\"", "%22"); 122 } 123 124 @Override 125 public Connection newRequest() { 126 // copy the prototype request for the different settings, cookie manager, etc 127 return new HttpConnection(req); 128 } 129 130 /** Create a new Connection that just wraps the provided Request and Response */ 131 private HttpConnection(Request req, Response res) { 132 this.req = req; 133 this.res = res; 134 } 135 136 @Override 137 public Connection url(URL url) { 138 req.url(url); 139 return this; 140 } 141 142 @Override 143 public Connection url(String url) { 144 Validate.notEmptyParam(url, "url"); 145 try { 146 req.url(new URL(url)); 147 } catch (MalformedURLException e) { 148 throw new IllegalArgumentException(String.format("The supplied URL, '%s', is malformed. Make sure it is an absolute URL, and starts with 'http://' or 'https://'. See https://jsoup.org/cookbook/extracting-data/working-with-urls", url), e); 149 } 150 return this; 151 } 152 153 @Override 154 public Connection proxy(@Nullable Proxy proxy) { 155 req.proxy(proxy); 156 return this; 157 } 158 159 @Override 160 public Connection proxy(String host, int port) { 161 req.proxy(host, port); 162 return this; 163 } 164 165 @Override 166 public Connection userAgent(String userAgent) { 167 Validate.notNullParam(userAgent, "userAgent"); 168 req.header(USER_AGENT, userAgent); 169 return this; 170 } 171 172 @Override 173 public Connection timeout(int millis) { 174 req.timeout(millis); 175 return this; 176 } 177 178 @Override 179 public Connection maxBodySize(int bytes) { 180 req.maxBodySize(bytes); 181 return this; 182 } 183 184 @Override 185 public Connection followRedirects(boolean followRedirects) { 186 req.followRedirects(followRedirects); 187 return this; 188 } 189 190 @Override 191 public Connection referrer(String referrer) { 192 Validate.notNullParam(referrer, "referrer"); 193 req.header("Referer", referrer); 194 return this; 195 } 196 197 @Override 198 public Connection method(Method method) { 199 req.method(method); 200 return this; 201 } 202 203 @Override 204 public Connection ignoreHttpErrors(boolean ignoreHttpErrors) { 205 req.ignoreHttpErrors(ignoreHttpErrors); 206 return this; 207 } 208 209 @Override 210 public Connection ignoreContentType(boolean ignoreContentType) { 211 req.ignoreContentType(ignoreContentType); 212 return this; 213 } 214 215 @Override 216 public Connection data(String key, String value) { 217 req.data(KeyVal.create(key, value)); 218 return this; 219 } 220 221 @Override 222 public Connection sslSocketFactory(SSLSocketFactory sslSocketFactory) { 223 req.sslSocketFactory(sslSocketFactory); 224 return this; 225 } 226 227 @Override 228 public Connection sslContext(SSLContext sslContext) { 229 req.sslContext(sslContext); 230 return this; 231 } 232 233 @Override 234 public Connection data(String key, String filename, InputStream inputStream) { 235 req.data(KeyVal.create(key, filename, inputStream)); 236 return this; 237 } 238 239 @Override 240 public Connection data(String key, String filename, InputStream inputStream, String contentType) { 241 req.data(KeyVal.create(key, filename, inputStream).contentType(contentType)); 242 return this; 243 } 244 245 @Override 246 public Connection data(Map<String, String> data) { 247 Validate.notNullParam(data, "data"); 248 for (Map.Entry<String, String> entry : data.entrySet()) { 249 req.data(KeyVal.create(entry.getKey(), entry.getValue())); 250 } 251 return this; 252 } 253 254 @Override 255 public Connection data(String... keyvals) { 256 Validate.notNullParam(keyvals, "keyvals"); 257 Validate.isTrue(keyvals.length %2 == 0, "Must supply an even number of key value pairs"); 258 for (int i = 0; i < keyvals.length; i += 2) { 259 String key = keyvals[i]; 260 String value = keyvals[i+1]; 261 Validate.notEmpty(key, "Data key must not be empty"); 262 Validate.notNull(value, "Data value must not be null"); 263 req.data(KeyVal.create(key, value)); 264 } 265 return this; 266 } 267 268 @Override 269 public Connection data(Collection<Connection.KeyVal> data) { 270 Validate.notNullParam(data, "data"); 271 for (Connection.KeyVal entry: data) { 272 req.data(entry); 273 } 274 return this; 275 } 276 277 @Override 278 public Connection.@Nullable KeyVal data(String key) { 279 Validate.notEmptyParam(key, "key"); 280 for (Connection.KeyVal keyVal : request().data()) { 281 if (keyVal.key().equals(key)) 282 return keyVal; 283 } 284 return null; 285 } 286 287 @Override 288 public Connection requestBody(String body) { 289 req.requestBody(body); 290 return this; 291 } 292 293 @Override 294 public Connection requestBodyStream(InputStream stream) { 295 req.requestBodyStream(stream); 296 return this; 297 } 298 299 @Override 300 public Connection header(String name, String value) { 301 req.header(name, value); 302 return this; 303 } 304 305 @Override 306 public Connection headers(Map<String,String> headers) { 307 Validate.notNullParam(headers, "headers"); 308 for (Map.Entry<String,String> entry : headers.entrySet()) { 309 req.header(entry.getKey(),entry.getValue()); 310 } 311 return this; 312 } 313 314 @Override 315 public Connection cookie(String name, String value) { 316 req.cookie(name, value); 317 return this; 318 } 319 320 @Override 321 public Connection cookies(Map<String, String> cookies) { 322 Validate.notNullParam(cookies, "cookies"); 323 for (Map.Entry<String, String> entry : cookies.entrySet()) { 324 req.cookie(entry.getKey(), entry.getValue()); 325 } 326 return this; 327 } 328 329 @Override 330 public Connection cookieStore(CookieStore cookieStore) { 331 // create a new cookie manager using the new store 332 req.cookieManager = new CookieManager(cookieStore, null); 333 return this; 334 } 335 336 @Override 337 public CookieStore cookieStore() { 338 return req.cookieManager.getCookieStore(); 339 } 340 341 @Override 342 public Connection parser(Parser parser) { 343 req.parser(parser); 344 return this; 345 } 346 347 @Override 348 public Document get() throws IOException { 349 req.method(Method.GET); 350 execute(); 351 Validate.notNull(res); 352 return res.parse(); 353 } 354 355 @Override 356 public Document post() throws IOException { 357 req.method(Method.POST); 358 execute(); 359 Validate.notNull(res); 360 return res.parse(); 361 } 362 363 @Override 364 public Connection.Response execute() throws IOException { 365 res = Response.execute(req); 366 return res; 367 } 368 369 @Override 370 public Connection.Request request() { 371 return req; 372 } 373 374 @Override 375 public Connection request(Connection.Request request) { 376 req = (HttpConnection.Request) request; // will throw a class-cast exception if the user has extended some but not all of Connection; that's desired 377 return this; 378 } 379 380 @Override 381 public Connection.Response response() { 382 if (res == null) { 383 throw new IllegalArgumentException("You must execute the request before getting a response."); 384 } 385 return res; 386 } 387 388 @Override 389 public Connection response(Connection.Response response) { 390 res = response; 391 return this; 392 } 393 394 @Override 395 public Connection postDataCharset(String charset) { 396 req.postDataCharset(charset); 397 return this; 398 } 399 400 @Override public Connection auth(@Nullable RequestAuthenticator authenticator) { 401 req.auth(authenticator); 402 return this; 403 } 404 405 @Override public Connection onResponseProgress(Progress<Connection.Response> handler) { 406 req.responseProgress = handler; 407 return this; 408 } 409 410 @SuppressWarnings("unchecked") 411 private static abstract class Base<T extends Connection.Base<T>> implements Connection.Base<T> { 412 private static final URL UnsetUrl; // only used if you created a new Request() 413 static { 414 try { 415 UnsetUrl = new URL("http://undefined/"); 416 } catch (MalformedURLException e) { 417 throw new IllegalStateException(e); 418 } 419 } 420 421 URL url = UnsetUrl; 422 Method method = Method.GET; 423 Map<String, List<String>> headers; 424 Map<String, String> cookies; 425 426 private Base() { 427 headers = new LinkedHashMap<>(); 428 cookies = new LinkedHashMap<>(); 429 } 430 431 private Base(Base<T> copy) { 432 url = copy.url; // unmodifiable object 433 method = copy.method; 434 headers = new LinkedHashMap<>(); 435 for (Map.Entry<String, List<String>> entry : copy.headers.entrySet()) { 436 headers.put(entry.getKey(), new ArrayList<>(entry.getValue())); 437 } 438 cookies = new LinkedHashMap<>(); cookies.putAll(copy.cookies); // just holds strings 439 } 440 441 @Override 442 public URL url() { 443 if (url == UnsetUrl) 444 throw new IllegalArgumentException("URL not set. Make sure to call #url(...) before executing the request."); 445 return url; 446 } 447 448 @Override 449 public T url(URL url) { 450 Validate.notNullParam(url, "url"); 451 this.url = new UrlBuilder(url).build(); 452 return (T) this; 453 } 454 455 @Override 456 public Method method() { 457 return method; 458 } 459 460 @Override 461 public T method(Method method) { 462 Validate.notNullParam(method, "method"); 463 this.method = method; 464 return (T) this; 465 } 466 467 @Override @Nullable 468 public String header(String name) { 469 Validate.notNullParam(name, "name"); 470 List<String> vals = getHeadersCaseInsensitive(name); 471 if (!vals.isEmpty()) { 472 // https://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2 473 return StringUtil.join(vals, ", "); 474 } 475 476 return null; 477 } 478 479 @Override 480 public T addHeader(String name, @Nullable String value) { 481 Validate.notEmptyParam(name, "name"); 482 value = value == null ? "" : value; 483 484 List<String> values = headers(name); 485 if (values.isEmpty()) { 486 values = new ArrayList<>(); 487 headers.put(name, values); 488 } 489 values.add(value); 490 491 return (T) this; 492 } 493 494 @Override 495 public List<String> headers(String name) { 496 Validate.notEmptyParam(name, "name"); 497 return getHeadersCaseInsensitive(name); 498 } 499 500 @Override 501 public T header(String name, String value) { 502 Validate.notEmptyParam(name, "name"); 503 removeHeader(name); // ensures we don't get an "accept-encoding" and an "Accept-Encoding" 504 addHeader(name, value); 505 return (T) this; 506 } 507 508 @Override 509 public boolean hasHeader(String name) { 510 Validate.notEmptyParam(name, "name"); 511 return !getHeadersCaseInsensitive(name).isEmpty(); 512 } 513 514 /** 515 * Test if the request has a header with this value (case-insensitive). 516 */ 517 @Override 518 public boolean hasHeaderWithValue(String name, String value) { 519 Validate.notEmpty(name); 520 Validate.notEmpty(value); 521 List<String> values = headers(name); 522 for (String candidate : values) { 523 if (value.equalsIgnoreCase(candidate)) 524 return true; 525 } 526 return false; 527 } 528 529 @Override 530 public T removeHeader(String name) { 531 Validate.notEmptyParam(name, "name"); 532 Map.Entry<String, List<String>> entry = scanHeaders(name); // remove is case-insensitive too 533 if (entry != null) 534 headers.remove(entry.getKey()); // ensures correct case 535 return (T) this; 536 } 537 538 @Override 539 public Map<String, String> headers() { 540 LinkedHashMap<String, String> map = new LinkedHashMap<>(headers.size()); 541 for (Map.Entry<String, List<String>> entry : headers.entrySet()) { 542 String header = entry.getKey(); 543 List<String> values = entry.getValue(); 544 if (!values.isEmpty()) 545 map.put(header, values.get(0)); 546 } 547 return map; 548 } 549 550 @Override 551 public Map<String, List<String>> multiHeaders() { 552 return headers; 553 } 554 555 private List<String> getHeadersCaseInsensitive(String name) { 556 Validate.notNull(name); 557 558 for (Map.Entry<String, List<String>> entry : headers.entrySet()) { 559 if (name.equalsIgnoreCase(entry.getKey())) 560 return entry.getValue(); 561 } 562 563 return Collections.emptyList(); 564 } 565 566 private Map.@Nullable Entry<String, List<String>> scanHeaders(String name) { 567 String lc = lowerCase(name); 568 for (Map.Entry<String, List<String>> entry : headers.entrySet()) { 569 if (lowerCase(entry.getKey()).equals(lc)) 570 return entry; 571 } 572 return null; 573 } 574 575 @Override 576 public String cookie(String name) { 577 Validate.notEmptyParam(name, "name"); 578 return cookies.get(name); 579 } 580 581 @Override 582 public T cookie(String name, String value) { 583 Validate.notEmptyParam(name, "name"); 584 Validate.notNullParam(value, "value"); 585 cookies.put(name, value); 586 return (T) this; 587 } 588 589 @Override 590 public boolean hasCookie(String name) { 591 Validate.notEmptyParam(name, "name"); 592 return cookies.containsKey(name); 593 } 594 595 @Override 596 public T removeCookie(String name) { 597 Validate.notEmptyParam(name, "name"); 598 cookies.remove(name); 599 return (T) this; 600 } 601 602 @Override 603 public Map<String, String> cookies() { 604 return cookies; 605 } 606 } 607 608 public static class Request extends HttpConnection.Base<Connection.Request> implements Connection.Request { 609 static { 610 System.setProperty("sun.net.http.allowRestrictedHeaders", "true"); 611 // make sure that we can send Sec-Fetch-Site headers etc. 612 } 613 614 HttpConnection connection; 615 private @Nullable Proxy proxy; 616 private int timeoutMilliseconds; 617 private int maxBodySizeBytes; 618 private boolean followRedirects; 619 private final Collection<Connection.KeyVal> data; 620 private @Nullable Object body = null; // String or InputStream 621 @Nullable String mimeBoundary; 622 private boolean ignoreHttpErrors = false; 623 private boolean ignoreContentType = false; 624 private Parser parser; 625 private boolean parserDefined = false; // called parser(...) vs initialized in ctor 626 private String postDataCharset = DataUtil.defaultCharsetName; 627 private @Nullable SSLSocketFactory sslSocketFactory; 628 @Nullable SSLContext sslContext; 629 private CookieManager cookieManager; 630 @Nullable RequestAuthenticator authenticator; 631 private @Nullable Progress<Connection.Response> responseProgress; 632 633 private final ReentrantLock executing = new ReentrantLock(); // detects and warns if same request used concurrently 634 635 Request() { 636 super(); 637 timeoutMilliseconds = 30000; // 30 seconds 638 maxBodySizeBytes = 1024 * 1024 * 2; // 2MB 639 followRedirects = true; 640 data = new ArrayList<>(); 641 method = Method.GET; 642 addHeader("Accept-Encoding", "gzip"); 643 addHeader(USER_AGENT, DEFAULT_UA); 644 parser = Parser.htmlParser(); 645 cookieManager = new CookieManager(); // creates a default InMemoryCookieStore 646 } 647 648 Request(Request copy) { 649 super(copy); 650 connection = copy.connection; 651 proxy = copy.proxy; 652 postDataCharset = copy.postDataCharset; 653 timeoutMilliseconds = copy.timeoutMilliseconds; 654 maxBodySizeBytes = copy.maxBodySizeBytes; 655 followRedirects = copy.followRedirects; 656 data = new ArrayList<>(); // data not copied 657 //body not copied 658 ignoreHttpErrors = copy.ignoreHttpErrors; 659 ignoreContentType = copy.ignoreContentType; 660 parser = copy.parser.newInstance(); // parsers and their tree-builders maintain state, so need a fresh copy 661 parserDefined = copy.parserDefined; 662 sslSocketFactory = copy.sslSocketFactory; // these are all synchronized so safe to share 663 sslContext = copy.sslContext; 664 cookieManager = copy.cookieManager; 665 authenticator = copy.authenticator; 666 responseProgress = copy.responseProgress; 667 } 668 669 @Override @Nullable 670 public Proxy proxy() { 671 return proxy; 672 } 673 674 @Override 675 public Request proxy(@Nullable Proxy proxy) { 676 this.proxy = proxy; 677 return this; 678 } 679 680 @Override 681 public Request proxy(String host, int port) { 682 this.proxy = new Proxy(Proxy.Type.HTTP, InetSocketAddress.createUnresolved(host, port)); 683 return this; 684 } 685 686 @Override 687 public int timeout() { 688 return timeoutMilliseconds; 689 } 690 691 @Override 692 public Request timeout(int millis) { 693 Validate.isTrue(millis >= 0, "Timeout milliseconds must be 0 (infinite) or greater"); 694 timeoutMilliseconds = millis; 695 return this; 696 } 697 698 @Override 699 public int maxBodySize() { 700 return maxBodySizeBytes; 701 } 702 703 @Override 704 public Connection.Request maxBodySize(int bytes) { 705 Validate.isTrue(bytes >= 0, "maxSize must be 0 (unlimited) or larger"); 706 maxBodySizeBytes = bytes; 707 return this; 708 } 709 710 @Override 711 public boolean followRedirects() { 712 return followRedirects; 713 } 714 715 @Override 716 public Connection.Request followRedirects(boolean followRedirects) { 717 this.followRedirects = followRedirects; 718 return this; 719 } 720 721 @Override 722 public boolean ignoreHttpErrors() { 723 return ignoreHttpErrors; 724 } 725 726 @Override @Nullable 727 public SSLSocketFactory sslSocketFactory() { 728 return sslSocketFactory; 729 } 730 731 @Override 732 public void sslSocketFactory(SSLSocketFactory sslSocketFactory) { 733 this.sslSocketFactory = sslSocketFactory; 734 } 735 736 @Override @Nullable 737 public SSLContext sslContext() { 738 return sslContext; 739 } 740 741 @Override 742 public Connection.Request sslContext(SSLContext sslContext) { 743 this.sslContext = sslContext; 744 return this; 745 } 746 747 @Override 748 public Connection.Request ignoreHttpErrors(boolean ignoreHttpErrors) { 749 this.ignoreHttpErrors = ignoreHttpErrors; 750 return this; 751 } 752 753 @Override 754 public boolean ignoreContentType() { 755 return ignoreContentType; 756 } 757 758 @Override 759 public Connection.Request ignoreContentType(boolean ignoreContentType) { 760 this.ignoreContentType = ignoreContentType; 761 return this; 762 } 763 764 @Override 765 public Request data(Connection.KeyVal keyval) { 766 Validate.notNullParam(keyval, "keyval"); 767 data.add(keyval); 768 return this; 769 } 770 771 @Override 772 public Collection<Connection.KeyVal> data() { 773 return data; 774 } 775 776 @Override 777 public Connection.Request requestBody(@Nullable String body) { 778 this.body = body; 779 return this; 780 } 781 782 @Override @Nullable 783 public String requestBody() { 784 return body instanceof String ? (String) body : null; 785 } 786 787 @Override 788 public Connection.Request requestBodyStream(InputStream stream) { 789 body = stream; 790 return this; 791 } 792 793 @Override 794 public Request parser(Parser parser) { 795 this.parser = parser; 796 parserDefined = true; 797 return this; 798 } 799 800 @Override 801 public Parser parser() { 802 return parser; 803 } 804 805 @Override 806 public Connection.Request postDataCharset(String charset) { 807 Validate.notNullParam(charset, "charset"); 808 if (!Charset.isSupported(charset)) throw new IllegalCharsetNameException(charset); 809 this.postDataCharset = charset; 810 return this; 811 } 812 813 @Override 814 public String postDataCharset() { 815 return postDataCharset; 816 } 817 818 CookieManager cookieManager() { 819 return cookieManager; 820 } 821 822 @Override public Connection.Request auth(@Nullable RequestAuthenticator authenticator) { 823 this.authenticator = authenticator; 824 return this; 825 } 826 827 @Override @Nullable public RequestAuthenticator auth() { 828 return authenticator; 829 } 830 } 831 832 public static class Response extends HttpConnection.Base<Connection.Response> implements Connection.Response { 833 private static final int MAX_REDIRECTS = 20; 834 private static final String LOCATION = "Location"; 835 int statusCode; 836 String statusMessage = ""; 837 private @Nullable ByteBuffer byteData; 838 private @Nullable ControllableInputStream bodyStream; 839 @Nullable RequestExecutor executor; 840 private @Nullable String charset; 841 @Nullable String contentType; 842 int contentLength; 843 private boolean executed = false; 844 private boolean inputStreamRead = false; 845 private int numRedirects = 0; 846 private final HttpConnection.Request req; 847 848 /* 849 * Matches XML content types (like text/xml, image/svg+xml, application/xhtml+xml;charset=UTF8, etc) 850 */ 851 private static final Pattern xmlContentTypeRxp = Pattern.compile("(\\w+)/\\w*\\+?xml.*"); 852 853 /** 854 <b>Internal only! </b>Creates a dummy HttpConnection.Response, useful for testing. All actual responses 855 are created from the HttpURLConnection and fields defined. 856 */ 857 Response() { 858 super(); 859 statusCode = 400; 860 statusMessage = "Request not made"; 861 req = new Request(); 862 contentType = null; 863 } 864 865 static Response execute(HttpConnection.Request req) throws IOException { 866 return execute(req, null); 867 } 868 869 static Response execute(HttpConnection.Request req, @Nullable Response prevRes) throws IOException { 870 Validate.isTrue(req.executing.tryLock(), "Multiple threads were detected trying to execute the same request concurrently. Make sure to use Connection#newRequest() and do not share an executing request between threads."); 871 Validate.notNullParam(req, "req"); 872 URL url = req.url(); 873 Validate.notNull(url, "URL must be specified to connect"); 874 String protocol = url.getProtocol(); 875 if (!protocol.equals("http") && !protocol.equals("https")) 876 throw new MalformedURLException("Only http & https protocols supported"); 877 final boolean supportsBody = req.method().hasBody(); 878 final boolean hasBody = req.body != null; 879 if (!supportsBody) 880 Validate.isFalse(hasBody, "Cannot set a request body for HTTP method " + req.method()); 881 882 // set up the request for execution 883 if (!req.data().isEmpty() && (!supportsBody || hasBody)) 884 serialiseRequestUrl(req); 885 else if (supportsBody) 886 setOutputContentType(req); 887 888 long startTime = System.nanoTime(); 889 RequestExecutor executor = RequestDispatch.get(req, prevRes); 890 Response res = null; 891 try { 892 res = executor.execute(); 893 894 // redirect if there's a location header (from 3xx, or 201 etc) 895 if (res.hasHeader(LOCATION) && req.followRedirects()) { 896 if (res.statusCode != HTTP_TEMP_REDIR) { 897 req.method(Method.GET); // always redirect with a get. any data param from original req are dropped. 898 req.data().clear(); 899 req.requestBody(null); 900 req.removeHeader(CONTENT_TYPE); 901 } 902 903 String location = res.header(LOCATION); 904 Validate.notNull(location); 905 if (location.startsWith("http:/") && location.charAt(6) != '/') // fix broken Location: http:/temp/AAG_New/en/index.php 906 location = location.substring(6); 907 URL redir = StringUtil.resolve(req.url(), location); 908 req.url(redir); 909 910 return execute(req, res); 911 } 912 if ((res.statusCode < 200 || res.statusCode >= 400) && !req.ignoreHttpErrors()) 913 throw new HttpStatusException("HTTP error fetching URL", res.statusCode, req.url().toString()); 914 915 // check that we can handle the returned content type; if not, abort before fetching it 916 String contentType = res.contentType(); 917 if (contentType != null 918 && !req.ignoreContentType() 919 && !contentType.startsWith("text/") 920 && !xmlContentTypeRxp.matcher(contentType).matches() 921 ) 922 throw new UnsupportedMimeTypeException("Unhandled content type. Must be text/*, */xml, or */*+xml", 923 contentType, req.url().toString()); 924 925 // switch to the XML parser if content type is xml and not parser not explicitly set 926 if (contentType != null && xmlContentTypeRxp.matcher(contentType).matches()) { 927 if (!req.parserDefined) req.parser(Parser.xmlParser()); 928 } 929 930 res.charset = DataUtil.getCharsetFromContentType(res.contentType); // may be null, readInputStream deals with it 931 if (res.contentLength != 0 && req.method() != HEAD) { // -1 means unknown, chunked. sun throws an IO exception on 500 response with no content when trying to read body 932 InputStream stream = executor.responseBody(); 933 if (res.hasHeaderWithValue(CONTENT_ENCODING, "gzip")) 934 stream = new GZIPInputStream(stream); 935 else if (res.hasHeaderWithValue(CONTENT_ENCODING, "deflate")) 936 stream = new InflaterInputStream(stream, new Inflater(true)); 937 938 res.bodyStream = ControllableInputStream.wrap( 939 stream, DefaultBufferSize, req.maxBodySize()) 940 .timeout(startTime, req.timeout()); 941 942 if (req.responseProgress != null) // set response progress listener 943 res.bodyStream.onProgress(res.contentLength, req.responseProgress, res); 944 } else { 945 res.byteData = DataUtil.emptyByteBuffer(); 946 } 947 } catch (IOException e) { 948 if (res != null) res.safeClose(); // will be non-null if got to conn 949 throw e; 950 } finally { 951 req.executing.unlock(); 952 953 // detach any thread local auth delegate 954 if (req.authenticator != null) 955 AuthenticationHandler.handler.remove(); 956 } 957 958 res.executed = true; 959 return res; 960 } 961 962 @Override 963 public int statusCode() { 964 return statusCode; 965 } 966 967 @Override 968 public String statusMessage() { 969 return statusMessage; 970 } 971 972 @Override @Nullable 973 public String charset() { 974 return charset; 975 } 976 977 @Override 978 public Response charset(String charset) { 979 this.charset = charset; 980 return this; 981 } 982 983 @Override @Nullable 984 public String contentType() { 985 return contentType; 986 } 987 988 /** Called from parse() or streamParser(), validates and prepares the input stream, and aligns common settings. */ 989 private ControllableInputStream prepareParse() { 990 Validate.isTrue(executed, "Request must be executed (with .execute(), .get(), or .post() before parsing response"); 991 ControllableInputStream stream = bodyStream; 992 if (byteData != null) { // bytes have been read in to the buffer, parse that 993 ByteArrayInputStream bytes = new ByteArrayInputStream(byteData.array(), 0, byteData.limit()); 994 stream = ControllableInputStream.wrap(bytes, 0); // no max 995 inputStreamRead = false; // ok to reparse if in bytes 996 } 997 Validate.isFalse(inputStreamRead, "Input stream already read and parsed, cannot re-read."); 998 Validate.notNull(stream); 999 inputStreamRead = true; 1000 return stream; 1001 } 1002 1003 @Override public Document parse() throws IOException { 1004 ControllableInputStream stream = prepareParse(); 1005 Document doc = DataUtil.parseInputStream(stream, charset, url.toExternalForm(), req.parser()); 1006 doc.connection(new HttpConnection(req, this)); // because we're static, don't have the connection obj. // todo - maybe hold in the req? 1007 charset = doc.outputSettings().charset().name(); // update charset from meta-equiv, possibly 1008 safeClose(); 1009 return doc; 1010 } 1011 1012 @Override public StreamParser streamParser() throws IOException { 1013 ControllableInputStream stream = prepareParse(); 1014 String baseUri = url.toExternalForm(); 1015 DataUtil.CharsetDoc charsetDoc = DataUtil.detectCharset(stream, charset, baseUri, req.parser()); 1016 // note that there may be a document in CharsetDoc as a result of scanning meta-data -- but as requires a stream parse, it is not used here. todo - revisit. 1017 1018 // set up the stream parser and rig this connection up to the parsed doc: 1019 StreamParser streamer = new StreamParser(req.parser()); 1020 BufferedReader reader = new BufferedReader(new InputStreamReader(stream, charsetDoc.charset)); 1021 streamer.parse(reader, baseUri); // initializes the parse and the document, but does not step() it 1022 streamer.document().connection(new HttpConnection(req, this)); 1023 charset = charsetDoc.charset.name(); 1024 1025 // we don't safeClose() as in parse(); caller must close streamParser to close InputStream stream 1026 return streamer; 1027 } 1028 1029 /** 1030 Reads the bodyStream into byteData. A no-op if already executed. 1031 */ 1032 @Override 1033 public Connection.Response readFully() throws IOException { 1034 Validate.isTrue(executed, "Request must be executed (with .execute(), .get(), or .post() before getting response body"); 1035 if (bodyStream != null && byteData == null) { 1036 Validate.isFalse(inputStreamRead, "Request has already been read (with .parse())"); 1037 try { 1038 byteData = DataUtil.readToByteBuffer(bodyStream, req.maxBodySize()); 1039 } finally { 1040 inputStreamRead = true; 1041 safeClose(); 1042 } 1043 } 1044 return this; 1045 } 1046 1047 /** 1048 Reads the body, but throws an UncheckedIOException if an IOException occurs. 1049 @throws UncheckedIOException if an IOException occurs 1050 */ 1051 private void readByteDataUnchecked() { 1052 try { 1053 readFully(); 1054 } catch (IOException e) { 1055 throw new UncheckedIOException(e); 1056 } 1057 } 1058 1059 @Override 1060 public String readBody() throws IOException { 1061 readFully(); 1062 return body(); 1063 } 1064 1065 @Override 1066 public String body() { 1067 readByteDataUnchecked(); 1068 Validate.notNull(byteData); 1069 // charset gets set from header on execute, and from meta-equiv on parse. parse may not have happened yet 1070 String body = (charset == null ? UTF_8 : Charset.forName(charset)) 1071 .decode(byteData).toString(); 1072 ((Buffer)byteData).rewind(); // cast to avoid covariant return type change in jdk9 1073 return body; 1074 } 1075 1076 @Override 1077 public byte[] bodyAsBytes() { 1078 readByteDataUnchecked(); 1079 Validate.notNull(byteData); 1080 Validate.isTrue(byteData.hasArray()); // we made it, so it should 1081 1082 byte[] array = byteData.array(); 1083 int offset = byteData.arrayOffset(); 1084 int length = byteData.limit(); 1085 1086 if (offset == 0 && length == array.length) { // exact, just return it 1087 return array; 1088 } else { // trim to size 1089 byte[] exactArray = new byte[length]; 1090 System.arraycopy(array, offset, exactArray, 0, length); 1091 return exactArray; 1092 } 1093 } 1094 1095 @Override 1096 public Connection.Response bufferUp() { 1097 readByteDataUnchecked(); 1098 return this; 1099 } 1100 1101 @Override 1102 public BufferedInputStream bodyStream() { 1103 Validate.isTrue(executed, "Request must be executed (with .execute(), .get(), or .post() before getting response body"); 1104 1105 // if we have read to bytes (via readFully), return those as a stream. 1106 if (byteData != null) { 1107 return new BufferedInputStream( 1108 new ByteArrayInputStream(byteData.array(), 0, byteData.limit()), 1109 DefaultBufferSize); 1110 } 1111 1112 Validate.isFalse(inputStreamRead, "Request has already been read"); 1113 Validate.notNull(bodyStream); 1114 inputStreamRead = true; 1115 return bodyStream.inputStream(); 1116 } 1117 1118 /** 1119 * Call on completion of stream read, to close the body (or error) stream. The connection.disconnect allows 1120 * keep-alives to work (as the underlying connection is actually held open, despite the name). 1121 */ 1122 private void safeClose() { 1123 if (bodyStream != null) { 1124 try { 1125 bodyStream.close(); 1126 } catch (IOException e) { 1127 // no-op 1128 } finally { 1129 bodyStream = null; 1130 } 1131 } 1132 1133 if (executor != null) executor.safeClose(); // disconnect 1134 } 1135 1136 Response(HttpConnection.Request request) { 1137 this.req = request; 1138 } 1139 1140 // set up url, method, header, cookies 1141 void prepareResponse(Map<String, List<String>> resHeaders, HttpConnection.@Nullable Response previousResponse) throws IOException { 1142 processResponseHeaders(resHeaders); // includes cookie key/val read during header scan 1143 CookieUtil.storeCookies(req, this, url, resHeaders); // add set cookies to cookie store 1144 1145 if (previousResponse != null) { // was redirected 1146 // map previous response cookies into this response cookies() object 1147 for (Map.Entry<String, String> prevCookie : previousResponse.cookies().entrySet()) { 1148 if (!hasCookie(prevCookie.getKey())) 1149 cookie(prevCookie.getKey(), prevCookie.getValue()); 1150 } 1151 previousResponse.safeClose(); 1152 1153 // enforce too many redirects: 1154 numRedirects = previousResponse.numRedirects + 1; 1155 if (numRedirects >= MAX_REDIRECTS) 1156 throw new IOException(String.format("Too many redirects occurred trying to load URL %s", previousResponse.url())); 1157 } 1158 } 1159 1160 void processResponseHeaders(Map<String, List<String>> resHeaders) { 1161 for (Map.Entry<String, List<String>> entry : resHeaders.entrySet()) { 1162 String name = entry.getKey(); 1163 if (name == null) 1164 continue; // http/1.1 line 1165 1166 List<String> values = entry.getValue(); 1167 for (String value : values) { 1168 addHeader(name, fixHeaderEncoding(value)); 1169 } 1170 } 1171 } 1172 1173 /** 1174 Servers may encode response headers in UTF-8 instead of RFC defined 8859. The JVM decodes the headers (before we see them) as 8859, which can lead to mojibake data. 1175 <p>This method attempts to detect that and re-decode the string as UTF-8.</p> 1176 <p>However on Android, the headers will be decoded as UTF8, so we can detect and pass those directly.</p> 1177 * @param val a header value string that may have been incorrectly decoded as 8859. 1178 * @return a potentially re-decoded string. 1179 */ 1180 @Nullable 1181 static String fixHeaderEncoding(@Nullable String val) { 1182 if (val == null) return val; 1183 // If we can't encode the string as 8859, then it couldn't have been decoded as 8859 1184 if (!StandardCharsets.ISO_8859_1.newEncoder().canEncode(val)) 1185 return val; 1186 byte[] bytes = val.getBytes(ISO_8859_1); 1187 if (looksLikeUtf8(bytes)) 1188 return new String(bytes, UTF_8); 1189 else 1190 return val; 1191 } 1192 1193 private static boolean looksLikeUtf8(byte[] input) { 1194 int i = 0; 1195 // BOM: 1196 if (input.length >= 3 1197 && (input[0] & 0xFF) == 0xEF 1198 && (input[1] & 0xFF) == 0xBB 1199 && (input[2] & 0xFF) == 0xBF) { 1200 i = 3; 1201 } 1202 1203 int end; 1204 boolean foundNonAscii = false; 1205 for (int j = input.length; i < j; ++i) { 1206 int o = input[i]; 1207 if ((o & 0x80) == 0) { 1208 continue; // ASCII 1209 } 1210 foundNonAscii = true; 1211 1212 // UTF-8 leading: 1213 if ((o & 0xE0) == 0xC0) { 1214 end = i + 1; 1215 } else if ((o & 0xF0) == 0xE0) { 1216 end = i + 2; 1217 } else if ((o & 0xF8) == 0xF0) { 1218 end = i + 3; 1219 } else { 1220 return false; 1221 } 1222 1223 if (end >= input.length) 1224 return false; 1225 1226 while (i < end) { 1227 i++; 1228 o = input[i]; 1229 if ((o & 0xC0) != 0x80) { 1230 return false; 1231 } 1232 } 1233 } 1234 return foundNonAscii; 1235 } 1236 1237 private static void setOutputContentType(final HttpConnection.Request req) { 1238 final String contentType = req.header(CONTENT_TYPE); 1239 String bound = null; 1240 if (contentType != null) { 1241 // no-op; don't add content type as already set (e.g. for requestBody()) 1242 // todo - if content type already set, we could add charset 1243 1244 // if user has set content type to multipart/form-data, auto add boundary. 1245 if(contentType.contains(MULTIPART_FORM_DATA) && !contentType.contains("boundary")) { 1246 bound = DataUtil.mimeBoundary(); 1247 req.header(CONTENT_TYPE, MULTIPART_FORM_DATA + "; boundary=" + bound); 1248 } 1249 1250 } 1251 else if (needsMultipart(req)) { 1252 bound = DataUtil.mimeBoundary(); 1253 req.header(CONTENT_TYPE, MULTIPART_FORM_DATA + "; boundary=" + bound); 1254 } else { 1255 req.header(CONTENT_TYPE, FORM_URL_ENCODED + "; charset=" + req.postDataCharset()); 1256 } 1257 req.mimeBoundary = bound; 1258 } 1259 1260 static void writePost(final HttpConnection.Request req, final OutputStream outputStream) throws IOException { 1261 try (OutputStreamWriter osw = new OutputStreamWriter(outputStream, req.postDataCharset()); 1262 BufferedWriter w = new BufferedWriter(osw)) { 1263 implWritePost(req, w, outputStream); 1264 } 1265 } 1266 1267 private static void implWritePost(final HttpConnection.Request req, final BufferedWriter w, final OutputStream outputStream) throws IOException { 1268 final Collection<Connection.KeyVal> data = req.data(); 1269 final String boundary = req.mimeBoundary; 1270 1271 if (boundary != null) { // a multipart post 1272 for (Connection.KeyVal keyVal : data) { 1273 w.write("--"); 1274 w.write(boundary); 1275 w.write("\r\n"); 1276 w.write("Content-Disposition: form-data; name=\""); 1277 w.write(encodeMimeName(keyVal.key())); // encodes " to %22 1278 w.write("\""); 1279 final InputStream input = keyVal.inputStream(); 1280 if (input != null) { 1281 w.write("; filename=\""); 1282 w.write(encodeMimeName(keyVal.value())); 1283 w.write("\"\r\nContent-Type: "); 1284 String contentType = keyVal.contentType(); 1285 w.write(contentType != null ? contentType : DefaultUploadType); 1286 w.write("\r\n\r\n"); 1287 w.flush(); 1288 DataUtil.crossStreams(input, outputStream); 1289 outputStream.flush(); 1290 } else { 1291 w.write("\r\n\r\n"); 1292 w.write(keyVal.value()); 1293 } 1294 w.write("\r\n"); 1295 } 1296 w.write("--"); 1297 w.write(boundary); 1298 w.write("--"); 1299 } else if (req.body != null) { // a single body (bytes or plain text); data will be in query string 1300 if (req.body instanceof String) { 1301 w.write((String) req.body); 1302 } else if (req.body instanceof InputStream) { 1303 DataUtil.crossStreams((InputStream) req.body, outputStream); 1304 outputStream.flush(); 1305 } else { 1306 throw new IllegalStateException(); 1307 } 1308 } else { // regular form data (application/x-www-form-urlencoded) 1309 boolean first = true; 1310 for (Connection.KeyVal keyVal : data) { 1311 if (!first) w.append('&'); 1312 else first = false; 1313 1314 w.write(URLEncoder.encode(keyVal.key(), req.postDataCharset())); 1315 w.write('='); 1316 w.write(URLEncoder.encode(keyVal.value(), req.postDataCharset())); 1317 } 1318 } 1319 } 1320 1321 // for get url reqs, serialise the data map into the url 1322 private static void serialiseRequestUrl(Connection.Request req) throws IOException { 1323 UrlBuilder in = new UrlBuilder(req.url()); 1324 1325 for (Connection.KeyVal keyVal : req.data()) { 1326 Validate.isFalse(keyVal.hasInputStream(), "InputStream data not supported in URL query string."); 1327 in.appendKeyVal(keyVal); 1328 } 1329 req.url(in.build()); 1330 req.data().clear(); // moved into url as get params 1331 } 1332 } 1333 1334 private static boolean needsMultipart(Connection.Request req) { 1335 // multipart mode, for files. add the header if we see something with an inputstream, and return a non-null boundary 1336 for (Connection.KeyVal keyVal : req.data()) { 1337 if (keyVal.hasInputStream()) 1338 return true; 1339 } 1340 return false; 1341 } 1342 1343 public static class KeyVal implements Connection.KeyVal { 1344 private String key; 1345 private String value; 1346 private @Nullable InputStream stream; 1347 private @Nullable String contentType; 1348 1349 public static KeyVal create(String key, String value) { 1350 return new KeyVal(key, value); 1351 } 1352 1353 public static KeyVal create(String key, String filename, InputStream stream) { 1354 return new KeyVal(key, filename) 1355 .inputStream(stream); 1356 } 1357 1358 private KeyVal(String key, String value) { 1359 Validate.notEmptyParam(key, "key"); 1360 Validate.notNullParam(value, "value"); 1361 this.key = key; 1362 this.value = value; 1363 } 1364 1365 @Override 1366 public KeyVal key(String key) { 1367 Validate.notEmptyParam(key, "key"); 1368 this.key = key; 1369 return this; 1370 } 1371 1372 @Override 1373 public String key() { 1374 return key; 1375 } 1376 1377 @Override 1378 public KeyVal value(String value) { 1379 Validate.notNullParam(value, "value"); 1380 this.value = value; 1381 return this; 1382 } 1383 1384 @Override 1385 public String value() { 1386 return value; 1387 } 1388 1389 @Override 1390 public KeyVal inputStream(InputStream inputStream) { 1391 Validate.notNullParam(value, "inputStream"); 1392 this.stream = inputStream; 1393 return this; 1394 } 1395 1396 @Override @Nullable 1397 public InputStream inputStream() { 1398 return stream; 1399 } 1400 1401 @Override 1402 public boolean hasInputStream() { 1403 return stream != null; 1404 } 1405 1406 @Override 1407 public Connection.KeyVal contentType(String contentType) { 1408 Validate.notEmpty(contentType); 1409 this.contentType = contentType; 1410 return this; 1411 } 1412 1413 @Override @Nullable 1414 public String contentType() { 1415 return contentType; 1416 } 1417 1418 @Override 1419 public String toString() { 1420 return key + "=" + value; 1421 } 1422 } 1423}