From 2bc5b059501b836659174b4dad0f8e8eb88c950d Mon Sep 17 00:00:00 2001 From: Waket Zheng Date: Tue, 27 Aug 2024 15:22:40 +0800 Subject: [PATCH 1/6] Add strict_idna argument to support emoji domain --- httpx/_urlparse.py | 16 ++++++++++++---- httpx/_urls.py | 10 +++++++--- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/httpx/_urlparse.py b/httpx/_urlparse.py index 479c2ef8a1..763c03e52e 100644 --- a/httpx/_urlparse.py +++ b/httpx/_urlparse.py @@ -199,6 +199,8 @@ def urlparse(url: str = "", **kwargs: str | None) -> ParseResult: if ":" in host and not (host.startswith("[") and host.endswith("]")): kwargs["host"] = f"[{host}]" + strict_idna = kwargs.pop("strict_idna", False) + # If any keyword arguments are provided, ensure they are valid. # ------------------------------------------------------------- @@ -256,7 +258,7 @@ def urlparse(url: str = "", **kwargs: str | None) -> ParseResult: # with components that are plain ASCII bytestrings. parsed_scheme: str = scheme.lower() parsed_userinfo: str = quote(userinfo, safe=SUB_DELIMS + ":") - parsed_host: str = encode_host(host) + parsed_host: str = encode_host(host, strict_idna=strict_idna) parsed_port: int | None = normalize_port(port, scheme) has_scheme = parsed_scheme != "" @@ -300,7 +302,7 @@ def urlparse(url: str = "", **kwargs: str | None) -> ParseResult: ) -def encode_host(host: str) -> str: +def encode_host(host: str, strict_idna: typing.Any = False) -> str: if not host: return "" @@ -342,9 +344,15 @@ def encode_host(host: str) -> str: # IDNA hostnames try: - return idna.encode(host.lower()).decode("ascii") + encoded = idna.encode(host.lower()) except idna.IDNAError: - raise InvalidURL(f"Invalid IDNA hostname: {host!r}") + if strict_idna: + raise InvalidURL(f"Invalid IDNA hostname: {host!r}") + try: + encoded = host.lower().encode("idna") + except UnicodeEncodeError: + raise InvalidURL(f"Invalid hostname: {host!r}") + return encoded.decode("ascii") def normalize_port(port: str | int | None, scheme: str) -> int | None: diff --git a/httpx/_urls.py b/httpx/_urls.py index ec4ea6b399..81bcf549cd 100644 --- a/httpx/_urls.py +++ b/httpx/_urls.py @@ -75,6 +75,7 @@ class URL: """ def __init__(self, url: URL | str = "", **kwargs: typing.Any) -> None: + self._strict_idna = kwargs.pop("strict_idna", False) if kwargs: allowed = { "scheme": str, @@ -114,7 +115,7 @@ def __init__(self, url: URL | str = "", **kwargs: typing.Any) -> None: kwargs["query"] = None if not params else str(QueryParams(params)) if isinstance(url, str): - self._uri_reference = urlparse(url, **kwargs) + self._uri_reference = urlparse(url, strict_idna=self._strict_idna, **kwargs) elif isinstance(url, URL): self._uri_reference = url._uri_reference.copy_with(**kwargs) else: @@ -186,9 +187,12 @@ def host(self) -> str: assert url.host == "::ffff:192.168.0.1" """ host: str = self._uri_reference.host - if host.startswith("xn--"): - host = idna.decode(host) + try: + host = idna.decode(host) + except idna.IDNAError: + if self._strict_idna: + raise return host From 89c8db328514bd9442888e180c6448231d1b6edd Mon Sep 17 00:00:00 2001 From: Waket Zheng Date: Tue, 27 Aug 2024 15:36:49 +0800 Subject: [PATCH 2/6] fix tests error --- tests/models/test_url.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/tests/models/test_url.py b/tests/models/test_url.py index 523a89bf65..8b0c3acb97 100644 --- a/tests/models/test_url.py +++ b/tests/models/test_url.py @@ -349,7 +349,14 @@ def test_url_invalid_hostname(): Ensure that invalid URLs raise an `httpx.InvalidURL` exception. """ with pytest.raises(httpx.InvalidURL): - httpx.URL("https://😇/") + httpx.URL("https://😇/", strict_idna=True) + + +def test_url_with_emoji(): + assert str(httpx.URL("https://😇/")) == "https://xn--l28h/" + assert httpx.URL("https://😇/") == httpx.URL("https://xn--l28h/") + assert httpx.URL("https://😇/", strict_idna=False) == httpx.URL("https://xn--l28h/") + assert str(httpx.URL("https://☃.com/")) == "https://xn--n3h.com/" def test_url_excessively_long_url(): @@ -802,7 +809,7 @@ def test_url_escaped_idna_host(): def test_url_invalid_idna_host(): with pytest.raises(httpx.InvalidURL) as exc: - httpx.URL("https://☃.com/") + httpx.URL("https://☃.com/", strict_idna=True) assert str(exc.value) == "Invalid IDNA hostname: '☃.com'" From 2b62365354b43b1927038db6c0c1983015519835 Mon Sep 17 00:00:00 2001 From: Waket Zheng Date: Tue, 27 Aug 2024 15:47:46 +0800 Subject: [PATCH 3/6] Update changelog --- CHANGELOG.md | 1 + docs/api.md | 1 + 2 files changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 42197a2c77..816f7dc116 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ## Added * Support for `zstd` content decoding using the python `zstandard` package is added. Installable using `httpx[zstd]`. (#3139) +* Support IDNA2003. (#3229) ### Fixed diff --git a/docs/api.md b/docs/api.md index d01cc649ba..0c1ae84704 100644 --- a/docs/api.md +++ b/docs/api.md @@ -126,6 +126,7 @@ what gets sent over the wire.* * `.is_ssl` - **bool** * `.is_absolute_url` - **bool** * `.is_relative_url` - **bool** +* `.strict_idna` - **bool** * `def .copy_with([scheme], [authority], [path], [query], [fragment])` - **URL** ## `Headers` From fa1d8c615c97f89a5fb8fbb43f9bcea1b0c98878 Mon Sep 17 00:00:00 2001 From: Waket Zheng Date: Tue, 27 Aug 2024 16:23:02 +0800 Subject: [PATCH 4/6] full test --- httpx/_urlparse.py | 2 +- httpx/_urls.py | 5 +++-- tests/client/test_redirects.py | 2 +- tests/models/test_url.py | 2 ++ 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/httpx/_urlparse.py b/httpx/_urlparse.py index 763c03e52e..b2d24466ad 100644 --- a/httpx/_urlparse.py +++ b/httpx/_urlparse.py @@ -350,7 +350,7 @@ def encode_host(host: str, strict_idna: typing.Any = False) -> str: raise InvalidURL(f"Invalid IDNA hostname: {host!r}") try: encoded = host.lower().encode("idna") - except UnicodeEncodeError: + except UnicodeError: raise InvalidURL(f"Invalid hostname: {host!r}") return encoded.decode("ascii") diff --git a/httpx/_urls.py b/httpx/_urls.py index 81bcf549cd..3230761755 100644 --- a/httpx/_urls.py +++ b/httpx/_urls.py @@ -5,6 +5,7 @@ import idna +from ._exceptions import InvalidURL from ._types import QueryParamTypes, RawURL from ._urlparse import urlencode, urlparse from ._utils import primitive_value_to_str @@ -190,9 +191,9 @@ def host(self) -> str: if host.startswith("xn--"): try: host = idna.decode(host) - except idna.IDNAError: + except (idna.IDNAError, idna.core.InvalidCodepoint): if self._strict_idna: - raise + raise InvalidURL(f"Invalid IDNA host: {host!r}") return host diff --git a/tests/client/test_redirects.py b/tests/client/test_redirects.py index f65827134c..a0b46d4fea 100644 --- a/tests/client/test_redirects.py +++ b/tests/client/test_redirects.py @@ -37,7 +37,7 @@ def redirects(request: httpx.Request) -> httpx.Response: elif request.url.path == "/invalid_redirect": status_code = httpx.codes.SEE_OTHER - raw_headers = [(b"location", "https://😇/".encode("utf-8"))] + raw_headers = [(b"location", "https://�/".encode("utf-8"))] return httpx.Response(status_code, headers=raw_headers) elif request.url.path == "/no_scheme_redirect": diff --git a/tests/models/test_url.py b/tests/models/test_url.py index 8b0c3acb97..b9e6f0f1d5 100644 --- a/tests/models/test_url.py +++ b/tests/models/test_url.py @@ -350,6 +350,8 @@ def test_url_invalid_hostname(): """ with pytest.raises(httpx.InvalidURL): httpx.URL("https://😇/", strict_idna=True) + with pytest.raises(httpx.InvalidURL): + assert httpx.URL("https://xn--n3h.com/", strict_idna=True).host def test_url_with_emoji(): From 098b6600d927d3cfd94459b47efeaaa51671f5ae Mon Sep 17 00:00:00 2001 From: Waket Zheng Date: Tue, 27 Aug 2024 20:25:39 +0800 Subject: [PATCH 5/6] Improve type hint and fix changelog conflict --- CHANGELOG.md | 6 +++++- httpx/_urlparse.py | 4 ++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a9104f6890..a40d419f18 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,12 +4,16 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). +## 0.27.2 (Unreleased) + +## Added +* Support IDNA2003. (#3229) + ## 0.27.1 (27th August, 2024) ## Added * Support for `zstd` content decoding using the python `zstandard` package is added. Installable using `httpx[zstd]`. (#3139) -* Support IDNA2003. (#3229) ### Fixed diff --git a/httpx/_urlparse.py b/httpx/_urlparse.py index b2d24466ad..17abb58005 100644 --- a/httpx/_urlparse.py +++ b/httpx/_urlparse.py @@ -199,7 +199,7 @@ def urlparse(url: str = "", **kwargs: str | None) -> ParseResult: if ":" in host and not (host.startswith("[") and host.endswith("]")): kwargs["host"] = f"[{host}]" - strict_idna = kwargs.pop("strict_idna", False) + strict_idna = bool(kwargs.pop("strict_idna", False)) # If any keyword arguments are provided, ensure they are valid. # ------------------------------------------------------------- @@ -302,7 +302,7 @@ def urlparse(url: str = "", **kwargs: str | None) -> ParseResult: ) -def encode_host(host: str, strict_idna: typing.Any = False) -> str: +def encode_host(host: str, strict_idna: bool = False) -> str: if not host: return "" From c720e3a48297bedcc4c13d46710a23abfcebea61 Mon Sep 17 00:00:00 2001 From: Waket Zheng Date: Tue, 27 Aug 2024 20:27:15 +0800 Subject: [PATCH 6/6] fix version error --- CHANGELOG.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a40d419f18..eab041079d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,14 +4,15 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). -## 0.27.2 (Unreleased) +## Unreleased + +### Added -## Added * Support IDNA2003. (#3229) ## 0.27.1 (27th August, 2024) -## Added +### Added * Support for `zstd` content decoding using the python `zstandard` package is added. Installable using `httpx[zstd]`. (#3139)