Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: suppport IDNA2003 #3284

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,15 @@ All notable changes to this project will be documented in this file.

The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).

## Unreleased

### Added

* Support IDNA2003. (#3229)
waketzheng marked this conversation as resolved.
Show resolved Hide resolved

## 0.27.1 (27th August, 2024)

## Added
### Added

* Support for `zstd` content decoding using the python `zstandard` package is added. Installable using `httpx[zstd]`. (#3139)

Expand Down
1 change: 1 addition & 0 deletions docs/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ what gets sent over the wire.*
* `.is_ssl` - **bool**
* `.is_absolute_url` - **bool**
* `.is_relative_url` - **bool**
* `.strict_idna` - **bool**
* `def .copy_with([scheme], [authority], [path], [query], [fragment])` - **URL**

## `Headers`
Expand Down
16 changes: 12 additions & 4 deletions httpx/_urlparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,8 @@ def urlparse(url: str = "", **kwargs: str | None) -> ParseResult:
if ":" in host and not (host.startswith("[") and host.endswith("]")):
kwargs["host"] = f"[{host}]"

strict_idna = bool(kwargs.pop("strict_idna", False))

# If any keyword arguments are provided, ensure they are valid.
# -------------------------------------------------------------

Expand Down Expand Up @@ -256,7 +258,7 @@ def urlparse(url: str = "", **kwargs: str | None) -> ParseResult:
# with components that are plain ASCII bytestrings.
parsed_scheme: str = scheme.lower()
parsed_userinfo: str = quote(userinfo, safe=SUB_DELIMS + ":")
parsed_host: str = encode_host(host)
parsed_host: str = encode_host(host, strict_idna=strict_idna)
parsed_port: int | None = normalize_port(port, scheme)

has_scheme = parsed_scheme != ""
Expand Down Expand Up @@ -300,7 +302,7 @@ def urlparse(url: str = "", **kwargs: str | None) -> ParseResult:
)


def encode_host(host: str) -> str:
def encode_host(host: str, strict_idna: bool = False) -> str:
if not host:
return ""

Expand Down Expand Up @@ -342,9 +344,15 @@ def encode_host(host: str) -> str:

# IDNA hostnames
try:
return idna.encode(host.lower()).decode("ascii")
encoded = idna.encode(host.lower())
except idna.IDNAError:
raise InvalidURL(f"Invalid IDNA hostname: {host!r}")
if strict_idna:
raise InvalidURL(f"Invalid IDNA hostname: {host!r}")
try:
encoded = host.lower().encode("idna")
except UnicodeError:
raise InvalidURL(f"Invalid hostname: {host!r}")
return encoded.decode("ascii")


def normalize_port(port: str | int | None, scheme: str) -> int | None:
Expand Down
11 changes: 8 additions & 3 deletions httpx/_urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import idna

from ._exceptions import InvalidURL
from ._types import QueryParamTypes, RawURL
from ._urlparse import urlencode, urlparse
from ._utils import primitive_value_to_str
Expand Down Expand Up @@ -75,6 +76,7 @@ class URL:
"""

def __init__(self, url: URL | str = "", **kwargs: typing.Any) -> None:
self._strict_idna = kwargs.pop("strict_idna", False)
if kwargs:
allowed = {
"scheme": str,
Expand Down Expand Up @@ -114,7 +116,7 @@ def __init__(self, url: URL | str = "", **kwargs: typing.Any) -> None:
kwargs["query"] = None if not params else str(QueryParams(params))

if isinstance(url, str):
self._uri_reference = urlparse(url, **kwargs)
self._uri_reference = urlparse(url, strict_idna=self._strict_idna, **kwargs)
elif isinstance(url, URL):
self._uri_reference = url._uri_reference.copy_with(**kwargs)
else:
Expand Down Expand Up @@ -186,9 +188,12 @@ def host(self) -> str:
assert url.host == "::ffff:192.168.0.1"
"""
host: str = self._uri_reference.host

if host.startswith("xn--"):
host = idna.decode(host)
try:
host = idna.decode(host)
except (idna.IDNAError, idna.core.InvalidCodepoint):
if self._strict_idna:
raise InvalidURL(f"Invalid IDNA host: {host!r}")

return host

Expand Down
2 changes: 1 addition & 1 deletion tests/client/test_redirects.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def redirects(request: httpx.Request) -> httpx.Response:

elif request.url.path == "/invalid_redirect":
status_code = httpx.codes.SEE_OTHER
raw_headers = [(b"location", "https://😇/".encode("utf-8"))]
raw_headers = [(b"location", "https:///".encode("utf-8"))]
return httpx.Response(status_code, headers=raw_headers)

elif request.url.path == "/no_scheme_redirect":
Expand Down
13 changes: 11 additions & 2 deletions tests/models/test_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,16 @@ def test_url_invalid_hostname():
Ensure that invalid URLs raise an `httpx.InvalidURL` exception.
"""
with pytest.raises(httpx.InvalidURL):
httpx.URL("https://😇/")
httpx.URL("https://😇/", strict_idna=True)
with pytest.raises(httpx.InvalidURL):
assert httpx.URL("https://xn--n3h.com/", strict_idna=True).host


def test_url_with_emoji():
assert str(httpx.URL("https://😇/")) == "https://xn--l28h/"
assert httpx.URL("https://😇/") == httpx.URL("https://xn--l28h/")
assert httpx.URL("https://😇/", strict_idna=False) == httpx.URL("https://xn--l28h/")
assert str(httpx.URL("https://☃.com/")) == "https://xn--n3h.com/"


def test_url_excessively_long_url():
Expand Down Expand Up @@ -802,7 +811,7 @@ def test_url_escaped_idna_host():

def test_url_invalid_idna_host():
with pytest.raises(httpx.InvalidURL) as exc:
httpx.URL("https://☃.com/")
httpx.URL("https://☃.com/", strict_idna=True)
assert str(exc.value) == "Invalid IDNA hostname: '☃.com'"


Expand Down