updates

2025-12-01 06:50:10 +02:00
parent 91f51bc6fe
commit 62c1fe5951
4682 changed files with 544807 additions and 31208 deletions
--- a/Backend/venv/lib/python3.12/site-packages/httpx/_urlparse.py
+++ b/Backend/venv/lib/python3.12/site-packages/httpx/_urlparse.py
@@ -15,6 +15,9 @@ Previously we relied on the excellent `rfc3986` package to handle URL parsing an
 validation, but this module provides a simpler alternative, with less indirection
 required.
 """
+
+from __future__ import annotations
+
 import ipaddress
 import re
 import typing
@@ -33,6 +36,67 @@ SUB_DELIMS = "!$&'()*+,;="

 PERCENT_ENCODED_REGEX = re.compile("%[A-Fa-f0-9]{2}")

+# https://url.spec.whatwg.org/#percent-encoded-bytes
+
+# The fragment percent-encode set is the C0 control percent-encode set
+# and U+0020 SPACE, U+0022 ("), U+003C (<), U+003E (>), and U+0060 (`).
+FRAG_SAFE = "".join(
+    [chr(i) for i in range(0x20, 0x7F) if i not in (0x20, 0x22, 0x3C, 0x3E, 0x60)]
+)
+
+# The query percent-encode set is the C0 control percent-encode set
+# and U+0020 SPACE, U+0022 ("), U+0023 (#), U+003C (<), and U+003E (>).
+QUERY_SAFE = "".join(
+    [chr(i) for i in range(0x20, 0x7F) if i not in (0x20, 0x22, 0x23, 0x3C, 0x3E)]
+)
+
+# The path percent-encode set is the query percent-encode set
+# and U+003F (?), U+0060 (`), U+007B ({), and U+007D (}).
+PATH_SAFE = "".join(
+    [
+        chr(i)
+        for i in range(0x20, 0x7F)
+        if i not in (0x20, 0x22, 0x23, 0x3C, 0x3E) + (0x3F, 0x60, 0x7B, 0x7D)
+    ]
+)
+
+# The userinfo percent-encode set is the path percent-encode set
+# and U+002F (/), U+003A (:), U+003B (;), U+003D (=), U+0040 (@),
+# U+005B ([) to U+005E (^), inclusive, and U+007C (|).
+USERNAME_SAFE = "".join(
+    [
+        chr(i)
+        for i in range(0x20, 0x7F)
+        if i
+        not in (0x20, 0x22, 0x23, 0x3C, 0x3E)
+        + (0x3F, 0x60, 0x7B, 0x7D)
+        + (0x2F, 0x3A, 0x3B, 0x3D, 0x40, 0x5B, 0x5C, 0x5D, 0x5E, 0x7C)
+    ]
+)
+PASSWORD_SAFE = "".join(
+    [
+        chr(i)
+        for i in range(0x20, 0x7F)
+        if i
+        not in (0x20, 0x22, 0x23, 0x3C, 0x3E)
+        + (0x3F, 0x60, 0x7B, 0x7D)
+        + (0x2F, 0x3A, 0x3B, 0x3D, 0x40, 0x5B, 0x5C, 0x5D, 0x5E, 0x7C)
+    ]
+)
+# Note... The terminology 'userinfo' percent-encode set in the WHATWG document
+# is used for the username and password quoting. For the joint userinfo component
+# we remove U+003A (:) from the safe set.
+USERINFO_SAFE = "".join(
+    [
+        chr(i)
+        for i in range(0x20, 0x7F)
+        if i
+        not in (0x20, 0x22, 0x23, 0x3C, 0x3E)
+        + (0x3F, 0x60, 0x7B, 0x7D)
+        + (0x2F, 0x3B, 0x3D, 0x40, 0x5B, 0x5C, 0x5D, 0x5E, 0x7C)
+    ]
+)
+

 # {scheme}:      (optional)
 # //{authority}  (optional)
@@ -62,8 +126,8 @@ AUTHORITY_REGEX = re.compile(
    (
        r"(?:(?P<userinfo>{userinfo})@)?" r"(?P<host>{host})" r":?(?P<port>{port})?"
    ).format(
-        userinfo="[^@]*",  # Any character sequence not including '@'.
-        host="(\\[.*\\]|[^:]*)",  # Either any character sequence not including ':',
+        userinfo=".*",  # Any character sequence.
+        host="(\\[.*\\]|[^:@]*)",  # Either any character sequence excluding ':' or '@',
        # or an IPv6 address enclosed within square brackets.
        port=".*",  # Any character sequence.
    )
@@ -87,7 +151,7 @@ COMPONENT_REGEX = {

 # We use these simple regexs as a first pass before handing off to
 # the stdlib 'ipaddress' module for IP address validation.
-IPv4_STYLE_HOSTNAME = re.compile(r"^[0-9]+.[0-9]+.[0-9]+.[0-9]+$")
+IPv4_STYLE_HOSTNAME = re.compile(r"^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$")
 IPv6_STYLE_HOSTNAME = re.compile(r"^\[.*\]$")


@@ -95,10 +159,10 @@ class ParseResult(typing.NamedTuple):
    scheme: str
    userinfo: str
    host: str
-    port: typing.Optional[int]
+    port: int | None
    path: str
-    query: typing.Optional[str]
-    fragment: typing.Optional[str]
+    query: str | None
+    fragment: str | None

    @property
    def authority(self) -> str:
@@ -119,7 +183,7 @@ class ParseResult(typing.NamedTuple):
            ]
        )

-    def copy_with(self, **kwargs: typing.Optional[str]) -> "ParseResult":
+    def copy_with(self, **kwargs: str | None) -> ParseResult:
        if not kwargs:
            return self

@@ -146,7 +210,7 @@ class ParseResult(typing.NamedTuple):
        )


-def urlparse(url: str = "", **kwargs: typing.Optional[str]) -> ParseResult:
+def urlparse(url: str = "", **kwargs: str | None) -> ParseResult:
    # Initial basic checks on allowable URLs.
    # ---------------------------------------

@@ -157,7 +221,12 @@ def urlparse(url: str = "", **kwargs: typing.Optional[str]) -> ParseResult:
    # If a URL includes any ASCII control characters including \t, \r, \n,
    # then treat it as invalid.
    if any(char.isascii() and not char.isprintable() for char in url):
-        raise InvalidURL("Invalid non-printable ASCII character in URL")
+        char = next(char for char in url if char.isascii() and not char.isprintable())
+        idx = url.find(char)
+        error = (
+            f"Invalid non-printable ASCII character in URL, {char!r} at position {idx}."
+        )
+        raise InvalidURL(error)

    # Some keyword arguments require special handling.
    # ------------------------------------------------
@@ -174,8 +243,8 @@ def urlparse(url: str = "", **kwargs: typing.Optional[str]) -> ParseResult:

    # Replace "username" and/or "password" with "userinfo".
    if "username" in kwargs or "password" in kwargs:
-        username = quote(kwargs.pop("username", "") or "")
-        password = quote(kwargs.pop("password", "") or "")
+        username = quote(kwargs.pop("username", "") or "", safe=USERNAME_SAFE)
+        password = quote(kwargs.pop("password", "") or "", safe=PASSWORD_SAFE)
        kwargs["userinfo"] = f"{username}:{password}" if password else username

    # Replace "raw_path" with "path" and "query".
@@ -202,9 +271,15 @@ def urlparse(url: str = "", **kwargs: typing.Optional[str]) -> ParseResult:
            # If a component includes any ASCII control characters including \t, \r, \n,
            # then treat it as invalid.
            if any(char.isascii() and not char.isprintable() for char in value):
-                raise InvalidURL(
-                    f"Invalid non-printable ASCII character in URL component '{key}'"
+                char = next(
+                    char for char in value if char.isascii() and not char.isprintable()
                )
+                idx = value.find(char)
+                error = (
+                    f"Invalid non-printable ASCII character in URL {key} component, "
+                    f"{char!r} at position {idx}."
+                )
+                raise InvalidURL(error)

            # Ensure that keyword arguments match as a valid regex.
            if not COMPONENT_REGEX[key].fullmatch(value):
@@ -224,7 +299,7 @@ def urlparse(url: str = "", **kwargs: typing.Optional[str]) -> ParseResult:
    authority = kwargs.get("authority", url_dict["authority"]) or ""
    path = kwargs.get("path", url_dict["path"]) or ""
    query = kwargs.get("query", url_dict["query"])
-    fragment = kwargs.get("fragment", url_dict["fragment"])
+    frag = kwargs.get("fragment", url_dict["fragment"])

    # The AUTHORITY_REGEX will always match, but may have empty components.
    authority_match = AUTHORITY_REGEX.match(authority)
@@ -241,32 +316,21 @@ def urlparse(url: str = "", **kwargs: typing.Optional[str]) -> ParseResult:
    # We end up with a parsed representation of the URL,
    # with components that are plain ASCII bytestrings.
    parsed_scheme: str = scheme.lower()
-    parsed_userinfo: str = quote(userinfo, safe=SUB_DELIMS + ":")
+    parsed_userinfo: str = quote(userinfo, safe=USERINFO_SAFE)
    parsed_host: str = encode_host(host)
-    parsed_port: typing.Optional[int] = normalize_port(port, scheme)
+    parsed_port: int | None = normalize_port(port, scheme)

    has_scheme = parsed_scheme != ""
    has_authority = (
        parsed_userinfo != "" or parsed_host != "" or parsed_port is not None
    )
    validate_path(path, has_scheme=has_scheme, has_authority=has_authority)
-    if has_authority:
+    if has_scheme or has_authority:
        path = normalize_path(path)

-    # The GEN_DELIMS set is... : / ? # [ ] @
-    # These do not need to be percent-quoted unless they serve as delimiters for the
-    # specific component.
-
-    # For 'path' we need to drop ? and # from the GEN_DELIMS set.
-    parsed_path: str = quote(path, safe=SUB_DELIMS + ":/[]@")
-    # For 'query' we need to drop '#' from the GEN_DELIMS set.
-    parsed_query: typing.Optional[str] = (
-        None if query is None else quote(query, safe=SUB_DELIMS + ":/?[]@")
-    )
-    # For 'fragment' we can include all of the GEN_DELIMS set.
-    parsed_fragment: typing.Optional[str] = (
-        None if fragment is None else quote(fragment, safe=SUB_DELIMS + ":/?#[]@")
-    )
+    parsed_path: str = quote(path, safe=PATH_SAFE)
+    parsed_query: str | None = None if query is None else quote(query, safe=QUERY_SAFE)
+    parsed_frag: str | None = None if frag is None else quote(frag, safe=FRAG_SAFE)

    # The parsed ASCII bytestrings are our canonical form.
    # All properties of the URL are derived from these.
@@ -277,7 +341,7 @@ def urlparse(url: str = "", **kwargs: typing.Optional[str]) -> ParseResult:
        parsed_port,
        parsed_path,
        parsed_query,
-        parsed_fragment,
+        parsed_frag,
    )


@@ -318,7 +382,8 @@ def encode_host(host: str) -> str:
        # From https://datatracker.ietf.org/doc/html/rfc3986/#section-3.2.2
        #
        # reg-name    = *( unreserved / pct-encoded / sub-delims )
-        return quote(host.lower(), safe=SUB_DELIMS)
+        WHATWG_SAFE = '"`{}%|\\'
+        return quote(host.lower(), safe=SUB_DELIMS + WHATWG_SAFE)

    # IDNA hostnames
    try:
@@ -327,9 +392,7 @@ def encode_host(host: str) -> str:
        raise InvalidURL(f"Invalid IDNA hostname: {host!r}")


-def normalize_port(
-    port: typing.Optional[typing.Union[str, int]], scheme: str
-) -> typing.Optional[int]:
+def normalize_port(port: str | int | None, scheme: str) -> int | None:
    # From https://tools.ietf.org/html/rfc3986#section-3.2.3
    #
    # "A scheme may define a default port.  For example, the "http" scheme
@@ -358,28 +421,27 @@ def normalize_port(

 def validate_path(path: str, has_scheme: bool, has_authority: bool) -> None:
    """
-    Path validation rules that depend on if the URL contains a scheme or authority component.
+    Path validation rules that depend on if the URL contains
+    a scheme or authority component.

    See https://datatracker.ietf.org/doc/html/rfc3986.html#section-3.3
    """
    if has_authority:
-        # > If a URI contains an authority component, then the path component
-        # > must either be empty or begin with a slash ("/") character."
+        # If a URI contains an authority component, then the path component
+        # must either be empty or begin with a slash ("/") character."
        if path and not path.startswith("/"):
            raise InvalidURL("For absolute URLs, path must be empty or begin with '/'")
-    else:
-        # > If a URI does not contain an authority component, then the path cannot begin
-        # > with two slash characters ("//").
+
+    if not has_scheme and not has_authority:
+        # If a URI does not contain an authority component, then the path cannot begin
+        # with two slash characters ("//").
        if path.startswith("//"):
-            raise InvalidURL(
-                "URLs with no authority component cannot have a path starting with '//'"
-            )
-        # > In addition, a URI reference (Section 4.1) may be a relative-path reference, in which
-        # > case the first path segment cannot contain a colon (":") character.
-        if path.startswith(":") and not has_scheme:
-            raise InvalidURL(
-                "URLs with no scheme component cannot have a path starting with ':'"
-            )
+            raise InvalidURL("Relative URLs cannot have a path starting with '//'")
+
+        # In addition, a URI reference (Section 4.1) may be a relative-path reference,
+        # in which case the first path segment cannot contain a colon (":") character.
+        if path.startswith(":"):
+            raise InvalidURL("Relative URLs cannot have a path starting with ':'")


 def normalize_path(path: str) -> str:
@@ -390,9 +452,18 @@ def normalize_path(path: str) -> str:

        normalize_path("/path/./to/somewhere/..") == "/path/to"
    """
-    # https://datatracker.ietf.org/doc/html/rfc3986#section-5.2.4
+    # Fast return when no '.' characters in the path.
+    if "." not in path:
+        return path
+
    components = path.split("/")
-    output: typing.List[str] = []
+
+    # Fast return when no '.' or '..' components in the path.
+    if "." not in components and ".." not in components:
+        return path
+
+    # https://datatracker.ietf.org/doc/html/rfc3986#section-5.2.4
+    output: list[str] = []
    for component in components:
        if component == ".":
            pass
@@ -404,59 +475,53 @@ def normalize_path(path: str) -> str:
    return "/".join(output)


-def percent_encode(char: str) -> str:
+def PERCENT(string: str) -> str:
+    return "".join([f"%{byte:02X}" for byte in string.encode("utf-8")])
+
+
+def percent_encoded(string: str, safe: str) -> str:
    """
-    Replace a single character with the percent-encoded representation.
-
-    Characters outside the ASCII range are represented with their a percent-encoded
-    representation of their UTF-8 byte sequence.
-
-    For example:
-
-        percent_encode(" ") == "%20"
+    Use percent-encoding to quote a string.
    """
-    return "".join([f"%{byte:02x}" for byte in char.encode("utf-8")]).upper()
+    NON_ESCAPED_CHARS = UNRESERVED_CHARACTERS + safe

-
-def is_safe(string: str, safe: str = "/") -> bool:
-    """
-    Determine if a given string is already quote-safe.
-    """
-    NON_ESCAPED_CHARS = UNRESERVED_CHARACTERS + safe + "%"
-
-    # All characters must already be non-escaping or '%'
-    for char in string:
-        if char not in NON_ESCAPED_CHARS:
-            return False
-
-    # Any '%' characters must be valid '%xx' escape sequences.
-    return string.count("%") == len(PERCENT_ENCODED_REGEX.findall(string))
-
-
-def quote(string: str, safe: str = "/") -> str:
-    """
-    Use percent-encoding to quote a string if required.
-    """
-    if is_safe(string, safe=safe):
+    # Fast path for strings that don't need escaping.
+    if not string.rstrip(NON_ESCAPED_CHARS):
        return string

-    NON_ESCAPED_CHARS = UNRESERVED_CHARACTERS + safe
    return "".join(
-        [char if char in NON_ESCAPED_CHARS else percent_encode(char) for char in string]
+        [char if char in NON_ESCAPED_CHARS else PERCENT(char) for char in string]
    )


-def urlencode(items: typing.List[typing.Tuple[str, str]]) -> str:
-    # We can use a much simpler version of the stdlib urlencode here because
-    # we don't need to handle a bunch of different typing cases, such as bytes vs str.
-    #
-    # https://github.com/python/cpython/blob/b2f7b2ef0b5421e01efb8c7bee2ef95d3bab77eb/Lib/urllib/parse.py#L926
-    #
-    # Note that we use '%20' encoding for spaces, and treat '/' as a safe
-    # character. This means our query params have the same escaping as other
-    # characters in the URL path. This is slightly different to `requests`,
-    # but is the behaviour that browsers use.
-    #
-    # See https://github.com/encode/httpx/issues/2536 and
-    # https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlencode
-    return "&".join([quote(k) + "=" + quote(v) for k, v in items])
+def quote(string: str, safe: str) -> str:
+    """
+    Use percent-encoding to quote a string, omitting existing '%xx' escape sequences.
+
+    See: https://www.rfc-editor.org/rfc/rfc3986#section-2.1
+
+    * `string`: The string to be percent-escaped.
+    * `safe`: A string containing characters that may be treated as safe, and do not
+        need to be escaped. Unreserved characters are always treated as safe.
+        See: https://www.rfc-editor.org/rfc/rfc3986#section-2.3
+    """
+    parts = []
+    current_position = 0
+    for match in re.finditer(PERCENT_ENCODED_REGEX, string):
+        start_position, end_position = match.start(), match.end()
+        matched_text = match.group(0)
+        # Add any text up to the '%xx' escape sequence.
+        if start_position != current_position:
+            leading_text = string[current_position:start_position]
+            parts.append(percent_encoded(leading_text, safe=safe))
+
+        # Add the '%xx' escape sequence.
+        parts.append(matched_text)
+        current_position = end_position
+
+    # Add any text after the final '%xx' escape sequence.
+    if current_position != len(string):
+        trailing_text = string[current_position:]
+        parts.append(percent_encoded(trailing_text, safe=safe))
+
+    return "".join(parts)