From b27f46a11bb69429d7a88af5d1dadf1c3a013467 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Mon, 23 Sep 2024 16:41:06 -0500 Subject: [PATCH] Fix double unquoting in url dispatcher (#9267) Co-authored-by: Sam Bull (cherry picked from commit 947b9c43374a2d6a73baab31c5f4bc106d4683ef) --- CHANGES/9267.breaking.rst | 1 + CHANGES/9267.bugfix.rst | 1 + aiohttp/web_urldispatcher.py | 24 ++++++++++++------------ requirements/base.txt | 2 +- requirements/constraints.txt | 2 +- requirements/dev.txt | 2 +- requirements/runtime-deps.in | 2 +- requirements/runtime-deps.txt | 2 +- requirements/test.txt | 2 +- setup.cfg | 2 +- tests/test_urldispatch.py | 17 ++++++++++++++--- tests/test_web_urldispatcher.py | 18 +++++++++++++++++- 12 files changed, 52 insertions(+), 23 deletions(-) create mode 100644 CHANGES/9267.breaking.rst create mode 120000 CHANGES/9267.bugfix.rst diff --git a/CHANGES/9267.breaking.rst b/CHANGES/9267.breaking.rst new file mode 100644 index 00000000000..82fec1d21b4 --- /dev/null +++ b/CHANGES/9267.breaking.rst @@ -0,0 +1 @@ +Increased minimum yarl version to 1.12.0 -- by :user:`bdraco`. diff --git a/CHANGES/9267.bugfix.rst b/CHANGES/9267.bugfix.rst new file mode 120000 index 00000000000..2a85c7ec63c --- /dev/null +++ b/CHANGES/9267.bugfix.rst @@ -0,0 +1 @@ +8898.bugfix.rst \ No newline at end of file diff --git a/aiohttp/web_urldispatcher.py b/aiohttp/web_urldispatcher.py index 9c07f4ee9ad..8c1eef9094a 100644 --- a/aiohttp/web_urldispatcher.py +++ b/aiohttp/web_urldispatcher.py @@ -375,7 +375,7 @@ def register_route(self, route: "ResourceRoute") -> None: async def resolve(self, request: Request) -> _Resolve: allowed_methods: Set[str] = set() - match_dict = self._match(request.rel_url.path) + match_dict = self._match(request.rel_url.path_safe) if match_dict is None: return None, allowed_methods @@ -425,8 +425,7 @@ def _match(self, path: str) -> Optional[Dict[str, str]]: # string comparison is about 10 times faster than regexp matching if self._path == path: return {} - else: - return None + return None def raw_match(self, path: str) -> bool: return self._path == path @@ -497,10 +496,9 @@ def _match(self, path: str) -> Optional[Dict[str, str]]: match = self._pattern.fullmatch(path) if match is None: return None - else: - return { - key: _unquote_path(value) for key, value in match.groupdict().items() - } + return { + key: _unquote_path_safe(value) for key, value in match.groupdict().items() + } def raw_match(self, path: str) -> bool: return self._orig_path == path @@ -645,7 +643,7 @@ def set_options_route(self, handler: Handler) -> None: ) async def resolve(self, request: Request) -> _Resolve: - path = request.rel_url.path + path = request.rel_url.path_safe method = request.method allowed_methods = set(self._routes) if not path.startswith(self._prefix2) and path != self._prefix: @@ -654,7 +652,7 @@ async def resolve(self, request: Request) -> _Resolve: if method not in allowed_methods: return None, allowed_methods - match_dict = {"filename": _unquote_path(path[len(self._prefix) + 1 :])} + match_dict = {"filename": _unquote_path_safe(path[len(self._prefix) + 1 :])} return (UrlMappingMatchInfo(match_dict, self._routes[method]), allowed_methods) def __len__(self) -> int: @@ -1035,7 +1033,7 @@ async def resolve(self, request: Request) -> UrlMappingMatchInfo: # candidates for a given url part because there are multiple resources # registered for the same canonical path, we resolve them in a linear # fashion to ensure registration order is respected. - url_part = request.rel_url.path + url_part = request.rel_url.path_safe while url_part: for candidate in resource_index.get(url_part, ()): match_dict, allowed = await candidate.resolve(request) @@ -1286,8 +1284,10 @@ def _quote_path(value: str) -> str: return URL.build(path=value, encoded=False).raw_path -def _unquote_path(value: str) -> str: - return URL.build(path=value, encoded=True).path.replace("%2F", "/") +def _unquote_path_safe(value: str) -> str: + if "%" not in value: + return value + return value.replace("%2F", "/").replace("%25", "%") def _requote_path(value: str) -> str: diff --git a/requirements/base.txt b/requirements/base.txt index d947f437f98..1eb566b4627 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -40,5 +40,5 @@ typing-extensions==4.12.2 # via multidict uvloop==0.20.0 ; platform_system != "Windows" and implementation_name == "cpython" # via -r requirements/base.in -yarl==1.11.1 +yarl==1.12.0 # via -r requirements/runtime-deps.in diff --git a/requirements/constraints.txt b/requirements/constraints.txt index 6ce841dc2c9..4f90ea80755 100644 --- a/requirements/constraints.txt +++ b/requirements/constraints.txt @@ -287,7 +287,7 @@ webcolors==24.8.0 # via blockdiag wheel==0.44.0 # via pip-tools -yarl==1.11.1 +yarl==1.12.0 # via -r requirements/runtime-deps.in zipp==3.20.2 # via diff --git a/requirements/dev.txt b/requirements/dev.txt index e4e6c6ac172..611f438c8b4 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -279,7 +279,7 @@ webcolors==24.8.0 # via blockdiag wheel==0.44.0 # via pip-tools -yarl==1.11.1 +yarl==1.12.0 # via -r requirements/runtime-deps.in zipp==3.20.2 # via diff --git a/requirements/runtime-deps.in b/requirements/runtime-deps.in index 1b440bc7c68..9a199453d55 100644 --- a/requirements/runtime-deps.in +++ b/requirements/runtime-deps.in @@ -9,4 +9,4 @@ Brotli; platform_python_implementation == 'CPython' brotlicffi; platform_python_implementation != 'CPython' frozenlist >= 1.1.1 multidict >=4.5, < 7.0 -yarl >= 1.11.0, < 2.0 +yarl >= 1.12.0, < 2.0 diff --git a/requirements/runtime-deps.txt b/requirements/runtime-deps.txt index eea3d44a539..87d55bfa80c 100644 --- a/requirements/runtime-deps.txt +++ b/requirements/runtime-deps.txt @@ -34,5 +34,5 @@ pycparser==2.22 # via cffi typing-extensions==4.12.2 # via multidict -yarl==1.11.1 +yarl==1.12.0 # via -r requirements/runtime-deps.in diff --git a/requirements/test.txt b/requirements/test.txt index a479b9a5e38..c2b9653fd4a 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -137,5 +137,5 @@ uvloop==0.20.0 ; platform_system != "Windows" and implementation_name == "cpytho # via -r requirements/base.in wait-for-it==2.2.2 # via -r requirements/test.in -yarl==1.11.1 +yarl==1.12.0 # via -r requirements/runtime-deps.in diff --git a/setup.cfg b/setup.cfg index c5258115f11..91f4385aedc 100644 --- a/setup.cfg +++ b/setup.cfg @@ -54,7 +54,7 @@ install_requires = attrs >= 17.3.0 frozenlist >= 1.1.1 multidict >=4.5, < 7.0 - yarl >= 1.11.0, < 2.0 + yarl >= 1.12.0, < 2.0 [options.exclude_package_data] * = diff --git a/tests/test_urldispatch.py b/tests/test_urldispatch.py index d0efa91593e..8c3eaed13b7 100644 --- a/tests/test_urldispatch.py +++ b/tests/test_urldispatch.py @@ -1,7 +1,7 @@ import pathlib import re from collections.abc import Container, Iterable, Mapping, MutableMapping, Sized -from urllib.parse import unquote +from urllib.parse import quote, unquote import pytest from re_assert import Matches @@ -457,7 +457,7 @@ def test_add_static_quoting(router) -> None: ) assert router["static"] is resource url = resource.url_for(filename="/1 2/файл%2F.txt") - assert url.path == "/пре %2Fфикс/1 2/файл%2F.txt" + assert url.path == "/пре /фикс/1 2/файл%2F.txt" assert str(url) == ( "/%D0%BF%D1%80%D0%B5%20%2F%D1%84%D0%B8%D0%BA%D1%81" "/1%202/%D1%84%D0%B0%D0%B9%D0%BB%252F.txt" @@ -630,7 +630,7 @@ def test_route_dynamic_quoting(router) -> None: route = router.add_route("GET", r"/пре %2Fфикс/{arg}", handler) url = route.url_for(arg="1 2/текст%2F") - assert url.path == "/пре %2Fфикс/1 2/текст%2F" + assert url.path == "/пре /фикс/1 2/текст%2F" assert str(url) == ( "/%D0%BF%D1%80%D0%B5%20%2F%D1%84%D0%B8%D0%BA%D1%81" "/1%202/%D1%82%D0%B5%D0%BA%D1%81%D1%82%252F" @@ -742,6 +742,17 @@ async def test_dynamic_match_unquoted_path(router) -> None: assert match_info == {"path": "path", "subpath": unquote(resource_id)} +async def test_dynamic_match_double_quoted_path(router: web.UrlDispatcher) -> None: + """Verify that double-quoted path is unquoted only once.""" + handler = make_handler() + router.add_route("GET", "/{path}/{subpath}", handler) + resource_id = quote("my/path|with!some%strange$characters", safe="") + double_quoted_resource_id = quote(resource_id, safe="") + req = make_mocked_request("GET", f"/path/{double_quoted_resource_id}") + match_info = await router.resolve(req) + assert match_info == {"path": "path", "subpath": resource_id} + + def test_add_route_not_started_with_slash(router) -> None: with pytest.raises(ValueError): handler = make_handler() diff --git a/tests/test_web_urldispatcher.py b/tests/test_web_urldispatcher.py index 7991cfe821e..eca365d2a25 100644 --- a/tests/test_web_urldispatcher.py +++ b/tests/test_web_urldispatcher.py @@ -5,7 +5,7 @@ import socket import sys from stat import S_IFIFO, S_IMODE -from typing import Any, Generator, Optional +from typing import Any, Generator, NoReturn, Optional import pytest import yarl @@ -885,6 +885,22 @@ async def handler(request: web.Request) -> web.Response: assert resp.status == expected_http_resp_status +async def test_decoded_raw_match_regex(aiohttp_client: AiohttpClient) -> None: + """Verify that raw_match only matches decoded url.""" + app = web.Application() + + async def handler(request: web.Request) -> NoReturn: + assert False + + app.router.add_get("/467%2C802%2C24834%2C24952%2C25362%2C40574/hello", handler) + client = await aiohttp_client(app) + + async with client.get( + yarl.URL("/467%2C802%2C24834%2C24952%2C25362%2C40574/hello", encoded=True) + ) as resp: + assert resp.status == 404 # should only match decoded url + + async def test_order_is_preserved(aiohttp_client: AiohttpClient) -> None: """Test route order is preserved.