Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: LSDV-5337: Pre-signed file proxy url clashing with already html encoded values causing errors in signature #4447

Merged
merged 7 commits into from
Jun 29, 2023
Prev Previous commit
Next Next commit
Adding a test to outline resolution and support of the upper limits o…
…f file uris in cloud storage
  • Loading branch information
bmartel committed Jun 27, 2023
commit fd7215603b59f55820528a46ae65e9de506e3ab4
61 changes: 61 additions & 0 deletions label_studio/tests/tasks/test_presign_storage_data.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import pytest
import base64
from unittest.mock import patch
from django.urls import reverse
from rest_framework import status
Expand Down Expand Up @@ -147,3 +148,63 @@ def mock_task_get(*args, **kwargs):
response.url
== "https://presigned-url.com/czM6Ly9oeXBlcnRleHQtYnVja2V0L2ZpbGUgd2l0aCAvc3BhY2VzIGFuZCcgLyAnIC8gcXVvdGVzLmpwZw=="
)

def test_successful_request_with_long_fileuri(
self, view, task, project, user, monkeypatch
):
task.resolve_storage_uri.return_value = dict(
url="https://presigned-url.com/fileuri",
presign_ttl=3600,
)
project.has_permission.return_value = True
task.project = project

def mock_task_get(*args, **kwargs):
if kwargs["pk"] == 1:
return task
else:
raise Task.DoesNotExist

obj = MagicMock()
obj.get = mock_task_get
monkeypatch.setattr("tasks.models.Task.objects", obj)

# This is a long fileuri that will be hashed
# The total length of the fileuri can not be more than 1024 characters
# The length of the fileuri below is 1024 characters including the extension
longest_allowable_cloud_storage_path = "is/a/long/path/that/needs/to/be/1024/characters/long/so/that/it/gets/hashedis/a/long/path/that/needs/to/be/1024/characters/long/so/that/it/gets/hashedis/a/long/path/that/needs/to/be/1024/characters/long/so/that/it/gets/hashedis/a/long/path/that/needs/to/be/1024/characters/long/so/that/it/gets/hashedis/a/long/path/that/needs/to/be/1024/characters/long/so/that/it/gets/hashedis/a/long/path/that/needs/to/be/1024/characters/long/so/that/it/gets/hashedis/a/long/path/that/needs/to/be/1024/characters/long/so/that/it/gets/hashedis/a/long/path/that/needs/to/be/1024/characters/long/so/that/it/gets/hashedis/a/long/path/that/needs/to/be/1024/characters/long/so/that/it/gets/hashedis/a/long/path/that/needs/to/be/1024/characters/long/so/that/it/gets/hashedis/a/long/path/that/needs/to/be/1024/characters/long/so/that/it/gets/hashedis/a/long/path/that/needs/to/be/1024/characters/long/so/that/it/gets/hashedis/a/long/path/that/needs/to/be/1024/characters/long/so/that/it/gets/hashedis/long/path/that/needs/to/be/1024/characters.png"
longest_uri = f"azure-blob://{longest_allowable_cloud_storage_path}"

base64_encoded_uri = base64.urlsafe_b64encode(longest_uri.encode()).decode()

# Determining the absolute upper bounds which could be possible, and ensuring it resolves and is supported
longest_allowable_url_length = 2000 # This is the maximum length of a url in most browsers, and is the absolute upper bound
largest_allowable_task_key = 9223372036854775807
longest_presign_path = f"/tasks/{largest_allowable_task_key}/presign/?fileuri="
scheme_length = len("https://")
longest_presign_path_length = len(longest_presign_path)
longest_allowable_fileuri_hash_length = len(base64_encoded_uri)
remaining_url_origin_length = (
longest_allowable_url_length
- scheme_length
+ longest_presign_path_length
+ longest_allowable_fileuri_hash_length
)

# The user domain should be the shortest part of the url, but factoring lengthy subdomains with nested levels in staging and dev environments this is a safe allowance
assert remaining_url_origin_length >= 512

# Check this resolves correctly on the server
request = APIRequestFactory().get(
reverse("data_import:storage-data-presign", kwargs={"task_id": 1})
+ f"?fileuri={base64_encoded_uri}"
)

request.user = user
force_authenticate(request, user)

response = view(request, task_id=1)

# And that the response is correct
assert response.status_code == status.HTTP_303_SEE_OTHER
assert response.url == "https://presigned-url.com/fileuri"
Loading