diff --git a/cvat/apps/engine/cloud_provider.py b/cvat/apps/engine/cloud_provider.py index 0992139d9bd..8a7e3bd3a8a 100644 --- a/cvat/apps/engine/cloud_provider.py +++ b/cvat/apps/engine/cloud_provider.py @@ -256,7 +256,7 @@ def list_files_on_one_page( search_prefix = prefix if self.prefix and (len(prefix) < len(self.prefix)): - if '/' in self.prefix[len(prefix):]: + if prefix and '/' in self.prefix[len(prefix):]: next_layer_and_tail = self.prefix[prefix.find('/') + 1:].split( "/", maxsplit=1 ) diff --git a/tests/python/rest_api/test_cloud_storages.py b/tests/python/rest_api/test_cloud_storages.py index 1c99143585b..8c61b0b0369 100644 --- a/tests/python/rest_api/test_cloud_storages.py +++ b/tests/python/rest_api/test_cloud_storages.py @@ -445,6 +445,7 @@ def test_org_user_get_cloud_storage_preview( self._test_cannot_see(username, storage_id) +@pytest.mark.usefixtures("restore_db_per_function") class TestGetCloudStorageContent: USER = "admin1" @@ -477,13 +478,14 @@ def _test_get_cloud_storage_content( @pytest.mark.parametrize("cloud_storage_id", [2]) @pytest.mark.parametrize( - "version, manifest, prefix, page_size, expected_content", + "version, manifest, prefix, default_bucket_prefix, page_size, expected_content", [ ( SUPPORTED_VERSIONS.V1, # [v1] list all bucket content "sub/manifest.jsonl", None, None, + None, ["sub/image_case_65_1.png", "sub/image_case_65_2.png"], ), ( @@ -491,6 +493,7 @@ def _test_get_cloud_storage_content( "sub/manifest.jsonl", None, None, + None, [FileInfo(mime_type="DIR", name="sub", type="DIR")], ), ( @@ -498,6 +501,7 @@ def _test_get_cloud_storage_content( "sub/manifest.jsonl", "sub/image_case_65_1", None, + None, [ FileInfo(mime_type="image", name="image_case_65_1.png", type="REG"), ], @@ -507,6 +511,7 @@ def _test_get_cloud_storage_content( "sub/manifest.jsonl", "sub/", None, + None, [ FileInfo(mime_type="image", name="image_case_65_1.png", type="REG"), FileInfo(mime_type="image", name="image_case_65_2.png", type="REG"), @@ -517,12 +522,14 @@ def _test_get_cloud_storage_content( None, None, None, + None, [FileInfo(mime_type="DIR", name="sub", type="DIR")], ), ( SUPPORTED_VERSIONS.V2, # [v2] list the second layer (directory "sub") of real bucket content None, "sub/", + None, 2, [ FileInfo(mime_type="unknown", name="demo_manifest.jsonl", type="REG"), @@ -534,6 +541,83 @@ def _test_get_cloud_storage_content( None, "/sub/", # cover case: API is identical to share point API None, + None, + [ + FileInfo(mime_type="unknown", name="demo_manifest.jsonl", type="REG"), + FileInfo(mime_type="image", name="image_case_65_1.png", type="REG"), + FileInfo(mime_type="image", name="image_case_65_2.png", type="REG"), + FileInfo(mime_type="unknown", name="manifest.jsonl", type="REG"), + FileInfo(mime_type="unknown", name="manifest_1.jsonl", type="REG"), + FileInfo(mime_type="unknown", name="manifest_2.jsonl", type="REG"), + ], + ), + ( + SUPPORTED_VERSIONS.V2, # [v2] list bucket content based on manifest when default bucket prefix is set to directory + "sub/manifest.jsonl", + None, + "sub/", + None, + [ + FileInfo(mime_type="image", name="image_case_65_1.png", type="REG"), + FileInfo(mime_type="image", name="image_case_65_2.png", type="REG"), + ], + ), + ( + # [v2] list bucket content based on manifest when default bucket prefix + # is set to template from which the files should start + SUPPORTED_VERSIONS.V2, + "sub/manifest.jsonl", + None, + "sub/image_case_65_1", + None, + [ + FileInfo(mime_type="image", name="image_case_65_1.png", type="REG"), + ], + ), + ( + SUPPORTED_VERSIONS.V2, # [v2] list bucket content based on manifest when specified prefix is stricter than default bucket prefix + "sub/manifest.jsonl", + "sub/image_case_65_1", + "sub/image_case", + None, + [ + FileInfo(mime_type="image", name="image_case_65_1.png", type="REG"), + ], + ), + ( + SUPPORTED_VERSIONS.V2, # [v2] list bucket content based on manifest when default bucket prefix is stricter than specified prefix + "sub/manifest.jsonl", + "sub/image_case", + "sub/image_case_65_1", + None, + [ + FileInfo(mime_type="image", name="image_case_65_1.png", type="REG"), + ], + ), + ( + SUPPORTED_VERSIONS.V2, # [v2] list bucket content based on manifest when default bucket prefix and specified prefix have no intersection + "sub/manifest.jsonl", + "sub/image_case_65_1", + "sub/image_case_65_2", + None, + [], + ), + ( + SUPPORTED_VERSIONS.V2, # [v2] list bucket content based on manifest when default bucket prefix contains dirs and prefix starts with it + "sub/manifest.jsonl", + "s", + "sub/", + None, + [ + FileInfo(mime_type="DIR", name="sub", type="DIR"), + ], + ), + ( + SUPPORTED_VERSIONS.V2, # [v2] list real bucket content when default bucket prefix is set to directory + None, + None, + "sub/", + None, [ FileInfo(mime_type="unknown", name="demo_manifest.jsonl", type="REG"), FileInfo(mime_type="image", name="image_case_65_1.png", type="REG"), @@ -543,6 +627,56 @@ def _test_get_cloud_storage_content( FileInfo(mime_type="unknown", name="manifest_2.jsonl", type="REG"), ], ), + ( + # [v2] list real bucket content when default bucket prefix + # is set to template from which the files should start + SUPPORTED_VERSIONS.V2, + None, + None, + "sub/demo", + None, + [ + FileInfo(mime_type="unknown", name="demo_manifest.jsonl", type="REG"), + ], + ), + ( + SUPPORTED_VERSIONS.V2, # [v2] list real bucket content when specified prefix is stricter than default bucket prefix + None, + "sub/image_case_65_1", + "sub/image_case", + None, + [ + FileInfo(mime_type="image", name="image_case_65_1.png", type="REG"), + ], + ), + ( + SUPPORTED_VERSIONS.V2, # [v2] list real bucket content when default bucket prefix is stricter than specified prefix + None, + "sub/image_case", + "sub/image_case_65_1", + None, + [ + FileInfo(mime_type="image", name="image_case_65_1.png", type="REG"), + ], + ), + ( + SUPPORTED_VERSIONS.V2, # [v2] list real bucket content when default bucket prefix and specified prefix have no intersection + None, + "sub/image_case_65_1", + "sub/image_case_65_2", + None, + [], + ), + ( + SUPPORTED_VERSIONS.V2, # [v2] list real bucket content when default bucket prefix contains dirs and prefix starts with it + None, + "s", + "sub/", + None, + [ + FileInfo(mime_type="DIR", name="sub", type="DIR"), + ], + ), ], ) def test_get_cloud_storage_content( @@ -551,9 +685,23 @@ def test_get_cloud_storage_content( version: SUPPORTED_VERSIONS, manifest: Optional[str], prefix: Optional[str], + default_bucket_prefix: Optional[str], page_size: Optional[int], expected_content: Optional[Any], + cloud_storages, ): + if default_bucket_prefix: + cloud_storage = cloud_storages[cloud_storage_id] + + with make_api_client(self.USER) as api_client: + (_, response) = api_client.cloudstorages_api.partial_update( + cloud_storage_id, + patched_cloud_storage_write_request={ + "specific_attributes": f'{cloud_storage["specific_attributes"]}&prefix={default_bucket_prefix}' + }, + ) + assert response.status == HTTPStatus.OK + result = self._test_get_cloud_storage_content( cloud_storage_id, version, manifest, prefix=prefix, page_size=page_size ) diff --git a/tests/python/rest_api/test_tasks.py b/tests/python/rest_api/test_tasks.py index e93f76ff43c..f433101f608 100644 --- a/tests/python/rest_api/test_tasks.py +++ b/tests/python/rest_api/test_tasks.py @@ -1620,6 +1620,55 @@ def test_cannot_create_task_with_same_skeleton_sublabels(self): response = get_method(self._USERNAME, "tasks") assert response.status_code == HTTPStatus.OK + @pytest.mark.with_external_services + @pytest.mark.parametrize("cloud_storage_id", [2]) + @pytest.mark.parametrize("use_manifest", [True, False]) + @pytest.mark.parametrize("server_files", [["test/"]]) + @pytest.mark.parametrize( + "default_prefix, expected_task_size", + [ + ( + "test/sub_1/img_0", + 1, + ), + ( + "test/sub_1/", + 3, + ), + ], + ) + @pytest.mark.parametrize("org", [""]) + def test_create_task_with_cloud_storage_directories_and_default_bucket_prefix( + self, + cloud_storage_id: int, + use_manifest: bool, + server_files: List[str], + default_prefix: str, + expected_task_size: int, + org: str, + cloud_storages, + request, + ): + cloud_storage = cloud_storages[cloud_storage_id] + + with make_api_client(self._USERNAME) as api_client: + (_, response) = api_client.cloudstorages_api.partial_update( + cloud_storage_id, + patched_cloud_storage_write_request={ + "specific_attributes": f'{cloud_storage["specific_attributes"]}&prefix={default_prefix}' + }, + ) + assert response.status == HTTPStatus.OK + + task_id, _ = self._create_task_with_cloud_data( + request, cloud_storage, use_manifest, server_files, org=org + ) + + with make_api_client(self._USERNAME) as api_client: + (task, response) = api_client.tasks_api.retrieve(task_id) + assert response.status == HTTPStatus.OK + assert task.size == expected_task_size + @pytest.mark.usefixtures("restore_db_per_function") class TestPatchTaskLabel: diff --git a/utils/dataset_manifest/core.py b/utils/dataset_manifest/core.py index 2fd2fedd7fa..dc050687b39 100644 --- a/utils/dataset_manifest/core.py +++ b/utils/dataset_manifest/core.py @@ -622,7 +622,7 @@ def emulate_hierarchical_structure( search_prefix = prefix if default_prefix and (len(prefix) < len(default_prefix)): - if '/' in self.prefix[len(prefix):]: + if prefix and '/' in default_prefix[len(prefix):]: next_layer_and_tail = default_prefix[prefix.find('/') + 1:].split( "/", maxsplit=1 )