Skip to content

Commit

Permalink
Fix: Integration test fails due to zero-length files in anvildev (#6581)
Browse files Browse the repository at this point in the history
  • Loading branch information
nadove-ucsc committed Sep 30, 2024
1 parent d004e23 commit 10757e8
Showing 1 changed file with 24 additions and 7 deletions.
31 changes: 24 additions & 7 deletions test/integration_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -678,10 +678,10 @@ def _manifest_execution_ids(self,
execution_ids = {token.execution_id for token in tokens}
return execution_ids

def _get_one_inner_file(self, catalog: CatalogName) -> FileInnerEntity:
def _get_one_inner_file(self, catalog: CatalogName) -> tuple[JSON, FileInnerEntity]:
outer_file = self._get_one_outer_file(catalog)
inner_files: JSONs = outer_file['files']
return cast(FileInnerEntity, one(inner_files))
return outer_file, cast(FileInnerEntity, one(inner_files))

@cache
def _get_one_outer_file(self, catalog: CatalogName) -> JSON:
Expand All @@ -702,6 +702,15 @@ def _get_one_outer_file(self, catalog: CatalogName) -> JSON:
self.fail('No files found')
return one(hits)

def _source_spec(self, catalog: CatalogName, entity: JSON) -> TDRSourceSpec:
if config.is_hca_enabled(catalog):
field = 'sourceSpec'
elif config.is_anvil_enabled(catalog):
field = 'source_spec'
else:
assert False, catalog
return TDRSourceSpec.parse(one(entity['sources'])[field])

def _file_size_facet(self, catalog: CatalogName) -> str:
if config.is_hca_enabled(catalog):
return 'fileSize'
Expand Down Expand Up @@ -739,7 +748,7 @@ def _project_type(self, catalog: CatalogName) -> EntityType:

def _test_dos_and_drs(self, catalog: CatalogName):
if config.is_dss_enabled(catalog) and config.dss_direct_access:
file = self._get_one_inner_file(catalog)
_, file = self._get_one_inner_file(catalog)
self._test_dos(catalog, file)
self._test_drs(catalog, file)

Expand Down Expand Up @@ -1077,8 +1086,9 @@ def _check_jsonl_manifest(self, _catalog: CatalogName, response: bytes):

def _test_repository_files(self, catalog: CatalogName):
with self.subTest('repository_files', catalog=catalog):
file = self._get_one_inner_file(catalog)
file_uuid, file_version = file['uuid'], file['version']
outer_file, inner_file = self._get_one_inner_file(catalog)
source = self._source_spec(catalog, outer_file)
file_uuid, file_version = inner_file['uuid'], inner_file['version']
endpoint_url = config.service_endpoint
file_url = endpoint_url.set(path=f'/fetch/repository/files/{file_uuid}',
args=dict(catalog=catalog,
Expand All @@ -1101,7 +1111,7 @@ def _test_repository_files(self, catalog: CatalogName):
response = self._get_url_json(GET, furl(response['Location']))
self.assertNotIn('Retry-After', response)
response = self._get_url(GET, furl(response['Location']), stream=True)
self._validate_file_response(response, file)
self._validate_file_response(response, source, inner_file)

def _file_ext(self, file: FileInnerEntity) -> str:
# We believe that the file extension is a more reliable indicator than
Expand All @@ -1123,12 +1133,19 @@ def _validate_file_content(self, content: ReadableFileObject, file: FileInnerEnt

def _validate_file_response(self,
response: urllib3.HTTPResponse,
source: TDRSourceSpec,
file: FileInnerEntity):
"""
Note: The response object must have been obtained with stream=True
"""
try:
self._validate_file_content(response, file)
if source.name == 'ANVIL_1000G_2019_Dev_20230609_ANV5_202306121732':
# All files in this snapshot were truncated to zero bytes by the
# Broad to save costs. The metadata is not a reliable indication
# of these files' actual size.
self.assertEqual(response.headers['Content-Length'], '0')
else:
self._validate_file_content(response, file)
finally:
response.close()

Expand Down

0 comments on commit 10757e8

Please sign in to comment.