Skip to content

Commit

Permalink
fixup! Fix: Integration test does not cover file downloads for AnVIL (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
dsotirho-ucsc committed Oct 2, 2024
1 parent 016a7b8 commit 5be3d3a
Showing 1 changed file with 15 additions and 6 deletions.
21 changes: 15 additions & 6 deletions test/integration_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -924,15 +924,23 @@ def _assertResponseStatus(self,
)
)

def _uuid_column_name(self, catalog: CatalogName) -> str:
if config.is_hca_enabled(catalog):
return 'bundle_uuid'
elif config.is_anvil_enabled(catalog):
return 'bundles.bundle_uuid'
else:
assert False, catalog

def _check_compact_manifest(self, _catalog: CatalogName, response: bytes):
self.__check_csv_manifest(BytesIO(response), 'bundle_uuid')
self.__check_csv_manifest(BytesIO(response), self._uuid_column_name(_catalog))

def _check_terra_bdbag_manifest(self, catalog: CatalogName, response: bytes):
with ZipFile(BytesIO(response)) as zip_fh:
data_path = os.path.join(os.path.dirname(first(zip_fh.namelist())), 'data')
file_path = os.path.join(data_path, 'participants.tsv')
with zip_fh.open(file_path) as file:
rows = self.__check_csv_manifest(file, 'bundle_uuid')
rows = self.__check_csv_manifest(file, self._uuid_column_name(catalog))
for row in rows:
# Terra doesn't allow colons in this column, but they may
# exist in versions indexed by TDR
Expand Down Expand Up @@ -1044,14 +1052,14 @@ def _read_csv_manifest(self, file: IO[bytes]) -> csv.DictReader:

def __check_csv_manifest(self,
file: IO[bytes],
uuid_field_name: str
uuid_column_name: str
) -> list[Mapping[str, str]]:
reader = self._read_csv_manifest(file)
rows = list(reader)
log.info(f'Manifest contains {len(rows)} rows.')
self.assertGreater(len(rows), 0)
self.assertIn(uuid_field_name, reader.fieldnames)
bundle_uuids = rows[0][uuid_field_name].split(ManifestGenerator.padded_joiner)
self.assertIn(uuid_column_name, reader.fieldnames)
bundle_uuids = rows[0][uuid_column_name].split(ManifestGenerator.padded_joiner)
self.assertGreater(len(bundle_uuids), 0)
for bundle_uuid in bundle_uuids:
self.assertEqual(bundle_uuid, str(uuid.UUID(bundle_uuid)))
Expand Down Expand Up @@ -1602,9 +1610,10 @@ def bundle_uuids(hit: JSON) -> set[str]:
def test_compact_manifest(expected_bundles):
manifest = BytesIO(self._get_url_content(PUT, manifest_url))
manifest_rows = self._read_csv_manifest(manifest)
uuid_column_name = self._uuid_column_name(catalog)
all_found_bundles = set()
for row in manifest_rows:
row_bundles = set(row['bundle_uuid'].split(ManifestGenerator.padded_joiner))
row_bundles = set(row[uuid_column_name].split(ManifestGenerator.padded_joiner))
# It's possible for one file to be present in multiple
# bundles (e.g. due to stitching), so each row may include
# additional bundles besides those included in the filters.
Expand Down

0 comments on commit 5be3d3a

Please sign in to comment.