Skip to content

Commit

Permalink
Remove --revision option of download command
Browse files Browse the repository at this point in the history
  • Loading branch information
juhoinkinen committed Mar 13, 2024
1 parent 2fe5b73 commit d7be137
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 36 deletions.
23 changes: 9 additions & 14 deletions annif/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -633,13 +633,6 @@ def run_upload(project_ids_pattern, repo_id, token, commit_message):
help="""Authentication token, obtained from the Hugging Face Hub.
Will default to the stored token.""",
)
@click.option(
"--revision",
help="""
An optional Git revision id which can be a branch name, a tag, or a commit
hash.
""",
)
@click.option(
"--force",
"-f",
Expand All @@ -648,7 +641,7 @@ def run_upload(project_ids_pattern, repo_id, token, commit_message):
help="Replace an existing project/vocabulary/config with the downloaded one",
)
@cli_util.common_options
def run_download(project_ids_pattern, repo_id, token, revision, force):
def run_download(project_ids_pattern, repo_id, token, force):
"""
Download selected projects and their vocabularies from a Hugging Face Hub
repository.
Expand All @@ -657,30 +650,32 @@ def run_download(project_ids_pattern, repo_id, token, revision, force):
configuration files of the projects that match the given
`project_ids_pattern` from the specified Hugging Face Hub repository and
unzips the archives to `data/` directory and places the configuration files
to `projects.d/` directory. An authentication token and revision can
be given with options.
to `projects.d/` directory. An authentication token can be given with
`--token` option.
"""

project_ids = cli_util.get_matching_project_ids_from_hf_hub(
project_ids_pattern, repo_id, token, revision
project_ids_pattern,
repo_id,
token,
)
click.echo(f"Downloading project(s): {', '.join(project_ids)}")

vocab_ids = set()
for project_id in project_ids:
project_zip_cache_path = cli_util.download_from_hf_hub(
f"projects/{project_id}.zip", repo_id, token, revision
f"projects/{project_id}.zip", repo_id, token
)
cli_util.unzip_archive(project_zip_cache_path, force)
config_file_cache_path = cli_util.download_from_hf_hub(
f"{project_id}.cfg", repo_id, token, revision
f"{project_id}.cfg", repo_id, token
)
vocab_ids.add(cli_util.get_vocab_id_from_config(config_file_cache_path))
cli_util.copy_project_config(config_file_cache_path, force)

for vocab_id in vocab_ids:
vocab_zip_cache_path = cli_util.download_from_hf_hub(
f"vocabs/{vocab_id}.zip", repo_id, token, revision
f"vocabs/{vocab_id}.zip", repo_id, token
)
cli_util.unzip_archive(vocab_zip_cache_path, force)

Expand Down
18 changes: 5 additions & 13 deletions annif/cli_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,36 +332,29 @@ def _upload_to_hf_hub(


def get_matching_project_ids_from_hf_hub(
project_ids_pattern: str, repo_id: str, token, revision: str
project_ids_pattern: str, repo_id: str, token
) -> list[str]:
"""Get project IDs of the projects in a Hugging Face Model Hub repository that match
the given pattern."""
all_repo_file_paths = _list_files_in_hf_hub(repo_id, token, revision)
all_repo_file_paths = _list_files_in_hf_hub(repo_id, token)
return [
path.rsplit(".zip")[0].split("projects/")[1]
for path in all_repo_file_paths
if fnmatch(path, f"projects/{project_ids_pattern}.zip")
]


def _list_files_in_hf_hub(repo_id: str, token: str, revision: str) -> list[str]:
def _list_files_in_hf_hub(repo_id: str, token: str) -> list[str]:
from huggingface_hub import list_repo_files
from huggingface_hub.utils import HfHubHTTPError, HFValidationError

try:
return [
repofile
for repofile in list_repo_files(
repo_id=repo_id, token=token, revision=revision
)
]
return [repofile for repofile in list_repo_files(repo_id=repo_id, token=token)]
except (HfHubHTTPError, HFValidationError) as err:
raise OperationFailedException(str(err))


def download_from_hf_hub(
filename: str, repo_id: str, token: str, revision: str
) -> list[str]:
def download_from_hf_hub(filename: str, repo_id: str, token: str) -> list[str]:
from huggingface_hub import hf_hub_download
from huggingface_hub.utils import HfHubHTTPError, HFValidationError

Expand All @@ -370,7 +363,6 @@ def download_from_hf_hub(
repo_id=repo_id,
filename=filename,
token=token,
revision=revision,
)
except (HfHubHTTPError, HFValidationError) as err:
raise OperationFailedException(str(err))
Expand Down
10 changes: 1 addition & 9 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -1150,7 +1150,7 @@ def test_get_project_config(app_project):
assert "[dummy-en]" in string_result


def hf_hub_download_mock_side_effect(filename, repo_id, token, revision):
def hf_hub_download_mock_side_effect(filename, repo_id, token):
return "tests/huggingface-cache/" + filename # Mocks the downloaded file paths


Expand Down Expand Up @@ -1189,19 +1189,16 @@ def test_download_dummy_fi(
repo_id="mock-repo",
filename="projects/dummy-fi.zip",
token=None,
revision=None,
),
mock.call(
repo_id="mock-repo",
filename="dummy-fi.cfg",
token=None,
revision=None,
),
mock.call(
repo_id="mock-repo",
filename="vocabs/dummy.zip",
token=None,
revision=None,
),
]
dirpath = os.path.join(str(testdatadir), "projects", "dummy-fi")
Expand Down Expand Up @@ -1247,31 +1244,26 @@ def test_download_dummy_fi_and_en(
repo_id="mock-repo",
filename="projects/dummy-fi.zip",
token=None,
revision=None,
),
mock.call(
repo_id="mock-repo",
filename="dummy-fi.cfg",
token=None,
revision=None,
),
mock.call(
repo_id="mock-repo",
filename="projects/dummy-en.zip",
token=None,
revision=None,
),
mock.call(
repo_id="mock-repo",
filename="dummy-en.cfg",
token=None,
revision=None,
),
mock.call(
repo_id="mock-repo",
filename="vocabs/dummy.zip",
token=None,
revision=None,
),
]
dirpath_fi = os.path.join(str(testdatadir), "projects", "dummy-fi")
Expand Down

0 comments on commit d7be137

Please sign in to comment.