From faacdf007b23feba7e6735aff1e8b97a08a17d42 Mon Sep 17 00:00:00 2001 From: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> Date: Fri, 5 Aug 2022 13:14:00 -0400 Subject: [PATCH] Move cache folder to huggingface/hub for consistency with hf_hub (#18492) * Move cache folder to just huggingface * Thank you VsCode for this needless import * Move to hub * Forgot one --- docs/source/en/installation.mdx | 8 ++++---- src/transformers/utils/hub.py | 16 ++++++++++++---- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/docs/source/en/installation.mdx b/docs/source/en/installation.mdx index f20490115842c3..4ff4e04436c74e 100644 --- a/docs/source/en/installation.mdx +++ b/docs/source/en/installation.mdx @@ -139,11 +139,11 @@ conda install -c huggingface transformers ## Cache setup -Pretrained models are downloaded and locally cached at: `~/.cache/huggingface/transformers/`. This is the default directory given by the shell environment variable `TRANSFORMERS_CACHE`. On Windows, the default directory is given by `C:\Users\username\.cache\huggingface\transformers`. You can change the shell environment variables shown below - in order of priority - to specify a different cache directory: +Pretrained models are downloaded and locally cached at: `~/.cache/huggingface/hub`. This is the default directory given by the shell environment variable `TRANSFORMERS_CACHE`. On Windows, the default directory is given by `C:\Users\username\.cache\huggingface\hub`. You can change the shell environment variables shown below - in order of priority - to specify a different cache directory: -1. Shell environment variable (default): `TRANSFORMERS_CACHE`. -2. Shell environment variable: `HF_HOME` + `transformers/`. -3. Shell environment variable: `XDG_CACHE_HOME` + `/huggingface/transformers`. +1. Shell environment variable (default): `HUGGINGFACE_HUB_CACHE` or `TRANSFORMERS_CACHE`. +2. Shell environment variable: `HF_HOME`. +3. Shell environment variable: `XDG_CACHE_HOME` + `/huggingface`. diff --git a/src/transformers/utils/hub.py b/src/transformers/utils/hub.py index 9e81654cda7e1a..7fa4c0a151ace4 100644 --- a/src/transformers/utils/hub.py +++ b/src/transformers/utils/hub.py @@ -81,7 +81,7 @@ def is_offline_mode(): hf_cache_home = os.path.expanduser( os.getenv("HF_HOME", os.path.join(os.getenv("XDG_CACHE_HOME", "~/.cache"), "huggingface")) ) -default_cache_path = os.path.join(hf_cache_home, "transformers") +default_cache_path = os.path.join(hf_cache_home, "hub") # Onetime move from the old location to the new one if no ENV variable has been set. if ( @@ -102,7 +102,8 @@ def is_offline_mode(): PYTORCH_PRETRAINED_BERT_CACHE = os.getenv("PYTORCH_PRETRAINED_BERT_CACHE", default_cache_path) PYTORCH_TRANSFORMERS_CACHE = os.getenv("PYTORCH_TRANSFORMERS_CACHE", PYTORCH_PRETRAINED_BERT_CACHE) -TRANSFORMERS_CACHE = os.getenv("TRANSFORMERS_CACHE", PYTORCH_TRANSFORMERS_CACHE) +HUGGINGFACE_HUB_CACHE = os.getenv("HUGGINGFACE_HUB_CACHE", PYTORCH_TRANSFORMERS_CACHE) +TRANSFORMERS_CACHE = os.getenv("TRANSFORMERS_CACHE", HUGGINGFACE_HUB_CACHE) HF_MODULES_CACHE = os.getenv("HF_MODULES_CACHE", os.path.join(hf_cache_home, "modules")) TRANSFORMERS_DYNAMIC_MODULE_NAME = "transformers_modules" SESSION_ID = uuid4().hex @@ -1475,9 +1476,16 @@ def move_to_new_cache(file, repo, filename, revision, etag, commit_hash): clean_files_for(file) -def move_cache(cache_dir=None, token=None): +def move_cache(cache_dir=None, new_cache_dir=None, token=None): + if new_cache_dir is None: + new_cache_dir = TRANSFORMERS_CACHE if cache_dir is None: - cache_dir = TRANSFORMERS_CACHE + # Migrate from old cache in .cache/huggingface/hub + old_cache = Path(TRANSFORMERS_CACHE).parent / "transformers" + if os.path.isdir(str(old_cache)): + cache_dir = str(old_cache) + else: + cache_dir = new_cache_dir if token is None: token = HfFolder.get_token() cached_files = get_all_cached_files(cache_dir=cache_dir)