From 3696f64a225a03ce0ac2ae8dac8e1a1204e865cb Mon Sep 17 00:00:00 2001 From: "Manuel R. Ciosici" Date: Thu, 3 Feb 2022 06:43:30 -0500 Subject: [PATCH 01/17] Add initial BNB integration --- src/transformers/file_utils.py | 3 +++ src/transformers/trainer.py | 8 ++++++ src/transformers/training_args.py | 1 + tests/test_trainer.py | 41 +++++++++++++++++++++++++------ 4 files changed, 46 insertions(+), 7 deletions(-) diff --git a/src/transformers/file_utils.py b/src/transformers/file_utils.py index 36d4a005a8d886..8dae6140a7a5c9 100644 --- a/src/transformers/file_utils.py +++ b/src/transformers/file_utils.py @@ -475,6 +475,9 @@ def is_py3nvml_available(): def is_apex_available(): return importlib.util.find_spec("apex") is not None +def is_bnb_available(): + return importlib.util.find_spec("bnb") is not None + def is_faiss_available(): return _faiss_available diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index fcc37919f3c6a9..f32b869c2ad2cc 100755 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -897,6 +897,14 @@ def get_optimizer_cls_and_kwargs(args: TrainingArguments) -> Tuple[Any, Any]: optimizer_kwargs.update(adam_kwargs) except ImportError: raise ValueError("Trainer tried to instantiate apex FusedAdam but apex is not installed!") + elif args.optim == OptimizerNames.ADAM_BNB_8BIT: + try: + from bnb.optim import Adam8bit + + optimizer_cls = Adam8bit + optimizer_kwargs.update(adam_kwargs) + except ImportError: + raise ValueError("Trainer tried to instantiate bnb Adam8bit but bnb is not installed!") else: raise ValueError(f"Trainer cannot instantiate unsupported optimizer: {args.optim}") return optimizer_cls, optimizer_kwargs diff --git a/src/transformers/training_args.py b/src/transformers/training_args.py index 0458110e41e57d..9e5a25f774ce46 100644 --- a/src/transformers/training_args.py +++ b/src/transformers/training_args.py @@ -80,6 +80,7 @@ class OptimizerNames(ExplicitEnum): ADAMW_TORCH_XLA = "adamw_torch_xla" ADAMW_APEX_FUSED = "adamw_apex_fused" ADAFACTOR = "adafactor" + ADAM_BNB_8BIT = "adam_bnb_8bit" @dataclass diff --git a/tests/test_trainer.py b/tests/test_trainer.py index cf275f127e6260..cd40aaba8e9bca 100644 --- a/tests/test_trainer.py +++ b/tests/test_trainer.py @@ -38,7 +38,7 @@ is_torch_available, logging, ) -from transformers.file_utils import WEIGHTS_NAME, is_apex_available +from transformers.file_utils import WEIGHTS_NAME, is_apex_available, is_bnb_available from transformers.testing_utils import ( ENDPOINT_STAGING, PASS, @@ -1752,13 +1752,13 @@ def hp_name(trial): }, ), ] - if is_apex_available(): - import apex + if is_bnb_available(): + import bnb optim_test_params.append( ( - OptimizerNames.ADAMW_APEX_FUSED, - apex.optimizers.FusedAdam, + OptimizerNames.ADAM_BNB_8BIT, + bnb.optim.Adam8bit, default_adam_kwargs, ) ) @@ -1787,8 +1787,8 @@ def test_optim_supported(self, name: str, expected_cls, mandatory_kwargs): def test_fused_adam(self): # Pretend that apex is installed and mock apex.optimizers.FusedAdam exists. - # Trainer.get_optimizer_cls_and_kwargs does not use FusedAdam, but only has to return a - # class called, so mocking apex.optimizers.FusedAdam should be fine for testing and allow + # Trainer.get_optimizer_cls_and_kwargs does not use FusedAdam. It only has to return the + # class given, so mocking apex.optimizers.FusedAdam should be fine for testing and allow # the test to run without requiring an apex installation. mock = Mock() modules = { @@ -1812,6 +1812,33 @@ def test_fused_adam_no_apex(self): with self.assertRaises(ValueError): Trainer.get_optimizer_cls_and_kwargs(args) + def test_bnb_adam8bit(self): + # Pretend that Bits and Bytes is installed and mock bnb.optim.Adam8bit exists. + # Trainer.get_optimizer_cls_and_kwargs does not use Adam8bit. It only has to return the + # class given, so mocking bnb.optim.Adam8bit should be fine for testing and allow + # the test to run without requiring a bnb installation. + mock = Mock() + modules = { + "bnb": mock, + "bnb.optim": mock.optim, + "bnb.optim.Adam8bit": mock.optim.Adam8bit, + } + with patch.dict("sys.modules", modules): + self.check_optim_and_kwargs( + OptimizerNames.ADAM_BNB_8BIT, + default_adam_kwargs, + mock.optim.Adam8bit, + ) + + def test_bnb_adam8bit_no_bnb(self): + args = TrainingArguments(optim=OptimizerNames.ADAM_BNB_8BIT, output_dir="None") + + # Pretend that bnb does not exist, even if installed. By setting bnb to None, importing + # bnb will fail even if bnb is installed. + with patch.dict("sys.modules", {"bnb.optim": None}): + with self.assertRaises(ValueError): + Trainer.get_optimizer_cls_and_kwargs(args) + @require_torch @require_wandb From ba8790ca4d151666b444cec008cca07e9607ffe9 Mon Sep 17 00:00:00 2001 From: "Manuel R. Ciosici" Date: Thu, 3 Feb 2022 06:45:31 -0500 Subject: [PATCH 02/17] fixup! Add initial BNB integration --- tests/test_trainer.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/test_trainer.py b/tests/test_trainer.py index cd40aaba8e9bca..715e9adeb0ef93 100644 --- a/tests/test_trainer.py +++ b/tests/test_trainer.py @@ -1752,6 +1752,16 @@ def hp_name(trial): }, ), ] + if is_apex_available(): + import apex + + optim_test_params.append( + ( + OptimizerNames.ADAMW_APEX_FUSED, + apex.optimizers.FusedAdam, + default_adam_kwargs, + ) + ) if is_bnb_available(): import bnb From 16df6c83cb7b2df5399cb90d015c974323373816 Mon Sep 17 00:00:00 2001 From: "Manuel R. Ciosici" Date: Thu, 3 Feb 2022 06:49:25 -0500 Subject: [PATCH 03/17] Add bnb test decorator --- tests/extended/test_trainer_ext.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/tests/extended/test_trainer_ext.py b/tests/extended/test_trainer_ext.py index 3a65f16580fa33..3469d15230e5b4 100644 --- a/tests/extended/test_trainer_ext.py +++ b/tests/extended/test_trainer_ext.py @@ -20,7 +20,7 @@ from unittest.mock import patch from parameterized import parameterized -from transformers.file_utils import is_apex_available +from transformers.file_utils import is_apex_available, is_bnb_available from transformers.integrations import is_fairscale_available from transformers.testing_utils import ( CaptureStderr, @@ -71,6 +71,17 @@ def require_apex(test_case): return test_case +# a candidate for testing_utils +def require_bnb(test_case): + """ + Decorator for bits and bytes (bnb) dependency + """ + if not is_bnb_available(): + return unittest.skip("test requires bnb")(test_case) + else: + return test_case + + @require_torch class TestTrainerExt(TestCasePlus): def run_seq2seq_quick( From 97bd33bbd10744999a164986284009534fda9fb4 Mon Sep 17 00:00:00 2001 From: "Manuel R. Ciosici" Date: Thu, 3 Feb 2022 06:53:28 -0500 Subject: [PATCH 04/17] Update Adamw8bit option name --- src/transformers/trainer.py | 2 +- src/transformers/training_args.py | 2 +- tests/test_trainer.py | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index f32b869c2ad2cc..10f8dee78414a4 100755 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -897,7 +897,7 @@ def get_optimizer_cls_and_kwargs(args: TrainingArguments) -> Tuple[Any, Any]: optimizer_kwargs.update(adam_kwargs) except ImportError: raise ValueError("Trainer tried to instantiate apex FusedAdam but apex is not installed!") - elif args.optim == OptimizerNames.ADAM_BNB_8BIT: + elif args.optim == OptimizerNames.ADAMW_BNB: try: from bnb.optim import Adam8bit diff --git a/src/transformers/training_args.py b/src/transformers/training_args.py index 9e5a25f774ce46..cca3190ea2e6eb 100644 --- a/src/transformers/training_args.py +++ b/src/transformers/training_args.py @@ -80,7 +80,7 @@ class OptimizerNames(ExplicitEnum): ADAMW_TORCH_XLA = "adamw_torch_xla" ADAMW_APEX_FUSED = "adamw_apex_fused" ADAFACTOR = "adafactor" - ADAM_BNB_8BIT = "adam_bnb_8bit" + ADAMW_BNB = "adamw_bnb" @dataclass diff --git a/tests/test_trainer.py b/tests/test_trainer.py index 715e9adeb0ef93..13cf12be833db8 100644 --- a/tests/test_trainer.py +++ b/tests/test_trainer.py @@ -1767,7 +1767,7 @@ def hp_name(trial): optim_test_params.append( ( - OptimizerNames.ADAM_BNB_8BIT, + OptimizerNames.ADAMW_BNB, bnb.optim.Adam8bit, default_adam_kwargs, ) @@ -1835,13 +1835,13 @@ def test_bnb_adam8bit(self): } with patch.dict("sys.modules", modules): self.check_optim_and_kwargs( - OptimizerNames.ADAM_BNB_8BIT, + OptimizerNames.ADAMW_BNB, default_adam_kwargs, mock.optim.Adam8bit, ) def test_bnb_adam8bit_no_bnb(self): - args = TrainingArguments(optim=OptimizerNames.ADAM_BNB_8BIT, output_dir="None") + args = TrainingArguments(optim=OptimizerNames.ADAMW_BNB, output_dir="None") # Pretend that bnb does not exist, even if installed. By setting bnb to None, importing # bnb will fail even if bnb is installed. From 22abb9c130fc4b061c87eeb3f0de642183b31d01 Mon Sep 17 00:00:00 2001 From: "Manuel R. Ciosici" Date: Fri, 11 Feb 2022 06:07:36 -0500 Subject: [PATCH 05/17] Use the full bnb package name --- src/transformers/trainer.py | 2 +- tests/test_trainer.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index 10f8dee78414a4..229e39d797d018 100755 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -899,7 +899,7 @@ def get_optimizer_cls_and_kwargs(args: TrainingArguments) -> Tuple[Any, Any]: raise ValueError("Trainer tried to instantiate apex FusedAdam but apex is not installed!") elif args.optim == OptimizerNames.ADAMW_BNB: try: - from bnb.optim import Adam8bit + from bitsandbytes.optim import Adam8bit optimizer_cls = Adam8bit optimizer_kwargs.update(adam_kwargs) diff --git a/tests/test_trainer.py b/tests/test_trainer.py index 13cf12be833db8..16fa649797d8ed 100644 --- a/tests/test_trainer.py +++ b/tests/test_trainer.py @@ -1763,7 +1763,7 @@ def hp_name(trial): ) ) if is_bnb_available(): - import bnb + import bitsandbytes as bnb optim_test_params.append( ( @@ -1829,9 +1829,9 @@ def test_bnb_adam8bit(self): # the test to run without requiring a bnb installation. mock = Mock() modules = { - "bnb": mock, - "bnb.optim": mock.optim, - "bnb.optim.Adam8bit": mock.optim.Adam8bit, + "bitsandbytes": mock, + "bitsandbytes.optim": mock.optim, + "bitsandbytes.optim.Adam8bit": mock.optim.Adam8bit, } with patch.dict("sys.modules", modules): self.check_optim_and_kwargs( From 226b3ddf01ebf23af7bcc494a7da1986f7c16f1f Mon Sep 17 00:00:00 2001 From: "Manuel R. Ciosici" Date: Fri, 11 Feb 2022 07:26:10 -0500 Subject: [PATCH 06/17] Overide bnb for all embedding layers --- src/transformers/trainer.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index 229e39d797d018..d219a39567037a 100755 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -847,6 +847,17 @@ def create_optimizer(self): ) else: self.optimizer = optimizer_cls(optimizer_grouped_parameters, **optimizer_kwargs) + if optimizer_cls.__name__ == "Adam8bit": + import bitsandbytes + from torch.nn import Embedding + + manager = bitsandbytes.optim.GlobalOptimManager.get_instance() + + for module in self.model.modules(): + if isinstance(module, Embedding): + manager.register_module_override(module, "weight", {"optim_bits": 32}) + logger.info(f"Registering bitsandbytes override for {module}") + if is_sagemaker_mp_enabled(): self.optimizer = smp.DistributedOptimizer(self.optimizer) From 1d03d49d3bf3b8ff0d2495e89b6fb21eefb3f4be Mon Sep 17 00:00:00 2001 From: "Manuel R. Ciosici" Date: Fri, 11 Feb 2022 07:27:07 -0500 Subject: [PATCH 07/17] Fix package name --- src/transformers/file_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/file_utils.py b/src/transformers/file_utils.py index 8dae6140a7a5c9..b664b90be0c07e 100644 --- a/src/transformers/file_utils.py +++ b/src/transformers/file_utils.py @@ -476,7 +476,7 @@ def is_apex_available(): return importlib.util.find_spec("apex") is not None def is_bnb_available(): - return importlib.util.find_spec("bnb") is not None + return importlib.util.find_spec("bitsandbytes") is not None def is_faiss_available(): From 22d7112ae0a316e7441f3489dc1c8a0b382195d6 Mon Sep 17 00:00:00 2001 From: "Manuel R. Ciosici" Date: Fri, 11 Feb 2022 07:28:51 -0500 Subject: [PATCH 08/17] Formatting --- src/transformers/file_utils.py | 1 + src/transformers/trainer.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/transformers/file_utils.py b/src/transformers/file_utils.py index b664b90be0c07e..9d036adf0a8d51 100644 --- a/src/transformers/file_utils.py +++ b/src/transformers/file_utils.py @@ -475,6 +475,7 @@ def is_py3nvml_available(): def is_apex_available(): return importlib.util.find_spec("apex") is not None + def is_bnb_available(): return importlib.util.find_spec("bitsandbytes") is not None diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index d219a39567037a..bbbd5083ff51d6 100755 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -848,9 +848,10 @@ def create_optimizer(self): else: self.optimizer = optimizer_cls(optimizer_grouped_parameters, **optimizer_kwargs) if optimizer_cls.__name__ == "Adam8bit": - import bitsandbytes from torch.nn import Embedding + import bitsandbytes + manager = bitsandbytes.optim.GlobalOptimManager.get_instance() for module in self.model.modules(): @@ -858,7 +859,6 @@ def create_optimizer(self): manager.register_module_override(module, "weight", {"optim_bits": 32}) logger.info(f"Registering bitsandbytes override for {module}") - if is_sagemaker_mp_enabled(): self.optimizer = smp.DistributedOptimizer(self.optimizer) From b048a34e38b8eebb95b59a2cf10e7e327cc6e62e Mon Sep 17 00:00:00 2001 From: "Manuel R. Ciosici" Date: Fri, 11 Feb 2022 10:23:21 -0500 Subject: [PATCH 09/17] Remove unnecessary import --- src/transformers/trainer.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index bbbd5083ff51d6..a81c852e6b8f17 100755 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -848,14 +848,12 @@ def create_optimizer(self): else: self.optimizer = optimizer_cls(optimizer_grouped_parameters, **optimizer_kwargs) if optimizer_cls.__name__ == "Adam8bit": - from torch.nn import Embedding - import bitsandbytes manager = bitsandbytes.optim.GlobalOptimManager.get_instance() for module in self.model.modules(): - if isinstance(module, Embedding): + if isinstance(module, nn.Embedding): manager.register_module_override(module, "weight", {"optim_bits": 32}) logger.info(f"Registering bitsandbytes override for {module}") From 2c04486415f995a1b4f9a2ef3e02785c0e20ea8c Mon Sep 17 00:00:00 2001 From: "Manuel R. Ciosici" Date: Wed, 30 Mar 2022 13:53:23 -0700 Subject: [PATCH 10/17] Update src/transformers/trainer.py Co-authored-by: Stas Bekman --- src/transformers/trainer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index a81c852e6b8f17..2e996e076d13db 100755 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -855,7 +855,7 @@ def create_optimizer(self): for module in self.model.modules(): if isinstance(module, nn.Embedding): manager.register_module_override(module, "weight", {"optim_bits": 32}) - logger.info(f"Registering bitsandbytes override for {module}") + logger.debug(f"bitsandbytes: will optimize {module} in fp32") if is_sagemaker_mp_enabled(): self.optimizer = smp.DistributedOptimizer(self.optimizer) From b7a1c0c3372c7ae6b30157b754ac5cc2fb338d70 Mon Sep 17 00:00:00 2001 From: "Manuel R. Ciosici" Date: Wed, 30 Mar 2022 13:55:19 -0700 Subject: [PATCH 11/17] Rename AdamwBNB optimizer option --- src/transformers/training_args.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/training_args.py b/src/transformers/training_args.py index cca3190ea2e6eb..7ad54d68ae253f 100644 --- a/src/transformers/training_args.py +++ b/src/transformers/training_args.py @@ -80,7 +80,7 @@ class OptimizerNames(ExplicitEnum): ADAMW_TORCH_XLA = "adamw_torch_xla" ADAMW_APEX_FUSED = "adamw_apex_fused" ADAFACTOR = "adafactor" - ADAMW_BNB = "adamw_bnb" + ADAMW_BNB = "adamw_bnb_8bit" @dataclass From 8cb259b512835661acb0caf30a29391eb2435b65 Mon Sep 17 00:00:00 2001 From: "Manuel R. Ciosici" Date: Fri, 8 Apr 2022 14:44:18 -0700 Subject: [PATCH 12/17] Add training test checking that bnb memory utilization is lower --- tests/extended/test_trainer_ext.py | 38 ++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/tests/extended/test_trainer_ext.py b/tests/extended/test_trainer_ext.py index 3469d15230e5b4..8687875d1a375c 100644 --- a/tests/extended/test_trainer_ext.py +++ b/tests/extended/test_trainer_ext.py @@ -17,6 +17,7 @@ import re import sys import unittest +from typing import Tuple from unittest.mock import patch from parameterized import parameterized @@ -229,6 +230,43 @@ def test_run_seq2seq_slow(self): assert "generated_predictions.txt" in contents assert "predict_results.json" in contents + @slow + @require_bnb + def test_run_seq2seq_bnb_slow(self): + from transformers.training_args import OptimizerNames + + def train_and_return_metrics(optim: str) -> Tuple[int, float]: + from pathlib import Path + + extra_args = ( + "--skip_memory_metrics 0 --optim {optim} --do_eval False --do_predict " + "False --adafactor False --log_level debug" + ) + + output_dir = self.run_trainer( + eval_steps=2, + max_len=128, + model_name=MARIAN_MODEL, + learning_rate=3e-4, + num_train_epochs=1, + distributed=False, + extra_args_str=extra_args.format(optim=optim), + do_eval=False, + do_predict=False, + ) + + # Check metrics + logs = TrainerState.load_from_json(Path(output_dir, "trainer_state.json")).log_history + gpu_peak_memory = logs[0]["train_mem_gpu_peaked_delta"] + loss = logs[0]["train_loss"] + return gpu_peak_memory, loss + + original_gpu_peak_memory, original_loss = train_and_return_metrics(OptimizerNames.ADAMW_TORCH.value) + bnb_gpu_peak_memory, bnb_loss = train_and_return_metrics(OptimizerNames.ADAMW_BNB.value) + + assert original_gpu_peak_memory < bnb_gpu_peak_memory + assert original_loss == bnb_loss + def run_trainer( self, eval_steps: int, From e8bf8d08acc88eebc7d48be6503e8247233f979b Mon Sep 17 00:00:00 2001 From: Stas Bekman Date: Thu, 14 Apr 2022 19:43:48 -0700 Subject: [PATCH 13/17] fix merge --- src/transformers/file_utils.py | 780 +-------------------------------- 1 file changed, 1 insertion(+), 779 deletions(-) diff --git a/src/transformers/file_utils.py b/src/transformers/file_utils.py index 774f27306683c7..4b93c496ce9ea0 100644 --- a/src/transformers/file_utils.py +++ b/src/transformers/file_utils.py @@ -16,785 +16,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """ -Utilities for working with the local dataset cache. Parts of this file is adapted from the AllenNLP library at -https://github.com/allenai/allennlp. -""" -import copy -import fnmatch -import functools -import importlib.util -import io -import json -import os -import re -import shutil -import subprocess -import sys -import tarfile -import tempfile -import types -from collections import OrderedDict, UserDict -from contextlib import ExitStack, contextmanager -from dataclasses import fields -from enum import Enum -from functools import partial, wraps -from hashlib import sha256 -from itertools import chain -from pathlib import Path -from types import ModuleType -from typing import Any, BinaryIO, ContextManager, Dict, List, Optional, Tuple, Union -from urllib.parse import urlparse -from uuid import uuid4 -from zipfile import ZipFile, is_zipfile - -import numpy as np -from packaging import version - -import requests -from filelock import FileLock -from huggingface_hub import HfFolder, Repository, create_repo, list_repo_files, whoami -from requests.exceptions import HTTPError -from transformers.utils.logging import tqdm -from transformers.utils.versions import importlib_metadata - -from . import __version__ -from .utils import logging - - -logger = logging.get_logger(__name__) # pylint: disable=invalid-name - -ENV_VARS_TRUE_VALUES = {"1", "ON", "YES", "TRUE"} -ENV_VARS_TRUE_AND_AUTO_VALUES = ENV_VARS_TRUE_VALUES.union({"AUTO"}) - -USE_TF = os.environ.get("USE_TF", "AUTO").upper() -USE_TORCH = os.environ.get("USE_TORCH", "AUTO").upper() -USE_JAX = os.environ.get("USE_FLAX", "AUTO").upper() - -if USE_TORCH in ENV_VARS_TRUE_AND_AUTO_VALUES and USE_TF not in ENV_VARS_TRUE_VALUES: - _torch_available = importlib.util.find_spec("torch") is not None - if _torch_available: - try: - _torch_version = importlib_metadata.version("torch") - logger.info(f"PyTorch version {_torch_version} available.") - except importlib_metadata.PackageNotFoundError: - _torch_available = False -else: - logger.info("Disabling PyTorch because USE_TF is set") - _torch_available = False - - -if USE_TF in ENV_VARS_TRUE_AND_AUTO_VALUES and USE_TORCH not in ENV_VARS_TRUE_VALUES: - _tf_available = importlib.util.find_spec("tensorflow") is not None - if _tf_available: - candidates = ( - "tensorflow", - "tensorflow-cpu", - "tensorflow-gpu", - "tf-nightly", - "tf-nightly-cpu", - "tf-nightly-gpu", - "intel-tensorflow", - "intel-tensorflow-avx512", - "tensorflow-rocm", - "tensorflow-macos", - ) - _tf_version = None - # For the metadata, we have to look for both tensorflow and tensorflow-cpu - for pkg in candidates: - try: - _tf_version = importlib_metadata.version(pkg) - break - except importlib_metadata.PackageNotFoundError: - pass - _tf_available = _tf_version is not None - if _tf_available: - if version.parse(_tf_version) < version.parse("2"): - logger.info(f"TensorFlow found but with version {_tf_version}. Transformers requires version 2 minimum.") - _tf_available = False - else: - logger.info(f"TensorFlow version {_tf_version} available.") -else: - logger.info("Disabling Tensorflow because USE_TORCH is set") - _tf_available = False - - -if USE_JAX in ENV_VARS_TRUE_AND_AUTO_VALUES: - _flax_available = importlib.util.find_spec("jax") is not None and importlib.util.find_spec("flax") is not None - if _flax_available: - try: - _jax_version = importlib_metadata.version("jax") - _flax_version = importlib_metadata.version("flax") - logger.info(f"JAX version {_jax_version}, Flax version {_flax_version} available.") - except importlib_metadata.PackageNotFoundError: - _flax_available = False -else: - _flax_available = False - - -_datasets_available = importlib.util.find_spec("datasets") is not None -try: - # Check we're not importing a "datasets" directory somewhere but the actual library by trying to grab the version - # AND checking it has an author field in the metadata that is HuggingFace. - _ = importlib_metadata.version("datasets") - _datasets_metadata = importlib_metadata.metadata("datasets") - if _datasets_metadata.get("author", "") != "HuggingFace Inc.": - _datasets_available = False -except importlib_metadata.PackageNotFoundError: - _datasets_available = False - - -_detectron2_available = importlib.util.find_spec("detectron2") is not None -try: - _detectron2_version = importlib_metadata.version("detectron2") - logger.debug(f"Successfully imported detectron2 version {_detectron2_version}") -except importlib_metadata.PackageNotFoundError: - _detectron2_available = False - - -_faiss_available = importlib.util.find_spec("faiss") is not None -try: - _faiss_version = importlib_metadata.version("faiss") - logger.debug(f"Successfully imported faiss version {_faiss_version}") -except importlib_metadata.PackageNotFoundError: - try: - _faiss_version = importlib_metadata.version("faiss-cpu") - logger.debug(f"Successfully imported faiss version {_faiss_version}") - except importlib_metadata.PackageNotFoundError: - _faiss_available = False - - -coloredlogs = importlib.util.find_spec("coloredlogs") is not None -try: - _coloredlogs_available = importlib_metadata.version("coloredlogs") - logger.debug(f"Successfully imported sympy version {_coloredlogs_available}") -except importlib_metadata.PackageNotFoundError: - _coloredlogs_available = False - - -sympy_available = importlib.util.find_spec("sympy") is not None -try: - _sympy_available = importlib_metadata.version("sympy") - logger.debug(f"Successfully imported sympy version {_sympy_available}") -except importlib_metadata.PackageNotFoundError: - _sympy_available = False - - -_tf2onnx_available = importlib.util.find_spec("tf2onnx") is not None -try: - _tf2onnx_version = importlib_metadata.version("tf2onnx") - logger.debug(f"Successfully imported tf2onnx version {_tf2onnx_version}") -except importlib_metadata.PackageNotFoundError: - _tf2onnx_available = False - -_onnx_available = importlib.util.find_spec("onnxruntime") is not None -try: - _onxx_version = importlib_metadata.version("onnx") - logger.debug(f"Successfully imported onnx version {_onxx_version}") -except importlib_metadata.PackageNotFoundError: - _onnx_available = False - - -_scatter_available = importlib.util.find_spec("torch_scatter") is not None -try: - _scatter_version = importlib_metadata.version("torch_scatter") - logger.debug(f"Successfully imported torch-scatter version {_scatter_version}") -except importlib_metadata.PackageNotFoundError: - _scatter_available = False - - -_pytorch_quantization_available = importlib.util.find_spec("pytorch_quantization") is not None -try: - _pytorch_quantization_version = importlib_metadata.version("pytorch_quantization") - logger.debug(f"Successfully imported pytorch-quantization version {_pytorch_quantization_version}") -except importlib_metadata.PackageNotFoundError: - _pytorch_quantization_available = False - - -_soundfile_available = importlib.util.find_spec("soundfile") is not None -try: - _soundfile_version = importlib_metadata.version("soundfile") - logger.debug(f"Successfully imported soundfile version {_soundfile_version}") -except importlib_metadata.PackageNotFoundError: - _soundfile_available = False - - -_tensorflow_probability_available = importlib.util.find_spec("tensorflow_probability") is not None -try: - _tensorflow_probability_version = importlib_metadata.version("tensorflow_probability") - logger.debug(f"Successfully imported tensorflow-probability version {_tensorflow_probability_version}") -except importlib_metadata.PackageNotFoundError: - _tensorflow_probability_available = False - - -_timm_available = importlib.util.find_spec("timm") is not None -try: - _timm_version = importlib_metadata.version("timm") - logger.debug(f"Successfully imported timm version {_timm_version}") -except importlib_metadata.PackageNotFoundError: - _timm_available = False - - -_torchaudio_available = importlib.util.find_spec("torchaudio") is not None -try: - _torchaudio_version = importlib_metadata.version("torchaudio") - logger.debug(f"Successfully imported torchaudio version {_torchaudio_version}") -except importlib_metadata.PackageNotFoundError: - _torchaudio_available = False - - -_phonemizer_available = importlib.util.find_spec("phonemizer") is not None -try: - _phonemizer_version = importlib_metadata.version("phonemizer") - logger.debug(f"Successfully imported phonemizer version {_phonemizer_version}") -except importlib_metadata.PackageNotFoundError: - _phonemizer_available = False - - -_pyctcdecode_available = importlib.util.find_spec("pyctcdecode") is not None -try: - _pyctcdecode_version = importlib_metadata.version("pyctcdecode") - logger.debug(f"Successfully imported pyctcdecode version {_pyctcdecode_version}") -except importlib_metadata.PackageNotFoundError: - _pyctcdecode_available = False - - -_librosa_available = importlib.util.find_spec("librosa") is not None -try: - _librosa_version = importlib_metadata.version("librosa") - logger.debug(f"Successfully imported librosa version {_librosa_version}") -except importlib_metadata.PackageNotFoundError: - _librosa_available = False - - -torch_cache_home = os.getenv("TORCH_HOME", os.path.join(os.getenv("XDG_CACHE_HOME", "~/.cache"), "torch")) -old_default_cache_path = os.path.join(torch_cache_home, "transformers") -# New default cache, shared with the Datasets library -hf_cache_home = os.path.expanduser( - os.getenv("HF_HOME", os.path.join(os.getenv("XDG_CACHE_HOME", "~/.cache"), "huggingface")) -) -default_cache_path = os.path.join(hf_cache_home, "transformers") - -# Onetime move from the old location to the new one if no ENV variable has been set. -if ( - os.path.isdir(old_default_cache_path) - and not os.path.isdir(default_cache_path) - and "PYTORCH_PRETRAINED_BERT_CACHE" not in os.environ - and "PYTORCH_TRANSFORMERS_CACHE" not in os.environ - and "TRANSFORMERS_CACHE" not in os.environ -): - logger.warning( - "In Transformers v4.0.0, the default path to cache downloaded models changed from " - "'~/.cache/torch/transformers' to '~/.cache/huggingface/transformers'. Since you don't seem to have overridden " - "and '~/.cache/torch/transformers' is a directory that exists, we're moving it to " - "'~/.cache/huggingface/transformers' to avoid redownloading models you have already in the cache. You should " - "only see this message once." - ) - shutil.move(old_default_cache_path, default_cache_path) - -PYTORCH_PRETRAINED_BERT_CACHE = os.getenv("PYTORCH_PRETRAINED_BERT_CACHE", default_cache_path) -PYTORCH_TRANSFORMERS_CACHE = os.getenv("PYTORCH_TRANSFORMERS_CACHE", PYTORCH_PRETRAINED_BERT_CACHE) -TRANSFORMERS_CACHE = os.getenv("TRANSFORMERS_CACHE", PYTORCH_TRANSFORMERS_CACHE) -HF_MODULES_CACHE = os.getenv("HF_MODULES_CACHE", os.path.join(hf_cache_home, "modules")) -TRANSFORMERS_DYNAMIC_MODULE_NAME = "transformers_modules" -SESSION_ID = uuid4().hex -DISABLE_TELEMETRY = os.getenv("DISABLE_TELEMETRY", False) in ENV_VARS_TRUE_VALUES - -WEIGHTS_NAME = "pytorch_model.bin" -TF2_WEIGHTS_NAME = "tf_model.h5" -TF_WEIGHTS_NAME = "model.ckpt" -FLAX_WEIGHTS_NAME = "flax_model.msgpack" -CONFIG_NAME = "config.json" -FEATURE_EXTRACTOR_NAME = "preprocessor_config.json" -MODEL_CARD_NAME = "modelcard.json" - -SENTENCEPIECE_UNDERLINE = "▁" -SPIECE_UNDERLINE = SENTENCEPIECE_UNDERLINE # Kept for backward compatibility - -MULTIPLE_CHOICE_DUMMY_INPUTS = [ - [[0, 1, 0, 1], [1, 0, 0, 1]] -] * 2 # Needs to have 0s and 1s only since XLM uses it for langs too. -DUMMY_INPUTS = [[7, 6, 0, 0, 1], [1, 2, 3, 0, 0], [0, 0, 0, 4, 5]] -DUMMY_MASK = [[1, 1, 1, 1, 1], [1, 1, 1, 0, 0], [0, 0, 0, 1, 1]] - -S3_BUCKET_PREFIX = "https://s3.amazonaws.com/models.huggingface.co/bert" -CLOUDFRONT_DISTRIB_PREFIX = "https://cdn.huggingface.co" - -_staging_mode = os.environ.get("HUGGINGFACE_CO_STAGING", "NO").upper() in ENV_VARS_TRUE_VALUES -_default_endpoint = "https://moon-staging.huggingface.co" if _staging_mode else "https://huggingface.co" - -HUGGINGFACE_CO_RESOLVE_ENDPOINT = os.environ.get("HUGGINGFACE_CO_RESOLVE_ENDPOINT", _default_endpoint) -HUGGINGFACE_CO_PREFIX = HUGGINGFACE_CO_RESOLVE_ENDPOINT + "/{model_id}/resolve/{revision}/{filename}" - -# This is the version of torch required to run torch.fx features and torch.onnx with dictionary inputs. -TORCH_FX_REQUIRED_VERSION = version.parse("1.9") -TORCH_ONNX_DICT_INPUTS_MINIMUM_VERSION = version.parse("1.8") - -_is_offline_mode = True if os.environ.get("TRANSFORMERS_OFFLINE", "0").upper() in ENV_VARS_TRUE_VALUES else False - - -def is_offline_mode(): - return _is_offline_mode - - -def is_torch_available(): - return _torch_available - - -def is_pyctcdecode_available(): - return _pyctcdecode_available - - -def is_librosa_available(): - return _librosa_available - - -def is_torch_cuda_available(): - if is_torch_available(): - import torch - - return torch.cuda.is_available() - else: - return False - - -def is_torch_bf16_available(): - if not is_torch_available(): - return False - - import torch - - # since currently no utility function is available we build our own. - # some bits come from https://github.com/pytorch/pytorch/blob/2289a12f21c54da93bf5d696e3f9aea83dd9c10d/torch/testing/_internal/common_cuda.py#L51 - # with additional check for torch version - # to succeed: - # 1. the hardware needs to support bf16 (arch >= Ampere) - # 2. torch >= 1.10 (1.9 should be enough for AMP API has changed in 1.10, so using 1.10 as minimal) - # 3. CUDA >= 11 - # 4. torch.autocast exists - # XXX: one problem here is that it may give invalid results on mixed gpus setup, so it's - # really only correct for the 0th gpu (or currently set default device if different from 0) - - if not torch.cuda.is_available() or torch.version.cuda is None: - return False - if torch.cuda.get_device_properties(torch.cuda.current_device()).major < 8: - return False - if int(torch.version.cuda.split(".")[0]) < 11: - return False - if version.parse(torch.__version__) < version.parse("1.10"): - return False - if not hasattr(torch, "autocast"): - return False - - return True - - -def is_torch_tf32_available(): - if not is_torch_available(): - return False - - import torch - - if not torch.cuda.is_available() or torch.version.cuda is None: - return False - if torch.cuda.get_device_properties(torch.cuda.current_device()).major < 8: - return False - if int(torch.version.cuda.split(".")[0]) < 11: - return False - if version.parse(torch.__version__) < version.parse("1.7"): - return False - - return True - - -_torch_fx_available = _torch_onnx_dict_inputs_support_available = False -if _torch_available: - torch_version = version.parse(importlib_metadata.version("torch")) - _torch_fx_available = (torch_version.major, torch_version.minor) == ( - TORCH_FX_REQUIRED_VERSION.major, - TORCH_FX_REQUIRED_VERSION.minor, - ) - - _torch_onnx_dict_inputs_support_available = torch_version >= TORCH_ONNX_DICT_INPUTS_MINIMUM_VERSION - - -def is_torch_fx_available(): - return _torch_fx_available - - -def is_torch_onnx_dict_inputs_support_available(): - return _torch_onnx_dict_inputs_support_available - - -def is_tf_available(): - return _tf_available - - -def is_coloredlogs_available(): - return _coloredlogs_available - - -def is_tf2onnx_available(): - return _tf2onnx_available - - -def is_onnx_available(): - return _onnx_available - - -def is_flax_available(): - return _flax_available - - -def is_torch_tpu_available(): - if not _torch_available: - return False - # This test is probably enough, but just in case, we unpack a bit. - if importlib.util.find_spec("torch_xla") is None: - return False - if importlib.util.find_spec("torch_xla.core") is None: - return False - return importlib.util.find_spec("torch_xla.core.xla_model") is not None - - -def is_datasets_available(): - return _datasets_available - - -def is_detectron2_available(): - return _detectron2_available - - -def is_rjieba_available(): - return importlib.util.find_spec("rjieba") is not None - - -def is_psutil_available(): - return importlib.util.find_spec("psutil") is not None - - -def is_py3nvml_available(): - return importlib.util.find_spec("py3nvml") is not None - - -def is_apex_available(): - return importlib.util.find_spec("apex") is not None - - -def is_bnb_available(): - return importlib.util.find_spec("bitsandbytes") is not None - - -def is_faiss_available(): - return _faiss_available - - -def is_scipy_available(): - return importlib.util.find_spec("scipy") is not None - - -def is_sklearn_available(): - if importlib.util.find_spec("sklearn") is None: - return False - return is_scipy_available() and importlib.util.find_spec("sklearn.metrics") - - -def is_sentencepiece_available(): - return importlib.util.find_spec("sentencepiece") is not None - - -def is_protobuf_available(): - if importlib.util.find_spec("google") is None: - return False - return importlib.util.find_spec("google.protobuf") is not None - - -def is_tokenizers_available(): - return importlib.util.find_spec("tokenizers") is not None - - -def is_vision_available(): - return importlib.util.find_spec("PIL") is not None - - -def is_pytesseract_available(): - return importlib.util.find_spec("pytesseract") is not None - - -def is_spacy_available(): - return importlib.util.find_spec("spacy") is not None - - -def is_ftfy_available(): - return importlib.util.find_spec("ftfy") is not None - - -def is_in_notebook(): - try: - # Test adapted from tqdm.autonotebook: https://github.com/tqdm/tqdm/blob/master/tqdm/autonotebook.py - get_ipython = sys.modules["IPython"].get_ipython - if "IPKernelApp" not in get_ipython().config: - raise ImportError("console") - if "VSCODE_PID" in os.environ: - raise ImportError("vscode") - - return importlib.util.find_spec("IPython") is not None - except (AttributeError, ImportError, KeyError): - return False - - -def is_scatter_available(): - return _scatter_available - - -def is_pytorch_quantization_available(): - return _pytorch_quantization_available - - -def is_tensorflow_probability_available(): - return _tensorflow_probability_available - - -def is_pandas_available(): - return importlib.util.find_spec("pandas") is not None - - -def is_sagemaker_dp_enabled(): - # Get the sagemaker specific env variable. - sagemaker_params = os.getenv("SM_FRAMEWORK_PARAMS", "{}") - try: - # Parse it and check the field "sagemaker_distributed_dataparallel_enabled". - sagemaker_params = json.loads(sagemaker_params) - if not sagemaker_params.get("sagemaker_distributed_dataparallel_enabled", False): - return False - except json.JSONDecodeError: - return False - # Lastly, check if the `smdistributed` module is present. - return importlib.util.find_spec("smdistributed") is not None - - -def is_sagemaker_mp_enabled(): - # Get the sagemaker specific mp parameters from smp_options variable. - smp_options = os.getenv("SM_HP_MP_PARAMETERS", "{}") - try: - # Parse it and check the field "partitions" is included, it is required for model parallel. - smp_options = json.loads(smp_options) - if "partitions" not in smp_options: - return False - except json.JSONDecodeError: - return False - - # Get the sagemaker specific framework parameters from mpi_options variable. - mpi_options = os.getenv("SM_FRAMEWORK_PARAMS", "{}") - try: - # Parse it and check the field "sagemaker_distributed_dataparallel_enabled". - mpi_options = json.loads(mpi_options) - if not mpi_options.get("sagemaker_mpi_enabled", False): - return False - except json.JSONDecodeError: - return False - # Lastly, check if the `smdistributed` module is present. - return importlib.util.find_spec("smdistributed") is not None - - -def is_training_run_on_sagemaker(): - return "SAGEMAKER_JOB_NAME" in os.environ - - -def is_soundfile_availble(): - return _soundfile_available - - -def is_timm_available(): - return _timm_available - - -def is_torchaudio_available(): - return _torchaudio_available - - -def is_speech_available(): - # For now this depends on torchaudio but the exact dependency might evolve in the future. - return _torchaudio_available - - -def is_phonemizer_available(): - return _phonemizer_available - - -def torch_only_method(fn): - def wrapper(*args, **kwargs): - if not _torch_available: - raise ImportError( - "You need to install pytorch to use this method or class, " - "or activate it with environment variables USE_TORCH=1 and USE_TF=0." - ) - else: - return fn(*args, **kwargs) - - return wrapper - - -# docstyle-ignore -DATASETS_IMPORT_ERROR = """ -{0} requires the 🤗 Datasets library but it was not found in your environment. You can install it with: -``` -pip install datasets -``` -In a notebook or a colab, you can install it by executing a cell with -``` -!pip install datasets -``` -then restarting your kernel. - -Note that if you have a local folder named `datasets` or a local python file named `datasets.py` in your current -working directory, python may try to import this instead of the 🤗 Datasets library. You should rename this folder or -that python file if that's the case. -""" - - -# docstyle-ignore -TOKENIZERS_IMPORT_ERROR = """ -{0} requires the 🤗 Tokenizers library but it was not found in your environment. You can install it with: -``` -pip install tokenizers -``` -In a notebook or a colab, you can install it by executing a cell with -``` -!pip install tokenizers -``` -""" - - -# docstyle-ignore -SENTENCEPIECE_IMPORT_ERROR = """ -{0} requires the SentencePiece library but it was not found in your environment. Checkout the instructions on the -installation page of its repo: https://github.com/google/sentencepiece#installation and follow the ones -that match your environment. -""" - - -# docstyle-ignore -PROTOBUF_IMPORT_ERROR = """ -{0} requires the protobuf library but it was not found in your environment. Checkout the instructions on the -installation page of its repo: https://github.com/protocolbuffers/protobuf/tree/master/python#installation and follow the ones -that match your environment. -""" - - -# docstyle-ignore -FAISS_IMPORT_ERROR = """ -{0} requires the faiss library but it was not found in your environment. Checkout the instructions on the -installation page of its repo: https://github.com/facebookresearch/faiss/blob/master/INSTALL.md and follow the ones -that match your environment. -""" - - -# docstyle-ignore -PYTORCH_IMPORT_ERROR = """ -{0} requires the PyTorch library but it was not found in your environment. Checkout the instructions on the -installation page: https://pytorch.org/get-started/locally/ and follow the ones that match your environment. -""" - - -# docstyle-ignore -SKLEARN_IMPORT_ERROR = """ -{0} requires the scikit-learn library but it was not found in your environment. You can install it with: -``` -pip install -U scikit-learn -``` -In a notebook or a colab, you can install it by executing a cell with -``` -!pip install -U scikit-learn -``` -""" - - -# docstyle-ignore -TENSORFLOW_IMPORT_ERROR = """ -{0} requires the TensorFlow library but it was not found in your environment. Checkout the instructions on the -installation page: https://www.tensorflow.org/install and follow the ones that match your environment. -""" - - -# docstyle-ignore -DETECTRON2_IMPORT_ERROR = """ -{0} requires the detectron2 library but it was not found in your environment. Checkout the instructions on the -installation page: https://github.com/facebookresearch/detectron2/blob/master/INSTALL.md and follow the ones -that match your environment. -""" - - -# docstyle-ignore -FLAX_IMPORT_ERROR = """ -{0} requires the FLAX library but it was not found in your environment. Checkout the instructions on the -installation page: https://github.com/google/flax and follow the ones that match your environment. -""" - - -# docstyle-ignore -SCATTER_IMPORT_ERROR = """ -{0} requires the torch-scatter library but it was not found in your environment. You can install it with pip as -explained here: https://github.com/rusty1s/pytorch_scatter. -""" - -# docstyle-ignore -PYTORCH_QUANTIZATION_IMPORT_ERROR = """ -{0} requires the pytorch-quantization library but it was not found in your environment. You can install it with pip: -`pip install pytorch-quantization --extra-index-url https://pypi.ngc.nvidia.com` -""" - -# docstyle-ignore -TENSORFLOW_PROBABILITY_IMPORT_ERROR = """ -{0} requires the tensorflow_probability library but it was not found in your environment. You can install it with pip as -explained here: https://github.com/tensorflow/probability. -""" - - -# docstyle-ignore -PANDAS_IMPORT_ERROR = """ -{0} requires the pandas library but it was not found in your environment. You can install it with pip as -explained here: https://pandas.pydata.org/pandas-docs/stable/getting_started/install.html. -""" - - -# docstyle-ignore -PHONEMIZER_IMPORT_ERROR = """ -{0} requires the phonemizer library but it was not found in your environment. You can install it with pip: -`pip install phonemizer` -""" - - -# docstyle-ignore -SCIPY_IMPORT_ERROR = """ -{0} requires the scipy library but it was not found in your environment. You can install it with pip: -`pip install scipy` -""" - - -# docstyle-ignore -SPEECH_IMPORT_ERROR = """ -{0} requires the torchaudio library but it was not found in your environment. You can install it with pip: -`pip install torchaudio` -""" - -# docstyle-ignore -TIMM_IMPORT_ERROR = """ -{0} requires the timm library but it was not found in your environment. You can install it with pip: -`pip install timm` -""" - -# docstyle-ignore -VISION_IMPORT_ERROR = """ -{0} requires the PIL library but it was not found in your environment. You can install it with pip: -`pip install pillow` -""" - - -# docstyle-ignore -PYTESSERACT_IMPORT_ERROR = """ -{0} requires the PyTesseract library but it was not found in your environment. You can install it with pip: -`pip install pytesseract` -""" +File utilities: utilities related to download and cache models This module should not be update anymore and is only left for backward compatibility. """ From 37044b534a5a75e9d8599c98795117b3bb067f69 Mon Sep 17 00:00:00 2001 From: Stas Bekman Date: Thu, 14 Apr 2022 20:25:59 -0700 Subject: [PATCH 14/17] fix merge; fix + extend new test --- src/transformers/utils/__init__.py | 1 + src/transformers/utils/import_utils.py | 4 ++ tests/extended/test_trainer_ext.py | 74 +++++++++++++++++++++----- tests/trainer/test_trainer.py | 5 +- 4 files changed, 69 insertions(+), 15 deletions(-) diff --git a/src/transformers/utils/__init__.py b/src/transformers/utils/__init__.py index 45364fb8fd335f..9ca406b6c70ae9 100644 --- a/src/transformers/utils/__init__.py +++ b/src/transformers/utils/__init__.py @@ -85,6 +85,7 @@ DummyObject, _LazyModule, is_apex_available, + is_bnb_available, is_coloredlogs_available, is_datasets_available, is_detectron2_available, diff --git a/src/transformers/utils/import_utils.py b/src/transformers/utils/import_utils.py index 6207d0df7ceaa6..55bc63be52ab2a 100644 --- a/src/transformers/utils/import_utils.py +++ b/src/transformers/utils/import_utils.py @@ -400,6 +400,10 @@ def is_apex_available(): return importlib.util.find_spec("apex") is not None +def is_bnb_available(): + return importlib.util.find_spec("bitsandbytes") is not None + + def is_faiss_available(): return _faiss_available diff --git a/tests/extended/test_trainer_ext.py b/tests/extended/test_trainer_ext.py index 46303fec6c6847..e72a202783f12d 100644 --- a/tests/extended/test_trainer_ext.py +++ b/tests/extended/test_trainer_ext.py @@ -21,7 +21,7 @@ from unittest.mock import patch from parameterized import parameterized -from transformers.file_utils import is_apex_available, is_bnb_available +from transformers import AutoModel from transformers.integrations import is_fairscale_available from transformers.testing_utils import ( CaptureStderr, @@ -38,7 +38,7 @@ ) from transformers.trainer_callback import TrainerState from transformers.trainer_utils import set_seed -from transformers.utils import is_apex_available +from transformers.utils import is_apex_available, is_bnb_available bindir = os.path.abspath(os.path.dirname(__file__)) @@ -206,7 +206,7 @@ def test_trainer_log_level_replica(self, experiment_id): self.assertEqual(n_matches, data["n_matches"]) @slow - def test_run_seq2seq_slow(self): + def test_run_seq2seq(self): output_dir = self.run_trainer( eval_steps=2, max_len=128, @@ -233,14 +233,14 @@ def test_run_seq2seq_slow(self): @slow @require_bnb - def test_run_seq2seq_bnb_slow(self): + def test_run_seq2seq_bnb(self): from transformers.training_args import OptimizerNames def train_and_return_metrics(optim: str) -> Tuple[int, float]: from pathlib import Path extra_args = ( - "--skip_memory_metrics 0 --optim {optim} --do_eval False --do_predict " + f"--skip_memory_metrics 0 --optim {optim} --do_eval False --do_predict " "False --adafactor False --log_level debug" ) @@ -250,23 +250,69 @@ def train_and_return_metrics(optim: str) -> Tuple[int, float]: model_name=MARIAN_MODEL, learning_rate=3e-4, num_train_epochs=1, - distributed=False, - extra_args_str=extra_args.format(optim=optim), + distributed=True, # force run in a new process + extra_args_str=extra_args, do_eval=False, do_predict=False, ) # Check metrics logs = TrainerState.load_from_json(Path(output_dir, "trainer_state.json")).log_history - gpu_peak_memory = logs[0]["train_mem_gpu_peaked_delta"] + gpu_peak_mem = logs[0]["train_mem_gpu_peaked_delta"] + gpu_alloc_mem = logs[0]["train_mem_gpu_alloc_delta"] + loss = logs[0]["train_loss"] - return gpu_peak_memory, loss + return gpu_peak_mem, gpu_alloc_mem, loss + + gpu_peak_mem_orig, gpu_alloc_mem_orig, loss_orig = train_and_return_metrics(OptimizerNames.ADAMW_TORCH.value) + gpu_peak_mem_bnb, gpu_alloc_mem_bnb, loss_bnb = train_and_return_metrics(OptimizerNames.ADAMW_BNB.value) + + gpu_peak_mem_diff_bytes = gpu_peak_mem_orig - gpu_peak_mem_bnb + gpu_peak_mem_diff_percent = gpu_peak_mem_diff_bytes / gpu_peak_mem_bnb + + gpu_total_mem_orig = gpu_peak_mem_orig + gpu_alloc_mem_orig + gpu_total_mem_bnb = gpu_peak_mem_bnb + gpu_alloc_mem_bnb + + gpu_total_mem_diff_bytes = gpu_total_mem_orig - gpu_total_mem_bnb + gpu_total_mem_diff_percent = gpu_total_mem_diff_bytes / gpu_total_mem_bnb + + # leave this for now if CI gets very different results + # print(f"{gpu_alloc_mem_orig=:010d} {gpu_peak_mem_orig=:010d} {gpu_alloc_mem_orig+gpu_peak_mem_orig=:010d}" ) + # print(f" {gpu_alloc_mem_bnb=:010d} {gpu_peak_mem_bnb=:010d} {gpu_alloc_mem_bnb+gpu_peak_mem_bnb=:010d}") + # print(f"{gpu_peak_mem_diff_bytes=}, {gpu_peak_mem_diff_percent=}") + # print(f"{gpu_total_mem_orig=}, {gpu_total_mem_bnb=}") + # print(f"{gpu_total_mem_diff_bytes=}, {gpu_total_mem_diff_percent=}") - original_gpu_peak_memory, original_loss = train_and_return_metrics(OptimizerNames.ADAMW_TORCH.value) - bnb_gpu_peak_memory, bnb_loss = train_and_return_metrics(OptimizerNames.ADAMW_BNB.value) + self.assertGreater( + gpu_peak_mem_diff_percent, + 10, # basically a huge difference - got ~30x on my desktop + "should use very little peak gpu memory with BNB, compared to without it" + f"but got gpu_peak_mem_orig={gpu_peak_mem_orig} and gpu_peak_mem_bnb={gpu_peak_mem_bnb}", + ) + + self.assertGreater( + gpu_total_mem_diff_percent, + 0.20, # could easily be 0.50, but let's stay on the safe side + "Using BNB should use less total GPU memory than without it" + f"but got gpu_total_mem_orig={gpu_total_mem_orig} and gpu_total_mem_bnb={gpu_total_mem_bnb}", + ) - assert original_gpu_peak_memory < bnb_gpu_peak_memory - assert original_loss == bnb_loss + self.assertEqual( + loss_orig, loss_bnb, "loss should be the same, but got loss_orig={loss_orig}, loss_bnb={loss_bnb}" + ) + + # Additionally let's test that the absolute gpu memory difference is larger or about the + # same as the expected saving coming from BNB (6 bytes per param) + model = AutoModel.from_pretrained(MARIAN_MODEL) + total_numel = sum(dict((p.data_ptr(), p.numel()) for p in model.parameters()).values()) + bnb_saved_bytes = total_numel * 6 # 324MB + print(f"{bnb_saved_bytes=}") + + self.assertGreater( + gpu_total_mem_diff_bytes, + bnb_saved_bytes * 0.8, # add a safety margin, if it saved slightly less + f"BNB should have saved about {bnb_saved_bytes} bytes, but the saved bytes were {gpu_total_mem_diff_bytes}", + ) def run_trainer( self, @@ -350,6 +396,8 @@ def run_trainer( {self.examples_dir_str}/pytorch/translation/run_translation.py """.split() cmd = [sys.executable] + distributed_args + args + # keep for quick debug + # print(" ".join([f"\nPYTHONPATH={self.src_dir_str}"] +cmd)); die execute_subprocess_async(cmd, env=self.get_env()) else: testargs = ["run_translation.py"] + args diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index 141587f78b562a..9f45d51d745f39 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -39,7 +39,6 @@ is_torch_available, logging, ) -from transformers.file_utils import WEIGHTS_NAME, is_apex_available, is_bnb_available from transformers.testing_utils import ( ENDPOINT_STAGING, PASS, @@ -66,7 +65,7 @@ ) from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR from transformers.training_args import OptimizerNames -from transformers.utils import WEIGHTS_NAME, is_apex_available +from transformers.utils import WEIGHTS_NAME, is_apex_available, is_bnb_available from transformers.utils.hp_naming import TrialShortNamer @@ -1871,6 +1870,7 @@ def hp_name(trial): }, ), ] + if is_apex_available(): import apex @@ -1881,6 +1881,7 @@ def hp_name(trial): default_adam_kwargs, ) ) + if is_bnb_available(): import bitsandbytes as bnb From 2e36019a38254b8c1410b5f47db507765976a086 Mon Sep 17 00:00:00 2001 From: Stas Bekman Date: Thu, 14 Apr 2022 20:29:22 -0700 Subject: [PATCH 15/17] cleanup --- tests/extended/test_trainer_ext.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/extended/test_trainer_ext.py b/tests/extended/test_trainer_ext.py index e72a202783f12d..ba7168e2cd5891 100644 --- a/tests/extended/test_trainer_ext.py +++ b/tests/extended/test_trainer_ext.py @@ -306,7 +306,6 @@ def train_and_return_metrics(optim: str) -> Tuple[int, float]: model = AutoModel.from_pretrained(MARIAN_MODEL) total_numel = sum(dict((p.data_ptr(), p.numel()) for p in model.parameters()).values()) bnb_saved_bytes = total_numel * 6 # 324MB - print(f"{bnb_saved_bytes=}") self.assertGreater( gpu_total_mem_diff_bytes, From dbdaf25cf75270ef4c6b77f51488fbac0b7e59b7 Mon Sep 17 00:00:00 2001 From: Stas Bekman Date: Fri, 15 Apr 2022 08:16:07 -0700 Subject: [PATCH 16/17] expand bnb --- src/transformers/utils/__init__.py | 2 +- src/transformers/utils/import_utils.py | 2 +- tests/extended/test_trainer_ext.py | 8 ++++---- tests/trainer/test_trainer.py | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/transformers/utils/__init__.py b/src/transformers/utils/__init__.py index 9ca406b6c70ae9..6101a924f969a0 100644 --- a/src/transformers/utils/__init__.py +++ b/src/transformers/utils/__init__.py @@ -85,7 +85,7 @@ DummyObject, _LazyModule, is_apex_available, - is_bnb_available, + is_bitsandbytes_available, is_coloredlogs_available, is_datasets_available, is_detectron2_available, diff --git a/src/transformers/utils/import_utils.py b/src/transformers/utils/import_utils.py index 55bc63be52ab2a..505ba94e0b193c 100644 --- a/src/transformers/utils/import_utils.py +++ b/src/transformers/utils/import_utils.py @@ -400,7 +400,7 @@ def is_apex_available(): return importlib.util.find_spec("apex") is not None -def is_bnb_available(): +def is_bitsandbytes_available(): return importlib.util.find_spec("bitsandbytes") is not None diff --git a/tests/extended/test_trainer_ext.py b/tests/extended/test_trainer_ext.py index ba7168e2cd5891..d3d112adb00e52 100644 --- a/tests/extended/test_trainer_ext.py +++ b/tests/extended/test_trainer_ext.py @@ -38,7 +38,7 @@ ) from transformers.trainer_callback import TrainerState from transformers.trainer_utils import set_seed -from transformers.utils import is_apex_available, is_bnb_available +from transformers.utils import is_apex_available, is_bitsandbytes_available bindir = os.path.abspath(os.path.dirname(__file__)) @@ -74,11 +74,11 @@ def require_apex(test_case): # a candidate for testing_utils -def require_bnb(test_case): +def require_bitsandbytes(test_case): """ Decorator for bits and bytes (bnb) dependency """ - if not is_bnb_available(): + if not is_bitsandbytes_available(): return unittest.skip("test requires bnb")(test_case) else: return test_case @@ -232,7 +232,7 @@ def test_run_seq2seq(self): assert "predict_results.json" in contents @slow - @require_bnb + @require_bitsandbytes def test_run_seq2seq_bnb(self): from transformers.training_args import OptimizerNames diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index 9f45d51d745f39..5fee6d8e3f7be4 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -65,7 +65,7 @@ ) from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR from transformers.training_args import OptimizerNames -from transformers.utils import WEIGHTS_NAME, is_apex_available, is_bnb_available +from transformers.utils import WEIGHTS_NAME, is_apex_available, is_bitsandbytes_available from transformers.utils.hp_naming import TrialShortNamer @@ -1882,7 +1882,7 @@ def hp_name(trial): ) ) - if is_bnb_available(): + if is_bitsandbytes_available(): import bitsandbytes as bnb optim_test_params.append( From ce2c550497b690e4038498817aa1fce011edfd7c Mon Sep 17 00:00:00 2001 From: Stas Bekman Date: Tue, 19 Apr 2022 10:46:06 -0700 Subject: [PATCH 17/17] move all require_* candidates to testing_utils.py --- src/transformers/testing_utils.py | 40 +++++++++++++++++++++++++++++- tests/extended/test_trainer_ext.py | 38 +++------------------------- 2 files changed, 42 insertions(+), 36 deletions(-) diff --git a/src/transformers/testing_utils.py b/src/transformers/testing_utils.py index b60c7942097a14..36f56d2eeb29c6 100644 --- a/src/transformers/testing_utils.py +++ b/src/transformers/testing_utils.py @@ -31,8 +31,16 @@ from transformers import logging as transformers_logging from .deepspeed import is_deepspeed_available -from .integrations import is_optuna_available, is_ray_available, is_sigopt_available, is_wandb_available +from .integrations import ( + is_fairscale_available, + is_optuna_available, + is_ray_available, + is_sigopt_available, + is_wandb_available, +) from .utils import ( + is_apex_available, + is_bitsandbytes_available, is_detectron2_available, is_faiss_available, is_flax_available, @@ -638,6 +646,36 @@ def require_deepspeed(test_case): return test_case +def require_fairscale(test_case): + """ + Decorator marking a test that requires fairscale + """ + if not is_fairscale_available(): + return unittest.skip("test requires fairscale")(test_case) + else: + return test_case + + +def require_apex(test_case): + """ + Decorator marking a test that requires apex + """ + if not is_apex_available(): + return unittest.skip("test requires apex")(test_case) + else: + return test_case + + +def require_bitsandbytes(test_case): + """ + Decorator for bits and bytes (bnb) dependency + """ + if not is_bitsandbytes_available(): + return unittest.skip("test requires bnb")(test_case) + else: + return test_case + + def require_phonemizer(test_case): """ Decorator marking a test that requires phonemizer diff --git a/tests/extended/test_trainer_ext.py b/tests/extended/test_trainer_ext.py index d3d112adb00e52..af8c5d4dd785de 100644 --- a/tests/extended/test_trainer_ext.py +++ b/tests/extended/test_trainer_ext.py @@ -22,7 +22,6 @@ from parameterized import parameterized from transformers import AutoModel -from transformers.integrations import is_fairscale_available from transformers.testing_utils import ( CaptureStderr, ExtendSysPath, @@ -30,6 +29,9 @@ execute_subprocess_async, get_gpu_count, get_torch_dist_unique_port, + require_apex, + require_bitsandbytes, + require_fairscale, require_torch, require_torch_gpu, require_torch_multi_gpu, @@ -38,7 +40,6 @@ ) from transformers.trainer_callback import TrainerState from transformers.trainer_utils import set_seed -from transformers.utils import is_apex_available, is_bitsandbytes_available bindir = os.path.abspath(os.path.dirname(__file__)) @@ -51,39 +52,6 @@ MBART_TINY = "sshleifer/tiny-mbart" -# a candidate for testing_utils -def require_fairscale(test_case): - """ - Decorator marking a test that requires fairscale - """ - if not is_fairscale_available(): - return unittest.skip("test requires fairscale")(test_case) - else: - return test_case - - -# a candidate for testing_utils -def require_apex(test_case): - """ - Decorator marking a test that requires apex - """ - if not is_apex_available(): - return unittest.skip("test requires apex")(test_case) - else: - return test_case - - -# a candidate for testing_utils -def require_bitsandbytes(test_case): - """ - Decorator for bits and bytes (bnb) dependency - """ - if not is_bitsandbytes_available(): - return unittest.skip("test requires bnb")(test_case) - else: - return test_case - - @require_torch class TestTrainerExt(TestCasePlus): def run_seq2seq_quick(