huggingface · younesbelkada · Aug 10, 2022 · Jun 24, 2022 · Jun 24, 2022 · Jun 27, 2022
diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py
@@ -118,6 +118,21 @@ def forward(self, input):
             return input
 
 
+def replace_8bit_linear(model):
+    import bitsandbytes as bnb
+
+    for n, module in model.named_children():
+        if len(list(module.children())) > 0:
+            replace_8bit_linear(module)
+
+        if isinstance(module, nn.Linear):
+            with init_empty_weights():
+                model._modules[n] = bnb.nn.Linear8bitLt(
+                    module.in_features, module.out_features, module.bias is not None, has_fp16_weights=True
+                )
+    return model
+
+
 def get_parameter_device(parameter: Union[nn.Module, GenerationMixin, "ModuleUtilsMixin"]):
     try:
         return next(parameter.parameters()).device
@@ -1776,6 +1791,7 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
         max_memory = kwargs.pop("max_memory", None)
         offload_folder = kwargs.pop("offload_folder", None)
         offload_state_dict = kwargs.pop("offload_state_dict", False)
+        load_in_8bit = kwargs.pop("load_in_8bit", False)
 
         if device_map is not None:
             if low_cpu_mem_usage is None:
@@ -2061,12 +2077,20 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
 
             logger.info("Detected DeepSpeed ZeRO-3: activating zero.init() for this model")
             init_contexts = [deepspeed.zero.Init(config_dict_or_path=deepspeed_config())] + init_contexts
+        elif load_in_8bit:
+
+            init_contexts = [init_empty_weights()]  # Force enable init empty weights
+
+            logger.info("Detected 8-bit loading: activating 8-bit loading for this model")
         elif low_cpu_mem_usage:
             init_contexts.append(init_empty_weights())
 
         with ContextManagers(init_contexts):
             model = cls(config, *model_args, **model_kwargs)
 
+        if load_in_8bit:
+            model = replace_8bit_linear(model)
+
         if device_map == "auto":
             if model._no_split_modules is None:
                 raise ValueError(f"{model.__class__.__name__} does not support `device_map='auto'` yet.")
@@ -2126,6 +2150,7 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
                 offload_folder=offload_folder,
                 offload_state_dict=offload_state_dict,
                 dtype=torch_dtype,
+                load_in_8bit=load_in_8bit,
             )
 
         # make sure token embedding weights are still tied if needed
@@ -2165,6 +2190,7 @@ def _load_pretrained_model(
         offload_folder=None,
         offload_state_dict=False,
         dtype=None,
+        load_in_8bit=False,
     ):
         if device_map is not None and "disk" in device_map.values():
             if offload_folder is None: