update readme

tayanzhuifeng · Aug 1, 2023 · 858e9f2 · 858e9f2
1 parent 8443504
commit 858e9f2
Show file tree

Hide file tree

Showing 4 changed files with 28 additions and 18 deletions.
diff --git a/README.md b/README.md
@@ -130,9 +130,11 @@ pip install https://github.com/jllllll/bitsandbytes-windows-webui/releases/downl
 ### All-in-one Web UI
 
 ```bash
-python src/train_web.py
+CUDA_VISIBLE_DEVICES=0 python src/train_web.py
 ```
 
+Currently the web UI only supports training on **a single GPU**.
+
 ### Fine-tuning with a Single GPU
 
 ```bash

diff --git a/README_zh.md b/README_zh.md
@@ -136,9 +136,11 @@ pip install https://github.com/jllllll/bitsandbytes-windows-webui/releases/downl
 ### 浏览器一键微调/测试
 
 ```bash
-python src/train_web.py
+CUDA_VISIBLE_DEVICES=0 python src/train_web.py
 ```
 
+目前网页 UI 仅支持**单卡训练**。
+
 ### 单 GPU 微调训练
 
 ```bash

diff --git a/src/glmtuner/chat/stream_chat.py b/src/glmtuner/chat/stream_chat.py
@@ -3,8 +3,7 @@
 from threading import Thread
 from transformers import TextIteratorStreamer
 
-from glmtuner.extras.misc import get_logits_processor
-from glmtuner.extras.misc import auto_configure_device_map
+from glmtuner.extras.misc import dispatch_model, get_logits_processor
 from glmtuner.hparams import ModelArguments, DataArguments, FinetuningArguments, GeneratingArguments
 from glmtuner.tuner import load_model_and_tokenizer
 
@@ -19,15 +18,8 @@ def __init__(
         generating_args: GeneratingArguments
     ) -> None:
         self.model, self.tokenizer = load_model_and_tokenizer(model_args, finetuning_args)
-
-        if torch.cuda.device_count() > 1:
-            from accelerate import dispatch_model
-            device_map = auto_configure_device_map(torch.cuda.device_count(), use_v2=(self.tokenizer.eos_token_id==2))
-            self.model = dispatch_model(self.model, device_map)
-        else:
-            self.model = self.model.cuda()
-
-        self.source_prefix = data_args.source_prefix or ""
+        self.model = dispatch_model(self.model, use_v2=(self.tokenizer.eos_token_id==2))
+        self.source_prefix = data_args.source_prefix
         self.generating_args = generating_args
 
     def get_prompt(

diff --git a/src/glmtuner/extras/misc.py b/src/glmtuner/extras/misc.py
@@ -93,6 +93,15 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
     return model
 
 
+def torch_gc() -> None:
+    r"""
+    Collects GPU memory.
+    """
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+        torch.cuda.ipc_collect()
+
+
 def auto_configure_device_map(num_gpus: int, use_v2: bool) -> Dict[str, int]:
     r"""
     Configures device map for ChatGLM.
@@ -135,10 +144,15 @@ def auto_configure_device_map(num_gpus: int, use_v2: bool) -> Dict[str, int]:
     return device_map
 
 
-def torch_gc() -> None:
+def dispatch_model(model: PreTrainedModel, use_v2: bool) -> PreTrainedModel:
     r"""
-    Collects GPU memory.
+    Dispatches a pre-trained model to GPUs with balanced memory.
     """
-    if torch.cuda.is_available():
-        torch.cuda.empty_cache()
-        torch.cuda.ipc_collect()
+    if torch.cuda.device_count() > 1:
+        from accelerate import dispatch_model
+
+        device_map = auto_configure_device_map(torch.cuda.device_count(), use_v2=use_v2)
+        model.tie_weights()
+        return dispatch_model(model, device_map)
+    else:
+        return model.cuda()