fix template encode

modelscope · Jintao-Huang · Jan 30, 2024 · Jan 30, 2024 · Jan 30, 2024 · Jan 30, 2024
commit e2a542290423419e708fa6cf61d1aded133c2e72
diff --git a/README.md b/README.md
@@ -164,7 +164,7 @@ from swift.llm import (
     infer_main, sft_main, app_ui_main, merge_lora_main
 )
 
-model_type = ModelType.qwen_1_8b_chat
+model_type = ModelType.qwen_1_8b
 sft_args = SftArguments(
     model_type=model_type,
     train_dataset_sample=2000,
@@ -178,7 +178,7 @@ torch.cuda.empty_cache()
 infer_args = InferArguments(
     ckpt_dir=best_model_checkpoint,
     load_dataset_config=True,
-    show_dataset_sample=10)
+    val_dataset_sample=10)
 # merge_lora_main(infer_args)
 result = infer_main(infer_args)
 torch.cuda.empty_cache()

diff --git a/README_CN.md b/README_CN.md
@@ -164,7 +164,7 @@ from swift.llm import (
     infer_main, sft_main, app_ui_main, merge_lora_main
 )
 
-model_type = ModelType.qwen_1_8b_chat
+model_type = ModelType.qwen_1_8b
 sft_args = SftArguments(
     model_type=model_type,
     train_dataset_sample=2000,
@@ -178,7 +178,7 @@ torch.cuda.empty_cache()
 infer_args = InferArguments(
     ckpt_dir=best_model_checkpoint,
     load_dataset_config=True,
-    show_dataset_sample=10)
+    val_dataset_sample=10)
 # merge_lora_main(infer_args)
 result = infer_main(infer_args)
 torch.cuda.empty_cache()

diff --git a/docs/source/LLM/LLM微调文档.md b/docs/source/LLM/LLM微调文档.md
@@ -64,7 +64,7 @@ torch.cuda.empty_cache()
 infer_args = InferArguments(
     ckpt_dir=best_model_checkpoint,
     load_dataset_config=True,
-    show_dataset_sample=10)
+    val_dataset_sample=10)
 # merge_lora_main(infer_args)
 result = infer_main(infer_args)
 torch.cuda.empty_cache()

diff --git a/swift/llm/utils/template.py b/swift/llm/utils/template.py
@@ -86,16 +86,8 @@ def __call__(self, input_ids: Tensor, scores: Tensor) -> bool:
             if isinstance(stop_word, str):
                 if stop_word in text:
                     return True
-            elif isinstance(stop_word, list) and len(stop_word) > 0:
-                res = []
-                for sw in stop_word:
-                    if isinstance(sw, str):
-                        token = getattr(tokenizer, sw)
-                        assert token is not None
-                    else:
-                        token = sw
-                    res.append(token)
-                if input_ids[0].tolist()[-len(res):] == res:
+            else:
+                if input_ids[0].tolist()[-len(stop_word):] == stop_word:
                     return True
         return False
 
@@ -148,6 +140,22 @@ def _init_template(self,
         self.max_length = max_length
         self.truncation_strategy = truncation_strategy
         self.model = kwargs.get('model', None)
+        # e.g. [['eos_token_id']] -> [[2]]
+        for key in ['prefix', 'prompt', 'chat_sep', 'suffix']:
+            value = getattr(self, key)
+            if value is None:
+                continue
+            res_value = []
+            for v in value:
+                if isinstance(v, list):
+                    res_v = []
+                    for sub_v in v:
+                        if isinstance(sub_v, str):
+                            sub_v = getattr(tokenizer, sub_v)
+                        res_v.append(sub_v)
+                    v = res_v
+                res_value.append(v)
+            setattr(self, key, res_value)
 
     def encode(
             self, example: Dict[str,
@@ -253,15 +261,6 @@ def _encode_context_list(
                     return_attention_mask=False,
                     add_special_tokens=False,
                     **curr_tokenizer_kwargs)['input_ids']
-            else:
-                token_list = []
-                for c in context:
-                    if isinstance(c, str):
-                        token = getattr(tokenizer, c)
-                        assert token is not None
-                    else:
-                        token = c
-                    token_list.append(token)
             input_ids += token_list
             if i in compute_loss_idx:
                 labels += token_list

diff --git a/swift/llm/utils/utils.py b/swift/llm/utils/utils.py
@@ -441,6 +441,8 @@ def inference_stream(model: PreTrainedModel,
         stream_config.eos_token_id = tokenizer.eos_token_id
     if tokenizer.pad_token_id is not None:
         stream_config.pad_token_id = tokenizer.pad_token_id
+    if tokenizer.bos_token_id is not None:
+        stream_config.bos_token_id = tokenizer.bos_token_id
     if stream_config.max_new_tokens is not None:
         stream_config.max_length = 20  # fix max_length, max_new_tokens warning
     stream_config.do_sample = True  # avoid is_greedy_gen_mode = True
@@ -568,6 +570,8 @@ def inference(model: PreTrainedModel,
         generation_config.eos_token_id = tokenizer.eos_token_id
     if tokenizer.pad_token_id is not None:
         generation_config.pad_token_id = tokenizer.pad_token_id
+    if tokenizer.bos_token_id is not None:
+        stream_config.bos_token_id = tokenizer.bos_token_id
     if generation_config.max_new_tokens is not None:
         generation_config.max_length = 20  # fix max_length, max_new_tokens warning
     if template.suffix[-1] not in stop_words: