From df58536d9f44eccafa0a9e93bbc9ca2c3bc55f58 Mon Sep 17 00:00:00 2001 From: Jintao Date: Fri, 31 May 2024 17:37:45 +0800 Subject: [PATCH] fix bugs (#1038) --- ...\240\351\200\237\344\270\216\351\203\250\347\275\262.md" | 4 ++-- .../LLM/VLLM-inference-acceleration-and-deployment.md | 4 ++-- examples/pytorch/llm/custom.py | 6 +++--- swift/llm/deploy.py | 2 +- swift/llm/utils/argument.py | 2 ++ swift/llm/utils/model.py | 2 ++ 6 files changed, 12 insertions(+), 8 deletions(-) diff --git "a/docs/source/LLM/VLLM\346\216\250\347\220\206\345\212\240\351\200\237\344\270\216\351\203\250\347\275\262.md" "b/docs/source/LLM/VLLM\346\216\250\347\220\206\345\212\240\351\200\237\344\270\216\351\203\250\347\275\262.md" index 4eef04151..9e27dfb34 100644 --- "a/docs/source/LLM/VLLM\346\216\250\347\220\206\345\212\240\351\200\237\344\270\216\351\203\250\347\275\262.md" +++ "b/docs/source/LLM/VLLM\346\216\250\347\220\206\345\212\240\351\200\237\344\270\216\351\203\250\347\275\262.md" @@ -18,7 +18,7 @@ pip config set global.index-url https://mirrors.aliyun.com/pypi/simple/ pip install 'ms-swift[llm]' -U # vllm与cuda版本有对应关系,请按照`https://docs.vllm.ai/en/latest/getting_started/installation.html`选择版本 -pip install vllm -U +pip install vllm pip install openai -U # 环境对齐 (通常不需要运行. 如果你运行错误, 可以跑下面的代码, 仓库使用最新环境测试) @@ -484,7 +484,7 @@ CUDA_VISIBLE_DEVICES=0 swift deploy --ckpt_dir 'xxx/vx-xxx/checkpoint-xxx-merged 客户端示例代码同原始模型. -### 多LoRA部署 +## 多LoRA部署 目前pt方式部署模型已经支持`peft>=0.10.0`进行多LoRA部署,具体方法为: diff --git a/docs/source_en/LLM/VLLM-inference-acceleration-and-deployment.md b/docs/source_en/LLM/VLLM-inference-acceleration-and-deployment.md index 997f14dec..70fe273bb 100644 --- a/docs/source_en/LLM/VLLM-inference-acceleration-and-deployment.md +++ b/docs/source_en/LLM/VLLM-inference-acceleration-and-deployment.md @@ -15,7 +15,7 @@ pip config set global.index-url https://mirrors.aliyun.com/pypi/simple/ pip install 'ms-swift[llm]' -U # vllm version corresponds to cuda version, please select version according to `https://docs.vllm.ai/en/latest/getting_started/installation.html` -pip install vllm -U +pip install vllm pip install openai -U # Environment alignment (usually not needed. If you get errors, you can run the code below, the repo uses the latest environment for testing) @@ -481,7 +481,7 @@ CUDA_VISIBLE_DEVICES=0 swift deploy --ckpt_dir 'xxx/vx-xxx/checkpoint-xxx-merged The example code for the client side is the same as the original models. -### Multiple LoRA Deployments +## Multiple LoRA Deployments The current model deployment method now supports multiple LoRA deployments with `peft>=0.10.0`. The specific steps are: diff --git a/examples/pytorch/llm/custom.py b/examples/pytorch/llm/custom.py index d63650a7f..8e24cbcf9 100644 --- a/examples/pytorch/llm/custom.py +++ b/examples/pytorch/llm/custom.py @@ -28,11 +28,11 @@ class CustomDatasetName: stsb_en = 'stsb-en' -@register_model(CustomModelType.tigerbot_7b, 'TigerResearch/tigerbot-7b-base-v3', LoRATM.llama, +@register_model(CustomModelType.tigerbot_7b, 'TigerResearch/tigerbot-7b-base-v3', LoRATM.llama2, TemplateType.default_generation) -@register_model(CustomModelType.tigerbot_13b, 'TigerResearch/tigerbot-13b-base-v2', LoRATM.llama, +@register_model(CustomModelType.tigerbot_13b, 'TigerResearch/tigerbot-13b-base-v2', LoRATM.llama2, TemplateType.default_generation) -@register_model(CustomModelType.tigerbot_13b_chat, 'TigerResearch/tigerbot-13b-chat-v4', LoRATM.llama, +@register_model(CustomModelType.tigerbot_13b_chat, 'TigerResearch/tigerbot-13b-chat-v4', LoRATM.llama2, CustomTemplateType.tigerbot) def get_tigerbot_model_tokenizer(model_dir: str, torch_dtype: Dtype, diff --git a/swift/llm/deploy.py b/swift/llm/deploy.py index f95e9209f..562bb49ea 100644 --- a/swift/llm/deploy.py +++ b/swift/llm/deploy.py @@ -42,7 +42,7 @@ async def get_available_models(): if _args.lora_request_list is not None: model_list += [lora_request.lora_name for lora_request in _args.lora_request_list] data = [ - Model(id=model_id, is_chat=not is_generation_template(model_id), owned_by=_args.owned_by) + Model(id=model_id, is_chat=not is_generation_template(_args.template_type), owned_by=_args.owned_by) for model_id in model_list ] return ModelList(data=data) diff --git a/swift/llm/utils/argument.py b/swift/llm/utils/argument.py index b898e23fe..2cce5f31c 100644 --- a/swift/llm/utils/argument.py +++ b/swift/llm/utils/argument.py @@ -220,6 +220,8 @@ def handle_compatibility(self: Union['SftArguments', 'InferArguments']) -> None: _dataset = getattr(self, key) if isinstance(_dataset, str): _dataset = [_dataset] + elif _dataset is None: + _dataset = [] if len(_dataset) == 1 and ',' in _dataset[0]: _dataset = _dataset[0].split(',') for i, d in enumerate(_dataset): diff --git a/swift/llm/utils/model.py b/swift/llm/utils/model.py index 2283a6f61..0bf61c86f 100644 --- a/swift/llm/utils/model.py +++ b/swift/llm/utils/model.py @@ -435,6 +435,8 @@ class LoRATM(NamedTuple): 'kv_b_proj', 'o_proj', ] + # compat + llama2 = llama GetModelTokenizerFunction = Callable[..., Tuple[Optional[PreTrainedModel], PreTrainedTokenizerBase]]