support qwen2-vl gptq awq (modelscope#1884)

eric-doug · Sep 2, 2024 · a748cca · a748cca
1 parent 9f3f65d
commit a748cca
Show file tree

Hide file tree

Showing 8 changed files with 120 additions and 9 deletions.
diff --git a/docs/source/LLM/支持的模型和数据集.md b/docs/source/LLM/支持的模型和数据集.md
@@ -369,8 +369,14 @@
 |qwen-audio-chat|[qwen/Qwen-Audio-Chat](https://modelscope.cn/models/qwen/Qwen-Audio-Chat/summary)|^(transformer.h)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|qwen-audio|&#x2714;|&#x2718;|&#x2718;|&#x2718;||audio|[Qwen/Qwen-Audio-Chat](https://huggingface.co/Qwen/Qwen-Audio-Chat)|
 |qwen2-audio-7b|[qwen/Qwen2-Audio-7B](https://modelscope.cn/models/qwen/Qwen2-Audio-7B/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|qwen2-audio-generation|&#x2714;|&#x2718;|&#x2718;|&#x2718;|librosa, transformers>=4.45.0.dev0|audio|[Qwen/Qwen2-Audio-7B](https://huggingface.co/Qwen/Qwen2-Audio-7B)|
 |qwen2-audio-7b-instruct|[qwen/Qwen2-Audio-7B-Instruct](https://modelscope.cn/models/qwen/Qwen2-Audio-7B-Instruct/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|qwen2-audio|&#x2714;|&#x2718;|&#x2718;|&#x2718;|librosa, transformers>=4.45.0.dev0|audio|[Qwen/Qwen2-Audio-7B-Instruct](https://huggingface.co/Qwen/Qwen2-Audio-7B-Instruct)|
-|qwen2-vl-2b-instruct|[qwen/Qwen2-VL-2B-Instruct](https://modelscope.cn/models/qwen/Qwen2-VL-2B-Instruct/summary)|^(model)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|qwen2-vl|&#x2714;|&#x2718;|&#x2718;|&#x2718;|pyav, transformers>=4.45.0.dev0|vision|[Qwen/Qwen2-VL-2B-Instruct](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct)|
-|qwen2-vl-7b-instruct|[qwen/Qwen2-VL-7B-Instruct](https://modelscope.cn/models/qwen/Qwen2-VL-7B-Instruct/summary)|^(model)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|qwen2-vl|&#x2714;|&#x2718;|&#x2718;|&#x2718;|pyav, transformers>=4.45.0.dev0|vision|[Qwen/Qwen2-VL-7B-Instruct](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct)|
+|qwen2-vl-2b-instruct|[qwen/Qwen2-VL-2B-Instruct](https://modelscope.cn/models/qwen/Qwen2-VL-2B-Instruct/summary)|^(model)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|qwen2-vl|&#x2714;|&#x2718;|&#x2718;|&#x2718;|pyav, transformers>=4.45.0.dev0, qwen_vl_utils|vision|[Qwen/Qwen2-VL-2B-Instruct](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct)|
+|qwen2-vl-2b-instruct-gptq-int4|[qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4/summary)|^(model)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|qwen2-vl|&#x2714;|&#x2718;|&#x2718;|&#x2718;|pyav, transformers>=4.45.0.dev0, qwen_vl_utils, auto_gptq>=0.5|vision|[Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4)|
+|qwen2-vl-2b-instruct-gptq-int8|[qwen/Qwen2-VL-2B-Instruct-GPTQ-Int8](https://modelscope.cn/models/qwen/Qwen2-VL-2B-Instruct-GPTQ-Int8/summary)|^(model)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|qwen2-vl|&#x2714;|&#x2718;|&#x2718;|&#x2718;|pyav, transformers>=4.45.0.dev0, qwen_vl_utils, auto_gptq>=0.5|vision|[Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int8](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int8)|
+|qwen2-vl-2b-instruct-awq|[qwen/Qwen2-VL-2B-Instruct-AWQ](https://modelscope.cn/models/qwen/Qwen2-VL-2B-Instruct-AWQ/summary)|^(model)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|qwen2-vl|&#x2714;|&#x2718;|&#x2718;|&#x2718;|pyav, transformers>=4.45.0.dev0, qwen_vl_utils, autoawq|vision|[Qwen/Qwen2-VL-2B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct-AWQ)|
+|qwen2-vl-7b-instruct|[qwen/Qwen2-VL-7B-Instruct](https://modelscope.cn/models/qwen/Qwen2-VL-7B-Instruct/summary)|^(model)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|qwen2-vl|&#x2714;|&#x2718;|&#x2718;|&#x2718;|pyav, transformers>=4.45.0.dev0, qwen_vl_utils|vision|[Qwen/Qwen2-VL-7B-Instruct](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct)|
+|qwen2-vl-7b-instruct-gptq-int4|[qwen/Qwen2-VL-7B-Instruct-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen2-VL-7B-Instruct-GPTQ-Int4/summary)|^(model)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|qwen2-vl|&#x2714;|&#x2718;|&#x2718;|&#x2718;|pyav, transformers>=4.45.0.dev0, qwen_vl_utils, auto_gptq>=0.5|vision|[Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int4)|
+|qwen2-vl-7b-instruct-gptq-int8|[qwen/Qwen2-VL-7B-Instruct-GPTQ-Int8](https://modelscope.cn/models/qwen/Qwen2-VL-7B-Instruct-GPTQ-Int8/summary)|^(model)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|qwen2-vl|&#x2714;|&#x2718;|&#x2718;|&#x2718;|pyav, transformers>=4.45.0.dev0, qwen_vl_utils, auto_gptq>=0.5|vision|[Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int8](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int8)|
+|qwen2-vl-7b-instruct-awq|[qwen/Qwen2-VL-7B-Instruct-AWQ](https://modelscope.cn/models/qwen/Qwen2-VL-7B-Instruct-AWQ/summary)|^(model)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|qwen2-vl|&#x2714;|&#x2718;|&#x2718;|&#x2718;|pyav, transformers>=4.45.0.dev0, qwen_vl_utils, autoawq|vision|[Qwen/Qwen2-VL-7B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct-AWQ)|
 |glm4v-9b-chat|[ZhipuAI/glm-4v-9b](https://modelscope.cn/models/ZhipuAI/glm-4v-9b/summary)|^(transformer.encoder)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|glm4v|&#x2718;|&#x2718;|&#x2718;|&#x2718;|transformers>=4.42|vision|[THUDM/glm-4v-9b](https://huggingface.co/THUDM/glm-4v-9b)|
 |idefics3-8b-llama3|[AI-ModelScope/Idefics3-8B-Llama3](https://modelscope.cn/models/AI-ModelScope/Idefics3-8B-Llama3/summary)|^(model.text_model\|model.connector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|idefics3|&#x2714;|&#x2718;|&#x2718;|&#x2718;|transformers>=4.45.0.dev0|vision|[HuggingFaceM4/Idefics3-8B-Llama3](https://huggingface.co/HuggingFaceM4/Idefics3-8B-Llama3)|
 |llava1_5-7b-instruct|[swift/llava-1.5-7b-hf](https://modelscope.cn/models/swift/llava-1.5-7b-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava1_5|&#x2714;|&#x2714;|&#x2714;|&#x2718;|transformers>=4.36|vision|[llava-hf/llava-1.5-7b-hf](https://huggingface.co/llava-hf/llava-1.5-7b-hf)|

diff --git a/docs/source/Multi-Modal/qwen2-vl最佳实践.md b/docs/source/Multi-Modal/qwen2-vl最佳实践.md
@@ -174,6 +174,14 @@ CUDA_VISIBLE_DEVICES=0,1,2,3 NPROC_PER_NODE=4 swift sft \
   --freeze_vit true \
   --deepspeed default-zero2 \
   --dataset latex-ocr-print#20000
+
+# 更少的显存消耗: QLoRA
+# GPU Memory: 10GB
+SIZE_FACTOR=8 MAX_PIXELS=602112 CUDA_VISIBLE_DEVICES=0 swift sft \
+  --model_type qwen2-vl-7b-instruct-gptq-int4 \
+  --model_id_or_path qwen/Qwen2-VL-7B-Instruct-GPTQ-Int4 \
+  --sft_type lora \
+  --dataset latex-ocr-print#20000
 ```
 
 微调后模型对验证集进行推理的示例（只训练了200个step）：

diff --git a/docs/source_en/LLM/Supported-models-datasets.md b/docs/source_en/LLM/Supported-models-datasets.md
@@ -369,8 +369,14 @@ The table below introcudes all models supported by SWIFT:
 |qwen-audio-chat|[qwen/Qwen-Audio-Chat](https://modelscope.cn/models/qwen/Qwen-Audio-Chat/summary)|^(transformer.h)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|qwen-audio|&#x2714;|&#x2718;|&#x2718;|&#x2718;||audio|[Qwen/Qwen-Audio-Chat](https://huggingface.co/Qwen/Qwen-Audio-Chat)|
 |qwen2-audio-7b|[qwen/Qwen2-Audio-7B](https://modelscope.cn/models/qwen/Qwen2-Audio-7B/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|qwen2-audio-generation|&#x2714;|&#x2718;|&#x2718;|&#x2718;|librosa, transformers>=4.45.0.dev0|audio|[Qwen/Qwen2-Audio-7B](https://huggingface.co/Qwen/Qwen2-Audio-7B)|
 |qwen2-audio-7b-instruct|[qwen/Qwen2-Audio-7B-Instruct](https://modelscope.cn/models/qwen/Qwen2-Audio-7B-Instruct/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|qwen2-audio|&#x2714;|&#x2718;|&#x2718;|&#x2718;|librosa, transformers>=4.45.0.dev0|audio|[Qwen/Qwen2-Audio-7B-Instruct](https://huggingface.co/Qwen/Qwen2-Audio-7B-Instruct)|
-|qwen2-vl-2b-instruct|[qwen/Qwen2-VL-2B-Instruct](https://modelscope.cn/models/qwen/Qwen2-VL-2B-Instruct/summary)|^(model)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|qwen2-vl|&#x2714;|&#x2718;|&#x2718;|&#x2718;|pyav, transformers>=4.45.0.dev0|vision|[Qwen/Qwen2-VL-2B-Instruct](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct)|
-|qwen2-vl-7b-instruct|[qwen/Qwen2-VL-7B-Instruct](https://modelscope.cn/models/qwen/Qwen2-VL-7B-Instruct/summary)|^(model)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|qwen2-vl|&#x2714;|&#x2718;|&#x2718;|&#x2718;|pyav, transformers>=4.45.0.dev0|vision|[Qwen/Qwen2-VL-7B-Instruct](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct)|
+|qwen2-vl-2b-instruct|[qwen/Qwen2-VL-2B-Instruct](https://modelscope.cn/models/qwen/Qwen2-VL-2B-Instruct/summary)|^(model)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|qwen2-vl|&#x2714;|&#x2718;|&#x2718;|&#x2718;|pyav, transformers>=4.45.0.dev0, qwen_vl_utils|vision|[Qwen/Qwen2-VL-2B-Instruct](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct)|
+|qwen2-vl-2b-instruct-gptq-int4|[qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4/summary)|^(model)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|qwen2-vl|&#x2714;|&#x2718;|&#x2718;|&#x2718;|pyav, transformers>=4.45.0.dev0, qwen_vl_utils, auto_gptq>=0.5|vision|[Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4)|
+|qwen2-vl-2b-instruct-gptq-int8|[qwen/Qwen2-VL-2B-Instruct-GPTQ-Int8](https://modelscope.cn/models/qwen/Qwen2-VL-2B-Instruct-GPTQ-Int8/summary)|^(model)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|qwen2-vl|&#x2714;|&#x2718;|&#x2718;|&#x2718;|pyav, transformers>=4.45.0.dev0, qwen_vl_utils, auto_gptq>=0.5|vision|[Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int8](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int8)|
+|qwen2-vl-2b-instruct-awq|[qwen/Qwen2-VL-2B-Instruct-AWQ](https://modelscope.cn/models/qwen/Qwen2-VL-2B-Instruct-AWQ/summary)|^(model)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|qwen2-vl|&#x2714;|&#x2718;|&#x2718;|&#x2718;|pyav, transformers>=4.45.0.dev0, qwen_vl_utils, autoawq|vision|[Qwen/Qwen2-VL-2B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct-AWQ)|
+|qwen2-vl-7b-instruct|[qwen/Qwen2-VL-7B-Instruct](https://modelscope.cn/models/qwen/Qwen2-VL-7B-Instruct/summary)|^(model)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|qwen2-vl|&#x2714;|&#x2718;|&#x2718;|&#x2718;|pyav, transformers>=4.45.0.dev0, qwen_vl_utils|vision|[Qwen/Qwen2-VL-7B-Instruct](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct)|
+|qwen2-vl-7b-instruct-gptq-int4|[qwen/Qwen2-VL-7B-Instruct-GPTQ-Int4](https://modelscope.cn/models/qwen/Qwen2-VL-7B-Instruct-GPTQ-Int4/summary)|^(model)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|qwen2-vl|&#x2714;|&#x2718;|&#x2718;|&#x2718;|pyav, transformers>=4.45.0.dev0, qwen_vl_utils, auto_gptq>=0.5|vision|[Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int4](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int4)|
+|qwen2-vl-7b-instruct-gptq-int8|[qwen/Qwen2-VL-7B-Instruct-GPTQ-Int8](https://modelscope.cn/models/qwen/Qwen2-VL-7B-Instruct-GPTQ-Int8/summary)|^(model)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|qwen2-vl|&#x2714;|&#x2718;|&#x2718;|&#x2718;|pyav, transformers>=4.45.0.dev0, qwen_vl_utils, auto_gptq>=0.5|vision|[Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int8](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int8)|
+|qwen2-vl-7b-instruct-awq|[qwen/Qwen2-VL-7B-Instruct-AWQ](https://modelscope.cn/models/qwen/Qwen2-VL-7B-Instruct-AWQ/summary)|^(model)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|qwen2-vl|&#x2714;|&#x2718;|&#x2718;|&#x2718;|pyav, transformers>=4.45.0.dev0, qwen_vl_utils, autoawq|vision|[Qwen/Qwen2-VL-7B-Instruct-AWQ](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct-AWQ)|
 |glm4v-9b-chat|[ZhipuAI/glm-4v-9b](https://modelscope.cn/models/ZhipuAI/glm-4v-9b/summary)|^(transformer.encoder)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|glm4v|&#x2718;|&#x2718;|&#x2718;|&#x2718;|transformers>=4.42|vision|[THUDM/glm-4v-9b](https://huggingface.co/THUDM/glm-4v-9b)|
 |idefics3-8b-llama3|[AI-ModelScope/Idefics3-8B-Llama3](https://modelscope.cn/models/AI-ModelScope/Idefics3-8B-Llama3/summary)|^(model.text_model\|model.connector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|idefics3|&#x2714;|&#x2718;|&#x2718;|&#x2718;|transformers>=4.45.0.dev0|vision|[HuggingFaceM4/Idefics3-8B-Llama3](https://huggingface.co/HuggingFaceM4/Idefics3-8B-Llama3)|
 |llava1_5-7b-instruct|[swift/llava-1.5-7b-hf](https://modelscope.cn/models/swift/llava-1.5-7b-hf/summary)|^(language_model\|multi_modal_projector)(?!.\*(lm_head\|output\|emb\|wte\|shared)).\*|llava1_5|&#x2714;|&#x2714;|&#x2714;|&#x2718;|transformers>=4.36|vision|[llava-hf/llava-1.5-7b-hf](https://huggingface.co/llava-hf/llava-1.5-7b-hf)|

diff --git a/docs/source_en/Multi-Modal/qwen2-vl-best-practice.md b/docs/source_en/Multi-Modal/qwen2-vl-best-practice.md
@@ -178,6 +178,14 @@ CUDA_VISIBLE_DEVICES=0,1,2,3 NPROC_PER_NODE=4 swift sft \
   --freeze_vit true \
   --deepspeed default-zero2 \
   --dataset latex-ocr-print#20000
+
+# Lower GPU Memory Consumption: QLoRA
+# GPU Memory: 10GB
+SIZE_FACTOR=8 MAX_PIXELS=602112 CUDA_VISIBLE_DEVICES=0 swift sft \
+  --model_type qwen2-vl-7b-instruct-gptq-int4 \
+  --model_id_or_path qwen/Qwen2-VL-7B-Instruct-GPTQ-Int4 \
+  --sft_type lora \
+  --dataset latex-ocr-print#20000
 ```
 
 To use a custom dataset, simply specify it as follows:

diff --git a/scripts/utils/run_model_info.py b/scripts/utils/run_model_info.py
@@ -1,6 +1,6 @@
 from typing import Any, List
 
-from swift.llm import MODEL_MAPPING, ModelType
+from swift.llm import MODEL_MAPPING, ModelType, get_default_lora_target_modules
 
 
 def get_model_info_table():
@@ -19,7 +19,7 @@ def get_model_info_table():
     for model_name in model_name_list:
         model_info = MODEL_MAPPING[model_name]
         model_id = model_info['model_id_or_path']
-        lora_target_modules = model_info['lora_target_modules']
+        lora_target_modules = get_default_lora_target_modules(model_name)
         if isinstance(lora_target_modules, list):
             lora_target_modules = ', '.join(lora_target_modules)
         else:

diff --git a/swift/llm/utils/model.py b/swift/llm/utils/model.py
@@ -151,7 +151,13 @@ class ModelType:
     qwen2_audio_7b = 'qwen2-audio-7b'
     qwen2_audio_7b_instruct = 'qwen2-audio-7b-instruct'
     qwen2_vl_2b_instruct = 'qwen2-vl-2b-instruct'
+    qwen2_vl_2b_instruct_gptq_int4 = 'qwen2-vl-2b-instruct-gptq-int4'
+    qwen2_vl_2b_instruct_gptq_int8 = 'qwen2-vl-2b-instruct-gptq-int8'
+    qwen2_vl_2b_instruct_awq = 'qwen2-vl-2b-instruct-awq'
     qwen2_vl_7b_instruct = 'qwen2-vl-7b-instruct'
+    qwen2_vl_7b_instruct_gptq_int4 = 'qwen2-vl-7b-instruct-gptq-int4'
+    qwen2_vl_7b_instruct_gptq_int8 = 'qwen2-vl-7b-instruct-gptq-int8'
+    qwen2_vl_7b_instruct_awq = 'qwen2-vl-7b-instruct-awq'
     # chatglm
     chatglm2_6b = 'chatglm2-6b'
     chatglm2_6b_32k = 'chatglm2-6b-32k'
@@ -3466,6 +3472,42 @@ def get_model_tokenizer_qwen2_audio(model_dir: str,
     requires=['pyav', 'transformers>=4.45.0.dev0', 'qwen_vl_utils'],
     tags=['multi-modal', 'vision'],
     hf_model_id='Qwen/Qwen2-VL-7B-Instruct')
+@register_model(
+    ModelType.qwen2_vl_7b_instruct_gptq_int4,
+    'qwen/Qwen2-VL-7B-Instruct-GPTQ-Int4',
+    LoRATM.qwen2_vl,
+    TemplateType.qwen2_vl,
+    support_flash_attn=True,
+    placeholder_tokens=['<|image_pad|>', '<|video_pad|>'],
+    requires=['pyav', 'transformers>=4.45.0.dev0', 'qwen_vl_utils', 'auto_gptq>=0.5'],
+    tags=['multi-modal', 'vision'],
+    function_kwargs={'gptq_bits': 4},
+    torch_dtype=torch.float16,
+    hf_model_id='Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int4')
+@register_model(
+    ModelType.qwen2_vl_7b_instruct_gptq_int8,
+    'qwen/Qwen2-VL-7B-Instruct-GPTQ-Int8',
+    LoRATM.qwen2_vl,
+    TemplateType.qwen2_vl,
+    support_flash_attn=True,
+    placeholder_tokens=['<|image_pad|>', '<|video_pad|>'],
+    requires=['pyav', 'transformers>=4.45.0.dev0', 'qwen_vl_utils', 'auto_gptq>=0.5'],
+    tags=['multi-modal', 'vision'],
+    function_kwargs={'gptq_bits': 8},
+    torch_dtype=torch.float16,
+    hf_model_id='Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int8')
+@register_model(
+    ModelType.qwen2_vl_7b_instruct_awq,
+    'qwen/Qwen2-VL-7B-Instruct-AWQ',
+    LoRATM.qwen2_vl,
+    TemplateType.qwen2_vl,
+    support_flash_attn=True,
+    placeholder_tokens=['<|image_pad|>', '<|video_pad|>'],
+    requires=['pyav', 'transformers>=4.45.0.dev0', 'qwen_vl_utils', 'autoawq'],
+    tags=['multi-modal', 'vision'],
+    function_kwargs={'is_awq': True},
+    torch_dtype=torch.float16,
+    hf_model_id='Qwen/Qwen2-VL-7B-Instruct-AWQ')
 @register_model(
     ModelType.qwen2_vl_2b_instruct,
     'qwen/Qwen2-VL-2B-Instruct',
@@ -3476,6 +3518,42 @@ def get_model_tokenizer_qwen2_audio(model_dir: str,
     requires=['pyav', 'transformers>=4.45.0.dev0', 'qwen_vl_utils'],
     tags=['multi-modal', 'vision'],
     hf_model_id='Qwen/Qwen2-VL-2B-Instruct')
+@register_model(
+    ModelType.qwen2_vl_2b_instruct_gptq_int4,
+    'qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4',
+    LoRATM.qwen2_vl,
+    TemplateType.qwen2_vl,
+    support_flash_attn=True,
+    placeholder_tokens=['<|image_pad|>', '<|video_pad|>'],
+    requires=['pyav', 'transformers>=4.45.0.dev0', 'qwen_vl_utils', 'auto_gptq>=0.5'],
+    tags=['multi-modal', 'vision'],
+    function_kwargs={'gptq_bits': 4},
+    torch_dtype=torch.float16,
+    hf_model_id='Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4')
+@register_model(
+    ModelType.qwen2_vl_2b_instruct_gptq_int8,
+    'qwen/Qwen2-VL-2B-Instruct-GPTQ-Int8',
+    LoRATM.qwen2_vl,
+    TemplateType.qwen2_vl,
+    support_flash_attn=True,
+    placeholder_tokens=['<|image_pad|>', '<|video_pad|>'],
+    requires=['pyav', 'transformers>=4.45.0.dev0', 'qwen_vl_utils', 'auto_gptq>=0.5'],
+    tags=['multi-modal', 'vision'],
+    function_kwargs={'gptq_bits': 8},
+    torch_dtype=torch.float16,
+    hf_model_id='Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int8')
+@register_model(
+    ModelType.qwen2_vl_2b_instruct_awq,
+    'qwen/Qwen2-VL-2B-Instruct-AWQ',
+    LoRATM.qwen2_vl,
+    TemplateType.qwen2_vl,
+    support_flash_attn=True,
+    placeholder_tokens=['<|image_pad|>', '<|video_pad|>'],
+    requires=['pyav', 'transformers>=4.45.0.dev0', 'qwen_vl_utils', 'autoawq'],
+    tags=['multi-modal', 'vision'],
+    function_kwargs={'is_awq': True},
+    torch_dtype=torch.float16,
+    hf_model_id='Qwen/Qwen2-VL-2B-Instruct-AWQ')
 def get_model_tokenizer_qwen2_vl(model_dir: str,
                                  torch_dtype: Dtype,
                                  model_kwargs: Dict[str, Any],
@@ -6529,7 +6607,7 @@ def get_default_template_type(model_type: str) -> Optional[str]:
     return MODEL_MAPPING[model_type].get('template')
 
 
-def get_default_lora_target_modules(model_type: str) -> Optional[List[str]]:
+def get_default_lora_target_modules(model_type: str) -> Union[List[str], str, None]:
     res = MODEL_MAPPING[model_type].get('lora_target_modules')
     if isinstance(res, str):
         res = get_regex_for_mm_default_lora(res)

diff --git a/swift/ui/__init__.py b/swift/ui/__init__.py
@@ -0,0 +1,2 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+from .app import webui_main
diff --git a/swift/ui/llm_train/lora.py b/swift/ui/llm_train/lora.py
@@ -2,7 +2,7 @@
 
 import gradio as gr
 
-from swift.llm import MODEL_MAPPING
+from swift.llm import MODEL_MAPPING, get_default_lora_target_modules
 from swift.ui.base import BaseUI
 
 
@@ -103,7 +103,10 @@ def do_build_ui(cls, base_tab: Type['BaseUI']):
 
             def update_lora(choice):
                 if choice is not None:
-                    return ' '.join(MODEL_MAPPING[choice]['lora_target_modules'])
+                    target_modules = get_default_lora_target_modules(choice)
+                    if isinstance(target_modules, list):
+                        target_modules = 'ALL'  # llm
+                    return target_modules
                 return None
 
             base_tab.element('model_type').change(