diff --git a/awq/models/__init__.py b/awq/models/__init__.py index 1ef85816..bffbaf39 100644 --- a/awq/models/__init__.py +++ b/awq/models/__init__.py @@ -4,4 +4,5 @@ from .falcon import FalconAWQForCausalLM from .bloom import BloomAWQForCausalLM from .gptj import GPTJAWQForCausalLM -from .gpt_bigcode import GptBigCodeAWQForCausalLM \ No newline at end of file +from .gpt_bigcode import GptBigCodeAWQForCausalLM +from .qwen import QwenAWQForCausalLM \ No newline at end of file diff --git a/awq/models/auto.py b/awq/models/auto.py index 91a60b7e..0985d817 100644 --- a/awq/models/auto.py +++ b/awq/models/auto.py @@ -12,7 +12,8 @@ "falcon": FalconAWQForCausalLM, "bloom": BloomAWQForCausalLM, "gptj": GPTJAWQForCausalLM, - "gpt_bigcode": GptBigCodeAWQForCausalLM + "gpt_bigcode": GptBigCodeAWQForCausalLM, + "qwen": QwenAWQForCausalLM } def check_and_get_model_type(model_dir, trust_remote_code=True): diff --git a/awq/models/base.py b/awq/models/base.py index 03b3eb1e..80381c69 100644 --- a/awq/models/base.py +++ b/awq/models/base.py @@ -160,7 +160,8 @@ def from_quantized(self, model_path, model_type, model_filename='', load_checkpoint_in_model( model, checkpoint=model_weights_path, - device_map=device_map + device_map=device_map, + dtype=torch_dtype ) # Dispath to devices diff --git a/awq/models/qwen.py b/awq/models/qwen.py new file mode 100644 index 00000000..c30f8eda --- /dev/null +++ b/awq/models/qwen.py @@ -0,0 +1,50 @@ +from .base import BaseAWQForCausalLM + +class QwenAWQForCausalLM(BaseAWQForCausalLM): + layer_type = "QWenBlock" + max_new_tokens_key = "seq_length" + + @staticmethod + def get_model_layers(model): + return model.transformer.h + + @staticmethod + def get_act_for_scaling(module): + return dict( + is_scalable=False + ) + + @staticmethod + def move_embed(model, device: str): + model.transformer.wte = model.transformer.wte.to(device) + model.transformer.rotary_emb = model.transformer.rotary_emb.to(device) + + @staticmethod + def get_layers_for_scaling(module, input_feat, module_kwargs): + layers = [] + + # attention + layers.append(dict( + prev_op=module.ln_1, + layers=[module.attn.c_attn, module.attn.c_proj], + inp=input_feat['attn.c_attn'], + module2inspect=module.attn, + kwargs=module_kwargs + )) + + # mlp + layers.append(dict( + prev_op=module.ln_2, + layers=[module.mlp.w1, module.mlp.w2], + inp=input_feat['mlp.w1'], + module2inspect=module.mlp + )) + + # linear 2 + # layers.append(dict( + # prev_op=module.mlp.w2, + # layers=[module.mlp.c_proj], + # inp=input_feat['mlp.c_proj'] + # )) + + return layers