Skip to content

Commit

Permalink
Add numina math model (modelscope#1421)
Browse files Browse the repository at this point in the history
  • Loading branch information
tastelikefeet committed Jul 17, 2024
1 parent 8b5ae89 commit df7020b
Show file tree
Hide file tree
Showing 6 changed files with 197 additions and 176 deletions.
198 changes: 100 additions & 98 deletions README.md

Large diffs are not rendered by default.

157 changes: 79 additions & 78 deletions README_CN.md

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions docs/source/LLM/支持的模型和数据集.md
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,7 @@
|deepseek-math-7b|[deepseek-ai/deepseek-math-7b-base](https://modelscope.cn/models/deepseek-ai/deepseek-math-7b-base/summary)|q_proj, k_proj, v_proj|default-generation|✔|✔||math|[deepseek-ai/deepseek-math-7b-base](https://huggingface.co/deepseek-ai/deepseek-math-7b-base)|
|deepseek-math-7b-instruct|[deepseek-ai/deepseek-math-7b-instruct](https://modelscope.cn/models/deepseek-ai/deepseek-math-7b-instruct/summary)|q_proj, k_proj, v_proj|deepseek|✔|✔||math|[deepseek-ai/deepseek-math-7b-instruct](https://huggingface.co/deepseek-ai/deepseek-math-7b-instruct)|
|deepseek-math-7b-chat|[deepseek-ai/deepseek-math-7b-rl](https://modelscope.cn/models/deepseek-ai/deepseek-math-7b-rl/summary)|q_proj, k_proj, v_proj|deepseek|✔|✔||math|[deepseek-ai/deepseek-math-7b-rl](https://huggingface.co/deepseek-ai/deepseek-math-7b-rl)|
|numina-math-7b|[AI-ModelScope/NuminaMath-7B-TIR](https://modelscope.cn/models/AI-ModelScope/NuminaMath-7B-TIR/summary)|q_proj, k_proj, v_proj|numina-math|✔|✔||math|[AI-MO/NuminaMath-7B-TIR](https://huggingface.co/AI-MO/NuminaMath-7B-TIR)|
|deepseek-v2-chat|[deepseek-ai/DeepSeek-V2-Chat](https://modelscope.cn/models/deepseek-ai/DeepSeek-V2-Chat/summary)|q_a_proj, q_b_proj, kv_a_proj_with_mqa, kv_b_proj, o_proj|deepseek2|✔|✔|transformers>=4.39.3|-|[deepseek-ai/DeepSeek-V2-Chat](https://huggingface.co/deepseek-ai/DeepSeek-V2-Chat)|
|deepseek-v2-lite|[deepseek-ai/DeepSeek-V2-Lite](https://modelscope.cn/models/deepseek-ai/DeepSeek-V2-Lite/summary)|q_a_proj, q_b_proj, kv_a_proj_with_mqa, kv_b_proj, o_proj|default-generation|✔|✔|transformers>=4.39.3|-|[deepseek-ai/DeepSeek-V2-Lite](https://huggingface.co/deepseek-ai/DeepSeek-V2-Lite)|
|deepseek-v2-lite-chat|[deepseek-ai/DeepSeek-V2-Lite-Chat](https://modelscope.cn/models/deepseek-ai/DeepSeek-V2-Lite-Chat/summary)|q_a_proj, q_b_proj, kv_a_proj_with_mqa, kv_b_proj, o_proj|deepseek2|✔|✔|transformers>=4.39.3|-|[deepseek-ai/DeepSeek-V2-Lite-Chat](https://huggingface.co/deepseek-ai/DeepSeek-V2-Lite-Chat)|
Expand Down
1 change: 1 addition & 0 deletions docs/source_en/LLM/Supported-models-datasets.md
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,7 @@ The table below introcudes all models supported by SWIFT:
|deepseek-math-7b|[deepseek-ai/deepseek-math-7b-base](https://modelscope.cn/models/deepseek-ai/deepseek-math-7b-base/summary)|q_proj, k_proj, v_proj|default-generation|✔|✔||math|[deepseek-ai/deepseek-math-7b-base](https://huggingface.co/deepseek-ai/deepseek-math-7b-base)|
|deepseek-math-7b-instruct|[deepseek-ai/deepseek-math-7b-instruct](https://modelscope.cn/models/deepseek-ai/deepseek-math-7b-instruct/summary)|q_proj, k_proj, v_proj|deepseek|✔|✔||math|[deepseek-ai/deepseek-math-7b-instruct](https://huggingface.co/deepseek-ai/deepseek-math-7b-instruct)|
|deepseek-math-7b-chat|[deepseek-ai/deepseek-math-7b-rl](https://modelscope.cn/models/deepseek-ai/deepseek-math-7b-rl/summary)|q_proj, k_proj, v_proj|deepseek|✔|✔||math|[deepseek-ai/deepseek-math-7b-rl](https://huggingface.co/deepseek-ai/deepseek-math-7b-rl)|
|numina-math-7b|[AI-ModelScope/NuminaMath-7B-TIR](https://modelscope.cn/models/AI-ModelScope/NuminaMath-7B-TIR/summary)|q_proj, k_proj, v_proj|numina-math|✔|✔||math|[AI-MO/NuminaMath-7B-TIR](https://huggingface.co/AI-MO/NuminaMath-7B-TIR)|
|deepseek-v2-chat|[deepseek-ai/DeepSeek-V2-Chat](https://modelscope.cn/models/deepseek-ai/DeepSeek-V2-Chat/summary)|q_a_proj, q_b_proj, kv_a_proj_with_mqa, kv_b_proj, o_proj|deepseek2|✔|✔|transformers>=4.39.3|-|[deepseek-ai/DeepSeek-V2-Chat](https://huggingface.co/deepseek-ai/DeepSeek-V2-Chat)|
|deepseek-v2-lite|[deepseek-ai/DeepSeek-V2-Lite](https://modelscope.cn/models/deepseek-ai/DeepSeek-V2-Lite/summary)|q_a_proj, q_b_proj, kv_a_proj_with_mqa, kv_b_proj, o_proj|default-generation|✔|✔|transformers>=4.39.3|-|[deepseek-ai/DeepSeek-V2-Lite](https://huggingface.co/deepseek-ai/DeepSeek-V2-Lite)|
|deepseek-v2-lite-chat|[deepseek-ai/DeepSeek-V2-Lite-Chat](https://modelscope.cn/models/deepseek-ai/DeepSeek-V2-Lite-Chat/summary)|q_a_proj, q_b_proj, kv_a_proj_with_mqa, kv_b_proj, o_proj|deepseek2|✔|✔|transformers>=4.39.3|-|[deepseek-ai/DeepSeek-V2-Lite-Chat](https://huggingface.co/deepseek-ai/DeepSeek-V2-Lite-Chat)|
Expand Down
11 changes: 11 additions & 0 deletions swift/llm/utils/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,8 @@ class ModelType:
deepseek_math_7b = 'deepseek-math-7b'
deepseek_math_7b_instruct = 'deepseek-math-7b-instruct'
deepseek_math_7b_chat = 'deepseek-math-7b-chat'
# numina-math
numina_math_7b = 'numina-math-7b'
# deepseek-vl
deepseek_vl_1_3b_chat = 'deepseek-vl-1_3b-chat'
deepseek_vl_7b_chat = 'deepseek-vl-7b-chat'
Expand Down Expand Up @@ -1855,6 +1857,15 @@ def _output_device_map_hook(module, input, output):
support_vllm=True,
tags=['math'],
hf_model_id='deepseek-ai/deepseek-math-7b-instruct')
@register_model(
ModelType.numina_math_7b,
'AI-ModelScope/NuminaMath-7B-TIR',
LoRATM.llama,
TemplateType.numina_math,
support_flash_attn=True,
support_vllm=True,
tags=['math'],
hf_model_id='AI-MO/NuminaMath-7B-TIR')
@register_model(
ModelType.deepseek_math_7b_chat,
'deepseek-ai/deepseek-math-7b-rl',
Expand Down
5 changes: 5 additions & 0 deletions swift/llm/utils/template.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ class TemplateType:
zephyr = 'zephyr'
sus = 'sus'
deepseek = 'deepseek'
numina_math = 'numina-math'
deepseek_coder = 'deepseek-coder'
deepseek_vl = 'deepseek-vl'
deepseek2 = 'deepseek2'
Expand Down Expand Up @@ -1128,6 +1129,10 @@ def data_collator(self, batch: List[Dict[str, Any]], padding_to: Optional[int] =
TemplateType.deepseek,
Template([['bos_token_id']], ['User: {{QUERY}}\n\nAssistant:'], [['eos_token_id']], [['eos_token_id']], None,
[['bos_token_id'], '{{SYSTEM}}\n\n']))
register_template(
TemplateType.numina_math,
Template([['bos_token_id']], ['### Problem: {{QUERY}}\n### Solution: '], ['\n'], [['eos_token_id']], None,
[['bos_token_id'], '{{SYSTEM}}']))
register_template(
TemplateType.deepseek2,
Template([[100000]], ['User: {{QUERY}}\n\nAssistant:'], [[100001]], [[100001]], None, [[100000], '{{SYSTEM}}\n\n']))
Expand Down

0 comments on commit df7020b

Please sign in to comment.