Support Mistral-small-inst-2409 (#2077)

* add transformers in gitignore * fix a typo bug in text-caps * support mistral-small-2409 * add .run to gitignore * add the doc of mistral-small-2409 * remove transformers in gitignore (to PR this branch) * update doc
modelscope · Sep 20, 2024 · bea9867 · bea9867
1 parent 74532d3
commit bea9867
Show file tree

Hide file tree

Showing 5 changed files with 15 additions and 1 deletion.
diff --git a/.gitignore b/.gitignore
@@ -109,6 +109,7 @@ venv.bak/
 
 .vscode
 .idea
+.run
 
 # custom
 *.pkl

diff --git a/docs/source/Instruction/支持的模型和数据集.md b/docs/source/Instruction/支持的模型和数据集.md
@@ -334,6 +334,7 @@
 |mistral-nemo-base-2407|[AI-ModelScope/Mistral-Nemo-Base-2407](https://modelscope.cn/models/AI-ModelScope/Mistral-Nemo-Base-2407/summary)|q_proj, k_proj, v_proj|default-generation|&#x2714;|&#x2714;|&#x2718;|&#x2718;|transformers>=4.43|-|[mistralai/Mistral-Nemo-Base-2407](https://huggingface.co/mistralai/Mistral-Nemo-Base-2407)|
 |mistral-nemo-instruct-2407|[AI-ModelScope/Mistral-Nemo-Instruct-2407](https://modelscope.cn/models/AI-ModelScope/Mistral-Nemo-Instruct-2407/summary)|q_proj, k_proj, v_proj|mistral-nemo|&#x2714;|&#x2714;|&#x2718;|&#x2718;|transformers>=4.43|-|[mistralai/Mistral-Nemo-Instruct-2407](https://huggingface.co/mistralai/Mistral-Nemo-Instruct-2407)|
 |mistral-large-instruct-2407|[LLM-Research/Mistral-Large-Instruct-2407](https://modelscope.cn/models/LLM-Research/Mistral-Large-Instruct-2407/summary)|q_proj, k_proj, v_proj|mistral-nemo|&#x2714;|&#x2714;|&#x2718;|&#x2718;|transformers>=4.43|-|[mistralai/Mistral-Large-Instruct-2407](https://huggingface.co/mistralai/Mistral-Large-Instruct-2407)|
+|mistral-small-instruct-2409|[AI-ModelScope/Mistral-Small-Instruct-2409](https://modelscope.cn/models/AI-ModelScope/Mistral-Small-Instruct-2409/summary)|q_proj, k_proj, v_proj|mistral-nemo|&#x2714;|&#x2714;|&#x2718;|&#x2718;|transformers>=4.43|-|[mistralai/Mistral-Small-Instruct-2409](https://huggingface.co/mistralai/Mistral-Small-Instruct-2409)|
 |mixtral-moe-7b|[AI-ModelScope/Mixtral-8x7B-v0.1](https://modelscope.cn/models/AI-ModelScope/Mixtral-8x7B-v0.1/summary)|q_proj, k_proj, v_proj|default-generation|&#x2714;|&#x2714;|&#x2718;|&#x2718;|transformers>=4.36|moe|[mistralai/Mixtral-8x7B-v0.1](https://huggingface.co/mistralai/Mixtral-8x7B-v0.1)|
 |mixtral-moe-7b-instruct|[AI-ModelScope/Mixtral-8x7B-Instruct-v0.1](https://modelscope.cn/models/AI-ModelScope/Mixtral-8x7B-Instruct-v0.1/summary)|q_proj, k_proj, v_proj|llama|&#x2714;|&#x2714;|&#x2718;|&#x2718;|transformers>=4.36|moe|[mistralai/Mixtral-8x7B-Instruct-v0.1](https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1)|
 |mixtral-moe-7b-aqlm-2bit-1x16|[AI-ModelScope/Mixtral-8x7b-AQLM-2Bit-1x16-hf](https://modelscope.cn/models/AI-ModelScope/Mixtral-8x7b-AQLM-2Bit-1x16-hf/summary)|q_proj, k_proj, v_proj|default-generation|&#x2714;|&#x2718;|&#x2718;|&#x2718;|transformers>=4.38, aqlm, torch>=2.2.0|moe|[ISTA-DASLab/Mixtral-8x7b-AQLM-2Bit-1x16-hf](https://huggingface.co/ISTA-DASLab/Mixtral-8x7b-AQLM-2Bit-1x16-hf)|

diff --git a/docs/source_en/Instruction/Supported-models-datasets.md b/docs/source_en/Instruction/Supported-models-datasets.md
@@ -334,6 +334,7 @@ The table below introcudes all models supported by SWIFT:
 |mistral-nemo-base-2407|[AI-ModelScope/Mistral-Nemo-Base-2407](https://modelscope.cn/models/AI-ModelScope/Mistral-Nemo-Base-2407/summary)|q_proj, k_proj, v_proj|default-generation|&#x2714;|&#x2714;|&#x2718;|&#x2718;|transformers>=4.43|-|[mistralai/Mistral-Nemo-Base-2407](https://huggingface.co/mistralai/Mistral-Nemo-Base-2407)|
 |mistral-nemo-instruct-2407|[AI-ModelScope/Mistral-Nemo-Instruct-2407](https://modelscope.cn/models/AI-ModelScope/Mistral-Nemo-Instruct-2407/summary)|q_proj, k_proj, v_proj|mistral-nemo|&#x2714;|&#x2714;|&#x2718;|&#x2718;|transformers>=4.43|-|[mistralai/Mistral-Nemo-Instruct-2407](https://huggingface.co/mistralai/Mistral-Nemo-Instruct-2407)|
 |mistral-large-instruct-2407|[LLM-Research/Mistral-Large-Instruct-2407](https://modelscope.cn/models/LLM-Research/Mistral-Large-Instruct-2407/summary)|q_proj, k_proj, v_proj|mistral-nemo|&#x2714;|&#x2714;|&#x2718;|&#x2718;|transformers>=4.43|-|[mistralai/Mistral-Large-Instruct-2407](https://huggingface.co/mistralai/Mistral-Large-Instruct-2407)|
+|mistral-small-instruct-2409|[AI-ModelScope/Mistral-Small-Instruct-2409](https://modelscope.cn/models/AI-ModelScope/Mistral-Small-Instruct-2409/summary)|q_proj, k_proj, v_proj|mistral-nemo|&#x2714;|&#x2714;|&#x2718;|&#x2718;|transformers>=4.43|-|[mistralai/Mistral-Small-Instruct-2409](https://huggingface.co/mistralai/Mistral-Small-Instruct-2409)|
 |mixtral-moe-7b|[AI-ModelScope/Mixtral-8x7B-v0.1](https://modelscope.cn/models/AI-ModelScope/Mixtral-8x7B-v0.1/summary)|q_proj, k_proj, v_proj|default-generation|&#x2714;|&#x2714;|&#x2718;|&#x2718;|transformers>=4.36|moe|[mistralai/Mixtral-8x7B-v0.1](https://huggingface.co/mistralai/Mixtral-8x7B-v0.1)|
 |mixtral-moe-7b-instruct|[AI-ModelScope/Mixtral-8x7B-Instruct-v0.1](https://modelscope.cn/models/AI-ModelScope/Mixtral-8x7B-Instruct-v0.1/summary)|q_proj, k_proj, v_proj|llama|&#x2714;|&#x2714;|&#x2718;|&#x2718;|transformers>=4.36|moe|[mistralai/Mixtral-8x7B-Instruct-v0.1](https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1)|
 |mixtral-moe-7b-aqlm-2bit-1x16|[AI-ModelScope/Mixtral-8x7b-AQLM-2Bit-1x16-hf](https://modelscope.cn/models/AI-ModelScope/Mixtral-8x7b-AQLM-2Bit-1x16-hf/summary)|q_proj, k_proj, v_proj|default-generation|&#x2714;|&#x2718;|&#x2718;|&#x2718;|transformers>=4.38, aqlm, torch>=2.2.0|moe|[ISTA-DASLab/Mixtral-8x7b-AQLM-2Bit-1x16-hf](https://huggingface.co/ISTA-DASLab/Mixtral-8x7b-AQLM-2Bit-1x16-hf)|

diff --git a/swift/llm/utils/dataset.py b/swift/llm/utils/dataset.py
@@ -1435,7 +1435,7 @@ def preprocess(row):
     'swift/TextCaps', [],
     preprocess_func=preprocess_text_caps,
     get_function=get_dataset_from_repo,
-    split=['train', 'val'],
+    split=['train', 'validation'],
     hf_dataset_id='HuggingFaceM4/TextCaps',
     huge_dataset=True,
     tags=['multi-modal', 'en', 'caption', 'quality'])

diff --git a/swift/llm/utils/model.py b/swift/llm/utils/model.py
@@ -484,6 +484,7 @@ class ModelType:
     mistral_nemo_base_2407 = 'mistral-nemo-base-2407'
     mistral_nemo_instruct_2407 = 'mistral-nemo-instruct-2407'
     mistral_large_instruct_2407 = 'mistral-large-instruct-2407'
+    mistral_small_instruct_2409 = 'mistral-small-instruct-2409'
     mixtral_moe_7b = 'mixtral-moe-7b'
     mixtral_moe_7b_instruct = 'mixtral-moe-7b-instruct'
     mixtral_moe_7b_aqlm_2bit_1x16 = 'mixtral-moe-7b-aqlm-2bit-1x16'  # aqlm
@@ -2623,6 +2624,16 @@ def get_model_tokenizer_glm4v(model_dir: str,
     support_flash_attn=True,
     support_vllm=True,
     hf_model_id='mistralai/Mistral-Large-Instruct-2407')
+@register_model(
+    ModelType.mistral_small_instruct_2409,
+    'AI-ModelScope/Mistral-Small-Instruct-2409',
+    LoRATM.llama,
+    TemplateType.mistral_nemo,
+    requires=['transformers>=4.43'],
+    ignore_file_pattern=['^consolidated'],
+    support_flash_attn=True,
+    support_vllm=True,
+    hf_model_id='mistralai/Mistral-Small-Instruct-2409')
 @register_model(
     ModelType.mistral_nemo_instruct_2407,
     'AI-ModelScope/Mistral-Nemo-Instruct-2407',
-Original file line number
+Diff line change
@@ Expand Up / @@ -109,6 +109,7 @@ venv.bak/ @@
     .vscode
     .idea
+    .run
     # custom
     *.pkl
@@ Expand Down @@