diff --git "a/docs/source/Instruction/\346\224\257\346\214\201\347\232\204\346\250\241\345\236\213\345\222\214\346\225\260\346\215\256\351\233\206.md" "b/docs/source/Instruction/\346\224\257\346\214\201\347\232\204\346\250\241\345\236\213\345\222\214\346\225\260\346\215\256\351\233\206.md" index 68c269f84..3d19bf2cd 100644 --- "a/docs/source/Instruction/\346\224\257\346\214\201\347\232\204\346\250\241\345\236\213\345\222\214\346\225\260\346\215\256\351\233\206.md" +++ "b/docs/source/Instruction/\346\224\257\346\214\201\347\232\204\346\250\241\345\236\213\345\222\214\346\225\260\346\215\256\351\233\206.md" @@ -99,6 +99,7 @@ |[Qwen/Qwen2-Math-1.5B](https://modelscope.cn/models/Qwen/Qwen2-Math-1.5B)|qwen2|qwen|transformers>=4.37|math|[Qwen/Qwen2-Math-1.5B](https://huggingface.co/Qwen/Qwen2-Math-1.5B)| |[Qwen/Qwen2-Math-7B](https://modelscope.cn/models/Qwen/Qwen2-Math-7B)|qwen2|qwen|transformers>=4.37|math|[Qwen/Qwen2-Math-7B](https://huggingface.co/Qwen/Qwen2-Math-7B)| |[Qwen/Qwen2-Math-72B](https://modelscope.cn/models/Qwen/Qwen2-Math-72B)|qwen2|qwen|transformers>=4.37|math|[Qwen/Qwen2-Math-72B](https://huggingface.co/Qwen/Qwen2-Math-72B)| +|[PowerInfer/SmallThinker-3B-Preview](https://modelscope.cn/models/PowerInfer/SmallThinker-3B-Preview)|qwen2|qwen|transformers>=4.37|-|[PowerInfer/SmallThinker-3B-Preview](https://huggingface.co/PowerInfer/SmallThinker-3B-Preview)| |[Qwen/Qwen2.5-0.5B-Instruct](https://modelscope.cn/models/Qwen/Qwen2.5-0.5B-Instruct)|qwen2_5|qwen2_5|transformers>=4.37|-|[Qwen/Qwen2.5-0.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct)| |[Qwen/Qwen2.5-1.5B-Instruct](https://modelscope.cn/models/Qwen/Qwen2.5-1.5B-Instruct)|qwen2_5|qwen2_5|transformers>=4.37|-|[Qwen/Qwen2.5-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct)| |[Qwen/Qwen2.5-3B-Instruct](https://modelscope.cn/models/Qwen/Qwen2.5-3B-Instruct)|qwen2_5|qwen2_5|transformers>=4.37|-|[Qwen/Qwen2.5-3B-Instruct](https://huggingface.co/Qwen/Qwen2.5-3B-Instruct)| @@ -664,7 +665,7 @@ |[AI-ModelScope/GuanacoDataset](https://modelscope.cn/datasets/AI-ModelScope/GuanacoDataset)|default|31563|250.3±70.6, min=95, max=987|chat, zh|[JosephusCheung/GuanacoDataset](https://huggingface.co/datasets/JosephusCheung/GuanacoDataset)| |[AI-ModelScope/LLaVA-Instruct-150K](https://modelscope.cn/datasets/AI-ModelScope/LLaVA-Instruct-150K)|default|623302|630.7±143.0, min=301, max=1166|chat, multi-modal, vision|-| |[AI-ModelScope/LLaVA-Pretrain](https://modelscope.cn/datasets/AI-ModelScope/LLaVA-Pretrain)|default|huge dataset|-|chat, multi-modal, quality|[liuhaotian/LLaVA-Pretrain](https://huggingface.co/datasets/liuhaotian/LLaVA-Pretrain)| -|[AI-ModelScope/LaTeX_OCR](https://modelscope.cn/datasets/AI-ModelScope/LaTeX_OCR)|default
synthetic_handwrite|162149|117.6±44.9, min=41, max=312|chat, ocr, multi-modal, vision|[linxy/LaTeX_OCR](https://huggingface.co/datasets/linxy/LaTeX_OCR)| +|[AI-ModelScope/LaTeX_OCR](https://modelscope.cn/datasets/AI-ModelScope/LaTeX_OCR)|default
human_handwrite
human_handwrite_print
synthetic_handwrite
small|162149|117.6±44.9, min=41, max=312|chat, ocr, multi-modal, vision|[linxy/LaTeX_OCR](https://huggingface.co/datasets/linxy/LaTeX_OCR)| |[AI-ModelScope/LongAlpaca-12k](https://modelscope.cn/datasets/AI-ModelScope/LongAlpaca-12k)|default|11998|9941.8±3417.1, min=4695, max=25826|long-sequence, QA|[Yukang/LongAlpaca-12k](https://huggingface.co/datasets/Yukang/LongAlpaca-12k)| |[AI-ModelScope/M3IT](https://modelscope.cn/datasets/AI-ModelScope/M3IT)|coco
vqa-v2
shapes
shapes-rephrased
coco-goi-rephrased
snli-ve
snli-ve-rephrased
okvqa
a-okvqa
viquae
textcap
docvqa
science-qa
imagenet
imagenet-open-ended
imagenet-rephrased
coco-goi
clevr
clevr-rephrased
nlvr
coco-itm
coco-itm-rephrased
vsr
vsr-rephrased
mocheg
mocheg-rephrased
coco-text
fm-iqa
activitynet-qa
msrvtt
ss
coco-cn
refcoco
refcoco-rephrased
multi30k
image-paragraph-captioning
visual-dialog
visual-dialog-rephrased
iqa
vcr
visual-mrc
ivqa
msrvtt-qa
msvd-qa
gqa
text-vqa
ocr-vqa
st-vqa
flickr8k-cn|huge dataset|-|chat, multi-modal, vision|-| |[AI-ModelScope/Magpie-Qwen2-Pro-200K-Chinese](https://modelscope.cn/datasets/AI-ModelScope/Magpie-Qwen2-Pro-200K-Chinese)|default|200000|448.4±223.5, min=87, max=4098|chat, sft, 🔥, zh|[Magpie-Align/Magpie-Qwen2-Pro-200K-Chinese](https://huggingface.co/datasets/Magpie-Align/Magpie-Qwen2-Pro-200K-Chinese)| @@ -688,7 +689,7 @@ |[AI-ModelScope/captcha-images](https://modelscope.cn/datasets/AI-ModelScope/captcha-images)|default|8000|47.0±0.0, min=47, max=47|chat, multi-modal, vision|-| |[AI-ModelScope/databricks-dolly-15k](https://modelscope.cn/datasets/AI-ModelScope/databricks-dolly-15k)|default|15011|199.0±268.8, min=26, max=5987|multi-task, en, quality|[databricks/databricks-dolly-15k](https://huggingface.co/datasets/databricks/databricks-dolly-15k)| |[AI-ModelScope/deepctrl-sft-data](https://modelscope.cn/datasets/AI-ModelScope/deepctrl-sft-data)|default
en|huge dataset|-|chat, general, sft, multi-round|-| -|[AI-ModelScope/egoschema](https://modelscope.cn/datasets/AI-ModelScope/egoschema)|Subset|101|191.6±80.7, min=96, max=435|chat, multi-modal, video|[lmms-lab/egoschema](https://huggingface.co/datasets/lmms-lab/egoschema)| +|[AI-ModelScope/egoschema](https://modelscope.cn/datasets/AI-ModelScope/egoschema)|default
cls|101|191.6±80.7, min=96, max=435|chat, multi-modal, video|[lmms-lab/egoschema](https://huggingface.co/datasets/lmms-lab/egoschema)| |[AI-ModelScope/firefly-train-1.1M](https://modelscope.cn/datasets/AI-ModelScope/firefly-train-1.1M)|default|1649399|204.3±365.3, min=28, max=9306|chat, general|[YeungNLP/firefly-train-1.1M](https://huggingface.co/datasets/YeungNLP/firefly-train-1.1M)| |[AI-ModelScope/generated_chat_0.4M](https://modelscope.cn/datasets/AI-ModelScope/generated_chat_0.4M)|default|396004|272.7±51.1, min=78, max=579|chat, character-dialogue|[BelleGroup/generated_chat_0.4M](https://huggingface.co/datasets/BelleGroup/generated_chat_0.4M)| |[AI-ModelScope/guanaco_belle_merge_v1.0](https://modelscope.cn/datasets/AI-ModelScope/guanaco_belle_merge_v1.0)|default|693987|133.8±93.5, min=30, max=1872|QA, zh|[Chinese-Vicuna/guanaco_belle_merge_v1.0](https://huggingface.co/datasets/Chinese-Vicuna/guanaco_belle_merge_v1.0)| @@ -718,11 +719,13 @@ |[AI-ModelScope/webnovel_cn](https://modelscope.cn/datasets/AI-ModelScope/webnovel_cn)|default|50000|1455.2±12489.4, min=524, max=490480|chat, novel|[zxbsmk/webnovel_cn](https://huggingface.co/datasets/zxbsmk/webnovel_cn)| |[AI-ModelScope/wikipedia-cn-20230720-filtered](https://modelscope.cn/datasets/AI-ModelScope/wikipedia-cn-20230720-filtered)|default|huge dataset|-|pretrain, quality|[pleisto/wikipedia-cn-20230720-filtered](https://huggingface.co/datasets/pleisto/wikipedia-cn-20230720-filtered)| |[AI-ModelScope/zhihu_rlhf_3k](https://modelscope.cn/datasets/AI-ModelScope/zhihu_rlhf_3k)|default|3460|594.5±365.9, min=31, max=1716|rlhf, dpo, zh|[liyucheng/zhihu_rlhf_3k](https://huggingface.co/datasets/liyucheng/zhihu_rlhf_3k)| -|[DAMO_NLP/jd](https://modelscope.cn/datasets/DAMO_NLP/jd)|default|45012|66.9±87.0, min=41, max=1699|text-generation, classification, 🔥|-| +|[DAMO_NLP/jd](https://modelscope.cn/datasets/DAMO_NLP/jd)|default
cls|45012|66.9±87.0, min=41, max=1699|text-generation, classification, 🔥|-| |-|default|huge dataset|-|pretrain, quality|[HuggingFaceFW/fineweb](https://huggingface.co/datasets/HuggingFaceFW/fineweb)| |-|auto_math_text
khanacademy
openstax
stanford
stories
web_samples_v1
web_samples_v2
wikihow|huge dataset|-|multi-domain, en, qa|[HuggingFaceTB/cosmopedia](https://huggingface.co/datasets/HuggingFaceTB/cosmopedia)| |[OmniData/Zhihu-KOL](https://modelscope.cn/datasets/OmniData/Zhihu-KOL)|default|huge dataset|-|zhihu, qa|[wangrui6/Zhihu-KOL](https://huggingface.co/datasets/wangrui6/Zhihu-KOL)| |[OmniData/Zhihu-KOL-More-Than-100-Upvotes](https://modelscope.cn/datasets/OmniData/Zhihu-KOL-More-Than-100-Upvotes)|default|271261|1003.4±1826.1, min=28, max=52541|zhihu, qa|[bzb2023/Zhihu-KOL-More-Than-100-Upvotes](https://huggingface.co/datasets/bzb2023/Zhihu-KOL-More-Than-100-Upvotes)| +|[PowerInfer/LONGCOT-Refine-500K](https://modelscope.cn/datasets/PowerInfer/LONGCOT-Refine-500K)|default|521921|296.5±158.4, min=39, max=4634|chat, sft, 🔥, cot|[PowerInfer/LONGCOT-Refine-500K](https://huggingface.co/datasets/PowerInfer/LONGCOT-Refine-500K)| +|[PowerInfer/QWQ-LONGCOT-500K](https://modelscope.cn/datasets/PowerInfer/QWQ-LONGCOT-500K)|default|498082|310.7±303.1, min=35, max=22941|chat, sft, 🔥, cot|[PowerInfer/QWQ-LONGCOT-500K](https://huggingface.co/datasets/PowerInfer/QWQ-LONGCOT-500K)| |[TIGER-Lab/MATH-plus](https://modelscope.cn/datasets/TIGER-Lab/MATH-plus)|train|893929|301.4±196.7, min=50, max=1162|qa, math, en, quality|[TIGER-Lab/MATH-plus](https://huggingface.co/datasets/TIGER-Lab/MATH-plus)| |[Tongyi-DataEngine/SA1B-Dense-Caption](https://modelscope.cn/datasets/Tongyi-DataEngine/SA1B-Dense-Caption)|default|huge dataset|-|zh, multi-modal, vqa|-| |[Tongyi-DataEngine/SA1B-Paired-Captions-Images](https://modelscope.cn/datasets/Tongyi-DataEngine/SA1B-Paired-Captions-Images)|default|7736284|106.4±18.5, min=48, max=193|zh, multi-modal, vqa|-| @@ -737,7 +740,7 @@ |[damo/nlp_polylm_multialpaca_sft](https://modelscope.cn/datasets/damo/nlp_polylm_multialpaca_sft)|ar
de
es
fr
id
ja
ko
pt
ru
th
vi|131867|101.6±42.5, min=30, max=1029|chat, general, multilingual|-| |[damo/zh_cls_fudan-news](https://modelscope.cn/datasets/damo/zh_cls_fudan-news)|default|4959|3234.4±2547.5, min=91, max=19548|chat, classification|-| |[damo/zh_ner-JAVE](https://modelscope.cn/datasets/damo/zh_ner-JAVE)|default|1266|118.3±45.5, min=44, max=223|chat, ner|-| -|[hjh0119/shareAI-Llama3-DPO-zh-en-emoji](https://modelscope.cn/datasets/hjh0119/shareAI-Llama3-DPO-zh-en-emoji)|zh
en|2449|334.0±162.8, min=36, max=1801|rlhf, dpo|-| +|[hjh0119/shareAI-Llama3-DPO-zh-en-emoji](https://modelscope.cn/datasets/hjh0119/shareAI-Llama3-DPO-zh-en-emoji)|default|2449|334.0±162.8, min=36, max=1801|rlhf, dpo|-| |[huangjintao/AgentInstruct_copy](https://modelscope.cn/datasets/huangjintao/AgentInstruct_copy)|alfworld
db
kg
mind2web
os
webshop|1866|1144.3±635.5, min=206, max=6412|chat, agent, multi-round|-| |[iic/100PoisonMpts](https://modelscope.cn/datasets/iic/100PoisonMpts)|default|906|150.6±80.8, min=39, max=656|poison-management, zh|-| |[iic/MSAgent-MultiRole](https://modelscope.cn/datasets/iic/MSAgent-MultiRole)|default|543|413.0±79.7, min=70, max=936|chat, agent, multi-round, role-play, multi-agent|-| @@ -753,14 +756,14 @@ |[modelscope/clue](https://modelscope.cn/datasets/modelscope/clue)|cmnli|391783|81.6±16.0, min=54, max=157|text-generation, classification|[clue](https://huggingface.co/datasets/clue)| |[modelscope/coco_2014_caption](https://modelscope.cn/datasets/modelscope/coco_2014_caption)|train
validation|454617|389.6±68.4, min=70, max=587|chat, multi-modal, vision, 🔥|-| |[shenweizhou/alpha-umi-toolbench-processed-v2](https://modelscope.cn/datasets/shenweizhou/alpha-umi-toolbench-processed-v2)|backbone
caller
planner
summarizer|huge dataset|-|chat, agent, 🔥|-| -|[simpleai/HC3](https://modelscope.cn/datasets/simpleai/HC3)|finance
medicine|11021|296.0±153.3, min=65, max=2267|text-generation, classification, 🔥|[Hello-SimpleAI/HC3](https://huggingface.co/datasets/Hello-SimpleAI/HC3)| +|[simpleai/HC3](https://modelscope.cn/datasets/simpleai/HC3)|finance
finance_cls
medicine
medicine_cls|11021|296.0±153.3, min=65, max=2267|text-generation, classification, 🔥|[Hello-SimpleAI/HC3](https://huggingface.co/datasets/Hello-SimpleAI/HC3)| |[simpleai/HC3-Chinese](https://modelscope.cn/datasets/simpleai/HC3-Chinese)|baike
baike_cls
open_qa
open_qa_cls
nlpcc_dbqa
nlpcc_dbqa_cls
finance
finance_cls
medicine
medicine_cls
law
law_cls
psychology
psychology_cls|39781|179.9±70.2, min=90, max=1070|text-generation, classification, 🔥|[Hello-SimpleAI/HC3-Chinese](https://huggingface.co/datasets/Hello-SimpleAI/HC3-Chinese)| |[speech_asr/speech_asr_aishell1_trainsets](https://modelscope.cn/datasets/speech_asr/speech_asr_aishell1_trainsets)|train
validation
test|141600|40.8±3.3, min=33, max=53|chat, multi-modal, audio|-| |[swift/A-OKVQA](https://modelscope.cn/datasets/swift/A-OKVQA)|default|18201|43.5±7.9, min=27, max=94|multi-modal, en, vqa, quality|[HuggingFaceM4/A-OKVQA](https://huggingface.co/datasets/HuggingFaceM4/A-OKVQA)| |[swift/ChartQA](https://modelscope.cn/datasets/swift/ChartQA)|default|28299|36.8±6.5, min=26, max=74|en, vqa, quality|[HuggingFaceM4/ChartQA](https://huggingface.co/datasets/HuggingFaceM4/ChartQA)| |[swift/GRIT](https://modelscope.cn/datasets/swift/GRIT)|caption
grounding
vqa|huge dataset|-|multi-modal, en, caption-grounding, vqa, quality|[zzliang/GRIT](https://huggingface.co/datasets/zzliang/GRIT)| |[swift/GenQA](https://modelscope.cn/datasets/swift/GenQA)|default|huge dataset|-|qa, quality, multi-task|[tomg-group-umd/GenQA](https://huggingface.co/datasets/tomg-group-umd/GenQA)| -|[swift/Infinity-Instruct](https://modelscope.cn/datasets/swift/Infinity-Instruct)|default|huge dataset|-|qa, quality, multi-task|[BAAI/Infinity-Instruct](https://huggingface.co/datasets/BAAI/Infinity-Instruct)| +|[swift/Infinity-Instruct](https://modelscope.cn/datasets/swift/Infinity-Instruct)|3M
7M
0625
Gen
7M_domains|huge dataset|-|qa, quality, multi-task|[BAAI/Infinity-Instruct](https://huggingface.co/datasets/BAAI/Infinity-Instruct)| |[swift/Mantis-Instruct](https://modelscope.cn/datasets/swift/Mantis-Instruct)|birds-to-words
chartqa
coinstruct
contrastive_caption
docvqa
dreamsim
dvqa
iconqa
imagecode
llava_665k_multi
lrv_multi
multi_vqa
nextqa
nlvr2
spot-the-diff
star
visual_story_telling|988115|619.9±156.6, min=243, max=1926|chat, multi-modal, vision|-| |[swift/MideficsDataset](https://modelscope.cn/datasets/swift/MideficsDataset)|default|3800|201.3±70.2, min=60, max=454|medical, en, vqa|[WinterSchool/MideficsDataset](https://huggingface.co/datasets/WinterSchool/MideficsDataset)| |[swift/Multimodal-Mind2Web](https://modelscope.cn/datasets/swift/Multimodal-Mind2Web)|default|1009|293855.4±331149.5, min=11301, max=3577519|agent, multi-modal|[osunlp/Multimodal-Mind2Web](https://huggingface.co/datasets/osunlp/Multimodal-Mind2Web)| diff --git a/docs/source_en/Instruction/Supported-models-and-datasets.md b/docs/source_en/Instruction/Supported-models-and-datasets.md index 8eb33dc54..1b946760b 100644 --- a/docs/source_en/Instruction/Supported-models-and-datasets.md +++ b/docs/source_en/Instruction/Supported-models-and-datasets.md @@ -99,6 +99,7 @@ The table below introduces the models integrated with ms-swift: |[Qwen/Qwen2-Math-1.5B](https://modelscope.cn/models/Qwen/Qwen2-Math-1.5B)|qwen2|qwen|transformers>=4.37|math|[Qwen/Qwen2-Math-1.5B](https://huggingface.co/Qwen/Qwen2-Math-1.5B)| |[Qwen/Qwen2-Math-7B](https://modelscope.cn/models/Qwen/Qwen2-Math-7B)|qwen2|qwen|transformers>=4.37|math|[Qwen/Qwen2-Math-7B](https://huggingface.co/Qwen/Qwen2-Math-7B)| |[Qwen/Qwen2-Math-72B](https://modelscope.cn/models/Qwen/Qwen2-Math-72B)|qwen2|qwen|transformers>=4.37|math|[Qwen/Qwen2-Math-72B](https://huggingface.co/Qwen/Qwen2-Math-72B)| +|[PowerInfer/SmallThinker-3B-Preview](https://modelscope.cn/models/PowerInfer/SmallThinker-3B-Preview)|qwen2|qwen|transformers>=4.37|-|[PowerInfer/SmallThinker-3B-Preview](https://huggingface.co/PowerInfer/SmallThinker-3B-Preview)| |[Qwen/Qwen2.5-0.5B-Instruct](https://modelscope.cn/models/Qwen/Qwen2.5-0.5B-Instruct)|qwen2_5|qwen2_5|transformers>=4.37|-|[Qwen/Qwen2.5-0.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct)| |[Qwen/Qwen2.5-1.5B-Instruct](https://modelscope.cn/models/Qwen/Qwen2.5-1.5B-Instruct)|qwen2_5|qwen2_5|transformers>=4.37|-|[Qwen/Qwen2.5-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct)| |[Qwen/Qwen2.5-3B-Instruct](https://modelscope.cn/models/Qwen/Qwen2.5-3B-Instruct)|qwen2_5|qwen2_5|transformers>=4.37|-|[Qwen/Qwen2.5-3B-Instruct](https://huggingface.co/Qwen/Qwen2.5-3B-Instruct)| @@ -665,7 +666,7 @@ The table below introduces information about the datasets integrated with ms-swi |[AI-ModelScope/GuanacoDataset](https://modelscope.cn/datasets/AI-ModelScope/GuanacoDataset)|default|31563|250.3±70.6, min=95, max=987|chat, zh|[JosephusCheung/GuanacoDataset](https://huggingface.co/datasets/JosephusCheung/GuanacoDataset)| |[AI-ModelScope/LLaVA-Instruct-150K](https://modelscope.cn/datasets/AI-ModelScope/LLaVA-Instruct-150K)|default|623302|630.7±143.0, min=301, max=1166|chat, multi-modal, vision|-| |[AI-ModelScope/LLaVA-Pretrain](https://modelscope.cn/datasets/AI-ModelScope/LLaVA-Pretrain)|default|huge dataset|-|chat, multi-modal, quality|[liuhaotian/LLaVA-Pretrain](https://huggingface.co/datasets/liuhaotian/LLaVA-Pretrain)| -|[AI-ModelScope/LaTeX_OCR](https://modelscope.cn/datasets/AI-ModelScope/LaTeX_OCR)|default
synthetic_handwrite|162149|117.6±44.9, min=41, max=312|chat, ocr, multi-modal, vision|[linxy/LaTeX_OCR](https://huggingface.co/datasets/linxy/LaTeX_OCR)| +|[AI-ModelScope/LaTeX_OCR](https://modelscope.cn/datasets/AI-ModelScope/LaTeX_OCR)|default
human_handwrite
human_handwrite_print
synthetic_handwrite
small|162149|117.6±44.9, min=41, max=312|chat, ocr, multi-modal, vision|[linxy/LaTeX_OCR](https://huggingface.co/datasets/linxy/LaTeX_OCR)| |[AI-ModelScope/LongAlpaca-12k](https://modelscope.cn/datasets/AI-ModelScope/LongAlpaca-12k)|default|11998|9941.8±3417.1, min=4695, max=25826|long-sequence, QA|[Yukang/LongAlpaca-12k](https://huggingface.co/datasets/Yukang/LongAlpaca-12k)| |[AI-ModelScope/M3IT](https://modelscope.cn/datasets/AI-ModelScope/M3IT)|coco
vqa-v2
shapes
shapes-rephrased
coco-goi-rephrased
snli-ve
snli-ve-rephrased
okvqa
a-okvqa
viquae
textcap
docvqa
science-qa
imagenet
imagenet-open-ended
imagenet-rephrased
coco-goi
clevr
clevr-rephrased
nlvr
coco-itm
coco-itm-rephrased
vsr
vsr-rephrased
mocheg
mocheg-rephrased
coco-text
fm-iqa
activitynet-qa
msrvtt
ss
coco-cn
refcoco
refcoco-rephrased
multi30k
image-paragraph-captioning
visual-dialog
visual-dialog-rephrased
iqa
vcr
visual-mrc
ivqa
msrvtt-qa
msvd-qa
gqa
text-vqa
ocr-vqa
st-vqa
flickr8k-cn|huge dataset|-|chat, multi-modal, vision|-| |[AI-ModelScope/Magpie-Qwen2-Pro-200K-Chinese](https://modelscope.cn/datasets/AI-ModelScope/Magpie-Qwen2-Pro-200K-Chinese)|default|200000|448.4±223.5, min=87, max=4098|chat, sft, 🔥, zh|[Magpie-Align/Magpie-Qwen2-Pro-200K-Chinese](https://huggingface.co/datasets/Magpie-Align/Magpie-Qwen2-Pro-200K-Chinese)| @@ -689,7 +690,7 @@ The table below introduces information about the datasets integrated with ms-swi |[AI-ModelScope/captcha-images](https://modelscope.cn/datasets/AI-ModelScope/captcha-images)|default|8000|47.0±0.0, min=47, max=47|chat, multi-modal, vision|-| |[AI-ModelScope/databricks-dolly-15k](https://modelscope.cn/datasets/AI-ModelScope/databricks-dolly-15k)|default|15011|199.0±268.8, min=26, max=5987|multi-task, en, quality|[databricks/databricks-dolly-15k](https://huggingface.co/datasets/databricks/databricks-dolly-15k)| |[AI-ModelScope/deepctrl-sft-data](https://modelscope.cn/datasets/AI-ModelScope/deepctrl-sft-data)|default
en|huge dataset|-|chat, general, sft, multi-round|-| -|[AI-ModelScope/egoschema](https://modelscope.cn/datasets/AI-ModelScope/egoschema)|Subset|101|191.6±80.7, min=96, max=435|chat, multi-modal, video|[lmms-lab/egoschema](https://huggingface.co/datasets/lmms-lab/egoschema)| +|[AI-ModelScope/egoschema](https://modelscope.cn/datasets/AI-ModelScope/egoschema)|default
cls|101|191.6±80.7, min=96, max=435|chat, multi-modal, video|[lmms-lab/egoschema](https://huggingface.co/datasets/lmms-lab/egoschema)| |[AI-ModelScope/firefly-train-1.1M](https://modelscope.cn/datasets/AI-ModelScope/firefly-train-1.1M)|default|1649399|204.3±365.3, min=28, max=9306|chat, general|[YeungNLP/firefly-train-1.1M](https://huggingface.co/datasets/YeungNLP/firefly-train-1.1M)| |[AI-ModelScope/generated_chat_0.4M](https://modelscope.cn/datasets/AI-ModelScope/generated_chat_0.4M)|default|396004|272.7±51.1, min=78, max=579|chat, character-dialogue|[BelleGroup/generated_chat_0.4M](https://huggingface.co/datasets/BelleGroup/generated_chat_0.4M)| |[AI-ModelScope/guanaco_belle_merge_v1.0](https://modelscope.cn/datasets/AI-ModelScope/guanaco_belle_merge_v1.0)|default|693987|133.8±93.5, min=30, max=1872|QA, zh|[Chinese-Vicuna/guanaco_belle_merge_v1.0](https://huggingface.co/datasets/Chinese-Vicuna/guanaco_belle_merge_v1.0)| @@ -719,11 +720,13 @@ The table below introduces information about the datasets integrated with ms-swi |[AI-ModelScope/webnovel_cn](https://modelscope.cn/datasets/AI-ModelScope/webnovel_cn)|default|50000|1455.2±12489.4, min=524, max=490480|chat, novel|[zxbsmk/webnovel_cn](https://huggingface.co/datasets/zxbsmk/webnovel_cn)| |[AI-ModelScope/wikipedia-cn-20230720-filtered](https://modelscope.cn/datasets/AI-ModelScope/wikipedia-cn-20230720-filtered)|default|huge dataset|-|pretrain, quality|[pleisto/wikipedia-cn-20230720-filtered](https://huggingface.co/datasets/pleisto/wikipedia-cn-20230720-filtered)| |[AI-ModelScope/zhihu_rlhf_3k](https://modelscope.cn/datasets/AI-ModelScope/zhihu_rlhf_3k)|default|3460|594.5±365.9, min=31, max=1716|rlhf, dpo, zh|[liyucheng/zhihu_rlhf_3k](https://huggingface.co/datasets/liyucheng/zhihu_rlhf_3k)| -|[DAMO_NLP/jd](https://modelscope.cn/datasets/DAMO_NLP/jd)|default|45012|66.9±87.0, min=41, max=1699|text-generation, classification, 🔥|-| +|[DAMO_NLP/jd](https://modelscope.cn/datasets/DAMO_NLP/jd)|default
cls|45012|66.9±87.0, min=41, max=1699|text-generation, classification, 🔥|-| |-|default|huge dataset|-|pretrain, quality|[HuggingFaceFW/fineweb](https://huggingface.co/datasets/HuggingFaceFW/fineweb)| |-|auto_math_text
khanacademy
openstax
stanford
stories
web_samples_v1
web_samples_v2
wikihow|huge dataset|-|multi-domain, en, qa|[HuggingFaceTB/cosmopedia](https://huggingface.co/datasets/HuggingFaceTB/cosmopedia)| |[OmniData/Zhihu-KOL](https://modelscope.cn/datasets/OmniData/Zhihu-KOL)|default|huge dataset|-|zhihu, qa|[wangrui6/Zhihu-KOL](https://huggingface.co/datasets/wangrui6/Zhihu-KOL)| |[OmniData/Zhihu-KOL-More-Than-100-Upvotes](https://modelscope.cn/datasets/OmniData/Zhihu-KOL-More-Than-100-Upvotes)|default|271261|1003.4±1826.1, min=28, max=52541|zhihu, qa|[bzb2023/Zhihu-KOL-More-Than-100-Upvotes](https://huggingface.co/datasets/bzb2023/Zhihu-KOL-More-Than-100-Upvotes)| +|[PowerInfer/LONGCOT-Refine-500K](https://modelscope.cn/datasets/PowerInfer/LONGCOT-Refine-500K)|default|521921|296.5±158.4, min=39, max=4634|chat, sft, 🔥, cot|[PowerInfer/LONGCOT-Refine-500K](https://huggingface.co/datasets/PowerInfer/LONGCOT-Refine-500K)| +|[PowerInfer/QWQ-LONGCOT-500K](https://modelscope.cn/datasets/PowerInfer/QWQ-LONGCOT-500K)|default|498082|310.7±303.1, min=35, max=22941|chat, sft, 🔥, cot|[PowerInfer/QWQ-LONGCOT-500K](https://huggingface.co/datasets/PowerInfer/QWQ-LONGCOT-500K)| |[TIGER-Lab/MATH-plus](https://modelscope.cn/datasets/TIGER-Lab/MATH-plus)|train|893929|301.4±196.7, min=50, max=1162|qa, math, en, quality|[TIGER-Lab/MATH-plus](https://huggingface.co/datasets/TIGER-Lab/MATH-plus)| |[Tongyi-DataEngine/SA1B-Dense-Caption](https://modelscope.cn/datasets/Tongyi-DataEngine/SA1B-Dense-Caption)|default|huge dataset|-|zh, multi-modal, vqa|-| |[Tongyi-DataEngine/SA1B-Paired-Captions-Images](https://modelscope.cn/datasets/Tongyi-DataEngine/SA1B-Paired-Captions-Images)|default|7736284|106.4±18.5, min=48, max=193|zh, multi-modal, vqa|-| @@ -738,7 +741,7 @@ The table below introduces information about the datasets integrated with ms-swi |[damo/nlp_polylm_multialpaca_sft](https://modelscope.cn/datasets/damo/nlp_polylm_multialpaca_sft)|ar
de
es
fr
id
ja
ko
pt
ru
th
vi|131867|101.6±42.5, min=30, max=1029|chat, general, multilingual|-| |[damo/zh_cls_fudan-news](https://modelscope.cn/datasets/damo/zh_cls_fudan-news)|default|4959|3234.4±2547.5, min=91, max=19548|chat, classification|-| |[damo/zh_ner-JAVE](https://modelscope.cn/datasets/damo/zh_ner-JAVE)|default|1266|118.3±45.5, min=44, max=223|chat, ner|-| -|[hjh0119/shareAI-Llama3-DPO-zh-en-emoji](https://modelscope.cn/datasets/hjh0119/shareAI-Llama3-DPO-zh-en-emoji)|zh
en|2449|334.0±162.8, min=36, max=1801|rlhf, dpo|-| +|[hjh0119/shareAI-Llama3-DPO-zh-en-emoji](https://modelscope.cn/datasets/hjh0119/shareAI-Llama3-DPO-zh-en-emoji)|default|2449|334.0±162.8, min=36, max=1801|rlhf, dpo|-| |[huangjintao/AgentInstruct_copy](https://modelscope.cn/datasets/huangjintao/AgentInstruct_copy)|alfworld
db
kg
mind2web
os
webshop|1866|1144.3±635.5, min=206, max=6412|chat, agent, multi-round|-| |[iic/100PoisonMpts](https://modelscope.cn/datasets/iic/100PoisonMpts)|default|906|150.6±80.8, min=39, max=656|poison-management, zh|-| |[iic/MSAgent-MultiRole](https://modelscope.cn/datasets/iic/MSAgent-MultiRole)|default|543|413.0±79.7, min=70, max=936|chat, agent, multi-round, role-play, multi-agent|-| @@ -754,14 +757,14 @@ The table below introduces information about the datasets integrated with ms-swi |[modelscope/clue](https://modelscope.cn/datasets/modelscope/clue)|cmnli|391783|81.6±16.0, min=54, max=157|text-generation, classification|[clue](https://huggingface.co/datasets/clue)| |[modelscope/coco_2014_caption](https://modelscope.cn/datasets/modelscope/coco_2014_caption)|train
validation|454617|389.6±68.4, min=70, max=587|chat, multi-modal, vision, 🔥|-| |[shenweizhou/alpha-umi-toolbench-processed-v2](https://modelscope.cn/datasets/shenweizhou/alpha-umi-toolbench-processed-v2)|backbone
caller
planner
summarizer|huge dataset|-|chat, agent, 🔥|-| -|[simpleai/HC3](https://modelscope.cn/datasets/simpleai/HC3)|finance
medicine|11021|296.0±153.3, min=65, max=2267|text-generation, classification, 🔥|[Hello-SimpleAI/HC3](https://huggingface.co/datasets/Hello-SimpleAI/HC3)| +|[simpleai/HC3](https://modelscope.cn/datasets/simpleai/HC3)|finance
finance_cls
medicine
medicine_cls|11021|296.0±153.3, min=65, max=2267|text-generation, classification, 🔥|[Hello-SimpleAI/HC3](https://huggingface.co/datasets/Hello-SimpleAI/HC3)| |[simpleai/HC3-Chinese](https://modelscope.cn/datasets/simpleai/HC3-Chinese)|baike
baike_cls
open_qa
open_qa_cls
nlpcc_dbqa
nlpcc_dbqa_cls
finance
finance_cls
medicine
medicine_cls
law
law_cls
psychology
psychology_cls|39781|179.9±70.2, min=90, max=1070|text-generation, classification, 🔥|[Hello-SimpleAI/HC3-Chinese](https://huggingface.co/datasets/Hello-SimpleAI/HC3-Chinese)| |[speech_asr/speech_asr_aishell1_trainsets](https://modelscope.cn/datasets/speech_asr/speech_asr_aishell1_trainsets)|train
validation
test|141600|40.8±3.3, min=33, max=53|chat, multi-modal, audio|-| |[swift/A-OKVQA](https://modelscope.cn/datasets/swift/A-OKVQA)|default|18201|43.5±7.9, min=27, max=94|multi-modal, en, vqa, quality|[HuggingFaceM4/A-OKVQA](https://huggingface.co/datasets/HuggingFaceM4/A-OKVQA)| |[swift/ChartQA](https://modelscope.cn/datasets/swift/ChartQA)|default|28299|36.8±6.5, min=26, max=74|en, vqa, quality|[HuggingFaceM4/ChartQA](https://huggingface.co/datasets/HuggingFaceM4/ChartQA)| |[swift/GRIT](https://modelscope.cn/datasets/swift/GRIT)|caption
grounding
vqa|huge dataset|-|multi-modal, en, caption-grounding, vqa, quality|[zzliang/GRIT](https://huggingface.co/datasets/zzliang/GRIT)| |[swift/GenQA](https://modelscope.cn/datasets/swift/GenQA)|default|huge dataset|-|qa, quality, multi-task|[tomg-group-umd/GenQA](https://huggingface.co/datasets/tomg-group-umd/GenQA)| -|[swift/Infinity-Instruct](https://modelscope.cn/datasets/swift/Infinity-Instruct)|default|huge dataset|-|qa, quality, multi-task|[BAAI/Infinity-Instruct](https://huggingface.co/datasets/BAAI/Infinity-Instruct)| +|[swift/Infinity-Instruct](https://modelscope.cn/datasets/swift/Infinity-Instruct)|3M
7M
0625
Gen
7M_domains|huge dataset|-|qa, quality, multi-task|[BAAI/Infinity-Instruct](https://huggingface.co/datasets/BAAI/Infinity-Instruct)| |[swift/Mantis-Instruct](https://modelscope.cn/datasets/swift/Mantis-Instruct)|birds-to-words
chartqa
coinstruct
contrastive_caption
docvqa
dreamsim
dvqa
iconqa
imagecode
llava_665k_multi
lrv_multi
multi_vqa
nextqa
nlvr2
spot-the-diff
star
visual_story_telling|988115|619.9±156.6, min=243, max=1926|chat, multi-modal, vision|-| |[swift/MideficsDataset](https://modelscope.cn/datasets/swift/MideficsDataset)|default|3800|201.3±70.2, min=60, max=454|medical, en, vqa|[WinterSchool/MideficsDataset](https://huggingface.co/datasets/WinterSchool/MideficsDataset)| |[swift/Multimodal-Mind2Web](https://modelscope.cn/datasets/swift/Multimodal-Mind2Web)|default|1009|293855.4±331149.5, min=11301, max=3577519|agent, multi-modal|[osunlp/Multimodal-Mind2Web](https://huggingface.co/datasets/osunlp/Multimodal-Mind2Web)| diff --git a/scripts/utils/run_dataset_info.py b/scripts/utils/run_dataset_info.py index d3f331471..76878125f 100644 --- a/scripts/utils/run_dataset_info.py +++ b/scripts/utils/run_dataset_info.py @@ -59,8 +59,7 @@ def run_dataset(key, template, cache_mapping): dataset_size, stat_str = cache_mapping[dataset_id] else: num_proc = 4 - dataset, _ = load_dataset( - f'{dataset_id}:all', strict=False, num_proc=num_proc, use_hf=use_hf, download_mode='force_redownload') + dataset, _ = load_dataset(f'{dataset_id}:all', strict=False, num_proc=num_proc, use_hf=use_hf) dataset_size = len(dataset) random_state = np.random.RandomState(42) idx_list = random_state.choice(dataset_size, size=min(dataset_size, 100000), replace=False) diff --git a/swift/llm/argument/base_args/base_args.py b/swift/llm/argument/base_args/base_args.py index 22340beef..a39bac4ad 100644 --- a/swift/llm/argument/base_args/base_args.py +++ b/swift/llm/argument/base_args/base_args.py @@ -235,10 +235,9 @@ def _init_device(self): else: torch.cuda.set_device(self.local_rank) - def get_template(self, processor: 'Processor', template_type=None) -> 'Template': + def get_template(self, processor: 'Processor') -> 'Template': template_kwargs = self.get_template_kwargs() - template_type = template_type or self.template - template = get_template(template_type, processor, **template_kwargs) + template = get_template(self.template, processor, **template_kwargs) logger.info(f'default_system: {template.template_meta.default_system}') return template diff --git a/swift/llm/dataset/__init__.py b/swift/llm/dataset/__init__.py index 651aa01b9..9c344dbdd 100644 --- a/swift/llm/dataset/__init__.py +++ b/swift/llm/dataset/__init__.py @@ -1,7 +1,12 @@ # Copyright (c) Alibaba, Inc. and its affiliates. +import os +import tempfile + +import datasets.config import datasets.fingerprint from datasets import disable_caching +from modelscope.hub.utils.utils import get_cache_dir from swift.utils.torch_utils import _find_local_mac from . import dataset @@ -13,19 +18,34 @@ from .utils import (ConstantLengthDataset, EncodePreprocessor, GetLengthPreprocessor, LazyLLMDataset, PackingPreprocessor, sample_dataset) +_update_fingerprint = datasets.fingerprint.update_fingerprint +_get_temporary_cache_files_directory = datasets.fingerprint.get_temporary_cache_files_directory + def _update_fingerprint_mac(*args, **kwargs): # Prevent different nodes use the same location in unique shared disk mac = _find_local_mac().replace(':', '') - fp = datasets.fingerprint._update_fingerprint(*args, **kwargs) + fp = _update_fingerprint(*args, **kwargs) fp += '-' + mac if len(fp) > 64: fp = fp[:64] return fp -datasets.fingerprint._update_fingerprint = datasets.fingerprint.update_fingerprint +def _new_get_temporary_cache_files_directory(*args, **kwargs): + global DATASET_TEMP_DIR + if DATASET_TEMP_DIR is None: + tmp_dir = os.path.join(get_cache_dir(), 'tmp') + os.makedirs(tmp_dir, exist_ok=True) + DATASET_TEMP_DIR = tempfile.TemporaryDirectory(prefix=datasets.config.TEMP_CACHE_DIR_PREFIX, dir=tmp_dir) + + return DATASET_TEMP_DIR.name + + datasets.fingerprint.update_fingerprint = _update_fingerprint_mac datasets.arrow_dataset.update_fingerprint = _update_fingerprint_mac +datasets.fingerprint.get_temporary_cache_files_directory = _new_get_temporary_cache_files_directory +datasets.arrow_dataset.get_temporary_cache_files_directory = _new_get_temporary_cache_files_directory +DATASET_TEMP_DIR = None register_dataset_info() disable_caching() diff --git a/swift/llm/dataset/data/dataset_info.json b/swift/llm/dataset/data/dataset_info.json index ab95cb5a3..6f5a826e3 100644 --- a/swift/llm/dataset/data/dataset_info.json +++ b/swift/llm/dataset/data/dataset_info.json @@ -589,5 +589,15 @@ "ms_dataset_id": "AI-ModelScope/Magpie-Qwen2-Pro-200K-English", "tags": ["chat", "sft", "🔥", "en"], "hf_dataset_id": "Magpie-Align/Magpie-Qwen2-Pro-200K-English" + }, + { + "ms_dataset_id": "PowerInfer/QWQ-LONGCOT-500K", + "tags": ["chat", "sft", "🔥", "cot"], + "hf_dataset_id": "PowerInfer/QWQ-LONGCOT-500K" + }, + { + "ms_dataset_id": "PowerInfer/LONGCOT-Refine-500K", + "tags": ["chat", "sft", "🔥", "cot"], + "hf_dataset_id": "PowerInfer/LONGCOT-Refine-500K" } ] diff --git a/swift/llm/dataset/preprocessor/core.py b/swift/llm/dataset/preprocessor/core.py index 48ac03993..6a463fd88 100644 --- a/swift/llm/dataset/preprocessor/core.py +++ b/swift/llm/dataset/preprocessor/core.py @@ -1,7 +1,5 @@ # Copyright (c) Alibaba, Inc. and its affiliates. import ast -import os -import tempfile from collections import Counter from contextlib import contextmanager from typing import Any, Callable, Dict, List, Optional, Union @@ -10,9 +8,7 @@ from datasets import Dataset as HfDataset from datasets import Image from datasets import IterableDataset as HfIterableDataset -from datasets import Value, is_caching_enabled -from datasets.arrow_dataset import generate_random_fingerprint -from modelscope.hub.utils.utils import get_cache_dir +from datasets import Value from swift.llm import history_to_messages from swift.utils import get_logger @@ -260,13 +256,8 @@ def __call__( dataset = self.prepare_dataset(dataset) dataset = self._cast_pil_image(dataset) map_kwargs = {} - if not is_caching_enabled() and isinstance(dataset, HfDataset): - tmp_dir = os.path.join(get_cache_dir(), 'tmp') - os.makedirs(tmp_dir, exist_ok=True) - tmp_dir = tempfile.TemporaryDirectory(dir=tmp_dir) - - cache_file_name = os.path.join(tmp_dir.name, 'cache-' + generate_random_fingerprint() + '.arrow') - map_kwargs.update({'num_proc': num_proc, 'cache_file_name': cache_file_name}) + if isinstance(dataset, HfDataset): + map_kwargs['num_proc'] = num_proc with self._patch_arrow_writer(): try: dataset_mapped = dataset.map( diff --git a/swift/llm/model/model/qwen.py b/swift/llm/model/model/qwen.py index 1e28734a2..8c0eafd58 100644 --- a/swift/llm/model/model/qwen.py +++ b/swift/llm/model/model/qwen.py @@ -336,6 +336,7 @@ def _get_cast_dtype(self) -> torch.dtype: Model('Qwen/Qwen2-Math-72B', 'Qwen/Qwen2-Math-72B'), ], tags=['math']), + ModelGroup([Model('PowerInfer/SmallThinker-3B-Preview', 'PowerInfer/SmallThinker-3B-Preview')]) ], TemplateType.qwen, get_model_tokenizer_with_flash_attn, diff --git a/swift/llm/utils.py b/swift/llm/utils.py index a71b2c19f..4e734559d 100644 --- a/swift/llm/utils.py +++ b/swift/llm/utils.py @@ -25,8 +25,6 @@ Message = Dict[str, Union[str, List[Dict[str, Any]]]] Messages = List[Message] -os.environ['TOKENIZERS_PARALLELISM'] = 'true' - class ProcessorMixin: diff --git a/tests/llm/test_run.py b/tests/llm/test_run.py index f356feaed..783f2ae0d 100644 --- a/tests/llm/test_run.py +++ b/tests/llm/test_run.py @@ -1,6 +1,6 @@ if __name__ == '__main__': import os - os.environ['CUDA_VISIBLE_DEVICES'] = '2' + os.environ['CUDA_VISIBLE_DEVICES'] = '0,1' os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com' import os @@ -236,7 +236,7 @@ def test_rlhf(self): torch.cuda.empty_cache() # llm rlhf # - rlhf_types = ['dpo', 'orpo', 'simpo', 'kto', 'cpo'] # , 'rm', 'ppo' + rlhf_types = ['dpo', 'orpo', 'simpo', 'kto', 'cpo', 'rm', 'ppo'] for rlhf_type in rlhf_types: dataset = ('AI-ModelScope/hh_rlhf_cn:harmless_base_cn#100' if rlhf_type != 'kto' else 'AI-ModelScope/ultrafeedback-binarized-preferences-cleaned-kto#100') @@ -289,7 +289,7 @@ def test_loss_matching(self): bool_var = use_swift_lora torch.cuda.empty_cache() output = sft_main([ - '--model', 'Qwen/Qwen-7B-Chat', '--eval_steps', '5', '--dataset', + '--model', 'Qwen/Qwen-7B-Chat', '--save_steps', '5', '--dataset', 'AI-ModelScope/leetcode-solutions-python#200', '--output_dir', output_dir, '--gradient_checkpointing', 'true', '--max_new_tokens', '100', '--attn_impl', 'flash_attn', '--target_modules', 'all-linear', '--seed', '0', '--lora_bias', 'all', '--modules_to_save', 'lm_head', '--use_swift_lora',