diff --git a/xinference/model/llm/llm_family.json b/xinference/model/llm/llm_family.json index 77dda1a84d..70b17daa61 100644 --- a/xinference/model/llm/llm_family.json +++ b/xinference/model/llm/llm_family.json @@ -7244,5 +7244,365 @@ "model_revision": "00e59e64f47d3c78e4cfbdd345888479797e8109" } ] + }, + { + "version": 1, + "context_length": 131072, + "model_name": "qwen2.5-instruct", + "model_lang": [ + "en", + "zh" + ], + "model_ability": [ + "chat", + "tools" + ], + "model_description": "Qwen2.5 is the latest series of Qwen large language models. For Qwen2.5, we release a number of base language models and instruction-tuned language models ranging from 0.5 to 72 billion parameters.", + "model_specs": [ + { + "model_format": "pytorch", + "model_size_in_billions": "0_5", + "quantizations": [ + "4-bit", + "8-bit", + "none" + ], + "model_id": "Qwen/Qwen2.5-0.5B-Instruct" + }, + { + "model_format": "pytorch", + "model_size_in_billions": "1_5", + "quantizations": [ + "4-bit", + "8-bit", + "none" + ], + "model_id": "Qwen/Qwen2.5-1.5B-Instruct" + }, + { + "model_format": "pytorch", + "model_size_in_billions": 3, + "quantizations": [ + "4-bit", + "8-bit", + "none" + ], + "model_id": "Qwen/Qwen2.5-3B-Instruct" + }, + { + "model_format": "pytorch", + "model_size_in_billions": 7, + "quantizations": [ + "4-bit", + "8-bit", + "none" + ], + "model_id": "Qwen/Qwen2.5-7B-Instruct" + }, + { + "model_format": "pytorch", + "model_size_in_billions": 14, + "quantizations": [ + "4-bit", + "8-bit", + "none" + ], + "model_id": "Qwen/Qwen2.5-14B-Instruct" + }, + { + "model_format": "pytorch", + "model_size_in_billions": 32, + "quantizations": [ + "4-bit", + "8-bit", + "none" + ], + "model_id": "Qwen/Qwen2.5-32B-Instruct" + }, + { + "model_format": "pytorch", + "model_size_in_billions": 72, + "quantizations": [ + "4-bit", + "8-bit", + "none" + ], + "model_id": "Qwen/Qwen2.5-72B-Instruct" + }, + { + "model_format": "gptq", + "model_size_in_billions": "0_5", + "quantizations": [ + "Int4", + "Int8" + ], + "model_id": "Qwen/Qwen2.5-0.5B-Instruct-GPTQ-{quantization}" + }, + { + "model_format": "gptq", + "model_size_in_billions": "1_5", + "quantizations": [ + "Int4", + "Int8" + ], + "model_id": "Qwen/Qwen2.5-1.5B-Instruct-GPTQ-{quantization}" + }, + { + "model_format": "gptq", + "model_size_in_billions": 3, + "quantizations": [ + "Int4", + "Int8" + ], + "model_id": "Qwen/Qwen2.5-3B-Instruct-GPTQ-{quantization}" + }, + { + "model_format": "gptq", + "model_size_in_billions": 7, + "quantizations": [ + "Int4", + "Int8" + ], + "model_id": "Qwen/Qwen2.5-7B-Instruct-GPTQ-{quantization}" + }, + { + "model_format": "gptq", + "model_size_in_billions": 14, + "quantizations": [ + "Int4", + "Int8" + ], + "model_id": "Qwen/Qwen2.5-14B-Instruct-GPTQ-{quantization}" + }, + { + "model_format": "gptq", + "model_size_in_billions": 32, + "quantizations": [ + "Int4", + "Int8" + ], + "model_id": "Qwen/Qwen2.5-32B-Instruct-GPTQ-{quantization}" + }, + { + "model_format": "gptq", + "model_size_in_billions": 72, + "quantizations": [ + "Int4", + "Int8" + ], + "model_id": "Qwen/Qwen2.5-72B-Instruct-GPTQ-{quantization}" + }, + { + "model_format": "awq", + "model_size_in_billions": "0_5", + "quantizations": [ + "Int4" + ], + "model_id": "Qwen/Qwen2.5-0.5B-Instruct-AWQ" + }, + { + "model_format": "awq", + "model_size_in_billions": "1_5", + "quantizations": [ + "Int4" + ], + "model_id": "Qwen/Qwen2.5-1.5B-Instruct-AWQ" + }, + { + "model_format": "awq", + "model_size_in_billions": 3, + "quantizations": [ + "Int4" + ], + "model_id": "Qwen/Qwen2.5-3B-Instruct-AWQ" + }, + { + "model_format": "awq", + "model_size_in_billions": 7, + "quantizations": [ + "Int4" + ], + "model_id": "Qwen/Qwen2.5-7B-Instruct-AWQ" + }, + { + "model_format": "awq", + "model_size_in_billions": 14, + "quantizations": [ + "Int4" + ], + "model_id": "Qwen/Qwen2.5-14B-Instruct-AWQ" + }, + { + "model_format": "awq", + "model_size_in_billions": 32, + "quantizations": [ + "Int4" + ], + "model_id": "Qwen/Qwen2.5-32B-Instruct-AWQ" + }, + { + "model_format": "awq", + "model_size_in_billions": 72, + "quantizations": [ + "Int4" + ], + "model_id": "Qwen/Qwen2.5-72B-Instruct-AWQ" + }, + { + "model_format": "ggufv2", + "model_size_in_billions": "0_5", + "quantizations": [ + "q2_k", + "q3_k_m", + "q4_0", + "q4_k_m", + "q5_0", + "q5_k_m", + "q6_k", + "q8_0", + "fp16" + ], + "model_id": "Qwen/Qwen2.5-0.5B-Instruct-GGUF", + "model_file_name_template": "qwen2_5-0_5b-instruct-{quantization}.gguf" + }, + { + "model_format": "ggufv2", + "model_size_in_billions": "1_5", + "quantizations": [ + "q2_k", + "q3_k_m", + "q4_0", + "q4_k_m", + "q5_0", + "q5_k_m", + "q6_k", + "q8_0", + "fp16" + ], + "model_id": "Qwen/Qwen2.5-1.5B-Instruct-GGUF", + "model_file_name_template": "qwen2_5-1_5b-instruct-{quantization}.gguf" + }, + { + "model_format": "ggufv2", + "model_size_in_billions": 3, + "quantizations": [ + "q2_k", + "q3_k_m", + "q4_0", + "q4_k_m", + "q5_0", + "q5_k_m", + "q6_k", + "q8_0", + "fp16" + ], + "model_id": "Qwen/Qwen2.5-3B-Instruct-GGUF", + "model_file_name_template": "qwen2_5-3b-instruct-{quantization}.gguf" + }, + { + "model_format": "ggufv2", + "model_size_in_billions": 7, + "quantizations": [ + "q2_k", + "q3_k_m", + "q4_0", + "q4_k_m", + "q5_0", + "q5_k_m", + "q6_k", + "q8_0", + "fp16" + ], + "model_id": "Qwen/Qwen2.5-7B-Instruct-GGUF", + "model_file_name_template": "qwen2_5-7b-instruct-{quantization}.gguf" + }, + { + "model_format": "ggufv2", + "model_size_in_billions": 14, + "quantizations": [ + "q2_k", + "q3_k_m", + "q4_0", + "q4_k_m", + "q5_0", + "q5_k_m", + "q6_k", + "q8_0", + "fp16" + ], + "model_id": "Qwen/Qwen2.5-14B-Instruct-GGUF", + "model_file_name_template": "qwen2_5-14b-instruct-{quantization}.gguf" + }, + { + "model_format": "ggufv2", + "model_size_in_billions": 32, + "quantizations": [ + "q2_k", + "q3_k_m", + "q4_0", + "q4_k_m", + "q5_0", + "q5_k_m", + "q6_k", + "q8_0", + "fp16" + ], + "model_id": "Qwen/Qwen2.5-32B-Instruct-GGUF", + "model_file_name_template": "qwen2_5-32b-instruct-{quantization}.gguf" + }, + { + "model_format": "ggufv2", + "model_size_in_billions": 72, + "quantizations": [ + "q2_k", + "q3_k_m", + "q4_0", + "q4_k_m", + "q5_0", + "q5_k_m", + "q6_k", + "q8_0", + "fp16" + ], + "model_id": "Qwen/Qwen2.5-72B-Instruct-GGUF", + "model_file_name_template": "qwen2_5-72b-instruct-{quantization}.gguf", + "model_file_name_split_template": "qwen2_5-72b-instruct-{quantization}-{part}.gguf", + "quantization_parts": { + "q5_0": [ + "00001-of-00002", + "00002-of-00002" + ], + "q5_k_m": [ + "00001-of-00002", + "00002-of-00002" + ], + "q6_k": [ + "00001-of-00002", + "00002-of-00002" + ], + "q8_0": [ + "00001-of-00002", + "00002-of-00002" + ], + "fp16": [ + "00001-of-00004", + "00002-of-00004", + "00003-of-00004", + "00004-of-00004" + ] + } + } + ], + "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within XML tags:\\n\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n\\n\\nFor each function call, return a json object with function name and arguments within XML tags:\\n\\n{{\\\"name\\\": , \\\"arguments\\\": }}\\n<|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n\\n' }}\n {{- message.content }}\n {{- '\\n' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n", + "stop_token_ids": [ + 151643, + 151644, + 151645 + ], + "stop": [ + "<|endoftext|>", + "<|im_start|>", + "<|im_end|>" + ] } ] diff --git a/xinference/model/llm/llm_family_modelscope.json b/xinference/model/llm/llm_family_modelscope.json index fdaab458aa..7309ee9651 100644 --- a/xinference/model/llm/llm_family_modelscope.json +++ b/xinference/model/llm/llm_family_modelscope.json @@ -4957,5 +4957,393 @@ "model_revision": "master" } ] + }, + { + "version": 1, + "context_length": 131072, + "model_name": "qwen2.5-instruct", + "model_lang": [ + "en", + "zh" + ], + "model_ability": [ + "chat", + "tools" + ], + "model_description": "Qwen2.5 is the latest series of Qwen large language models. For Qwen2.5, we release a number of base language models and instruction-tuned language models ranging from 0.5 to 72 billion parameters.", + "model_specs": [ + { + "model_format": "pytorch", + "model_size_in_billions": "0_5", + "quantizations": [ + "4-bit", + "8-bit", + "none" + ], + "model_id": "qwen/Qwen2.5-0.5B-Instruct", + "model_hub": "modelscope" + }, + { + "model_format": "pytorch", + "model_size_in_billions": "1_5", + "quantizations": [ + "4-bit", + "8-bit", + "none" + ], + "model_id": "qwen/Qwen2.5-1.5B-Instruct", + "model_hub": "modelscope" + }, + { + "model_format": "pytorch", + "model_size_in_billions": 3, + "quantizations": [ + "4-bit", + "8-bit", + "none" + ], + "model_id": "qwen/Qwen2.5-3B-Instruct", + "model_hub": "modelscope" + }, + { + "model_format": "pytorch", + "model_size_in_billions": 7, + "quantizations": [ + "4-bit", + "8-bit", + "none" + ], + "model_id": "qwen/Qwen2.5-7B-Instruct", + "model_hub": "modelscope" + }, + { + "model_format": "pytorch", + "model_size_in_billions": 14, + "quantizations": [ + "4-bit", + "8-bit", + "none" + ], + "model_id": "qwen/Qwen2.5-14B-Instruct", + "model_hub": "modelscope" + }, + { + "model_format": "pytorch", + "model_size_in_billions": 32, + "quantizations": [ + "4-bit", + "8-bit", + "none" + ], + "model_id": "qwen/Qwen2.5-32B-Instruct", + "model_hub": "modelscope" + }, + { + "model_format": "pytorch", + "model_size_in_billions": 72, + "quantizations": [ + "4-bit", + "8-bit", + "none" + ], + "model_id": "qwen/Qwen2.5-72B-Instruct", + "model_hub": "modelscope" + }, + { + "model_format": "gptq", + "model_size_in_billions": "0_5", + "quantizations": [ + "Int4", + "Int8" + ], + "model_id": "qwen/Qwen2.5-0.5B-Instruct-GPTQ-{quantization}", + "model_hub": "modelscope" + }, + { + "model_format": "gptq", + "model_size_in_billions": "1_5", + "quantizations": [ + "Int4", + "Int8" + ], + "model_id": "qwen/Qwen2.5-1.5B-Instruct-GPTQ-{quantization}", + "model_hub": "modelscope" + }, + { + "model_format": "gptq", + "model_size_in_billions": 3, + "quantizations": [ + "Int4", + "Int8" + ], + "model_id": "qwen/Qwen2.5-3B-Instruct-GPTQ-{quantization}", + "model_hub": "modelscope" + }, + { + "model_format": "gptq", + "model_size_in_billions": 7, + "quantizations": [ + "Int4", + "Int8" + ], + "model_id": "qwen/Qwen2.5-7B-Instruct-GPTQ-{quantization}", + "model_hub": "modelscope" + }, + { + "model_format": "gptq", + "model_size_in_billions": 14, + "quantizations": [ + "Int4", + "Int8" + ], + "model_id": "qwen/Qwen2.5-14B-Instruct-GPTQ-{quantization}", + "model_hub": "modelscope" + }, + { + "model_format": "gptq", + "model_size_in_billions": 32, + "quantizations": [ + "Int4", + "Int8" + ], + "model_id": "qwen/Qwen2.5-32B-Instruct-GPTQ-{quantization}", + "model_hub": "modelscope" + }, + { + "model_format": "gptq", + "model_size_in_billions": 72, + "quantizations": [ + "Int4", + "Int8" + ], + "model_id": "qwen/Qwen2.5-72B-Instruct-GPTQ-{quantization}", + "model_hub": "modelscope" + }, + { + "model_format": "awq", + "model_size_in_billions": "0_5", + "quantizations": [ + "Int4" + ], + "model_id": "qwen/Qwen2-0.5B-Instruct-AWQ", + "model_hub": "modelscope" + }, + { + "model_format": "awq", + "model_size_in_billions": "1_5", + "quantizations": [ + "Int4" + ], + "model_id": "qwen/Qwen2-1.5B-Instruct-AWQ", + "model_hub": "modelscope" + }, + { + "model_format": "awq", + "model_size_in_billions": 3, + "quantizations": [ + "Int4" + ], + "model_id": "qwen/Qwen2.5-3B-Instruct-AWQ", + "model_hub": "modelscope" + }, + { + "model_format": "awq", + "model_size_in_billions": 7, + "quantizations": [ + "Int4" + ], + "model_id": "qwen/Qwen2.5-7B-Instruct-AWQ", + "model_hub": "modelscope" + }, + { + "model_format": "awq", + "model_size_in_billions":14, + "quantizations": [ + "Int4" + ], + "model_id": "qwen/Qwen2.5-14B-Instruct-AWQ", + "model_hub": "modelscope" + }, + { + "model_format": "awq", + "model_size_in_billions": 32, + "quantizations": [ + "Int4" + ], + "model_id": "qwen/Qwen2.5-32B-Instruct-AWQ", + "model_hub": "modelscope" + }, + { + "model_format": "awq", + "model_size_in_billions": 72, + "quantizations": [ + "Int4" + ], + "model_id": "qwen/Qwen2.5-72B-Instruct-AWQ", + "model_hub": "modelscope" + }, + { + "model_format": "ggufv2", + "model_size_in_billions": "0_5", + "quantizations": [ + "q2_k", + "q3_k_m", + "q4_0", + "q4_k_m", + "q5_0", + "q5_k_m", + "q6_k", + "q8_0", + "fp16" + ], + "model_id": "qwen/Qwen2.5-0.5B-Instruct-GGUF", + "model_file_name_template": "qwen2_5-0_5b-instruct-{quantization}.gguf", + "model_hub": "modelscope" + }, + { + "model_format": "ggufv2", + "model_size_in_billions": "1_5", + "quantizations": [ + "q2_k", + "q3_k_m", + "q4_0", + "q4_k_m", + "q5_0", + "q5_k_m", + "q6_k", + "q8_0", + "fp16" + ], + "model_id": "qwen/Qwen2.5-1.5B-Instruct-GGUF", + "model_file_name_template": "qwen2_5-1_5b-instruct-{quantization}.gguf", + "model_hub": "modelscope" + }, + { + "model_format": "ggufv2", + "model_size_in_billions": 3, + "quantizations": [ + "q2_k", + "q3_k_m", + "q4_0", + "q4_k_m", + "q5_0", + "q5_k_m", + "q6_k", + "q8_0", + "fp16" + ], + "model_id": "qwen/Qwen2.5-3B-Instruct-GGUF", + "model_file_name_template": "qwen2_5-3b-instruct-{quantization}.gguf", + "model_hub": "modelscope" + }, + { + "model_format": "ggufv2", + "model_size_in_billions": 7, + "quantizations": [ + "q2_k", + "q3_k_m", + "q4_0", + "q4_k_m", + "q5_0", + "q5_k_m", + "q6_k", + "q8_0", + "fp16" + ], + "model_id": "qwen/Qwen2.5-7B-Instruct-GGUF", + "model_file_name_template": "qwen2_5-7b-instruct-{quantization}.gguf", + "model_hub": "modelscope" + }, + { + "model_format": "ggufv2", + "model_size_in_billions": 14, + "quantizations": [ + "q2_k", + "q3_k_m", + "q4_0", + "q4_k_m", + "q5_0", + "q5_k_m", + "q6_k", + "q8_0", + "fp16" + ], + "model_id": "qwen/Qwen2.5-14B-Instruct-GGUF", + "model_file_name_template": "qwen2_5-14b-instruct-{quantization}.gguf", + "model_hub": "modelscope" + }, + { + "model_format": "ggufv2", + "model_size_in_billions": 32, + "quantizations": [ + "q2_k", + "q3_k_m", + "q4_0", + "q4_k_m", + "q5_0", + "q5_k_m", + "q6_k", + "q8_0", + "fp16" + ], + "model_id": "qwen/Qwen2.5-32B-Instruct-GGUF", + "model_file_name_template": "qwen2_5-32b-instruct-{quantization}.gguf", + "model_hub": "modelscope" + }, + { + "model_format": "ggufv2", + "model_size_in_billions": 72, + "quantizations": [ + "q2_k", + "q3_k_m", + "q4_0", + "q4_k_m", + "q5_0", + "q5_k_m", + "q6_k", + "q8_0", + "fp16" + ], + "model_id": "qwen/Qwen2.5-72B-Instruct-GGUF", + "model_hub": "modelscope", + "model_file_name_template": "qwen2_5-72b-instruct-{quantization}.gguf", + "model_file_name_split_template": "qwen2_5-72b-instruct-{quantization}-{part}.gguf", + "quantization_parts": { + "q5_0": [ + "00001-of-00002", + "00002-of-00002" + ], + "q5_k_m": [ + "00001-of-00002", + "00002-of-00002" + ], + "q6_k": [ + "00001-of-00002", + "00002-of-00002" + ], + "q8_0": [ + "00001-of-00002", + "00002-of-00002" + ], + "fp16": [ + "00001-of-00004", + "00002-of-00004", + "00003-of-00004", + "00004-of-00004" + ] + } + } + ], + "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within XML tags:\\n\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n\\n\\nFor each function call, return a json object with function name and arguments within XML tags:\\n\\n{{\\\"name\\\": , \\\"arguments\\\": }}\\n<|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n\\n' }}\n {{- message.content }}\n {{- '\\n' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n", + "stop_token_ids": [ + 151643, + 151644, + 151645 + ], + "stop": [ + "<|endoftext|>", + "<|im_start|>", + "<|im_end|>" + ] } ] diff --git a/xinference/model/llm/vllm/core.py b/xinference/model/llm/vllm/core.py index 811fd5d342..3aaee0738f 100644 --- a/xinference/model/llm/vllm/core.py +++ b/xinference/model/llm/vllm/core.py @@ -138,6 +138,7 @@ class VLLMGenerateConfig(TypedDict, total=False): VLLM_SUPPORTED_MODELS.append("codeqwen1.5") VLLM_SUPPORTED_CHAT_MODELS.append("codeqwen1.5-chat") VLLM_SUPPORTED_CHAT_MODELS.append("qwen2-instruct") + VLLM_SUPPORTED_CHAT_MODELS.append("qwen2.5-instruct") if VLLM_INSTALLED and vllm.__version__ >= "0.3.2": VLLM_SUPPORTED_CHAT_MODELS.append("gemma-it")