Upgrade to vllm 0.17.0 corex v4.1 overlay

This commit is contained in:
2026-04-29 19:38:22 +08:00
parent 8fac6062e4
commit 938d0854a5
430 changed files with 35969 additions and 14511 deletions

View File

@@ -75,12 +75,14 @@ _TEXT_GENERATION_MODELS = {
"AquilaForCausalLM": ("llama", "LlamaForCausalLM"), # AquilaChat2
"ArceeForCausalLM": ("arcee", "ArceeForCausalLM"),
"ArcticForCausalLM": ("arctic", "ArcticForCausalLM"),
"AXK1ForCausalLM": ("AXK1", "AXK1ForCausalLM"),
# baichuan-7b, upper case 'C' in the class name
"BaiChuanForCausalLM": ("baichuan", "BaiChuanForCausalLM"),
# baichuan-13b, lower case 'c' in the class name
"BaichuanForCausalLM": ("baichuan", "BaichuanForCausalLM"),
"BailingMoeForCausalLM": ("bailing_moe", "BailingMoeForCausalLM"),
"BailingMoeV2ForCausalLM": ("bailing_moe", "BailingMoeV2ForCausalLM"),
"BailingMoeV2_5ForCausalLM": ("bailing_moe_linear", "BailingMoeV25ForCausalLM"),
"BambaForCausalLM": ("bamba", "BambaForCausalLM"),
"BloomForCausalLM": ("bloom", "BloomForCausalLM"),
"ChatGLMModel": ("chatglm", "ChatGLMForCausalLM"),
@@ -259,6 +261,10 @@ _EMBEDDING_MODELS = {
"OpsColQwen3Model": ("colqwen3", "ColQwen3Model"),
"Qwen3VLNemotronEmbedModel": ("colqwen3", "ColQwen3Model"),
"SiglipModel": ("siglip", "SiglipEmbeddingModel"),
"LlamaNemotronVLModel": (
"nemotron_vl",
"LlamaNemotronVLForEmbedding",
),
# Technically Terratorch models work on images, both in
# input and output. I am adding it here because it piggy-backs on embedding
# models for the time being.
@@ -278,6 +284,10 @@ _CROSS_ENCODER_MODELS = {
"llama",
"LlamaBidirectionalForSequenceClassification",
),
"LlamaNemotronVLForSequenceClassification": (
"nemotron_vl",
"LlamaNemotronVLForSequenceClassification",
),
"ModernBertForSequenceClassification": (
"modernbert",
"ModernBertForSequenceClassification",
@@ -331,6 +341,10 @@ _MULTIMODAL_MODELS = {
"ernie45_vl",
"Ernie4_5_VLMoeForConditionalGeneration",
),
"FireRedASR2ForConditionalGeneration": (
"fireredasr2",
"FireRedASR2ForConditionalGeneration",
),
"FunASRForConditionalGeneration": ("funasr", "FunASRForConditionalGeneration"), # noqa: E501
"FunAudioChatForConditionalGeneration": (
"funaudiochat",
@@ -506,6 +520,7 @@ _MULTIMODAL_MODELS = {
}
_SPECULATIVE_DECODING_MODELS = {
"ExtractHiddenStatesModel": ("extract_hidden_states", "ExtractHiddenStatesModel"),
"MiMoMTPModel": ("mimo_mtp", "MiMoMTP"),
"EagleLlamaForCausalLM": ("llama_eagle", "EagleLlamaForCausalLM"),
"EagleLlama4ForCausalLM": ("llama4_eagle", "EagleLlama4ForCausalLM"),