Files
xc-llm-kunlun/vllm_kunlun/models/__init__.py

116 lines
3.7 KiB
Python
Raw Normal View History

2025-12-10 12:05:39 +08:00
from vllm import ModelRegistry
def register_model():
# from .demo_model import DemoModel # noqa: F401
from .qwen2_5_vl import Qwen2_5_VLForConditionalGeneration # noqa: F401
from .qwen2_vl import Qwen2VLForConditionalGeneration # noqa: F401
from .qwen3_moe import Qwen3MoeForCausalLM # noqa: F401
from .qwen3_omni_moe_thinker import ( # noqa: F401
Qwen3OmniMoeThinkerForConditionalGeneration,
)
from .qwen3_vl import Qwen3VLForConditionalGeneration # noqa: F401
from .qwen3_vl_moe import Qwen3VLMoeForConditionalGeneration # noqa: F401
2025-12-10 17:51:24 +08:00
# from .llama4 import Llama4ForCausalLM #noqa: F401
# from .mllama4 import Llama4ForConditionalGeneration #noqa: F401
# from .deepseek_v2 import KunlunDeepseekV2MoE
2025-12-10 12:05:39 +08:00
# ModelRegistry.register_model(
# "DemoModel",
# "vllm_kunlun.model_executor.models.demo_model:DemoModel")
ModelRegistry.register_model(
"Qwen2VLForConditionalGeneration",
"vllm_kunlun.models.qwen2_vl:Qwen2VLForConditionalGeneration",
)
2025-12-10 12:05:39 +08:00
ModelRegistry.register_model(
"Qwen2_5_VLForConditionalGeneration",
"vllm_kunlun.models.qwen2_5_vl:Qwen2_5_VLForConditionalGeneration",
)
ModelRegistry.register_model(
"Qwen3ForCausalLM", "vllm_kunlun.models.qwen3:Qwen3ForCausalLM"
)
ModelRegistry.register_model(
"Qwen3MoeForCausalLM", "vllm_kunlun.models.qwen3_moe:Qwen3MoeForCausalLM"
)
2025-12-10 12:05:39 +08:00
ModelRegistry.register_model(
"Qwen3NextForCausalLM", "vllm_kunlun.models.qwen3_next:Qwen3NextForCausalLM"
)
2025-12-10 12:05:39 +08:00
ModelRegistry.register_model(
"Qwen3NextMTP", "vllm_kunlun.models.qwen3_next_mtp:Qwen3NextMTP"
)
2025-12-10 17:51:24 +08:00
ModelRegistry.register_model(
"GlmForCausalLM", "vllm_kunlun.models.glm:GlmForCausalLM"
)
2025-12-10 12:05:39 +08:00
ModelRegistry.register_model(
"GptOssForCausalLM", "vllm_kunlun.models.gpt_oss:GptOssForCausalLM"
)
2025-12-10 17:51:24 +08:00
2025-12-10 12:05:39 +08:00
ModelRegistry.register_model(
"InternLM2ForCausalLM", "vllm_kunlun.models.internlm2:InternLM2ForCausalLM"
)
2025-12-10 12:05:39 +08:00
ModelRegistry.register_model(
"InternVLChatModel", "vllm_kunlun.models.internvl:InternVLChatModel"
)
2025-12-10 12:05:39 +08:00
ModelRegistry.register_model(
"InternS1ForConditionalGeneration",
"vllm_kunlun.models.interns1:InternS1ForConditionalGeneration",
)
2025-12-10 12:05:39 +08:00
ModelRegistry.register_model(
2025-12-10 17:51:24 +08:00
"Qwen3VLForConditionalGeneration",
"vllm_kunlun.models.qwen3_vl:Qwen3VLForConditionalGeneration",
)
2025-12-10 12:05:39 +08:00
ModelRegistry.register_model(
2025-12-10 17:51:24 +08:00
"Qwen3VLMoeForConditionalGeneration",
"vllm_kunlun.models.qwen3_vl_moe:Qwen3VLMoeForConditionalGeneration",
)
2025-12-10 17:51:24 +08:00
ModelRegistry.register_model(
"Qwen3OmniMoeForConditionalGeneration",
"vllm_kunlun.models.qwen3_omni_moe_thinker:Qwen3OmniMoeThinkerForConditionalGeneration",
)
2025-12-10 12:05:39 +08:00
ModelRegistry.register_model(
"SeedOssForCausalLM", "vllm_kunlun.models.seed_oss:SeedOssForCausalLM"
)
2025-12-10 12:05:39 +08:00
ModelRegistry.register_model(
"MiMoV2FlashForCausalLM",
"vllm_kunlun.models.mimo_v2_flash:MiMoV2FlashForCausalLM",
)
ModelRegistry.register_model(
"GptOssForCausalLM", "vllm_kunlun.models.gpt_oss:GptOssForCausalLM"
)
ModelRegistry.register_model(
"DeepseekV3ForCausalLM", "vllm_kunlun.models.deepseek_v2:DeepseekV3ForCausalLM"
)
ModelRegistry.register_model(
"DeepseekV32ForCausalLM", "vllm_kunlun.models.deepseek_v2:DeepseekV3ForCausalLM"
)
2026-01-06 21:37:21 +08:00
ModelRegistry.register_model(
"DeepSeekMTPModel", "vllm_kunlun.models.deepseek_mtp:DeepSeekMTP"
)
ModelRegistry.register_model(
"GlmMoeDsaForCausalLM", "vllm_kunlun.models.deepseek_v2:GlmMoeDsaForCausalLM"
)
2025-12-10 12:05:39 +08:00
def register_quant_method():
2025-12-15 21:21:28 +08:00
"""to do"""