from vllm import ModelRegistry def register_model(): # from .demo_model import DemoModel # noqa: F401 from .qwen2_5_vl import Qwen2_5_VLForConditionalGeneration # noqa: F401 from .qwen2_vl import Qwen2VLForConditionalGeneration # noqa: F401 from .qwen3_moe import Qwen3MoeForCausalLM # noqa: F401 from .qwen3_omni_moe_thinker import ( # noqa: F401 Qwen3OmniMoeThinkerForConditionalGeneration, ) from .qwen3_vl import Qwen3VLForConditionalGeneration # noqa: F401 from .qwen3_vl_moe import Qwen3VLMoeForConditionalGeneration # noqa: F401 # from .llama4 import Llama4ForCausalLM #noqa: F401 # from .mllama4 import Llama4ForConditionalGeneration #noqa: F401 # from .deepseek_v2 import KunlunDeepseekV2MoE # ModelRegistry.register_model( # "DemoModel", # "vllm_kunlun.model_executor.models.demo_model:DemoModel") ModelRegistry.register_model( "Qwen2VLForConditionalGeneration", "vllm_kunlun.models.qwen2_vl:Qwen2VLForConditionalGeneration", ) ModelRegistry.register_model( "Qwen2_5_VLForConditionalGeneration", "vllm_kunlun.models.qwen2_5_vl:Qwen2_5_VLForConditionalGeneration", ) ModelRegistry.register_model( "Qwen3ForCausalLM", "vllm_kunlun.models.qwen3:Qwen3ForCausalLM" ) ModelRegistry.register_model( "Qwen3MoeForCausalLM", "vllm_kunlun.models.qwen3_moe:Qwen3MoeForCausalLM" ) ModelRegistry.register_model( "Qwen3NextForCausalLM", "vllm_kunlun.models.qwen3_next:Qwen3NextForCausalLM" ) ModelRegistry.register_model( "Qwen3NextMTP", "vllm_kunlun.models.qwen3_next_mtp:Qwen3NextMTP" ) ModelRegistry.register_model( "GlmForCausalLM", "vllm_kunlun.models.glm:GlmForCausalLM" ) ModelRegistry.register_model( "GptOssForCausalLM", "vllm_kunlun.models.gpt_oss:GptOssForCausalLM" ) ModelRegistry.register_model( "InternLM2ForCausalLM", "vllm_kunlun.models.internlm2:InternLM2ForCausalLM" ) ModelRegistry.register_model( "InternVLChatModel", "vllm_kunlun.models.internvl:InternVLChatModel" ) ModelRegistry.register_model( "InternS1ForConditionalGeneration", "vllm_kunlun.models.interns1:InternS1ForConditionalGeneration", ) ModelRegistry.register_model( "Qwen3VLForConditionalGeneration", "vllm_kunlun.models.qwen3_vl:Qwen3VLForConditionalGeneration", ) ModelRegistry.register_model( "Qwen3VLMoeForConditionalGeneration", "vllm_kunlun.models.qwen3_vl_moe:Qwen3VLMoeForConditionalGeneration", ) ModelRegistry.register_model( "Qwen3OmniMoeForConditionalGeneration", "vllm_kunlun.models.qwen3_omni_moe_thinker:Qwen3OmniMoeThinkerForConditionalGeneration", ) ModelRegistry.register_model( "SeedOssForCausalLM", "vllm_kunlun.models.seed_oss:SeedOssForCausalLM" ) ModelRegistry.register_model( "MiMoV2FlashForCausalLM", "vllm_kunlun.models.mimo_v2_flash:MiMoV2FlashForCausalLM", ) ModelRegistry.register_model( "GptOssForCausalLM", "vllm_kunlun.models.gpt_oss:GptOssForCausalLM" ) ModelRegistry.register_model( "DeepseekV3ForCausalLM", "vllm_kunlun.models.deepseek_v2:DeepseekV3ForCausalLM" ) ModelRegistry.register_model( "DeepseekV32ForCausalLM", "vllm_kunlun.models.deepseek_v2:DeepseekV3ForCausalLM" ) ModelRegistry.register_model( "DeepSeekMTPModel", "vllm_kunlun.models.deepseek_mtp:DeepSeekMTP" ) ModelRegistry.register_model( "GlmMoeDsaForCausalLM", "vllm_kunlun.models.deepseek_v2:GlmMoeDsaForCausalLM" ) def register_quant_method(): """to do"""