2025-03-07 15:41:47 +08:00
|
|
|
from vllm import ModelRegistry
|
|
|
|
|
|
2025-08-14 09:33:39 +08:00
|
|
|
import vllm_ascend.envs as envs_ascend
|
2025-06-07 16:46:58 +08:00
|
|
|
|
2025-03-07 15:41:47 +08:00
|
|
|
|
|
|
|
|
def register_model():
|
|
|
|
|
ModelRegistry.register_model(
|
|
|
|
|
"Qwen2VLForConditionalGeneration",
|
2025-04-30 14:22:38 +08:00
|
|
|
"vllm_ascend.models.qwen2_vl:AscendQwen2VLForConditionalGeneration")
|
|
|
|
|
|
2025-09-25 18:50:12 +08:00
|
|
|
ModelRegistry.register_model(
|
|
|
|
|
"Qwen3VLMoeForConditionalGeneration",
|
|
|
|
|
"vllm_ascend.models.qwen2_5_vl_without_padding:AscendQwen3VLMoeForConditionalGeneration"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
ModelRegistry.register_model(
|
|
|
|
|
"Qwen3VLForConditionalGeneration",
|
|
|
|
|
"vllm_ascend.models.qwen2_5_vl_without_padding:AscendQwen3VLForConditionalGeneration"
|
|
|
|
|
)
|
|
|
|
|
|
2025-08-14 09:33:39 +08:00
|
|
|
if envs_ascend.USE_OPTIMIZED_MODEL:
|
2025-06-07 19:45:46 +08:00
|
|
|
ModelRegistry.register_model(
|
|
|
|
|
"Qwen2_5_VLForConditionalGeneration",
|
|
|
|
|
"vllm_ascend.models.qwen2_5_vl:AscendQwen2_5_VLForConditionalGeneration"
|
|
|
|
|
)
|
|
|
|
|
else:
|
|
|
|
|
ModelRegistry.register_model(
|
|
|
|
|
"Qwen2_5_VLForConditionalGeneration",
|
|
|
|
|
"vllm_ascend.models.qwen2_5_vl_without_padding:AscendQwen2_5_VLForConditionalGeneration_Without_Padding"
|
|
|
|
|
)
|
2025-04-07 10:56:12 +08:00
|
|
|
|
2025-09-30 03:25:58 +08:00
|
|
|
ModelRegistry.register_model(
|
|
|
|
|
"DeepseekV32ForCausalLM",
|
2025-10-20 09:50:44 +08:00
|
|
|
"vllm_ascend.models.deepseek_v3_2:CustomDeepseekV3ForCausalLM")
|
2025-09-30 03:25:58 +08:00
|
|
|
|
2025-09-08 21:30:37 +08:00
|
|
|
# There is no PanguProMoEForCausalLM in vLLM, so we should register it before vLLM config initialization
|
|
|
|
|
# to make sure the model can be loaded correctly. This register step can be removed once vLLM support PanguProMoEForCausalLM.
|
2025-06-20 23:59:59 +08:00
|
|
|
ModelRegistry.register_model(
|
|
|
|
|
"PanguProMoEForCausalLM",
|
2025-09-08 21:30:37 +08:00
|
|
|
"vllm_ascend.torchair.models.torchair_pangu_moe:PanguProMoEForCausalLM"
|
|
|
|
|
)
|
2025-09-16 01:17:42 +08:00
|
|
|
ModelRegistry.register_model(
|
|
|
|
|
"Qwen3NextForCausalLM",
|
[3/N][Refactor][Qwen3-Next] Refacotr model structure and fix bug by vllm #25400 (#3142)
### What this PR does / why we need it?
Refactor model structure in qwen3_next.py to reduce code line.
### Does this PR introduce _any_ user-facing change?
N/A
### How was this patch tested?
```
def main():
prompts = [
"The future of AI is",
]
# Create a sampling params object.
sampling_params = SamplingParams(max_tokens=100, temperature=0.6, top_k=40, top_p=0.95)
# Create an LLM.
llm = LLM(
model="Qwen/Qwen3-Next-80B-A3B-Instruct",
tensor_parallel_size=4,
enforce_eager=True,
trust_remote_code=True,
max_model_len=256,
gpu_memory_utilization=0.7,
block_size=64,
)
# Generate texts from the prompts.
outputs = llm.generate(prompts, sampling_params)
for output in outputs:
prompt = output.prompt
generated_text = output.outputs[0].text
print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
```
- vLLM version: v0.10.2
- vLLM main:
https://github.com/vllm-project/vllm/commit/releases/v0.11.0
---------
Signed-off-by: Icey <1790571317@qq.com>
2025-09-28 21:14:36 +08:00
|
|
|
"vllm_ascend.models.qwen3_next:CustomQwen3NextForCausalLM")
|