diff --git a/vllm_ascend/ops/rotary_embedding.py b/vllm_ascend/ops/rotary_embedding.py index cb7538d5..15bda163 100644 --- a/vllm_ascend/ops/rotary_embedding.py +++ b/vllm_ascend/ops/rotary_embedding.py @@ -76,6 +76,8 @@ def set_cos_and_sin(vllm_config, max_num_reqs, decode_token_per_req, dtype, devi # For models using partial rope like Qwen3-Next. if hasattr(model_config.hf_text_config, "partial_rotary_factor"): rope_dim = int(rope_dim * model_config.hf_text_config.partial_rotary_factor) + elif hasattr(model_config.hf_text_config, "rotary_dim"): + rope_dim = int(model_config.hf_text_config.rotary_dim) _cos = torch.ones(1, max_num_batched_tokens, 1, rope_dim, dtype=dtype, device=device) _sin = torch.zeros(1, max_num_batched_tokens, 1, rope_dim, dtype=dtype, device=device)