diff --git a/vllm_ascend/ascend_forward_context.py b/vllm_ascend/ascend_forward_context.py index ea137b04..d59f02be 100644 --- a/vllm_ascend/ascend_forward_context.py +++ b/vllm_ascend/ascend_forward_context.py @@ -87,6 +87,10 @@ def set_ascend_forward_context( if is_context_moe_model: sp_enabled = enable_sp(vllm_config) and num_tokens is not None mmrs_fusion = False + elif is_draft_model: + # TODO: for dense drafter, `sp` is redundant and is not compatible with `dp` and `graph`. + # Disable it to avoid more problems. + sp_enabled = False else: sp_enabled = enable_sp(vllm_config) and num_tokens is not None and num_tokens > 1000