[Bugfix] Fix setting of speculative_config.enforce_eager for dsv32 (#5945)
### What this PR does / why we need it?
This PR aims to fix setting of `speculative_config.enforce_eager` in
deepseek v3.2 mtp. The point is that, vllm sets
`speculative_config.enforce_eager` as True if using deepseek_v32 with
mtp. Since we support graph mode, we simply ignore it here. However,
this fix will also implicitly ignore user setting of
`speculative_config.enforce_eager`, we need to take care and remove it
once vllm supports this feature.
### Does this PR introduce _any_ user-facing change?
N/A
### How was this patch tested?
by ci
- vLLM version: v0.13.0
- vLLM main:
2c24bc6996
Signed-off-by: Zetong Li <slippersss@126.com>
This commit is contained in:
@@ -412,6 +412,21 @@ class NPUPlatform(Platform):
|
|||||||
os.environ["PYTORCH_NPU_ALLOC_CONF"] = npu_alloc_configs
|
os.environ["PYTORCH_NPU_ALLOC_CONF"] = npu_alloc_configs
|
||||||
logger.info("Set PYTORCH_NPU_ALLOC_CONF=%s", npu_alloc_configs)
|
logger.info("Set PYTORCH_NPU_ALLOC_CONF=%s", npu_alloc_configs)
|
||||||
|
|
||||||
|
# NOTE: vllm sets `speculative_config.enforce_eager` as True if using
|
||||||
|
# deepseek_v32 with mtp. Since we support graph mode, we simply ignore
|
||||||
|
# it here. However, this fix will also implicitly ignore user setting of
|
||||||
|
# `speculative_config.enforce_eager`, we need to take care and remove it
|
||||||
|
# once vllm supports this feature.
|
||||||
|
speculative_config = vllm_config.speculative_config
|
||||||
|
if (
|
||||||
|
model_config
|
||||||
|
and speculative_config
|
||||||
|
and hasattr(model_config.hf_text_config, "model_type")
|
||||||
|
and model_config.hf_text_config.model_type == "deepseek_v32"
|
||||||
|
and speculative_config.enforce_eager
|
||||||
|
):
|
||||||
|
speculative_config.enforce_eager = False
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def import_kernels(cls) -> None:
|
def import_kernels(cls) -> None:
|
||||||
# Directly importing vllm_ascend_C prevents ASCEND_RT_VISIBLE_DEVICES
|
# Directly importing vllm_ascend_C prevents ASCEND_RT_VISIBLE_DEVICES
|
||||||
|
|||||||
@@ -246,7 +246,7 @@ class MtpProposer(EagleProposer):
|
|||||||
-1]:
|
-1]:
|
||||||
num_input_tokens = self.vllm_config.pad_for_cudagraph(
|
num_input_tokens = self.vllm_config.pad_for_cudagraph(
|
||||||
num_scheduled_tokens)
|
num_scheduled_tokens)
|
||||||
elif self.use_aclgraph and num_tokens <= self.runner.cudagraph_batch_sizes[
|
elif self.use_aclgraph and num_tokens <= self.runner.cudagraph_batch_sizes[
|
||||||
-1]:
|
-1]:
|
||||||
# Acl graph mode, add padding to the batch size
|
# Acl graph mode, add padding to the batch size
|
||||||
num_input_tokens = self.vllm_config.pad_for_cudagraph(num_tokens)
|
num_input_tokens = self.vllm_config.pad_for_cudagraph(num_tokens)
|
||||||
|
|||||||
Reference in New Issue
Block a user