diff --git a/vllm_ascend/spec_decode/eagle_proposer.py b/vllm_ascend/spec_decode/eagle_proposer.py index 2dd1a7d6..6b47e6bf 100644 --- a/vllm_ascend/spec_decode/eagle_proposer.py +++ b/vllm_ascend/spec_decode/eagle_proposer.py @@ -51,7 +51,7 @@ class EagleProposer(Proposer): not self.vllm_config.model_config.enforce_eager) self.cudagraph_batch_sizes = list( - reversed( + sorted( self.vllm_config.compilation_config.cudagraph_capture_sizes)) # persistent buffers for cuda graph diff --git a/vllm_ascend/spec_decode/mtp_proposer.py b/vllm_ascend/spec_decode/mtp_proposer.py index 89425c6a..4b7bfad9 100644 --- a/vllm_ascend/spec_decode/mtp_proposer.py +++ b/vllm_ascend/spec_decode/mtp_proposer.py @@ -108,7 +108,7 @@ class MtpProposer(Proposer): self.use_aclgraph = self.runner._use_aclgraph() self.cudagraph_batch_sizes = (list( - reversed( + sorted( self.vllm_config.compilation_config.cudagraph_capture_sizes)) if self.use_aclgraph else [])