diff --git a/vllm_ascend/spec_decode/interface.py b/vllm_ascend/spec_decode/interface.py index 3f0a36b..ad4e751 100644 --- a/vllm_ascend/spec_decode/interface.py +++ b/vllm_ascend/spec_decode/interface.py @@ -35,7 +35,8 @@ class Proposer: num_reqs: int = 0, num_tokens_across_dp: Optional[torch.Tensor] = None, aclgraph_runtime_mode: CUDAGraphMode = CUDAGraphMode.NONE, - batch_descriptor=None): + batch_descriptor=None, + dummy_compute_logits=lambda hidden_states: None): """Called by dummy_run in modle_runner""" raise NotImplementedError diff --git a/vllm_ascend/spec_decode/ngram_proposer.py b/vllm_ascend/spec_decode/ngram_proposer.py index 39a894b..3a9262a 100644 --- a/vllm_ascend/spec_decode/ngram_proposer.py +++ b/vllm_ascend/spec_decode/ngram_proposer.py @@ -26,7 +26,8 @@ class NgramProposer(VllmNgramProposer, Proposer): num_reqs=None, num_tokens_across_dp=None, aclgraph_runtime_mode: CUDAGraphMode = CUDAGraphMode.NONE, - batch_descriptor=None): + batch_descriptor=None, + dummy_compute_logits=lambda hidden_states: None): pass def generate_token_ids(self,