[Spec Decode]clean up spec decode interface (#6947)

This pull request refactors the speculative decoding proposer interface to align with upstream vLLM, removing the local `Proposer` interface and renaming methods to `propose`. This is the first step. In the future we should remove the class register and just add few Ascend specified method once the arch in vLLM is ready. - vLLM version: v0.16.0 - vLLM main: 15d76f74e2 Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
2026-03-05 14:30:10 +08:00
parent 2bd9c35788
commit 13777bf3f0
11 changed files with 194 additions and 315 deletions
--- a/docs/source/user_guide/feature_guide/Multi_Token_Prediction.md
+++ b/docs/source/user_guide/feature_guide/Multi_Token_Prediction.md
@@ -47,7 +47,6 @@ mtp_proposer.py
 ├── Proposer
 │   ├── load_model
 │   ├── dummy_run
-│   ├── generate_token_ids
 │   ├── _prepare_inputs
 │   ├── _propose
 ```
@@ -86,11 +85,11 @@ def get_spec_decode_method(method,
                           device,
                           runner):
    if method == "ngram":
-        return NgramProposer(vllm_config, device, runner)
+        return AscendNgramProposer(vllm_config, device, runner)
    elif method in ["eagle", "eagle3"]:
-        return EagleProposer(vllm_config, device, runner)
+        return AscendEagleProposer(vllm_config, device, runner)
    elif method == 'mtp':
-        return MtpProposer(vllm_config, device, runner)
+        return AscendMtpProposer(vllm_config, device, runner)
    else:
        raise ValueError("Unknown speculative decoding method: "
                         f"{method}")