[Spec Decode]clean up spec decode interface (#6947)
This pull request refactors the speculative decoding proposer interface
to align with upstream vLLM, removing the local `Proposer` interface and
renaming methods to `propose`.
This is the first step. In the future we should remove the class
register and just add few Ascend specified method once the arch in vLLM
is ready.
- vLLM version: v0.16.0
- vLLM main:
15d76f74e2
Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
@@ -47,7 +47,6 @@ mtp_proposer.py
|
||||
├── Proposer
|
||||
│ ├── load_model
|
||||
│ ├── dummy_run
|
||||
│ ├── generate_token_ids
|
||||
│ ├── _prepare_inputs
|
||||
│ ├── _propose
|
||||
```
|
||||
@@ -86,11 +85,11 @@ def get_spec_decode_method(method,
|
||||
device,
|
||||
runner):
|
||||
if method == "ngram":
|
||||
return NgramProposer(vllm_config, device, runner)
|
||||
return AscendNgramProposer(vllm_config, device, runner)
|
||||
elif method in ["eagle", "eagle3"]:
|
||||
return EagleProposer(vllm_config, device, runner)
|
||||
return AscendEagleProposer(vllm_config, device, runner)
|
||||
elif method == 'mtp':
|
||||
return MtpProposer(vllm_config, device, runner)
|
||||
return AscendMtpProposer(vllm_config, device, runner)
|
||||
else:
|
||||
raise ValueError("Unknown speculative decoding method: "
|
||||
f"{method}")
|
||||
|
||||
Reference in New Issue
Block a user