[Feat][Bugfix][main] Adapted SP to eagle3 (#5562)
### What this PR does / why we need it?
Adapted sp to eagle3.
There may still be some problems, e.g., accuracy in some scenes,
`sp`+`dp`...
We will fix them later.
### Does this PR introduce _any_ user-facing change?
N/A
### How was this patch tested?
We tested it mainly in a new `e2e`.
```shell
pytest -s tests/e2e/singlecard/spec_decode/test_v1_spec_decode.py::test_llama_qwen_eagle_acceptance
```
```text
.
=============================== warnings summary ===============================
<frozen importlib._bootstrap>:241
<frozen importlib._bootstrap>:241: DeprecationWarning: builtin type SwigPyPacked has no __module__ attribute
<frozen importlib._bootstrap>:241
<frozen importlib._bootstrap>:241: DeprecationWarning: builtin type SwigPyObject has no __module__ attribute
-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html
============= 3 passed, 1 skipped, 2 warnings in 142.05s (0:02:22) =============
```
It passed.
- vLLM version: v0.13.0
- vLLM main:
7157596103
Signed-off-by: drslark <slarksblood@qq.com>
This commit is contained in:
@@ -1056,6 +1056,33 @@ class NPUModelRunner(GPUModelRunner):
|
||||
input_ids, inputs_embeds, intermediate_tensors,
|
||||
max_num_scheduled_tokens)
|
||||
|
||||
# all-gather one hidden-states in sp scene
|
||||
@staticmethod
|
||||
def _all_gather_hidden_states(hidden_states):
|
||||
hidden_states = tensor_model_parallel_all_gather(hidden_states, 0)
|
||||
pad_size = get_forward_context().pad_size
|
||||
if pad_size > 0:
|
||||
hidden_states = hidden_states[:-pad_size, :]
|
||||
|
||||
return hidden_states
|
||||
|
||||
# all-gather a list of hidden-states in sp scene
|
||||
@staticmethod
|
||||
def _all_gather_hidden_states_list(hidden_states_list):
|
||||
return [
|
||||
NPUModelRunner._all_gather_hidden_states(hidden_states)
|
||||
for hidden_states in hidden_states_list
|
||||
]
|
||||
|
||||
# all-gather hidden-states in last layer with aux-hidden-states in sp scene
|
||||
@staticmethod
|
||||
def _all_gather_hidden_states_and_aux(hidden_states):
|
||||
if isinstance(hidden_states, tuple):
|
||||
return (NPUModelRunner._all_gather_hidden_states(hidden_states[0]),
|
||||
NPUModelRunner._all_gather_hidden_states_list(
|
||||
hidden_states[1]))
|
||||
return NPUModelRunner._all_gather_hidden_states(hidden_states)
|
||||
|
||||
def _generate_process_reqs_hidden_states(self, maybe_padded_num_tokens,
|
||||
input_ids, positions,
|
||||
intermediate_tensors,
|
||||
@@ -1103,10 +1130,8 @@ class NPUModelRunner(GPUModelRunner):
|
||||
|
||||
if get_forward_context().sp_enabled and not isinstance(
|
||||
hidden_states, IntermediateTensors):
|
||||
hidden_states = tensor_model_parallel_all_gather(hidden_states, 0)
|
||||
pad_size = get_forward_context().pad_size
|
||||
if pad_size > 0:
|
||||
hidden_states = hidden_states[:-pad_size, :]
|
||||
hidden_states = self._all_gather_hidden_states_and_aux(
|
||||
hidden_states)
|
||||
return hidden_states if self.pcp_size == 1 else self.pcp_manager.get_restore_hidden_states(
|
||||
hidden_states)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user