[EPLB][Bugfix] EPLB support fp/bf16 (#5531)

### What this PR does / why we need it?
EPLB support dtype of fp/bf16.
### Does this PR introduce _any_ user-facing change?

### How was this patch tested?
w8a8_dynamic Baseline:
| dataset | version | metric | mode | vllm-api-general-chat |
|----- | ----- | ----- | ----- | -----|
| aime2024 | 604a78 | accuracy | gen | 86.67 |

w8a8_dynamic eplb:
| dataset | version | metric | mode | vllm-api-general-chat |
|----- | ----- | ----- | ----- | -----|
| aime2024 | 604a78 | accuracy | gen | 86.67 |

The fp16 conversation is normal.
The fp16 test is in progress.

Baseline fp16
| dataset | version | metric | mode | vllm-api-general-chat |
|----- | ----- | ----- | ----- | -----|
| aime2024 | 604a78 | accuracy | gen | 86.67 |

eplb fp16
| dataset | version | metric | mode | vllm-api-general-chat |
|----- | ----- | ----- | ----- | -----|
| aime2024 | 604a78 | accuracy | gen | 83.33 |

- vLLM version: v0.13.0
- vLLM main:
45c1ca1ca1

Signed-off-by: shenchuxiaofugui <1311027364@qq.com>
This commit is contained in:
LI SHENGYONG
2026-01-26 14:28:16 +08:00
committed by GitHub
parent 52d4acfa51
commit 611e223b7d
4 changed files with 67 additions and 118 deletions

View File

@@ -1,61 +0,0 @@
import pytest
from vllm_ascend.eplb.adaptor.abstract_adaptor import EplbAdaptor
class DummyAdaptor(EplbAdaptor):
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.args = kwargs
def get_rank_expert_workload(self):
return "workload"
def do_update_expert_map(self, layer_id, updated_expert_map):
return {"layer_id": layer_id, "map": updated_expert_map}
def do_update_expert_weight(self, layer_id, local_expert_to_replace,
buffer_tensor_id):
return {
"layer_id": layer_id,
"replace": local_expert_to_replace,
"buffer": buffer_tensor_id,
}
def test_base_class_methods_raise():
adaptor = EplbAdaptor()
with pytest.raises(NotImplementedError):
adaptor.get_rank_expert_workload()
with pytest.raises(NotImplementedError):
adaptor.do_update_expert_map(1, {})
with pytest.raises(NotImplementedError):
adaptor.do_update_expert_weight(1, "x", "y")
def test_dummy_adaptor_init_and_args():
adaptor = DummyAdaptor(test_arg=123)
assert adaptor.args["test_arg"] == 123
def test_get_rank_expert_workload():
adaptor = DummyAdaptor()
result = adaptor.get_rank_expert_workload()
assert result == "workload"
def test_do_update_expert_map():
adaptor = DummyAdaptor()
updated = {"expert": 1}
result = adaptor.do_update_expert_map(2, updated)
assert result["layer_id"] == 2
assert result["map"] == updated
def test_do_update_expert_weight():
adaptor = DummyAdaptor()
result = adaptor.do_update_expert_weight(1, "expertA", "bufferX")
assert result["layer_id"] == 1
assert result["replace"] == "expertA"
assert result["buffer"] == "bufferX"

View File

@@ -0,0 +1,39 @@
import unittest
from unittest.mock import MagicMock, patch
import torch
from vllm_ascend.eplb.adaptor.vllm_adaptor import VllmEplbAdaptor
from transformers import DeepseekV2Config
class TestVllmAdaptor(unittest.TestCase):
def setUp(self):
n_routed_experts = 256
mock_model = MagicMock()
mock_model.model.named_parameters.return_value = dict()
config = DeepseekV2Config(n_routed_experts=n_routed_experts)
mock_model.config = config
mock_model.get_expert_map.return_value = [i for i in range(n_routed_experts)]
mock_model.get_log2phy_map.return_value = [i for i in range(n_routed_experts)]
self.model = mock_model
self.mock_rank = patch("vllm_ascend.eplb.adaptor.vllm_adaptor.dist.get_rank", return_value=0).start()
self.mock_size = patch("vllm_ascend.eplb.adaptor.vllm_adaptor.dist.get_world_size", return_value=4).start()
@patch("torch.empty_like", return_value=torch.zeros(16, 32))
def test_init_fp16(self, mock_func):
self.model.quant_config = None
VllmEplbAdaptor(self.model)
@patch("torch.empty_like", return_value=torch.zeros(16, 32))
def test_init_w8a8(self, mock_func):
VllmEplbAdaptor(self.model)
def tearDown(self):
self.mock_rank.stop()
self.mock_size.stop()
if __name__ == "__main__":
unittest.main()