[EPLB][Bugfix] EPLB support fp/bf16 (#5531)
### What this PR does / why we need it?
EPLB support dtype of fp/bf16.
### Does this PR introduce _any_ user-facing change?
### How was this patch tested?
w8a8_dynamic Baseline:
| dataset | version | metric | mode | vllm-api-general-chat |
|----- | ----- | ----- | ----- | -----|
| aime2024 | 604a78 | accuracy | gen | 86.67 |
w8a8_dynamic eplb:
| dataset | version | metric | mode | vllm-api-general-chat |
|----- | ----- | ----- | ----- | -----|
| aime2024 | 604a78 | accuracy | gen | 86.67 |
The fp16 conversation is normal.
The fp16 test is in progress.
Baseline fp16
| dataset | version | metric | mode | vllm-api-general-chat |
|----- | ----- | ----- | ----- | -----|
| aime2024 | 604a78 | accuracy | gen | 86.67 |
eplb fp16
| dataset | version | metric | mode | vllm-api-general-chat |
|----- | ----- | ----- | ----- | -----|
| aime2024 | 604a78 | accuracy | gen | 83.33 |
- vLLM version: v0.13.0
- vLLM main:
45c1ca1ca1
Signed-off-by: shenchuxiaofugui <1311027364@qq.com>
This commit is contained in:
39
tests/ut/eplb/adaptor/test_vllm_adaptor.py
Normal file
39
tests/ut/eplb/adaptor/test_vllm_adaptor.py
Normal file
@@ -0,0 +1,39 @@
|
||||
import unittest
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import torch
|
||||
|
||||
from vllm_ascend.eplb.adaptor.vllm_adaptor import VllmEplbAdaptor
|
||||
from transformers import DeepseekV2Config
|
||||
|
||||
|
||||
class TestVllmAdaptor(unittest.TestCase):
|
||||
def setUp(self):
|
||||
n_routed_experts = 256
|
||||
mock_model = MagicMock()
|
||||
mock_model.model.named_parameters.return_value = dict()
|
||||
config = DeepseekV2Config(n_routed_experts=n_routed_experts)
|
||||
mock_model.config = config
|
||||
mock_model.get_expert_map.return_value = [i for i in range(n_routed_experts)]
|
||||
mock_model.get_log2phy_map.return_value = [i for i in range(n_routed_experts)]
|
||||
self.model = mock_model
|
||||
|
||||
self.mock_rank = patch("vllm_ascend.eplb.adaptor.vllm_adaptor.dist.get_rank", return_value=0).start()
|
||||
self.mock_size = patch("vllm_ascend.eplb.adaptor.vllm_adaptor.dist.get_world_size", return_value=4).start()
|
||||
|
||||
@patch("torch.empty_like", return_value=torch.zeros(16, 32))
|
||||
def test_init_fp16(self, mock_func):
|
||||
self.model.quant_config = None
|
||||
VllmEplbAdaptor(self.model)
|
||||
|
||||
@patch("torch.empty_like", return_value=torch.zeros(16, 32))
|
||||
def test_init_w8a8(self, mock_func):
|
||||
VllmEplbAdaptor(self.model)
|
||||
|
||||
def tearDown(self):
|
||||
self.mock_rank.stop()
|
||||
self.mock_size.stop()
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
||||
Reference in New Issue
Block a user