### What this PR does / why we need it?
The initialization method of expert_map used by the eplb module is
different from that used by the fused_moe module. This PR deletes the
expert_map initialization method used by the eplb module to make the
initialization methods consistent.
#### before bugfix
self._expert_map=tensor([64, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2,
3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
59, 60, 61,62, 63], device='npu:1', dtype=torch.int32)
self.shared_dict["expert_maps"][0]=tensor([-1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,
54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64]], dtype=torch.int32)
### How was this patch tested?
#### qwen3-235B-w8a8 aime
| dataset | version | metric | mode | vllm-api-general-chat |
|----- | ----- | ----- | ----- | -----|
| aime2024 | 604a78 | accuracy | gen | 86.67 |
- vLLM version: v0.13.0
- vLLM main:
2f4e6548ef
Signed-off-by: shenchuxiaofugui <1311027364@qq.com>
62 lines
1.7 KiB
Python
62 lines
1.7 KiB
Python
import pytest
|
|
|
|
from vllm_ascend.eplb.adaptor.abstract_adaptor import EplbAdaptor
|
|
|
|
|
|
class DummyAdaptor(EplbAdaptor):
|
|
|
|
def __init__(self, **kwargs):
|
|
super().__init__(**kwargs)
|
|
self.args = kwargs
|
|
|
|
def get_rank_expert_workload(self):
|
|
return "workload"
|
|
|
|
def do_update_expert_map(self, layer_id, updated_expert_map):
|
|
return {"layer_id": layer_id, "map": updated_expert_map}
|
|
|
|
def do_update_expert_weight(self, layer_id, local_expert_to_replace,
|
|
buffer_tensor_id):
|
|
return {
|
|
"layer_id": layer_id,
|
|
"replace": local_expert_to_replace,
|
|
"buffer": buffer_tensor_id,
|
|
}
|
|
|
|
|
|
def test_base_class_methods_raise():
|
|
adaptor = EplbAdaptor()
|
|
with pytest.raises(NotImplementedError):
|
|
adaptor.get_rank_expert_workload()
|
|
with pytest.raises(NotImplementedError):
|
|
adaptor.do_update_expert_map(1, {})
|
|
with pytest.raises(NotImplementedError):
|
|
adaptor.do_update_expert_weight(1, "x", "y")
|
|
|
|
|
|
def test_dummy_adaptor_init_and_args():
|
|
adaptor = DummyAdaptor(test_arg=123)
|
|
assert adaptor.args["test_arg"] == 123
|
|
|
|
|
|
def test_get_rank_expert_workload():
|
|
adaptor = DummyAdaptor()
|
|
result = adaptor.get_rank_expert_workload()
|
|
assert result == "workload"
|
|
|
|
|
|
def test_do_update_expert_map():
|
|
adaptor = DummyAdaptor()
|
|
updated = {"expert": 1}
|
|
result = adaptor.do_update_expert_map(2, updated)
|
|
assert result["layer_id"] == 2
|
|
assert result["map"] == updated
|
|
|
|
|
|
def test_do_update_expert_weight():
|
|
adaptor = DummyAdaptor()
|
|
result = adaptor.do_update_expert_weight(1, "expertA", "bufferX")
|
|
assert result["layer_id"] == 1
|
|
assert result["replace"] == "expertA"
|
|
assert result["buffer"] == "bufferX"
|