[Main2Main] Upgrade vLLM to 0226 (#6813)
### What this PR does / why we need it?
Breaking:
1. https://github.com/vllm-project/vllm/pull/33452
2. https://github.com/vllm-project/vllm/pull/33451
3. https://github.com/vllm-project/vllm/pull/32567
4. https://github.com/vllm-project/vllm/pull/32344
### Does this PR introduce _any_ user-facing change?
### How was this patch tested?
- vLLM version: v0.15.0
- vLLM main:
83b47f67b1
---------
Signed-off-by: MrZ20 <2609716663@qq.com>
Signed-off-by: gcanlin <canlinguosdu@gmail.com>
Co-authored-by: MrZ20 <2609716663@qq.com>
This commit is contained in:
@@ -22,9 +22,9 @@ class TestAscendConfig(unittest.TestCase):
|
||||
"eplb_config": {"dynamic_eplb": True, "num_redundant_experts": 2},
|
||||
}
|
||||
from vllm.model_executor.layers.fused_moe.config import RoutingMethodType
|
||||
if vllm_version_is("0.15.0"):
|
||||
if vllm_version_is("0.16.0"):
|
||||
moe_parallel_config = FusedMoEParallelConfig(
|
||||
2, 0, 1, 2, 1, 1, 1, 1, True, "hccl", enable_eplb=True)
|
||||
2, 0, 1, 2, 1, 1, 1, 1, True, "hccl", is_sequence_parallel=True, enable_eplb=True)
|
||||
moe_config = FusedMoEConfig(
|
||||
num_experts=8,
|
||||
experts_per_token=8,
|
||||
|
||||
Reference in New Issue
Block a user