support fused_moe_allgather_ep (#1335)
### What this PR does / why we need it? support fused_moe_allgather_ep ### How was this patch tested? It was tested by UT. Signed-off-by: lyj-jjj <liuyingjun5@huawei.com>
This commit is contained in:
@@ -99,6 +99,11 @@ env_variables: Dict[str, Callable[[], Any]] = {
|
||||
# Whether to enable the trace recompiles from pytorch.
|
||||
"VLLM_ASCEND_TRACE_RECOMPILES":
|
||||
lambda: bool(int(os.getenv("VLLM_ASCEND_TRACE_RECOMPILES", '0'))),
|
||||
# Whether to enable fused_experts_allgather_ep. MoeInitRoutingV3 and
|
||||
# GroupedMatmulFinalizeRouting operators are combined to implement EP.
|
||||
"VLLM_ENABLE_FUSED_EXPERTS_ALLGATHER_EP":
|
||||
lambda: bool(int(os.getenv("VLLM_ENABLE_FUSED_EXPERTS_ALLGATHER_EP", '0'))
|
||||
),
|
||||
"VLLM_ASCEND_ENABLE_DBO":
|
||||
lambda: bool(int(os.getenv("VLLM_ASCEND_ENABLE_DBO", '0'))),
|
||||
# Whether to enable the model execute time observe profile. Disable it when
|
||||
|
||||
Reference in New Issue
Block a user