[CI] CI refactor (#4928)

1. rename workflow to better name
2. fix lint error
3. remove accuracy report doc and test

- vLLM version: v0.12.0
- vLLM main:
ad32e3e19c

Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
wangxiyuan
2025-12-14 11:09:56 +08:00
committed by GitHub
parent ba28d54f35
commit 8090914d69
20 changed files with 4 additions and 268 deletions

View File

@@ -200,7 +200,6 @@ class AscendW8A8DynamicFusedMoEMethod:
assert router_logits.shape[
1] == global_num_experts - global_redundant_expert_num, "Number of global experts mismatch (excluding redundancy)"
topk_weights, topk_ids = None, None
if self.multistream_overlap_gate:
fc3_context = get_flash_common3_context()
assert fc3_context is not None
@@ -219,7 +218,8 @@ class AscendW8A8DynamicFusedMoEMethod:
scoring_func=scoring_func,
e_score_correction_bias=e_score_correction_bias,
global_num_experts=global_num_experts)
assert topk_ids is not None
assert topk_weights is not None
# this is a naive implementation for experts load balance so as
# to avoid accumulating too much tokens on a single rank.
# currently it is only activated when doing profile runs.