[feature] Rework Ascend NPU graph support (#9350)

Co-authored-by: ronnie_zheng <zl19940307@163.com> Co-authored-by: yezhifeng (D) <y00897525@china.huawei.com> Co-authored-by: anon189Ty <Stari_Falcon@outlook.com> Co-authored-by: Maksim <makcum888e@mail.ru> Co-authored-by: ssshinigami <44640852+ssshinigami@users.noreply.github.com>
2025-08-20 11:32:27 +08:00
parent f515449582
commit 3680d6f88b
18 changed files with 546 additions and 81 deletions
--- a/benchmark/kernels/fused_moe_triton/benchmark_torch_compile_fused_moe.py
+++ b/benchmark/kernels/fused_moe_triton/benchmark_torch_compile_fused_moe.py
@@ -9,7 +9,7 @@ from transformers import AutoConfig
 from sglang.srt.layers.moe.fused_moe_triton.fused_moe import (
    fused_moe as fused_moe_triton,
 )
-from sglang.srt.model_executor.cuda_graph_runner import set_torch_compile_config
+from sglang.srt.model_executor.graph_runner import set_torch_compile_config


 def get_model_config(model_name: str, tp_size: int):