[Ascend]optimize Qwen3 on Ascend (#10574)

Co-authored-by: c30031083 <chenxu140@huawei.com>
2025-09-23 03:18:36 +03:00
parent 095093ee5a
commit e22f3a5ec9
6 changed files with 81 additions and 2 deletions
--- a/python/sglang/srt/model_executor/model_runner.py
+++ b/python/sglang/srt/model_executor/model_runner.py
@@ -179,6 +179,13 @@ UNBALANCED_MODEL_LOADING_TIMEOUT_S = 300
 logger = logging.getLogger(__name__)


+if _is_npu:
+    import torch_npu
+
+    torch.npu.config.allow_internal_format = True
+    torch_npu.npu.set_compile_mode(jit_compile=False)
+
+
 class RankZeroFilter(logging.Filter):
    """Filter that only allows INFO level logs from rank 0, but allows all other levels from any rank."""