support torchair mode (#2641)

### What this PR does / why we need it?
support torchair mode
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?

- vLLM version: v0.10.1.1
- vLLM main:
5438967fbc

Signed-off-by: zhangdepeng <zhangdepeng2@huawei.com>
Signed-off-by: p00465316 <panchao13@huawei.com>
Co-authored-by: zhangdepeng <zhangdepeng2@huawei.com>
This commit is contained in:
panchao-hub
2025-09-01 15:49:07 +08:00
committed by GitHub
parent b72e34013f
commit ea53f9076e
4 changed files with 19 additions and 0 deletions

View File

@@ -70,6 +70,7 @@ class TorchairGraphConfig:
def __init__(self, torchair_graph_config):
self.enabled = torchair_graph_config.get("enabled", False)
self.mode = torchair_graph_config.get("mode", '')
self.use_cached_graph = torchair_graph_config.get(
"use_cached_graph", False)
self.graph_batch_sizes = torchair_graph_config.get(
@@ -91,6 +92,9 @@ class TorchairGraphConfig:
"graph_batch_sizes_init is only valid when graph_batch_sizes is empty"
)
if not self.enabled:
if self.mode:
raise RuntimeError(
"mode is valid only when Torchair graph mode is enabled")
if self.use_cached_graph:
raise RuntimeError(
"use_cached_graph is valid only when Torchair graph mode is enabled"

View File

@@ -324,6 +324,8 @@ class NPUTorchairModelRunner(NPUModelRunner):
communication_adaptation_310p()
config = torchair.CompilerConfig()
if get_ascend_config().torchair_graph_config.mode:
config.mode = get_ascend_config().torchair_graph_config.mode
config.experimental_config.frozen_parameter = True
# enabling tiling_schedule_optimize on 300I Duo has some bugs, so we have to
# disable it on 300I Duo platform now.