diff --git a/docs/source/user_guide/additional_config.md b/docs/source/user_guide/additional_config.md index df39789..a884bda 100644 --- a/docs/source/user_guide/additional_config.md +++ b/docs/source/user_guide/additional_config.md @@ -38,6 +38,7 @@ The details of each config option are as follows: | Name | Type | Default | Description | | ---- | ---- | ------- | ----------- | | `enabled` | bool | `False` | Whether to enable torchair graph mode | +| `enable_view_optimize` | bool | `True` | Whether to enable torchair view optimization | | `use_cached_graph` | bool | `False` | Whether to use cached graph | | `graph_batch_sizes` | list[int] | `[]` | The batch size for torchair graph cache | | `graph_batch_sizes_init` | bool | `False` | Init graph batch size dynamically if `graph_batch_sizes` is empty | diff --git a/vllm_ascend/ascend_config.py b/vllm_ascend/ascend_config.py index 41ebbde..065b7d0 100644 --- a/vllm_ascend/ascend_config.py +++ b/vllm_ascend/ascend_config.py @@ -55,6 +55,8 @@ class TorchairGraphConfig: "graph_batch_sizes_init", False) self.enable_multistream_shared_expert = torchair_graph_config.get( "enable_multistream_shared_expert", False) + self.enable_view_optimize = torchair_graph_config.get( + "enable_view_optimize", True) if not isinstance(self.graph_batch_sizes, list): raise TypeError("graph_batch_sizes must be list[int]") diff --git a/vllm_ascend/worker/model_runner.py b/vllm_ascend/worker/model_runner.py index 43059b8..48c5d4b 100644 --- a/vllm_ascend/worker/model_runner.py +++ b/vllm_ascend/worker/model_runner.py @@ -1037,6 +1037,8 @@ class NPUModelRunnerBase(ModelRunnerBase[TModelInputForNPU]): config = torchair.CompilerConfig() config.experimental_config.frozen_parameter = True config.experimental_config.tiling_schedule_optimize = True + config.experimental_config.enable_view_optimize = \ + get_ascend_config().torchair_graph_config.enable_view_optimize torch.npu.set_compile_mode(jit_compile=False) if not self.use_cached_npu_graph: npu_backend = torchair.get_npu_backend(compiler_config=config) diff --git a/vllm_ascend/worker/model_runner_v1.py b/vllm_ascend/worker/model_runner_v1.py index 3dda021..2f2e5c5 100644 --- a/vllm_ascend/worker/model_runner_v1.py +++ b/vllm_ascend/worker/model_runner_v1.py @@ -1286,6 +1286,8 @@ class NPUModelRunner(LoRAModelRunnerMixin): config = torchair.CompilerConfig() config.experimental_config.frozen_parameter = True config.experimental_config.tiling_schedule_optimize = True + config.experimental_config.enable_view_optimize = \ + get_ascend_config().torchair_graph_config.enable_view_optimize torch.npu.set_compile_mode(jit_compile=False) if not self.use_cached_npu_graph: npu_backend = torchair.get_npu_backend(compiler_config=config)