[5/N][refactor]add torchair rotary ops (#2559)

### What this PR does / why we need it? Move torchair related rotary ops into torchair dir to make the code clear. Next step we'll remove all torchair related code outside of torchair rotary ops. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? vLLM version: main vLLM main: ab9f2cfd19 - vLLM version: v0.10.1.1 - vLLM main: 81eea3d348 Signed-off-by: hust17yixuan <303660421@qq.com>
2025-09-01 09:09:21 +08:00
parent 3a5fc5ee01
commit c2c97f3079
4 changed files with 725 additions and 7 deletions
--- a/vllm_ascend/torchair/torchair_model_runner.py
+++ b/vllm_ascend/torchair/torchair_model_runner.py
@@ -15,7 +15,7 @@
 # limitations under the License.
 # This file is a part of the vllm-ascend project.
 # Adapted from vllm-project/vllm/vllm/worker/gpu_model_runner.py
-#
+# isort: skip_file

 import types
 from typing import Optional
@@ -34,12 +34,10 @@ from vllm.logger import logger
 import vllm_ascend.envs as envs_ascend
 from vllm_ascend.ascend_config import get_ascend_config
 from vllm_ascend.platform import NPUPlatform
-from vllm_ascend.torchair.utils import (TorchairCommonAttentionMetadata,
-                                        check_torchair_cache_exist,
-                                        converting_weight_acl_format,
-                                        register_torchair_model,
-                                        torchair_quant_method_register,
-                                        write_kv_cache_bytes_to_file)
+from vllm_ascend.torchair.utils import (
+    TorchairCommonAttentionMetadata, check_torchair_cache_exist,
+    converting_weight_acl_format, register_torchair_model, torchair_ops_patch,
+    torchair_quant_method_register, write_kv_cache_bytes_to_file)
 from vllm_ascend.utils import (ACL_FORMAT_FRACTAL_ND, ACL_FORMAT_FRACTAL_NZ,
                               is_310p)
 from vllm_ascend.worker.model_runner_v1 import NPUModelRunner
@@ -68,6 +66,7 @@ class NPUTorchairModelRunner(NPUModelRunner):

        self._check_batch_sizes_consistency()
        register_torchair_model()
+        torchair_ops_patch()
        torchair_quant_method_register()

    def _sync_metadata_across_dp(