[5/N][refactor]add torchair rotary ops (#2559)

### What this PR does / why we need it?
Move torchair related rotary ops into torchair dir to make the code
clear. Next step we'll remove all torchair related code outside of
torchair rotary ops.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
vLLM version: main
vLLM main:
ab9f2cfd19


- vLLM version: v0.10.1.1
- vLLM main:
81eea3d348

Signed-off-by: hust17yixuan <303660421@qq.com>
This commit is contained in:
Wang Yixuan
2025-09-01 09:09:21 +08:00
committed by GitHub
parent 3a5fc5ee01
commit c2c97f3079
4 changed files with 725 additions and 7 deletions

View File

@@ -15,7 +15,7 @@
# limitations under the License.
# This file is a part of the vllm-ascend project.
# Adapted from vllm-project/vllm/vllm/worker/gpu_model_runner.py
#
# isort: skip_file
import types
from typing import Optional
@@ -34,12 +34,10 @@ from vllm.logger import logger
import vllm_ascend.envs as envs_ascend
from vllm_ascend.ascend_config import get_ascend_config
from vllm_ascend.platform import NPUPlatform
from vllm_ascend.torchair.utils import (TorchairCommonAttentionMetadata,
check_torchair_cache_exist,
converting_weight_acl_format,
register_torchair_model,
torchair_quant_method_register,
write_kv_cache_bytes_to_file)
from vllm_ascend.torchair.utils import (
TorchairCommonAttentionMetadata, check_torchair_cache_exist,
converting_weight_acl_format, register_torchair_model, torchair_ops_patch,
torchair_quant_method_register, write_kv_cache_bytes_to_file)
from vllm_ascend.utils import (ACL_FORMAT_FRACTAL_ND, ACL_FORMAT_FRACTAL_NZ,
is_310p)
from vllm_ascend.worker.model_runner_v1 import NPUModelRunner
@@ -68,6 +66,7 @@ class NPUTorchairModelRunner(NPUModelRunner):
self._check_batch_sizes_consistency()
register_torchair_model()
torchair_ops_patch()
torchair_quant_method_register()
def _sync_metadata_across_dp(