[4/N][Refactor] torchair model runner refactor (#2208)

There is lot of torchair code in model runner leading the code hard for maintenance. We'll create new torchair_model_runner to split torchair related logic. Following the workflow #2203, this is the first PR. What's this PR do: create common function `_convert_torch_foramt` for initialize_kv_cache - vLLM version: v0.10.0 - vLLM main: 14a5d903ab Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
2025-08-11 21:39:24 +08:00
parent eb43a475f4
commit c8b0f5f799
2 changed files with 18 additions and 13 deletions
--- a/vllm_ascend/torchair/torchair_model_runner.py
+++ b/vllm_ascend/torchair/torchair_model_runner.py
@@ -20,10 +20,11 @@
 from typing import Optional

 import torch
+import torch_npu
 from vllm.config import VllmConfig
 from vllm.forward_context import get_forward_context

-from vllm_ascend.utils import (ACL_FORMAT_FRACTAL_NZ,
+from vllm_ascend.utils import (ACL_FORMAT_FRACTAL_ND, ACL_FORMAT_FRACTAL_NZ,
                               maybe_converting_weight_acl_format)
 from vllm_ascend.worker.model_runner_v1 import NPUModelRunner

@@ -113,3 +114,7 @@ class NPUTorchairModelRunner(NPUModelRunner):
                with_prefill, is_torchair_compile, input_ids, positions,
                attn_metadata, num_tokens, intermediate_tensors, inputs_embeds)
        return hidden_states
+
+    def _convert_torch_format(self, kv_cache):
+        kv_cache = torch_npu.npu_format_cast(kv_cache, ACL_FORMAT_FRACTAL_ND)
+        return kv_cache