[2/4][Refactor] Refactor torchair utils (#1892)

There is a lot torchair specified logic in common code. It results hard
code maintenance. We will create a new torchair module to launch
torchair related logic there. I plan to add 4 PR.

1. Refactor worker
2. Refactor utils (this PR)
- simple change that move all torchair related util function to torchair
module
3. Refactor model_runner
4. Refactor attention

- vLLM version: v0.9.2
- vLLM main:
8188196a1c

Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
wangxiyuan
2025-07-21 19:43:30 +08:00
committed by GitHub
parent 957b0b611f
commit 7265dc090d
11 changed files with 142 additions and 136 deletions

View File

@@ -18,12 +18,9 @@
#
import atexit
import fcntl
import functools
import math
import os
import shutil
from contextlib import contextmanager, nullcontext
from contextlib import contextmanager
from enum import Enum
from threading import Lock
from typing import TYPE_CHECKING, List, Tuple
@@ -37,14 +34,6 @@ from vllm.logger import logger
import vllm_ascend.envs as envs
from vllm_ascend.ascend_config import get_ascend_config
try:
# Recent release of torchair has moved these ops to `.scope`.
from torchair.scope import npu_stream_switch as _npu_stream_switch
from torchair.scope import npu_wait_tensor as _npu_wait_tensor
except ImportError:
from torchair.ops import NpuStreamSwitch as _npu_stream_switch
from torchair.ops import npu_wait_tensor as _npu_wait_tensor
if TYPE_CHECKING:
from vllm.config import VllmConfig
else:
@@ -67,6 +56,7 @@ _CUSTOM_OP_ENABLED = None
_IS_310P = None
_SLEEP_MODE_ENABLED = None
_CURRENT_STREAM = None
_ASCEND_CUSTOMOP_IS_REIGISTERED = False
def is_310p():
@@ -403,19 +393,6 @@ class ProfileExecuteDuration:
return durations
# TODO(wxy): Move to ops module
def npu_stream_switch(tag: str, priority: int, *, enabled: bool = True):
return _npu_stream_switch(tag, priority) if enabled else nullcontext()
# TODO(wxy): Move to ops module
def npu_wait_tensor(self: torch.Tensor,
dependency: torch.Tensor,
*,
enabled: bool = True):
return _npu_wait_tensor(self, dependency) if enabled else self
# TODO(wxy): Move to ops module
def npu_prefetch(input: torch.Tensor,
dependency: torch.Tensor,
@@ -489,83 +466,6 @@ def get_fused_moe_state(ep_size: int, with_prefill: bool,
return FusedMoEState.MC2
KV_CACHE_BYTES_CACHE_PATH_NAME = ".kv_cache_bytes"
KV_CACHE_BYTES_CACHE_FILE_NAME = "kv_cache_bytes"
TORCHAIR_CACHE_PATH_NAME = ".torchair_cache"
TORCHAIR_CACHE_DIR = os.getenv(
'TORCHAIR_CACHE_HOME', os.path.join(os.getcwd(), TORCHAIR_CACHE_PATH_NAME))
def get_torchair_current_work_dir(file_name=None):
if file_name is None:
return TORCHAIR_CACHE_DIR
return os.path.join(TORCHAIR_CACHE_DIR, file_name)
def check_torchair_cache_exist():
res = False
torch_air_abs_path = get_torchair_current_work_dir()
if os.path.exists(torch_air_abs_path):
file_list = os.listdir(torch_air_abs_path)
if len(file_list) != 0:
res = True
return res
def check_kv_cache_bytes_cache_exist():
res = False
kv_cache_bytes_cache_abs_path = get_torchair_current_work_dir(
KV_CACHE_BYTES_CACHE_PATH_NAME)
if os.path.exists(kv_cache_bytes_cache_abs_path):
file_list = os.listdir(kv_cache_bytes_cache_abs_path)
if len(file_list) != 0:
res = True
return res
def read_kv_cache_bytes_from_file(rank) -> int:
kv_cache_bytes = -1
kv_cache_bytes_cache_abs_path = get_torchair_current_work_dir(
KV_CACHE_BYTES_CACHE_PATH_NAME)
kv_cache_bytes_file = os.path.join(
kv_cache_bytes_cache_abs_path,
f"{rank}_{KV_CACHE_BYTES_CACHE_FILE_NAME}")
with open(kv_cache_bytes_file, "r", encoding="utf-8") as f:
with file_lock(f, fcntl.LOCK_SH):
kv_cache_bytes = int(f.readline())
return kv_cache_bytes
@contextmanager
def file_lock(file_descriptor, lock_type):
fcntl.flock(file_descriptor, lock_type)
try:
yield
finally:
fcntl.flock(file_descriptor, fcntl.LOCK_UN)
def write_kv_cache_bytes_to_file(rank, kv_cache_bytes):
kv_cache_bytes_cache_abs_path = get_torchair_current_work_dir(
KV_CACHE_BYTES_CACHE_PATH_NAME)
os.makedirs(kv_cache_bytes_cache_abs_path, exist_ok=True)
kv_cache_bytes_file = os.path.join(
kv_cache_bytes_cache_abs_path,
f"{rank}_{KV_CACHE_BYTES_CACHE_FILE_NAME}")
with open(kv_cache_bytes_file, "w", encoding="utf-8") as f:
with file_lock(f, fcntl.LOCK_EX):
f.write(f"{kv_cache_bytes}")
def delete_torchair_cache_file():
torch_air_abs_path = get_torchair_current_work_dir()
if os.path.exists(torch_air_abs_path):
shutil.rmtree(torch_air_abs_path)
_ASCEND_CUSTOMOP_IS_REIGISTERED = False
def register_ascend_customop():
"""Register Ascend CustomOP