2025-02-05 10:53:12 +08:00
|
|
|
#
|
|
|
|
|
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
|
|
|
|
|
# Copyright 2023 The vLLM team.
|
|
|
|
|
#
|
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
|
# You may obtain a copy of the License at
|
|
|
|
|
#
|
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
#
|
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
|
# limitations under the License.
|
2025-04-17 14:59:56 +08:00
|
|
|
# This file is a part of the vllm-ascend project.
|
|
|
|
|
# Adapted from https://github.com/vllm-project/vllm/blob/main/setup.py
|
2025-02-05 10:53:12 +08:00
|
|
|
#
|
|
|
|
|
|
2025-04-03 14:52:34 +08:00
|
|
|
import importlib.util
|
|
|
|
|
import logging
|
2025-02-05 10:53:12 +08:00
|
|
|
import os
|
2025-04-03 14:52:34 +08:00
|
|
|
import subprocess
|
|
|
|
|
import sys
|
|
|
|
|
from sysconfig import get_paths
|
|
|
|
|
from typing import Dict, List
|
2025-02-05 10:53:12 +08:00
|
|
|
|
2025-11-28 18:06:39 +08:00
|
|
|
from setuptools import Command, Extension, find_packages, setup
|
2025-04-03 14:52:34 +08:00
|
|
|
from setuptools.command.build_ext import build_ext
|
2025-06-27 09:14:43 +08:00
|
|
|
from setuptools.command.build_py import build_py
|
2025-04-03 14:52:34 +08:00
|
|
|
from setuptools.command.develop import develop
|
|
|
|
|
from setuptools.command.install import install
|
2025-02-10 09:36:09 +08:00
|
|
|
from setuptools_scm import get_version
|
2025-02-05 10:53:12 +08:00
|
|
|
|
2025-04-03 14:52:34 +08:00
|
|
|
|
|
|
|
|
def load_module_from_path(module_name, path):
|
|
|
|
|
spec = importlib.util.spec_from_file_location(module_name, path)
|
|
|
|
|
module = importlib.util.module_from_spec(spec)
|
|
|
|
|
sys.modules[module_name] = module
|
|
|
|
|
spec.loader.exec_module(module)
|
|
|
|
|
return module
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ROOT_DIR = os.path.dirname(__file__)
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def check_or_set_default_env(cmake_args,
|
|
|
|
|
env_name,
|
|
|
|
|
env_variable,
|
|
|
|
|
default_path=""):
|
|
|
|
|
if env_variable is None:
|
|
|
|
|
logging.warning(
|
|
|
|
|
f"No {env_name} found in your environment, pleause try to set {env_name} "
|
|
|
|
|
"if you customize the installation path of this library, otherwise default "
|
|
|
|
|
"path will be adapted during build this project")
|
|
|
|
|
logging.warning(f"Set default {env_name}: {default_path}")
|
|
|
|
|
env_variable = default_path
|
|
|
|
|
else:
|
|
|
|
|
logging.info(f"Found existing {env_name}: {env_variable}")
|
|
|
|
|
# cann package seems will check this environments in cmake, need write this env variable back.
|
|
|
|
|
if env_name == "ASCEND_HOME_PATH":
|
|
|
|
|
os.environ["ASCEND_HOME_PATH"] = env_variable
|
|
|
|
|
cmake_args += [f"-D{env_name}={env_variable}"]
|
|
|
|
|
return cmake_args
|
|
|
|
|
|
|
|
|
|
|
[refact] unified soc_version code (#4359)
### What this PR does / why we need it?
Currently, there are two paths to judge the chip type in code,
`get_ascend_soc_version` use `get_soc_version` api in torch_npu, and
`is_310p` `use _build_info.__soc_version__`, which generate when
install. We need to unify the two paths.
We need to unify these codes based on the following points:
1. We need to ensure consistency in chip type judgment between compiling
and running states;
2. In compiling state, we need chip type to complete op's compilation,
but in running state, we only need device
type(910B/910_93/310P/910_95/etc) to make code branch judgement;
3. In compiling state, torch_npu may not have been installed yet, so we
can't use torch_npu's api.
Based on the above points, we have made the following changes:
1. When user set env `SOC_VERSION`, use it; when not set, query
soc_version by `npu-smi`;
2. generate device_type based on soc_version when compiling, and write
`__device_type__` instead of `__soc_version__` in `_build_info.py`;
3. In running state, use `__device_type__` to judge code branch.
### Does this PR introduce _any_ user-facing change?
When not set env `SOC_VERSION`, it will not be `ASCEND910B1` by default,
we will query soc_version by `npu-smi`. And env `SOC_VERSION` must be in
the list `soc_to_device` in `setup.py`.
- vLLM version: v0.11.0
- vLLM main:
https://github.com/vllm-project/vllm/commit/2918c1b49c88c29783c86f78d2c4221cb9622379
Signed-off-by: zzzzwwjj <1183291235@qq.com>
2025-11-26 14:28:55 +08:00
|
|
|
def get_value_from_lines(lines: List[str], key: str) -> str:
|
|
|
|
|
for line in lines:
|
|
|
|
|
line = ' '.join(line.split())
|
|
|
|
|
if key in line:
|
|
|
|
|
return line.split(':')[-1].strip()
|
|
|
|
|
return ""
|
|
|
|
|
|
|
|
|
|
|
2025-11-27 17:18:49 +08:00
|
|
|
def get_chip_type() -> str:
|
[refact] unified soc_version code (#4359)
### What this PR does / why we need it?
Currently, there are two paths to judge the chip type in code,
`get_ascend_soc_version` use `get_soc_version` api in torch_npu, and
`is_310p` `use _build_info.__soc_version__`, which generate when
install. We need to unify the two paths.
We need to unify these codes based on the following points:
1. We need to ensure consistency in chip type judgment between compiling
and running states;
2. In compiling state, we need chip type to complete op's compilation,
but in running state, we only need device
type(910B/910_93/310P/910_95/etc) to make code branch judgement;
3. In compiling state, torch_npu may not have been installed yet, so we
can't use torch_npu's api.
Based on the above points, we have made the following changes:
1. When user set env `SOC_VERSION`, use it; when not set, query
soc_version by `npu-smi`;
2. generate device_type based on soc_version when compiling, and write
`__device_type__` instead of `__soc_version__` in `_build_info.py`;
3. In running state, use `__device_type__` to judge code branch.
### Does this PR introduce _any_ user-facing change?
When not set env `SOC_VERSION`, it will not be `ASCEND910B1` by default,
we will query soc_version by `npu-smi`. And env `SOC_VERSION` must be in
the list `soc_to_device` in `setup.py`.
- vLLM version: v0.11.0
- vLLM main:
https://github.com/vllm-project/vllm/commit/2918c1b49c88c29783c86f78d2c4221cb9622379
Signed-off-by: zzzzwwjj <1183291235@qq.com>
2025-11-26 14:28:55 +08:00
|
|
|
try:
|
|
|
|
|
npu_info_lines = subprocess.check_output(
|
|
|
|
|
['npu-smi', 'info', '-l']).decode().strip().split('\n')
|
|
|
|
|
npu_id = int(get_value_from_lines(npu_info_lines, 'NPU ID'))
|
|
|
|
|
chip_info_lines = subprocess.check_output(
|
|
|
|
|
['npu-smi', 'info', '-t', 'board', '-i',
|
|
|
|
|
str(npu_id), '-c', '0']).decode().strip().split('\n')
|
|
|
|
|
chip_name = get_value_from_lines(chip_info_lines, 'Chip Name')
|
|
|
|
|
chip_type = get_value_from_lines(chip_info_lines, 'Chip Type')
|
|
|
|
|
npu_name = get_value_from_lines(chip_info_lines, 'NPU Name')
|
|
|
|
|
|
|
|
|
|
if "310" in chip_name:
|
|
|
|
|
# 310P case
|
|
|
|
|
assert chip_type
|
|
|
|
|
return (chip_type + chip_name).lower()
|
|
|
|
|
elif "910" in chip_name:
|
|
|
|
|
if chip_type:
|
|
|
|
|
# A2 case
|
|
|
|
|
assert not npu_name
|
|
|
|
|
return (chip_type + chip_name).lower()
|
|
|
|
|
else:
|
|
|
|
|
# A3 case
|
|
|
|
|
assert npu_name
|
|
|
|
|
return (chip_name + '_' + npu_name).lower()
|
|
|
|
|
else:
|
|
|
|
|
# TODO(zzzzwwjj): Currently, A5's chip name has not determined yet.
|
|
|
|
|
raise ValueError(
|
|
|
|
|
f"Unable to recognize chip name: {chip_name}, please manually set env SOC_VERSION"
|
|
|
|
|
)
|
|
|
|
|
except subprocess.CalledProcessError as e:
|
|
|
|
|
raise RuntimeError(f"Get chip info failed: {e}")
|
|
|
|
|
except FileNotFoundError:
|
2025-11-27 17:18:49 +08:00
|
|
|
logging.warning(
|
|
|
|
|
"npu-smi command not found, if this is an npu envir, please check if npu driver is installed correctly."
|
|
|
|
|
)
|
|
|
|
|
return ""
|
[refact] unified soc_version code (#4359)
### What this PR does / why we need it?
Currently, there are two paths to judge the chip type in code,
`get_ascend_soc_version` use `get_soc_version` api in torch_npu, and
`is_310p` `use _build_info.__soc_version__`, which generate when
install. We need to unify the two paths.
We need to unify these codes based on the following points:
1. We need to ensure consistency in chip type judgment between compiling
and running states;
2. In compiling state, we need chip type to complete op's compilation,
but in running state, we only need device
type(910B/910_93/310P/910_95/etc) to make code branch judgement;
3. In compiling state, torch_npu may not have been installed yet, so we
can't use torch_npu's api.
Based on the above points, we have made the following changes:
1. When user set env `SOC_VERSION`, use it; when not set, query
soc_version by `npu-smi`;
2. generate device_type based on soc_version when compiling, and write
`__device_type__` instead of `__soc_version__` in `_build_info.py`;
3. In running state, use `__device_type__` to judge code branch.
### Does this PR introduce _any_ user-facing change?
When not set env `SOC_VERSION`, it will not be `ASCEND910B1` by default,
we will query soc_version by `npu-smi`. And env `SOC_VERSION` must be in
the list `soc_to_device` in `setup.py`.
- vLLM version: v0.11.0
- vLLM main:
https://github.com/vllm-project/vllm/commit/2918c1b49c88c29783c86f78d2c4221cb9622379
Signed-off-by: zzzzwwjj <1183291235@qq.com>
2025-11-26 14:28:55 +08:00
|
|
|
|
|
|
|
|
|
2025-04-03 14:52:34 +08:00
|
|
|
envs = load_module_from_path("envs",
|
|
|
|
|
os.path.join(ROOT_DIR, "vllm_ascend", "envs.py"))
|
|
|
|
|
|
2025-11-27 17:18:49 +08:00
|
|
|
soc_version = get_chip_type()
|
[refact] unified soc_version code (#4359)
### What this PR does / why we need it?
Currently, there are two paths to judge the chip type in code,
`get_ascend_soc_version` use `get_soc_version` api in torch_npu, and
`is_310p` `use _build_info.__soc_version__`, which generate when
install. We need to unify the two paths.
We need to unify these codes based on the following points:
1. We need to ensure consistency in chip type judgment between compiling
and running states;
2. In compiling state, we need chip type to complete op's compilation,
but in running state, we only need device
type(910B/910_93/310P/910_95/etc) to make code branch judgement;
3. In compiling state, torch_npu may not have been installed yet, so we
can't use torch_npu's api.
Based on the above points, we have made the following changes:
1. When user set env `SOC_VERSION`, use it; when not set, query
soc_version by `npu-smi`;
2. generate device_type based on soc_version when compiling, and write
`__device_type__` instead of `__soc_version__` in `_build_info.py`;
3. In running state, use `__device_type__` to judge code branch.
### Does this PR introduce _any_ user-facing change?
When not set env `SOC_VERSION`, it will not be `ASCEND910B1` by default,
we will query soc_version by `npu-smi`. And env `SOC_VERSION` must be in
the list `soc_to_device` in `setup.py`.
- vLLM version: v0.11.0
- vLLM main:
https://github.com/vllm-project/vllm/commit/2918c1b49c88c29783c86f78d2c4221cb9622379
Signed-off-by: zzzzwwjj <1183291235@qq.com>
2025-11-26 14:28:55 +08:00
|
|
|
|
|
|
|
|
if not envs.SOC_VERSION:
|
2025-11-27 17:18:49 +08:00
|
|
|
if not soc_version:
|
|
|
|
|
raise RuntimeError(
|
|
|
|
|
"Could not determine chip type automatically via 'npu-smi'. "
|
|
|
|
|
"This can happen in a CPU-only environment. "
|
|
|
|
|
"Please set the 'SOC_VERSION' environment variable to specify the target chip."
|
|
|
|
|
)
|
[refact] unified soc_version code (#4359)
### What this PR does / why we need it?
Currently, there are two paths to judge the chip type in code,
`get_ascend_soc_version` use `get_soc_version` api in torch_npu, and
`is_310p` `use _build_info.__soc_version__`, which generate when
install. We need to unify the two paths.
We need to unify these codes based on the following points:
1. We need to ensure consistency in chip type judgment between compiling
and running states;
2. In compiling state, we need chip type to complete op's compilation,
but in running state, we only need device
type(910B/910_93/310P/910_95/etc) to make code branch judgement;
3. In compiling state, torch_npu may not have been installed yet, so we
can't use torch_npu's api.
Based on the above points, we have made the following changes:
1. When user set env `SOC_VERSION`, use it; when not set, query
soc_version by `npu-smi`;
2. generate device_type based on soc_version when compiling, and write
`__device_type__` instead of `__soc_version__` in `_build_info.py`;
3. In running state, use `__device_type__` to judge code branch.
### Does this PR introduce _any_ user-facing change?
When not set env `SOC_VERSION`, it will not be `ASCEND910B1` by default,
we will query soc_version by `npu-smi`. And env `SOC_VERSION` must be in
the list `soc_to_device` in `setup.py`.
- vLLM version: v0.11.0
- vLLM main:
https://github.com/vllm-project/vllm/commit/2918c1b49c88c29783c86f78d2c4221cb9622379
Signed-off-by: zzzzwwjj <1183291235@qq.com>
2025-11-26 14:28:55 +08:00
|
|
|
envs.SOC_VERSION = soc_version
|
|
|
|
|
else:
|
2025-11-27 17:18:49 +08:00
|
|
|
if soc_version and envs.SOC_VERSION != soc_version:
|
[refact] unified soc_version code (#4359)
### What this PR does / why we need it?
Currently, there are two paths to judge the chip type in code,
`get_ascend_soc_version` use `get_soc_version` api in torch_npu, and
`is_310p` `use _build_info.__soc_version__`, which generate when
install. We need to unify the two paths.
We need to unify these codes based on the following points:
1. We need to ensure consistency in chip type judgment between compiling
and running states;
2. In compiling state, we need chip type to complete op's compilation,
but in running state, we only need device
type(910B/910_93/310P/910_95/etc) to make code branch judgement;
3. In compiling state, torch_npu may not have been installed yet, so we
can't use torch_npu's api.
Based on the above points, we have made the following changes:
1. When user set env `SOC_VERSION`, use it; when not set, query
soc_version by `npu-smi`;
2. generate device_type based on soc_version when compiling, and write
`__device_type__` instead of `__soc_version__` in `_build_info.py`;
3. In running state, use `__device_type__` to judge code branch.
### Does this PR introduce _any_ user-facing change?
When not set env `SOC_VERSION`, it will not be `ASCEND910B1` by default,
we will query soc_version by `npu-smi`. And env `SOC_VERSION` must be in
the list `soc_to_device` in `setup.py`.
- vLLM version: v0.11.0
- vLLM main:
https://github.com/vllm-project/vllm/commit/2918c1b49c88c29783c86f78d2c4221cb9622379
Signed-off-by: zzzzwwjj <1183291235@qq.com>
2025-11-26 14:28:55 +08:00
|
|
|
logging.warning(
|
|
|
|
|
f"env SOC_VERSION: {envs.SOC_VERSION} is not equal to soc_version from npu-smi: {soc_version}"
|
|
|
|
|
)
|
|
|
|
|
|
2025-04-03 14:52:34 +08:00
|
|
|
|
2025-11-17 19:13:04 +08:00
|
|
|
def gen_build_info():
|
|
|
|
|
soc_version = envs.SOC_VERSION
|
|
|
|
|
|
[refact] unified soc_version code (#4359)
### What this PR does / why we need it?
Currently, there are two paths to judge the chip type in code,
`get_ascend_soc_version` use `get_soc_version` api in torch_npu, and
`is_310p` `use _build_info.__soc_version__`, which generate when
install. We need to unify the two paths.
We need to unify these codes based on the following points:
1. We need to ensure consistency in chip type judgment between compiling
and running states;
2. In compiling state, we need chip type to complete op's compilation,
but in running state, we only need device
type(910B/910_93/310P/910_95/etc) to make code branch judgement;
3. In compiling state, torch_npu may not have been installed yet, so we
can't use torch_npu's api.
Based on the above points, we have made the following changes:
1. When user set env `SOC_VERSION`, use it; when not set, query
soc_version by `npu-smi`;
2. generate device_type based on soc_version when compiling, and write
`__device_type__` instead of `__soc_version__` in `_build_info.py`;
3. In running state, use `__device_type__` to judge code branch.
### Does this PR introduce _any_ user-facing change?
When not set env `SOC_VERSION`, it will not be `ASCEND910B1` by default,
we will query soc_version by `npu-smi`. And env `SOC_VERSION` must be in
the list `soc_to_device` in `setup.py`.
- vLLM version: v0.11.0
- vLLM main:
https://github.com/vllm-project/vllm/commit/2918c1b49c88c29783c86f78d2c4221cb9622379
Signed-off-by: zzzzwwjj <1183291235@qq.com>
2025-11-26 14:28:55 +08:00
|
|
|
soc_to_device = {
|
2025-12-17 14:08:19 +08:00
|
|
|
"910b": "A2",
|
|
|
|
|
"910c": "A3",
|
2025-11-29 17:42:16 +08:00
|
|
|
"310p": "_310P",
|
2025-12-17 14:08:19 +08:00
|
|
|
"ascend910b1": "A2",
|
|
|
|
|
"ascend910b2": "A2",
|
|
|
|
|
"ascend910b2c": "A2",
|
|
|
|
|
"ascend910b3": "A2",
|
|
|
|
|
"ascend910b4": "A2",
|
|
|
|
|
"ascend910b4-1": "A2",
|
|
|
|
|
"ascend910_9391": "A3",
|
|
|
|
|
"ascend910_9381": "A3",
|
|
|
|
|
"ascend910_9372": "A3",
|
|
|
|
|
"ascend910_9392": "A3",
|
|
|
|
|
"ascend910_9382": "A3",
|
|
|
|
|
"ascend910_9362": "A3",
|
[refact] unified soc_version code (#4359)
### What this PR does / why we need it?
Currently, there are two paths to judge the chip type in code,
`get_ascend_soc_version` use `get_soc_version` api in torch_npu, and
`is_310p` `use _build_info.__soc_version__`, which generate when
install. We need to unify the two paths.
We need to unify these codes based on the following points:
1. We need to ensure consistency in chip type judgment between compiling
and running states;
2. In compiling state, we need chip type to complete op's compilation,
but in running state, we only need device
type(910B/910_93/310P/910_95/etc) to make code branch judgement;
3. In compiling state, torch_npu may not have been installed yet, so we
can't use torch_npu's api.
Based on the above points, we have made the following changes:
1. When user set env `SOC_VERSION`, use it; when not set, query
soc_version by `npu-smi`;
2. generate device_type based on soc_version when compiling, and write
`__device_type__` instead of `__soc_version__` in `_build_info.py`;
3. In running state, use `__device_type__` to judge code branch.
### Does this PR introduce _any_ user-facing change?
When not set env `SOC_VERSION`, it will not be `ASCEND910B1` by default,
we will query soc_version by `npu-smi`. And env `SOC_VERSION` must be in
the list `soc_to_device` in `setup.py`.
- vLLM version: v0.11.0
- vLLM main:
https://github.com/vllm-project/vllm/commit/2918c1b49c88c29783c86f78d2c4221cb9622379
Signed-off-by: zzzzwwjj <1183291235@qq.com>
2025-11-26 14:28:55 +08:00
|
|
|
"ascend310p1": "_310P",
|
|
|
|
|
"ascend310p3": "_310P",
|
|
|
|
|
"ascend310p5": "_310P",
|
|
|
|
|
"ascend310p7": "_310P",
|
|
|
|
|
"ascend310p3vir01": "_310P",
|
|
|
|
|
"ascend310p3vir02": "_310P",
|
|
|
|
|
"ascend310p3vir04": "_310P",
|
|
|
|
|
"ascend310p3vir08": "_310P",
|
2025-12-17 14:08:19 +08:00
|
|
|
"ascend910_9579": "A5",
|
[refact] unified soc_version code (#4359)
### What this PR does / why we need it?
Currently, there are two paths to judge the chip type in code,
`get_ascend_soc_version` use `get_soc_version` api in torch_npu, and
`is_310p` `use _build_info.__soc_version__`, which generate when
install. We need to unify the two paths.
We need to unify these codes based on the following points:
1. We need to ensure consistency in chip type judgment between compiling
and running states;
2. In compiling state, we need chip type to complete op's compilation,
but in running state, we only need device
type(910B/910_93/310P/910_95/etc) to make code branch judgement;
3. In compiling state, torch_npu may not have been installed yet, so we
can't use torch_npu's api.
Based on the above points, we have made the following changes:
1. When user set env `SOC_VERSION`, use it; when not set, query
soc_version by `npu-smi`;
2. generate device_type based on soc_version when compiling, and write
`__device_type__` instead of `__soc_version__` in `_build_info.py`;
3. In running state, use `__device_type__` to judge code branch.
### Does this PR introduce _any_ user-facing change?
When not set env `SOC_VERSION`, it will not be `ASCEND910B1` by default,
we will query soc_version by `npu-smi`. And env `SOC_VERSION` must be in
the list `soc_to_device` in `setup.py`.
- vLLM version: v0.11.0
- vLLM main:
https://github.com/vllm-project/vllm/commit/2918c1b49c88c29783c86f78d2c4221cb9622379
Signed-off-by: zzzzwwjj <1183291235@qq.com>
2025-11-26 14:28:55 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
assert soc_version in soc_to_device, f"Undefined soc_version: {soc_version}. Please file an issue to vllm-ascend."
|
|
|
|
|
device_type = soc_to_device[soc_version]
|
|
|
|
|
|
2025-11-17 19:13:04 +08:00
|
|
|
package_dir = os.path.join(ROOT_DIR, "vllm_ascend", "_build_info.py")
|
|
|
|
|
with open(package_dir, "w+") as f:
|
|
|
|
|
f.write('# Auto-generated file\n')
|
[refact] unified soc_version code (#4359)
### What this PR does / why we need it?
Currently, there are two paths to judge the chip type in code,
`get_ascend_soc_version` use `get_soc_version` api in torch_npu, and
`is_310p` `use _build_info.__soc_version__`, which generate when
install. We need to unify the two paths.
We need to unify these codes based on the following points:
1. We need to ensure consistency in chip type judgment between compiling
and running states;
2. In compiling state, we need chip type to complete op's compilation,
but in running state, we only need device
type(910B/910_93/310P/910_95/etc) to make code branch judgement;
3. In compiling state, torch_npu may not have been installed yet, so we
can't use torch_npu's api.
Based on the above points, we have made the following changes:
1. When user set env `SOC_VERSION`, use it; when not set, query
soc_version by `npu-smi`;
2. generate device_type based on soc_version when compiling, and write
`__device_type__` instead of `__soc_version__` in `_build_info.py`;
3. In running state, use `__device_type__` to judge code branch.
### Does this PR introduce _any_ user-facing change?
When not set env `SOC_VERSION`, it will not be `ASCEND910B1` by default,
we will query soc_version by `npu-smi`. And env `SOC_VERSION` must be in
the list `soc_to_device` in `setup.py`.
- vLLM version: v0.11.0
- vLLM main:
https://github.com/vllm-project/vllm/commit/2918c1b49c88c29783c86f78d2c4221cb9622379
Signed-off-by: zzzzwwjj <1183291235@qq.com>
2025-11-26 14:28:55 +08:00
|
|
|
f.write(f"__device_type__ = '{device_type}'\n")
|
2025-11-17 19:13:04 +08:00
|
|
|
logging.info(f"Generated _build_info.py with SOC version: {soc_version}")
|
|
|
|
|
|
|
|
|
|
|
2025-04-03 14:52:34 +08:00
|
|
|
class CMakeExtension(Extension):
|
|
|
|
|
|
|
|
|
|
def __init__(self,
|
|
|
|
|
name: str,
|
|
|
|
|
cmake_lists_dir: str = ".",
|
|
|
|
|
**kwargs) -> None:
|
|
|
|
|
super().__init__(name, sources=[], py_limited_api=False, **kwargs)
|
|
|
|
|
self.cmake_lists_dir = os.path.abspath(cmake_lists_dir)
|
|
|
|
|
|
|
|
|
|
|
2025-11-17 19:13:04 +08:00
|
|
|
class custom_develop(develop):
|
|
|
|
|
|
|
|
|
|
def run(self):
|
|
|
|
|
gen_build_info()
|
|
|
|
|
super().run()
|
|
|
|
|
|
|
|
|
|
|
2025-06-27 09:14:43 +08:00
|
|
|
class custom_build_info(build_py):
|
|
|
|
|
|
|
|
|
|
def run(self):
|
2025-11-17 19:13:04 +08:00
|
|
|
gen_build_info()
|
2025-06-27 09:14:43 +08:00
|
|
|
super().run()
|
|
|
|
|
|
|
|
|
|
|
2025-11-28 18:06:39 +08:00
|
|
|
class build_and_install_aclnn(Command):
|
|
|
|
|
description = "Build and install AclNN by running build_aclnn.sh"
|
|
|
|
|
user_options = []
|
|
|
|
|
|
|
|
|
|
def initialize_options(self):
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
def finalize_options(self):
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
def run(self):
|
|
|
|
|
try:
|
|
|
|
|
print("Running bash build_aclnn.sh ...")
|
|
|
|
|
subprocess.check_call(
|
|
|
|
|
["bash", "csrc/build_aclnn.sh", ROOT_DIR, envs.SOC_VERSION])
|
|
|
|
|
print("buid_aclnn.sh executed successfully!")
|
|
|
|
|
except subprocess.CalledProcessError as e:
|
|
|
|
|
print(f"Error running build_aclnn.sh: {e}")
|
|
|
|
|
raise SystemExit(e.returncode)
|
|
|
|
|
|
|
|
|
|
|
2025-04-03 14:52:34 +08:00
|
|
|
class cmake_build_ext(build_ext):
|
|
|
|
|
# A dict of extension directories that have been configured.
|
|
|
|
|
did_config: Dict[str, bool] = {}
|
|
|
|
|
|
|
|
|
|
#
|
|
|
|
|
# Determine number of compilation jobs
|
|
|
|
|
#
|
|
|
|
|
def compute_num_jobs(self):
|
|
|
|
|
# `num_jobs` is either the value of the MAX_JOBS environment variable
|
|
|
|
|
# (if defined) or the number of CPUs available.
|
|
|
|
|
num_jobs = envs.MAX_JOBS
|
|
|
|
|
if num_jobs is not None:
|
|
|
|
|
num_jobs = int(num_jobs)
|
|
|
|
|
logger.info("Using MAX_JOBS=%d as the number of jobs.", num_jobs)
|
|
|
|
|
else:
|
|
|
|
|
try:
|
|
|
|
|
# os.sched_getaffinity() isn't universally available, so fall
|
|
|
|
|
# back to os.cpu_count() if we get an error here.
|
|
|
|
|
num_jobs = len(os.sched_getaffinity(0))
|
|
|
|
|
except AttributeError:
|
|
|
|
|
num_jobs = os.cpu_count()
|
|
|
|
|
num_jobs = max(1, num_jobs)
|
|
|
|
|
|
|
|
|
|
return num_jobs
|
|
|
|
|
|
|
|
|
|
#
|
|
|
|
|
# Perform cmake configuration for a single extension.
|
|
|
|
|
#
|
|
|
|
|
def configure(self, ext: CMakeExtension) -> None:
|
|
|
|
|
build_temp = self.build_temp
|
|
|
|
|
os.makedirs(build_temp, exist_ok=True)
|
|
|
|
|
source_dir = os.path.abspath(ROOT_DIR)
|
|
|
|
|
python_executable = sys.executable
|
|
|
|
|
cmake_args = ["cmake"]
|
|
|
|
|
# Default use release mode to compile the csrc code
|
|
|
|
|
# Turbo now support compiled with Release, Debug and RelWithDebugInfo
|
|
|
|
|
if envs.CMAKE_BUILD_TYPE is None or envs.CMAKE_BUILD_TYPE not in [
|
|
|
|
|
"Debug",
|
|
|
|
|
"Release",
|
|
|
|
|
"RelWithDebugInfo",
|
|
|
|
|
]:
|
|
|
|
|
envs.CMAKE_BUILD_TYPE = "Release"
|
|
|
|
|
cmake_args += [f"-DCMAKE_BUILD_TYPE={envs.CMAKE_BUILD_TYPE}"]
|
|
|
|
|
# Default dump the compile commands for lsp
|
|
|
|
|
cmake_args += ["-DCMAKE_EXPORT_COMPILE_COMMANDS=1"]
|
Add sleep mode feature for Ascend NPU (#513)
### What this PR does / why we need it?
This PR adds sleep mode feature for vllm-ascend, when sleeps, we do
mainly two things:
- offload model weights
- discard kv cache
RLHF tools(such as https://github.com/volcengine/verl and
https://github.com/OpenRLHF/OpenRLHF) have a strong need of sleep mode
to accelerate the training process.
This PR may solve #375 and #320 .
### Does this PR introduce _any_ user-facing change?
No existing user interfaces changed.
Users will have two new methods(`sleep()` and `wake_up()`) to use.
### How was this patch tested?
This PR is tested with Qwen/Qwen2.5-0.5B-Instruct.
At first, we have free NPU memory M1.
After `llm = LLM("Qwen/Qwen2.5-0.5B-Instruct", enable_sleep_mode=True)`
executed, we have free NPU memory M2. M2 < M1.
Then we call `llm.sleep(level=1)`, we have free NPU memory M3.
We have M3 > M2, M3 is very close to M1.
Plus, we have the same output tokens before sleep and after wake up,
with the config of `SamplingParams(temperature=0, max_tokens=10)` and
with the same input tokens of course.
This PR is utilizing the CMake procedure of #371 , thanks a lot.
Signed-off-by: Shuqiao Li <celestialli@outlook.com>
2025-04-18 13:11:39 +08:00
|
|
|
if envs.CXX_COMPILER is not None:
|
|
|
|
|
cmake_args += [f"-DCMAKE_CXX_COMPILER={envs.CXX_COMPILER}"]
|
|
|
|
|
if envs.C_COMPILER is not None:
|
|
|
|
|
cmake_args += [f"-DCMAKE_C_COMPILER={envs.C_COMPILER}"]
|
2025-04-03 14:52:34 +08:00
|
|
|
if envs.VERBOSE:
|
|
|
|
|
cmake_args += ["-DCMAKE_VERBOSE_MAKEFILE=ON"]
|
|
|
|
|
|
|
|
|
|
# find ASCEND_HOME_PATH
|
|
|
|
|
check_or_set_default_env(
|
|
|
|
|
cmake_args,
|
|
|
|
|
"ASCEND_HOME_PATH",
|
|
|
|
|
envs.ASCEND_HOME_PATH,
|
|
|
|
|
"/usr/local/Ascend/ascend-toolkit/latest",
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# find PYTHON_EXECUTABLE
|
|
|
|
|
check_or_set_default_env(cmake_args, "PYTHON_EXECUTABLE",
|
|
|
|
|
sys.executable)
|
|
|
|
|
|
|
|
|
|
# find PYTHON_INCLUDE_PATH
|
[Bug fix] fix a typo in setup.py (#762)
<!-- Thanks for sending a pull request!
BEFORE SUBMITTING, PLEASE READ
https://docs.vllm.ai/en/latest/contributing/overview.html
-->
### What this PR does / why we need it?
<!--
- Please clarify what changes you are proposing. The purpose of this
section is to outline the changes and how this PR fixes the issue.
If possible, please consider writing useful notes for better and faster
reviews in your PR.
- Please clarify why the changes are needed. For instance, the use case
and bug description.
- Fixes #
--> Fix a typo in setup.py. Currently, it does not affect any
functionality or interfaces.
### Does this PR introduce _any_ user-facing change?
<!--
Note that it means *any* user-facing change including all aspects such
as API, interface or other behavior changes.
Documentation-only updates are not considered user-facing changes.
-->
### How was this patch tested?
<!--
CI passed with new added/existing test.
If it was tested in a way different from regular unit tests, please
clarify how you tested step by step, ideally copy and paste-able, so
that other reviewers can test and check, and descendants can verify in
the future.
If tests were not added, please describe why they were not added and/or
why it was difficult to add.
-->
Signed-off-by: linfeng-yuan <1102311262@qq.com>
2025-05-06 17:01:26 +08:00
|
|
|
check_or_set_default_env(cmake_args, "PYTHON_INCLUDE_PATH",
|
2025-04-03 14:52:34 +08:00
|
|
|
get_paths()["include"])
|
|
|
|
|
|
|
|
|
|
# ccache and ninja can not be applied at ascendc kernels now
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
# if pybind11 is installed via pip
|
|
|
|
|
pybind11_cmake_path = (subprocess.check_output(
|
|
|
|
|
[python_executable, "-m", "pybind11",
|
2025-06-16 23:09:53 +08:00
|
|
|
"--cmakedir"]).decode().strip())
|
2025-04-03 14:52:34 +08:00
|
|
|
except subprocess.CalledProcessError as e:
|
|
|
|
|
# else specify pybind11 path installed from source code on CI container
|
|
|
|
|
raise RuntimeError(f"CMake configuration failed: {e}")
|
|
|
|
|
|
|
|
|
|
install_path = os.path.join(ROOT_DIR, self.build_lib)
|
|
|
|
|
if isinstance(self.distribution.get_command_obj("develop"), develop):
|
|
|
|
|
install_path = os.path.join(ROOT_DIR, "vllm_ascend")
|
|
|
|
|
# add CMAKE_INSTALL_PATH
|
|
|
|
|
cmake_args += [f"-DCMAKE_INSTALL_PREFIX={install_path}"]
|
|
|
|
|
|
|
|
|
|
cmake_args += [f"-DCMAKE_PREFIX_PATH={pybind11_cmake_path}"]
|
|
|
|
|
|
2025-11-29 17:42:16 +08:00
|
|
|
soc_version_map = {
|
|
|
|
|
"910b": "ascend910b1",
|
|
|
|
|
"910c": "ascend910_9392",
|
|
|
|
|
"310p": "ascend310p1",
|
|
|
|
|
}
|
|
|
|
|
CANN_SOC_VERSION = soc_version_map.get(envs.SOC_VERSION,
|
|
|
|
|
envs.SOC_VERSION)
|
|
|
|
|
cmake_args += [f"-DSOC_VERSION={CANN_SOC_VERSION}"]
|
2025-04-12 10:24:53 +08:00
|
|
|
|
2025-04-03 14:52:34 +08:00
|
|
|
# Override the base directory for FetchContent downloads to $ROOT/.deps
|
|
|
|
|
# This allows sharing dependencies between profiles,
|
|
|
|
|
# and plays more nicely with sccache.
|
|
|
|
|
# To override this, set the FETCHCONTENT_BASE_DIR environment variable.
|
|
|
|
|
fc_base_dir = os.path.join(ROOT_DIR, ".deps")
|
|
|
|
|
fc_base_dir = os.environ.get("FETCHCONTENT_BASE_DIR", fc_base_dir)
|
|
|
|
|
cmake_args += ["-DFETCHCONTENT_BASE_DIR={}".format(fc_base_dir)]
|
|
|
|
|
|
2025-04-12 10:24:53 +08:00
|
|
|
torch_npu_command = "python3 -m pip show torch-npu | grep '^Location:' | awk '{print $2}'"
|
|
|
|
|
try:
|
|
|
|
|
torch_npu_path = subprocess.check_output(
|
|
|
|
|
torch_npu_command, shell=True).decode().strip()
|
|
|
|
|
torch_npu_path += "/torch_npu"
|
|
|
|
|
except subprocess.CalledProcessError as e:
|
|
|
|
|
raise RuntimeError(f"Retrieve torch version version failed: {e}")
|
|
|
|
|
|
|
|
|
|
# add TORCH_NPU_PATH
|
|
|
|
|
cmake_args += [f"-DTORCH_NPU_PATH={torch_npu_path}"]
|
|
|
|
|
|
2025-04-03 14:52:34 +08:00
|
|
|
build_tool = []
|
|
|
|
|
# TODO(ganyi): ninja and ccache support for ascend c auto codegen. now we can only use make build
|
|
|
|
|
# if which('ninja') is not None:
|
|
|
|
|
# build_tool += ['-G', 'Ninja']
|
|
|
|
|
# Default build tool to whatever cmake picks.
|
|
|
|
|
|
|
|
|
|
cmake_args += [source_dir]
|
|
|
|
|
logging.info(f"cmake config command: {cmake_args}")
|
|
|
|
|
try:
|
|
|
|
|
subprocess.check_call(cmake_args, cwd=self.build_temp)
|
|
|
|
|
except subprocess.CalledProcessError as e:
|
|
|
|
|
raise RuntimeError(f"CMake configuration failed: {e}")
|
|
|
|
|
|
|
|
|
|
subprocess.check_call(
|
|
|
|
|
["cmake", ext.cmake_lists_dir, *build_tool, *cmake_args],
|
|
|
|
|
cwd=self.build_temp,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
def build_extensions(self) -> None:
|
|
|
|
|
# Ensure that CMake is present and working
|
|
|
|
|
try:
|
|
|
|
|
subprocess.check_output(["cmake", "--version"])
|
|
|
|
|
except OSError as e:
|
|
|
|
|
raise RuntimeError(f"Cannot find CMake executable: {e}")
|
|
|
|
|
|
|
|
|
|
# Create build directory if it does not exist.
|
|
|
|
|
if not os.path.exists(self.build_temp):
|
|
|
|
|
os.makedirs(self.build_temp)
|
|
|
|
|
|
|
|
|
|
targets = []
|
|
|
|
|
|
|
|
|
|
os.makedirs(os.path.join(self.build_lib, "vllm_ascend"), exist_ok=True)
|
|
|
|
|
|
|
|
|
|
def target_name(s: str) -> str:
|
|
|
|
|
return s.removeprefix("vllm_ascend.")
|
|
|
|
|
|
|
|
|
|
# Build all the extensions
|
|
|
|
|
for ext in self.extensions:
|
|
|
|
|
self.configure(ext)
|
|
|
|
|
targets.append(target_name(ext.name))
|
|
|
|
|
|
|
|
|
|
num_jobs = self.compute_num_jobs()
|
|
|
|
|
|
|
|
|
|
build_args = [
|
|
|
|
|
"--build",
|
|
|
|
|
".",
|
|
|
|
|
f"-j={num_jobs}",
|
|
|
|
|
*[f"--target={name}" for name in targets],
|
|
|
|
|
]
|
|
|
|
|
try:
|
|
|
|
|
subprocess.check_call(["cmake", *build_args], cwd=self.build_temp)
|
|
|
|
|
except OSError as e:
|
|
|
|
|
raise RuntimeError(f"Build library failed: {e}")
|
|
|
|
|
# Install the libraries
|
|
|
|
|
install_args = [
|
|
|
|
|
"cmake",
|
|
|
|
|
"--install",
|
|
|
|
|
".",
|
|
|
|
|
]
|
|
|
|
|
try:
|
|
|
|
|
subprocess.check_call(install_args, cwd=self.build_temp)
|
|
|
|
|
except OSError as e:
|
|
|
|
|
raise RuntimeError(f"Install library failed: {e}")
|
|
|
|
|
|
|
|
|
|
# copy back to build folder for editable build
|
|
|
|
|
if isinstance(self.distribution.get_command_obj("develop"), develop):
|
|
|
|
|
import shutil
|
|
|
|
|
for root, _, files in os.walk(self.build_temp):
|
|
|
|
|
for file in files:
|
|
|
|
|
if file.endswith(".so"):
|
|
|
|
|
src_path = os.path.join(root, file)
|
|
|
|
|
dst_path = os.path.join(self.build_lib, "vllm_ascend",
|
|
|
|
|
file)
|
|
|
|
|
shutil.copy(src_path, dst_path)
|
|
|
|
|
print(f"Copy: {src_path} -> {dst_path}")
|
|
|
|
|
|
2025-11-28 18:06:39 +08:00
|
|
|
# copy back _cann_ops_custom directory
|
|
|
|
|
src_cann_ops_custom = os.path.join(ROOT_DIR, "vllm_ascend",
|
|
|
|
|
"_cann_ops_custom")
|
|
|
|
|
dst_cann_ops_custom = os.path.join(self.build_lib, "vllm_ascend",
|
|
|
|
|
"_cann_ops_custom")
|
|
|
|
|
if os.path.exists(src_cann_ops_custom):
|
|
|
|
|
import shutil
|
|
|
|
|
if os.path.exists(dst_cann_ops_custom):
|
|
|
|
|
shutil.rmtree(dst_cann_ops_custom)
|
|
|
|
|
shutil.copytree(src_cann_ops_custom, dst_cann_ops_custom)
|
|
|
|
|
print(f"Copy: {src_cann_ops_custom} -> {dst_cann_ops_custom}")
|
|
|
|
|
|
2025-04-03 14:52:34 +08:00
|
|
|
def run(self):
|
2025-11-28 18:06:39 +08:00
|
|
|
# First, ensure ACLNN custom-ops is built and installed.
|
|
|
|
|
self.run_command("build_aclnn")
|
|
|
|
|
# Then, run the standard build_ext command to compile the extensions
|
2025-04-03 14:52:34 +08:00
|
|
|
super().run()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class custom_install(install):
|
|
|
|
|
|
|
|
|
|
def run(self):
|
|
|
|
|
self.run_command("build_ext")
|
|
|
|
|
install.run(self)
|
|
|
|
|
|
|
|
|
|
|
2025-02-05 10:53:12 +08:00
|
|
|
ROOT_DIR = os.path.dirname(__file__)
|
2025-02-10 09:36:09 +08:00
|
|
|
try:
|
|
|
|
|
VERSION = get_version(write_to="vllm_ascend/_version.py")
|
|
|
|
|
except LookupError:
|
|
|
|
|
# The checkout action in github action CI does not checkout the tag. It
|
|
|
|
|
# only checks out the commit. In this case, we set a dummy version.
|
|
|
|
|
VERSION = "0.0.0"
|
2025-02-05 10:53:12 +08:00
|
|
|
|
2025-12-10 23:48:03 +08:00
|
|
|
ext_modules = [CMakeExtension(name="vllm_ascend.vllm_ascend_C")]
|
2025-04-03 14:52:34 +08:00
|
|
|
|
2025-02-05 10:53:12 +08:00
|
|
|
|
|
|
|
|
def get_path(*filepath) -> str:
|
|
|
|
|
return os.path.join(ROOT_DIR, *filepath)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def read_readme() -> str:
|
|
|
|
|
"""Read the README file if present."""
|
|
|
|
|
p = get_path("README.md")
|
|
|
|
|
if os.path.isfile(p):
|
|
|
|
|
with open(get_path("README.md"), encoding="utf-8") as f:
|
|
|
|
|
return f.read()
|
|
|
|
|
else:
|
|
|
|
|
return ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_requirements() -> List[str]:
|
|
|
|
|
"""Get Python package dependencies from requirements.txt."""
|
|
|
|
|
|
|
|
|
|
def _read_requirements(filename: str) -> List[str]:
|
|
|
|
|
with open(get_path(filename)) as f:
|
|
|
|
|
requirements = f.read().strip().split("\n")
|
|
|
|
|
resolved_requirements = []
|
|
|
|
|
for line in requirements:
|
|
|
|
|
if line.startswith("-r "):
|
|
|
|
|
resolved_requirements += _read_requirements(line.split()[1])
|
|
|
|
|
elif line.startswith("--"):
|
|
|
|
|
continue
|
|
|
|
|
else:
|
|
|
|
|
resolved_requirements.append(line)
|
|
|
|
|
return resolved_requirements
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
requirements = _read_requirements("requirements.txt")
|
|
|
|
|
except ValueError:
|
|
|
|
|
print("Failed to read requirements.txt in vllm_ascend.")
|
|
|
|
|
return requirements
|
|
|
|
|
|
|
|
|
|
|
2025-06-27 09:14:43 +08:00
|
|
|
cmdclass = {
|
2025-11-17 19:13:04 +08:00
|
|
|
"develop": custom_develop,
|
2025-06-27 09:14:43 +08:00
|
|
|
"build_py": custom_build_info,
|
2025-11-28 18:06:39 +08:00
|
|
|
"build_aclnn": build_and_install_aclnn,
|
2025-06-27 09:14:43 +08:00
|
|
|
"build_ext": cmake_build_ext,
|
|
|
|
|
"install": custom_install
|
|
|
|
|
}
|
2025-04-03 14:52:34 +08:00
|
|
|
|
2025-02-05 10:53:12 +08:00
|
|
|
setup(
|
2025-04-03 14:52:34 +08:00
|
|
|
name="vllm_ascend",
|
2025-02-05 10:53:12 +08:00
|
|
|
# Follow:
|
|
|
|
|
# https://packaging.python.org/en/latest/specifications/version-specifiers
|
2025-02-10 09:36:09 +08:00
|
|
|
version=VERSION,
|
2025-02-05 10:53:12 +08:00
|
|
|
author="vLLM-Ascend team",
|
|
|
|
|
license="Apache 2.0",
|
2025-08-19 11:00:18 +08:00
|
|
|
description="vLLM Ascend backend plugin",
|
2025-02-05 10:53:12 +08:00
|
|
|
long_description=read_readme(),
|
|
|
|
|
long_description_content_type="text/markdown",
|
|
|
|
|
url="https://github.com/vllm-project/vllm-ascend",
|
|
|
|
|
project_urls={
|
|
|
|
|
"Homepage": "https://github.com/vllm-project/vllm-ascend",
|
|
|
|
|
},
|
2025-04-27 17:28:29 +08:00
|
|
|
# TODO: Add 3.12 back when torch-npu support 3.12
|
2025-02-05 10:53:12 +08:00
|
|
|
classifiers=[
|
|
|
|
|
"Programming Language :: Python :: 3.10",
|
|
|
|
|
"Programming Language :: Python :: 3.11",
|
|
|
|
|
"License :: OSI Approved :: Apache Software License",
|
|
|
|
|
"Intended Audience :: Developers",
|
|
|
|
|
"Intended Audience :: Information Technology",
|
|
|
|
|
"Intended Audience :: Science/Research",
|
|
|
|
|
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
|
|
|
"Topic :: Scientific/Engineering :: Information Analysis",
|
|
|
|
|
],
|
2025-04-03 14:52:34 +08:00
|
|
|
packages=find_packages(exclude=("docs", "examples", "tests*", "csrc")),
|
2025-11-10 11:50:12 +08:00
|
|
|
python_requires=">=3.10",
|
2025-02-05 10:53:12 +08:00
|
|
|
install_requires=get_requirements(),
|
2025-04-03 14:52:34 +08:00
|
|
|
ext_modules=ext_modules,
|
|
|
|
|
cmdclass=cmdclass,
|
2025-02-05 10:53:12 +08:00
|
|
|
extras_require={},
|
2025-03-07 15:41:47 +08:00
|
|
|
entry_points={
|
2025-04-03 14:52:34 +08:00
|
|
|
"vllm.platform_plugins": ["ascend = vllm_ascend:register"],
|
2025-10-09 21:09:54 +08:00
|
|
|
"vllm.general_plugins": [
|
2025-10-23 15:56:07 +08:00
|
|
|
"ascend_kv_connector = vllm_ascend:register_connector",
|
2025-11-12 09:07:14 +08:00
|
|
|
"ascend_model_loader = vllm_ascend:register_model_loader",
|
|
|
|
|
"ascend_service_profiling = vllm_ascend:register_service_profiling"
|
2025-10-09 21:09:54 +08:00
|
|
|
],
|
2025-04-03 14:52:34 +08:00
|
|
|
},
|
|
|
|
|
)
|