diff --git a/README.md b/README.md index ded9f91..7370415 100644 --- a/README.md +++ b/README.md @@ -34,24 +34,23 @@ By using vLLM Ascend plugin, popular open-source models, including Transformer-l ## Prerequisites - Hardware: Atlas 800I A2 Inference series, Atlas A2 Training series +- OS: Linux - Software: * Python >= 3.9 * CANN >= 8.0.0 * PyTorch >= 2.5.1, torch-npu >= 2.5.1.dev20250308 * vLLM (the same version as vllm-ascend) -Find more about how to setup your environment step by step in [here](docs/source/installation.md). - ## Getting Started Please refer to [QuickStart](https://vllm-ascend.readthedocs.io/en/latest/quick_start.html) and [Installation](https://vllm-ascend.readthedocs.io/en/latest/installation.html) for more details. ## Contributing -See [CONTRIBUTING](docs/source/developer_guide/contributing.md) for more details, which is a step-by-step guide to help you set up development environment, build and test. +See [CONTRIBUTING](https://vllm-ascend.readthedocs.io/en/main/developer_guide/contributing.html) for more details, which is a step-by-step guide to help you set up development environment, build and test. We welcome and value any contributions and collaborations: - Please let us know if you encounter a bug by [filing an issue](https://github.com/vllm-project/vllm-ascend/issues) -- Please use [User forum] for usage questions and help. +- Please use [User forum](https://discuss.vllm.ai/c/hardware-support/vllm-ascend-support) for usage questions and help. ## Branch @@ -68,7 +67,7 @@ Below is maintained branches: | v0.7.1-dev | Unmaintained | Only doc fixed is allowed | | v0.7.3-dev | Maintained | CI commitment for vLLM 0.7.3 version | -Please refer to [Versioning policy](docs/source/developer_guide/versioning_policy.md) for more details. +Please refer to [Versioning policy](https://vllm-ascend.readthedocs.io/en/main/developer_guide/versioning_policy.html) for more details. ## License diff --git a/README.zh.md b/README.zh.md index 6ece548..e61a1d6 100644 --- a/README.zh.md +++ b/README.zh.md @@ -35,20 +35,25 @@ vLLM 昇腾插件 (`vllm-ascend`) 是一个由社区维护的让vLLM在Ascend NP ## 准备 - 硬件:Atlas 800I A2 Inference系列、Atlas A2 Training系列 +- 操作系统:Linux - 软件: * Python >= 3.9 * CANN >= 8.0.RC2 * PyTorch >= 2.5.1, torch-npu >= 2.5.1.dev20250308 * vLLM (与vllm-ascend版本一致) -在[此处](docs/source/installation.md),您可以了解如何逐步准备环境。 - ## 开始使用 请查看[快速开始](https://vllm-ascend.readthedocs.io/en/latest/quick_start.html)和[安装指南](https://vllm-ascend.readthedocs.io/en/latest/installation.html)了解更多. -## 分支 +## 贡献 +请参考 [CONTRIBUTING]((https://vllm-ascend.readthedocs.io/en/main/developer_guide/contributing.html)) 文档了解更多关于开发环境搭建、功能测试以及 PR 提交规范的信息。 +我们欢迎并重视任何形式的贡献与合作: +- 请通过[Issue](https://github.com/vllm-project/vllm-ascend/issues)来告知我们您遇到的任何Bug。 +- 请通过[用户论坛](https://discuss.vllm.ai/c/hardware-support/vllm-ascend-support)来交流使用问题和寻求帮助。 + +## 分支策略 vllm-ascend有主干分支和开发分支。 - **main**: 主干分支,与vLLM的主干分支对应,并通过昇腾CI持续进行质量看护。 @@ -62,15 +67,7 @@ vllm-ascend有主干分支和开发分支。 | v0.7.1-dev | Unmaintained | 只允许文档修复 | | v0.7.3-dev | Maintained | 基于vLLM v0.7.3版本CI看护 | -请参阅[版本策略](docs/source/developer_guide/versioning_policy.zh.md)了解更多详细信息。 - -## 贡献 -有关更多详细信息,请参阅 [CONTRIBUTING](docs/source/developer_guide/contributing.zh.md),可以更详细的帮助您部署开发环境、构建和测试。 - -我们欢迎并重视任何形式的贡献与合作: -- 请通过[Issue](https://github.com/vllm-project/vllm-ascend/issues)来告知我们您遇到的任何Bug。 -- 请通过[用户论坛](https://github.com/vllm-project/vllm-ascend/issues)来交流使用问题和寻求帮助。 +请参阅[版本策略](https://vllm-ascend.readthedocs.io/en/main/developer_guide/versioning_policy.html)了解更多详细信息。 ## 许可证 - Apache 许可证 2.0,如 [LICENSE](./LICENSE) 文件中所示。 diff --git a/collect_env.py b/collect_env.py index b8c7b7c..5b24bea 100644 --- a/collect_env.py +++ b/collect_env.py @@ -47,10 +47,9 @@ SystemEnv = namedtuple( 'pip_version', # 'pip' or 'pip3' 'pip_packages', 'conda_packages', - 'is_xnnpack_available', 'cpu_info', 'vllm_version', # vllm specific field - 'vllm_build_flags', # vllm specific field + 'vllm_ascend_version', # vllm ascend specific field 'env_vars', 'npu_info', # ascend specific field 'cann_info', # ascend specific field @@ -155,17 +154,27 @@ def get_cmake_version(run_lambda): r'cmake (.*)') +def _parse_version(version, version_tuple): + version_str = version_tuple[-1] + if version_str.startswith('g'): + if '.' in version_str: + git_sha = version_str.split('.')[0][1:] + date = version_str.split('.')[-1][1:] + return f"{version} (git sha: {git_sha}, date: {date})" + else: + git_sha = version_str[1:] # type: ignore + return f"{version} (git sha: {git_sha})" + return version + + def get_vllm_version(): from vllm import __version__, __version_tuple__ + return _parse_version(__version__, __version_tuple__) - if __version__ == "dev": - return "N/A (dev)" - if len(__version_tuple__) == 4: # dev build - git_sha = __version_tuple__[-1][1:] # type: ignore - return f"{__version__} (git sha: {git_sha}" - - return __version__ +def get_vllm_ascend_version(): + from vllm_ascend._version import __version__, __version_tuple__ + return _parse_version(__version__, __version_tuple__) def get_cpu_info(run_lambda): @@ -284,23 +293,6 @@ def get_pip_packages(run_lambda, patterns=None): return pip_version, out -def summarize_vllm_build_flags(): - # This could be a static method if the flags are constant, or dynamic if you need to check environment variables, etc. - return 'ROCm: {}; Neuron: {}'.format( - 'Enabled' if os.environ.get('ROCM_HOME') else 'Disabled', - 'Enabled' if os.environ.get('NEURON_CORES') else 'Disabled', - ) - - -def is_xnnpack_available(): - if TORCH_AVAILABLE: - import torch.backends.xnnpack - return str( - torch.backends.xnnpack.enabled) # type: ignore[attr-defined] - else: - return "N/A" - - def get_npu_info(run_lambda): return run_and_read_all(run_lambda, 'npu-smi info') @@ -317,7 +309,7 @@ def get_cann_info(run_lambda): def get_env_vars(): env_vars = '' secret_terms = ('secret', 'token', 'api', 'access', 'password') - report_prefix = ("TORCH", "PYTORCH", "ASCEND_") + report_prefix = ("TORCH", "PYTORCH", "ASCEND_", "ATB_") for k, v in os.environ.items(): if any(term in k.lower() for term in secret_terms): continue @@ -343,9 +335,6 @@ def get_env_info(): conda_packages = get_conda_packages(run_lambda) - vllm_version = get_vllm_version() - vllm_build_flags = summarize_vllm_build_flags() - return SystemEnv( torch_version=version_str, is_debug_build=debug_mode_str, @@ -361,10 +350,9 @@ def get_env_info(): gcc_version=get_gcc_version(run_lambda), clang_version=get_clang_version(run_lambda), cmake_version=get_cmake_version(run_lambda), - is_xnnpack_available=is_xnnpack_available(), cpu_info=get_cpu_info(run_lambda), - vllm_version=vllm_version, - vllm_build_flags=vllm_build_flags, + vllm_version=get_vllm_version(), + vllm_ascend_version=get_vllm_ascend_version(), env_vars=get_env_vars(), npu_info=get_npu_info(run_lambda), cann_info=get_cann_info(run_lambda), @@ -383,7 +371,6 @@ Libc version: {libc_version} Python version: {python_version} Python platform: {python_platform} -Is XNNPACK available: {is_xnnpack_available} CPU: {cpu_info} @@ -400,10 +387,11 @@ env_info_fmt += "\n" env_info_fmt += """ vLLM Version: {vllm_version} -vLLM Build Flags: -{vllm_build_flags} +vLLM Ascend Version: {vllm_ascend_version} +ENV Variables: {env_vars} + NPU: {npu_info} diff --git a/format.sh b/format.sh index 8dc0a31..1d0b940 100755 --- a/format.sh +++ b/format.sh @@ -140,7 +140,7 @@ echo 'vLLM mypy: Done' # https://github.com/codespell-project/codespell/issues/1915 # Avoiding the "./" prefix and using "/**" globs for directories appears to solve the problem CODESPELL_EXCLUDES=( - '--skip' 'tests/prompts/**,./benchmarks/sonnet.txt,*tests/lora/data/**,build/**' + '--skip' 'tests/prompts/**,./benchmarks/sonnet.txt,*tests/lora/data/**,build/**,./vllm_ascend.egg-info/**' ) CODESPELL_IGNORE_WORDS=( diff --git a/test.py b/test.py deleted file mode 100644 index c6825fc..0000000 --- a/test.py +++ /dev/null @@ -1,31 +0,0 @@ -import os - -import torch -import torch_npu # noqa: F401 - -device_id = 0 - - -def _device_id_to_physical_device_id(device_id: int) -> int: - if "ASCEND_RT_VISIBLE_DEVICES" in os.environ: - device_ids = os.environ["ASCEND_RT_VISIBLE_DEVICES"].split(",") - if device_ids == [""]: - raise RuntimeError("ASCEND_RT_VISIBLE_DEVICES is set to empty" - "string, which means Ascend NPU support is" - "disabled.") - physical_device_id = device_ids[device_id] - return int(physical_device_id) - else: - return device_id - - -physical_device_id = _device_id_to_physical_device_id(device_id) -print("physical_device_id: " + str(physical_device_id)) - -# return torch.npu.get_device_name(physical_device_id) -torch.npu.get_device_name(device_id) - -for k, v in os.environ.items(): - if k == "ASCEND_RT_VISIBLE_DEVICES": - print(k) - print(v) diff --git a/vllm_ascend/worker/model_runner_v1.py b/vllm_ascend/worker/model_runner_v1.py index 14d38af..4648d76 100644 --- a/vllm_ascend/worker/model_runner_v1.py +++ b/vllm_ascend/worker/model_runner_v1.py @@ -52,7 +52,7 @@ from vllm_ascend.attention.attention_v1 import (AscendAttentionBackend, AscendMetadata) if TYPE_CHECKING: - from vllm.v1.core.scheduler_output import SchedulerOutput + from vllm.v1.core.sched.output import SchedulerOutput logger = init_logger(__name__) diff --git a/vllm_ascend/worker/worker_v1.py b/vllm_ascend/worker/worker_v1.py index 127b107..dd840e9 100644 --- a/vllm_ascend/worker/worker_v1.py +++ b/vllm_ascend/worker/worker_v1.py @@ -33,7 +33,7 @@ from vllm.logger import init_logger from vllm.model_executor import set_random_seed from vllm.platforms import current_platform from vllm.utils import STR_DTYPE_TO_TORCH_DTYPE -from vllm.v1.core.scheduler import SchedulerOutput +from vllm.v1.core.sched.output import SchedulerOutput from vllm.v1.kv_cache_interface import (FullAttentionSpec, KVCacheConfig, KVCacheSpec) from vllm.v1.outputs import ModelRunnerOutput