diff --git a/.github/Dockerfile.buildwheel b/.github/Dockerfile.buildwheel index 5326935a..92d1fa72 100644 --- a/.github/Dockerfile.buildwheel +++ b/.github/Dockerfile.buildwheel @@ -18,10 +18,12 @@ ARG PY_VERSION=3.11 FROM quay.io/ascend/manylinux:8.3.rc1-910b-manylinux_2_28-py${PY_VERSION} ARG COMPILE_CUSTOM_KERNELS=1 +ARG SOC_VERSION # Define environments ENV DEBIAN_FRONTEND=noninteractive ENV COMPILE_CUSTOM_KERNELS=${COMPILE_CUSTOM_KERNELS} +ENV SOC_VERSION=$SOC_VERSION RUN yum update -y && \ yum install -y python3-pip git vim wget net-tools gcc gcc-c++ make cmake numactl-devel && \ rm -rf /var/cache/yum diff --git a/.github/workflows/image_310p_openeuler.yml b/.github/workflows/image_310p_openeuler.yml index b033cb47..5a34889c 100644 --- a/.github/workflows/image_310p_openeuler.yml +++ b/.github/workflows/image_310p_openeuler.yml @@ -132,4 +132,5 @@ jobs: file: Dockerfile.310p.openEuler build-args: | PIP_INDEX_URL=https://pypi.org/simple + SOC_VERSION=ascend310p1 provenance: false diff --git a/.github/workflows/image_310p_ubuntu.yml b/.github/workflows/image_310p_ubuntu.yml index ddac1c1f..56aafcf8 100644 --- a/.github/workflows/image_310p_ubuntu.yml +++ b/.github/workflows/image_310p_ubuntu.yml @@ -128,4 +128,5 @@ jobs: tags: ${{ steps.meta.outputs.tags }} build-args: | PIP_INDEX_URL=https://pypi.org/simple + SOC_VERSION=ascend310p1 provenance: false \ No newline at end of file diff --git a/.github/workflows/image_a3_openeuler.yml b/.github/workflows/image_a3_openeuler.yml index 6524c9e0..b1c57727 100644 --- a/.github/workflows/image_a3_openeuler.yml +++ b/.github/workflows/image_a3_openeuler.yml @@ -131,5 +131,6 @@ jobs: file: Dockerfile.a3.openEuler build-args: | PIP_INDEX_URL=https://pypi.org/simple + SOC_VERSION=ascend910_9391 provenance: false diff --git a/.github/workflows/image_a3_ubuntu.yml b/.github/workflows/image_a3_ubuntu.yml index baaab8da..473df8e5 100644 --- a/.github/workflows/image_a3_ubuntu.yml +++ b/.github/workflows/image_a3_ubuntu.yml @@ -127,5 +127,6 @@ jobs: tags: ${{ steps.meta.outputs.tags }} build-args: | PIP_INDEX_URL=https://pypi.org/simple + SOC_VERSION=ascend910_9391 provenance: false diff --git a/.github/workflows/image_openeuler.yml b/.github/workflows/image_openeuler.yml index ead1467d..29ccb848 100644 --- a/.github/workflows/image_openeuler.yml +++ b/.github/workflows/image_openeuler.yml @@ -131,4 +131,5 @@ jobs: file: Dockerfile.openEuler build-args: | PIP_INDEX_URL=https://pypi.org/simple + SOC_VERSION=ascend910b1 provenance: false diff --git a/.github/workflows/image_ubuntu.yml b/.github/workflows/image_ubuntu.yml index 15960137..ab321304 100644 --- a/.github/workflows/image_ubuntu.yml +++ b/.github/workflows/image_ubuntu.yml @@ -128,4 +128,5 @@ jobs: tags: ${{ steps.meta.outputs.tags }} build-args: | PIP_INDEX_URL=https://pypi.org/simple + SOC_VERSION=ascend910b1 provenance: false diff --git a/.github/workflows/release_code.yml b/.github/workflows/release_code.yml index 2184693d..be8b85f1 100644 --- a/.github/workflows/release_code.yml +++ b/.github/workflows/release_code.yml @@ -59,6 +59,8 @@ jobs: python3 -m pip install twine setuptools_scm - name: Generate tar.gz + env: + SOC_VERSION: ascend910b1 run: | python3 setup.py sdist ls dist diff --git a/.github/workflows/release_whl.yml b/.github/workflows/release_whl.yml index 1f9dd22d..d23d427e 100644 --- a/.github/workflows/release_whl.yml +++ b/.github/workflows/release_whl.yml @@ -69,6 +69,7 @@ jobs: ls docker build -f ./.github/Dockerfile.buildwheel \ --build-arg PY_VERSION=${{ matrix.python-version }} \ + --build-arg SOC_VERSION=ascend910b1 \ -t wheel:v1 . docker run --rm \ -u $(id -u):$(id -g) \ diff --git a/.github/workflows/vllm_ascend_test_pr_light.yaml b/.github/workflows/vllm_ascend_test_pr_light.yaml index e2ba3566..06c8cfd2 100644 --- a/.github/workflows/vllm_ascend_test_pr_light.yaml +++ b/.github/workflows/vllm_ascend_test_pr_light.yaml @@ -81,6 +81,7 @@ jobs: env: VLLM_LOGGING_LEVEL: ERROR VLLM_USE_MODELSCOPE: True + SOC_VERSION: ascend910b1 strategy: matrix: vllm_version: [v0.11.2] diff --git a/Dockerfile b/Dockerfile index 5ace7366..2cc85ae8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -20,10 +20,12 @@ FROM quay.io/ascend/cann:8.3.rc1-910b-ubuntu22.04-py3.11 ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple" ARG COMPILE_CUSTOM_KERNELS=1 ARG MOONCAKE_TAG="v0.3.7.post2" +ARG SOC_VERSION # Define environments ENV DEBIAN_FRONTEND=noninteractive ENV COMPILE_CUSTOM_KERNELS=${COMPILE_CUSTOM_KERNELS} +ENV SOC_VERSION=$SOC_VERSION WORKDIR /workspace diff --git a/Dockerfile.310p b/Dockerfile.310p index 7113cc98..354f02a6 100644 --- a/Dockerfile.310p +++ b/Dockerfile.310p @@ -19,10 +19,12 @@ FROM quay.io/ascend/cann:8.3.rc1-310p-ubuntu22.04-py3.11 ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple" ARG COMPILE_CUSTOM_KERNELS=1 +ARG SOC_VERSION # Define environments ENV DEBIAN_FRONTEND=noninteractive ENV COMPILE_CUSTOM_KERNELS=${COMPILE_CUSTOM_KERNELS} +ENV SOC_VERSION=$SOC_VERSION RUN apt-get update -y && \ apt-get install -y python3-pip git vim wget net-tools gcc g++ cmake libnuma-dev && \ diff --git a/Dockerfile.310p.openEuler b/Dockerfile.310p.openEuler index 612e4456..3463939c 100644 --- a/Dockerfile.310p.openEuler +++ b/Dockerfile.310p.openEuler @@ -19,8 +19,10 @@ FROM quay.io/ascend/cann:8.3.rc1-310p-openeuler24.03-py3.11 ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple" ARG COMPILE_CUSTOM_KERNELS=1 +ARG SOC_VERSION ENV COMPILE_CUSTOM_KERNELS=${COMPILE_CUSTOM_KERNELS} +ENV SOC_VERSION=$SOC_VERSION RUN yum update -y && \ yum install -y python3-pip git vim wget net-tools gcc gcc-c++ make cmake numactl-devel && \ diff --git a/Dockerfile.a3 b/Dockerfile.a3 index 58ff11b8..ba6703e0 100644 --- a/Dockerfile.a3 +++ b/Dockerfile.a3 @@ -20,11 +20,13 @@ FROM quay.io/ascend/cann:8.3.rc1-a3-ubuntu22.04-py3.11 ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple" ARG COMPILE_CUSTOM_KERNELS=1 ARG MOONCAKE_TAG=v0.3.7.post2 +ARG SOC_VERSION COPY . /vllm-workspace/vllm-ascend/ # Define environments ENV DEBIAN_FRONTEND=noninteractive ENV COMPILE_CUSTOM_KERNELS=${COMPILE_CUSTOM_KERNELS} +ENV SOC_VERSION=$SOC_VERSION RUN pip config set global.index-url ${PIP_INDEX_URL} diff --git a/Dockerfile.a3.openEuler b/Dockerfile.a3.openEuler index 5cd053f2..259aa98e 100644 --- a/Dockerfile.a3.openEuler +++ b/Dockerfile.a3.openEuler @@ -20,8 +20,10 @@ FROM quay.io/ascend/cann:8.3.rc1-a3-openeuler24.03-py3.11 ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple" ARG COMPILE_CUSTOM_KERNELS=1 ARG MOONCAKE_TAG="v0.3.7.post2" +ARG SOC_VERSION ENV COMPILE_CUSTOM_KERNELS=${COMPILE_CUSTOM_KERNELS} +ENV SOC_VERSION=$SOC_VERSION RUN pip config set global.index-url ${PIP_INDEX_URL} diff --git a/Dockerfile.openEuler b/Dockerfile.openEuler index 7ba9dfba..4c6c6f9e 100644 --- a/Dockerfile.openEuler +++ b/Dockerfile.openEuler @@ -20,8 +20,10 @@ FROM quay.io/ascend/cann:8.3.rc1-910b-openeuler24.03-py3.11 ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple" ARG COMPILE_CUSTOM_KERNELS=1 ARG MOONCAKE_TAG="v0.3.7.post2" +ARG SOC_VERSION ENV COMPILE_CUSTOM_KERNELS=${COMPILE_CUSTOM_KERNELS} +ENV SOC_VERSION=$SOC_VERSION RUN pip config set global.index-url ${PIP_INDEX_URL} diff --git a/setup.py b/setup.py index 3ab900dc..0cee690e 100644 --- a/setup.py +++ b/setup.py @@ -73,7 +73,7 @@ def get_value_from_lines(lines: List[str], key: str) -> str: return "" -def get_chip_info() -> str: +def get_chip_type() -> str: try: npu_info_lines = subprocess.check_output( ['npu-smi', 'info', '-l']).decode().strip().split('\n') @@ -106,19 +106,27 @@ def get_chip_info() -> str: except subprocess.CalledProcessError as e: raise RuntimeError(f"Get chip info failed: {e}") except FileNotFoundError: - # cpu envir, release code case, return `ascend910b1` by default - return "ascend910b1" + logging.warning( + "npu-smi command not found, if this is an npu envir, please check if npu driver is installed correctly." + ) + return "" envs = load_module_from_path("envs", os.path.join(ROOT_DIR, "vllm_ascend", "envs.py")) -soc_version = get_chip_info() +soc_version = get_chip_type() if not envs.SOC_VERSION: + if not soc_version: + raise RuntimeError( + "Could not determine chip type automatically via 'npu-smi'. " + "This can happen in a CPU-only environment. " + "Please set the 'SOC_VERSION' environment variable to specify the target chip." + ) envs.SOC_VERSION = soc_version else: - if envs.SOC_VERSION != soc_version: + if soc_version and envs.SOC_VERSION != soc_version: logging.warning( f"env SOC_VERSION: {envs.SOC_VERSION} is not equal to soc_version from npu-smi: {soc_version}" ) @@ -126,10 +134,6 @@ else: def gen_build_info(): soc_version = envs.SOC_VERSION - if "310" in soc_version and not envs.COMPILE_CUSTOM_KERNELS: - raise ValueError( - "SOC version 310 only supports custom kernels. Please set COMPILE_CUSTOM_KERNELS=1 to enable custom kernels." - ) # TODO(zzzzwwjj): Add A5 case soc_to_device = { @@ -158,6 +162,11 @@ def gen_build_info(): assert soc_version in soc_to_device, f"Undefined soc_version: {soc_version}. Please file an issue to vllm-ascend." device_type = soc_to_device[soc_version] + if device_type == "_310P" and not envs.COMPILE_CUSTOM_KERNELS: + raise ValueError( + "device type 310P only supports custom kernels. Please set COMPILE_CUSTOM_KERNELS=1 to enable custom kernels." + ) + package_dir = os.path.join(ROOT_DIR, "vllm_ascend", "_build_info.py") with open(package_dir, "w+") as f: f.write('# Auto-generated file\n')