diff --git a/Dockerfile b/Dockerfile index 29d6445..cabe38c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -15,7 +15,7 @@ # This file is a part of the vllm-ascend project. # -FROM quay.io/ascend/cann:8.2.rc1-910b-ubuntu22.04-py3.11 +FROM git.modelhub.org.cn:9443/enginex-ascend/cann:8.2.rc1-910b-ubuntu22.04-py3.11 ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple" ARG COMPILE_CUSTOM_KERNELS=1 diff --git a/README.md b/README.md index 8c78bcf..3144f44 100644 --- a/README.md +++ b/README.md @@ -24,6 +24,53 @@ vLLM 昇腾插件 (`vllm-ascend`) 是一个由社区维护的让vLLM在Ascend NP * PyTorch >= 2.7.1, torch-npu >= 2.7.1.dev20250724 * vLLM (与vllm-ascend版本一致) +## QuickStart + +1、从 modelscope上下载支持的模型,例如 Qwen/Qwen3-8B +```python +modelscope download --model Qwen/Qwen3-8B README.md --local_dir ./model +``` + +2、使用Dockerfile生成镜像 +从仓库的【软件包】栏目下载基础镜像 git.modelhub.org.cn:9443/enginex-ascend/cann:8.2.rc1-910b-ubuntu22.04-py3.11 +使用 Dockerfile_ascend 生成 镜像 +```python +docker build -f Dockerfile -t ascend-vllm:dev . +``` + +3、启动docker +```python +docker run -it --rm \ + -p 10086:80 \ + --name test-ascend-my-1 \ + -v `pwd`:/host \ + -e ASCEND_VISIBLE_DEVICES=1 \ + --device /dev/davinci1:/dev/davinci0 \ + --device /dev/davinci_manager \ + --device /dev/devmm_svm \ + --device /dev/hisi_hdc \ + -v ./model:/model \ + -v /usr/local/dcmi:/usr/local/dcmi \ + -v /usr/local/bin/npu-smi:/usr/local/bin/npu-smi \ + -v /usr/local/Ascend/driver/lib64/:/usr/local/Ascend/driver/lib64/ \ + -v /usr/local/Ascend/driver/version.info:/usr/local/Ascend/driver/version.info \ + -v /etc/ascend_install.info:/etc/ascend_install.info \ + --privileged \ + ascend-vllm:dev \ + vllm serve --served-model-name qwen3-8b --max-model-len 4096 +``` + +4、测试服务 +```python +curl -X POST http://localhost:10086/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "qwen3-8b", + "messages": [{"role": "user", "content": "你好"}], + "stream": true + }' +``` + ## 开始使用 推荐您使用以下版本快速开始使用: