From d39855b07545999ca13c7cc92c291853981f8ee1 Mon Sep 17 00:00:00 2001 From: Yikun Jiang Date: Mon, 28 Apr 2025 21:52:17 +0800 Subject: [PATCH] Update installation and tutorial doc (#711) ### What this PR does / why we need it? Update installation and tutorial doc ### Does this PR introduce _any_ user-facing change? NO ### How was this patch tested? preview Signed-off-by: Yikun Jiang --- docs/source/installation.md | 12 +++++++++--- docs/source/tutorials/single_npu.md | 10 +++++----- docs/source/user_guide/supported_models.md | 2 ++ 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/docs/source/installation.md b/docs/source/installation.md index a41f19a..51d0475 100644 --- a/docs/source/installation.md +++ b/docs/source/installation.md @@ -124,7 +124,7 @@ First install system dependencies: ```bash apt update -y -apt install -y gcc g++ cmake libnuma-dev wget +apt install -y gcc g++ cmake libnuma-dev wget git ``` **[Optinal]** Config the extra-index of `pip` if you are working on a **x86** machine, so that the torch with cpu could be found: @@ -138,8 +138,14 @@ Then you can install `vllm` and `vllm-ascend` from **pre-built wheel**: ```{code-block} bash :substitutions: -# Install vllm-project/vllm from pypi (v0.8.4 aarch64 is unsupported see detail in below note) -pip install vllm==|pip_vllm_version| +# Install vllm-project/vllm from pypi +# (v0.8.4 aarch64 is unsupported see detail in below note) +# pip install vllm==|pip_vllm_version| +# Install vLLM +git clone --depth 1 --branch |vllm_version| https://github.com/vllm-project/vllm +cd vllm +VLLM_TARGET_DEVICE=empty pip install -v -e . +cd .. # Install vllm-project/vllm-ascend from pypi. pip install vllm-ascend==|pip_vllm_ascend_version| diff --git a/docs/source/tutorials/single_npu.md b/docs/source/tutorials/single_npu.md index 445d951..7c2f20a 100644 --- a/docs/source/tutorials/single_npu.md +++ b/docs/source/tutorials/single_npu.md @@ -1,4 +1,4 @@ -# Single NPU (Qwen2.5 7B) +# Single NPU (Qwen3 8B) ## Run vllm-ascend on Single NPU @@ -50,7 +50,7 @@ prompts = [ "The future of AI is", ] sampling_params = SamplingParams(temperature=0.8, top_p=0.95) -llm = LLM(model="Qwen/Qwen2.5-7B-Instruct", max_model_len=26240) +llm = LLM(model="Qwen/Qwen3-8B", max_model_len=26240) outputs = llm.generate(prompts, sampling_params) for output in outputs: @@ -91,7 +91,7 @@ docker run --rm \ -e VLLM_USE_MODELSCOPE=True \ -e PYTORCH_NPU_ALLOC_CONF=max_split_size_mb:256 \ -it $IMAGE \ -vllm serve Qwen/Qwen2.5-7B-Instruct --max_model_len 26240 +vllm serve Qwen/Qwen3-8B --max_model_len 26240 ``` :::{note} @@ -112,7 +112,7 @@ Once your server is started, you can query the model with input prompts: curl http://localhost:8000/v1/completions \ -H "Content-Type: application/json" \ -d '{ - "model": "Qwen/Qwen2.5-7B-Instruct", + "model": "Qwen/Qwen3-8B", "prompt": "The future of AI is", "max_tokens": 7, "temperature": 0 @@ -122,7 +122,7 @@ curl http://localhost:8000/v1/completions \ If you query the server successfully, you can see the info shown below (client): ```bash -{"id":"cmpl-b25a59a2f985459781ce7098aeddfda7","object":"text_completion","created":1739523925,"model":"Qwen/Qwen2.5-7B-Instruct","choices":[{"index":0,"text":" here. It’s not just a","logprobs":null,"finish_reason":"length","stop_reason":null,"prompt_logprobs":null}],"usage":{"prompt_tokens":5,"total_tokens":12,"completion_tokens":7,"prompt_tokens_details":null}} +{"id":"cmpl-b25a59a2f985459781ce7098aeddfda7","object":"text_completion","created":1739523925,"model":"Qwen/Qwen3-8B","choices":[{"index":0,"text":" here. It’s not just a","logprobs":null,"finish_reason":"length","stop_reason":null,"prompt_logprobs":null}],"usage":{"prompt_tokens":5,"total_tokens":12,"completion_tokens":7,"prompt_tokens_details":null}} ``` Logs of the vllm server: diff --git a/docs/source/user_guide/supported_models.md b/docs/source/user_guide/supported_models.md index 10ef6f0..4938c18 100644 --- a/docs/source/user_guide/supported_models.md +++ b/docs/source/user_guide/supported_models.md @@ -5,6 +5,8 @@ | DeepSeek v3 | ✅||| | DeepSeek R1 | ✅||| | DeepSeek Distill (Qwen/LLama) |✅|| +| Qwen3 | ✅ || +| Qwen3-Moe | ✅ || | Qwen2-VL | ✅ || | Qwen2-Audio | ✅ || | Qwen2.5 | ✅ ||