From bd66cfa6c2942ecfbdec9c2712f0d34693609d55 Mon Sep 17 00:00:00 2001 From: hongweijie <1462519292@qq.com> Date: Wed, 10 Dec 2025 17:55:27 +0800 Subject: [PATCH 1/6] [Doc] Update Qwen model accuracy report --- .../evaluation/accuracy_report/Qwen2.5-32B.md | 19 ++++++++++++++++++ .../accuracy_report/Qwen3-30B-A3B-coder.md | 16 +++++++++++++++ .../evaluation/accuracy_report/Qwen3-8B.md | 20 +++++++++++++++++++ 3 files changed, 55 insertions(+) create mode 100644 docs/source/developer_guide/evaluation/accuracy_report/Qwen2.5-32B.md create mode 100644 docs/source/developer_guide/evaluation/accuracy_report/Qwen3-30B-A3B-coder.md create mode 100644 docs/source/developer_guide/evaluation/accuracy_report/Qwen3-8B.md diff --git a/docs/source/developer_guide/evaluation/accuracy_report/Qwen2.5-32B.md b/docs/source/developer_guide/evaluation/accuracy_report/Qwen2.5-32B.md new file mode 100644 index 0000000..ff15c37 --- /dev/null +++ b/docs/source/developer_guide/evaluation/accuracy_report/Qwen2.5-32B.md @@ -0,0 +1,19 @@ +# Qwen2.5-32B + +* vLLM Version: vLLM: 0.10.1.1 , vLLM-KunLun Version: v0.10.1.1 +* Software Environment:OS: Ubuntu 22.04, PyTorch ≥ 2.5.1 +* Hardware Environment: KunLun P800 +* Parallel mode:TP4 + +```bash ++-----------+--------------------------+------------------+------+--------+---------+ +| Dataset | Metric | Subset | Num | Score | Cat.0 | ++-----------+--------------------------+------------------+------+--------+---------+ +| gsm8k | mean_acc | main | 1319 | 0.9158 | default | +| humaneval | pass@1 | openai_humaneval | 164 | 0.878 | default | +| ifeval | mean_prompt_level_strict | default | 541 | 0.8059 | default | +| ifeval | mean_inst_level_strict | default | 541 | 0.8765 | default | +| ifeval | mean_prompt_level_loose | default | 541 | 0.8262 | default | +| ifeval | mean_inst_level_loose | default | 541 | 0.8916 | default | ++-----------+--------------------------+------------------+------+--------+---------+ +``` \ No newline at end of file diff --git a/docs/source/developer_guide/evaluation/accuracy_report/Qwen3-30B-A3B-coder.md b/docs/source/developer_guide/evaluation/accuracy_report/Qwen3-30B-A3B-coder.md new file mode 100644 index 0000000..7368e7b --- /dev/null +++ b/docs/source/developer_guide/evaluation/accuracy_report/Qwen3-30B-A3B-coder.md @@ -0,0 +1,16 @@ +# Qwen3-30B-A3B-coder + +* vLLM Version: vLLM: 0.10.1.1 , vLLM-KunLun Version: v0.10.1.1 +* Software Environment:OS: Ubuntu 22.04, PyTorch ≥ 2.5.1 +* Hardware Environment: KunLun P800 +* Parallel mode:TP4 + +```bash ++-----------------+-------------+--------------------+------+--------+---------+ +| Dataset | Metric | Subset | Num | Score | Cat.0 | ++-----------------+-------------+--------------------+------+--------+---------+ +| gsm8k | mean_acc | main | 1319 | 0.9272 | default | +| humaneval | pass@1 | openai_humaneval | 164 | 0.9146 | default | +| live_code_bench | pass@1 | release_latest | 714 | 0.5644 | default | ++-----------------+-------------+--------------------+------+--------+---------+ +``` \ No newline at end of file diff --git a/docs/source/developer_guide/evaluation/accuracy_report/Qwen3-8B.md b/docs/source/developer_guide/evaluation/accuracy_report/Qwen3-8B.md new file mode 100644 index 0000000..55340c2 --- /dev/null +++ b/docs/source/developer_guide/evaluation/accuracy_report/Qwen3-8B.md @@ -0,0 +1,20 @@ +# Qwen3-8B + +* vLLM Version: vLLM: 0.10.1.1 , vLLM-KunLun Version: v0.10.1.1 +* Software Environment:OS: Ubuntu 22.04, PyTorch ≥ 2.5.1 +* Hardware Environment: KunLun P800 +* Parallel mode:TP1 + +```bash ++-----------+--------------------------+--------------------+------+--------+---------+ +| Dataset | Metric | Subset | Num | Score | Cat.0 | ++-----------+--------------------------+--------------------+------+--------+---------+ +| gsm8k | mean_acc | main | 1319 | 0.9143 | default | +| humaneval | pass@1 | openai_humaneval | 164 | 0.8049 | default | +| ifeval | mean_prompt_level_strict | default | 541 | 0.8503 | default | +| ifeval | mean_inst_level_strict | default | 541 | 0.8971 | default | +| ifeval | mean_prompt_level_loose | default | 541 | 0.8762 | default | +| ifeval | mean_inst_level_loose | default | 541 | 0.9165 | default | +| math_500 | mean_acc | Level 1 | 43 | 0.907 | default | ++-----------+--------------------------+--------------------+------+--------+---------+ +``` \ No newline at end of file From 670c2397b8a749c163edadcc4d72c1fe29da4ba4 Mon Sep 17 00:00:00 2001 From: xyDong0223 Date: Wed, 10 Dec 2025 21:52:48 +0800 Subject: [PATCH 2/6] [Kernel] Enable fast random sample on Kunlun P --- vllm_kunlun/v1/sample/ops/topk_topp_sampler.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/vllm_kunlun/v1/sample/ops/topk_topp_sampler.py b/vllm_kunlun/v1/sample/ops/topk_topp_sampler.py index e175040..e45c426 100644 --- a/vllm_kunlun/v1/sample/ops/topk_topp_sampler.py +++ b/vllm_kunlun/v1/sample/ops/topk_topp_sampler.py @@ -151,7 +151,12 @@ def random_sample( # not have its own seed. Then, we overwrite the values for the requests # that have their own seeds. if len(generators) != probs.shape[0]: - q.exponential_() + if os.getenv('FAST_RANDOM_SAMPLE') == "1": + q.uniform_() + q = -torch.log(q) + q = q.clamp(min=1e-4) + else: + q.exponential_() if generators: # TODO(woosuk): This can be slow because we handle each request # one by one. Optimize this. From 170e7091d12ae31fc87519030c5a7f3f6e7dc3fc Mon Sep 17 00:00:00 2001 From: xyDong0223 Date: Wed, 10 Dec 2025 21:58:03 +0800 Subject: [PATCH 3/6] Delete docs/source/developer_guide/evaluation/accuracy_report/Qwen3-8B.md --- .../evaluation/accuracy_report/Qwen3-8B.md | 20 ------------------- 1 file changed, 20 deletions(-) delete mode 100644 docs/source/developer_guide/evaluation/accuracy_report/Qwen3-8B.md diff --git a/docs/source/developer_guide/evaluation/accuracy_report/Qwen3-8B.md b/docs/source/developer_guide/evaluation/accuracy_report/Qwen3-8B.md deleted file mode 100644 index 55340c2..0000000 --- a/docs/source/developer_guide/evaluation/accuracy_report/Qwen3-8B.md +++ /dev/null @@ -1,20 +0,0 @@ -# Qwen3-8B - -* vLLM Version: vLLM: 0.10.1.1 , vLLM-KunLun Version: v0.10.1.1 -* Software Environment:OS: Ubuntu 22.04, PyTorch ≥ 2.5.1 -* Hardware Environment: KunLun P800 -* Parallel mode:TP1 - -```bash -+-----------+--------------------------+--------------------+------+--------+---------+ -| Dataset | Metric | Subset | Num | Score | Cat.0 | -+-----------+--------------------------+--------------------+------+--------+---------+ -| gsm8k | mean_acc | main | 1319 | 0.9143 | default | -| humaneval | pass@1 | openai_humaneval | 164 | 0.8049 | default | -| ifeval | mean_prompt_level_strict | default | 541 | 0.8503 | default | -| ifeval | mean_inst_level_strict | default | 541 | 0.8971 | default | -| ifeval | mean_prompt_level_loose | default | 541 | 0.8762 | default | -| ifeval | mean_inst_level_loose | default | 541 | 0.9165 | default | -| math_500 | mean_acc | Level 1 | 43 | 0.907 | default | -+-----------+--------------------------+--------------------+------+--------+---------+ -``` \ No newline at end of file From f4bf3a62516c46aa65595864c554c188422e7f3d Mon Sep 17 00:00:00 2001 From: xyDong0223 Date: Wed, 10 Dec 2025 21:58:16 +0800 Subject: [PATCH 4/6] Delete docs/source/developer_guide/evaluation/accuracy_report/Qwen2.5-32B.md --- .../evaluation/accuracy_report/Qwen2.5-32B.md | 19 ------------------- 1 file changed, 19 deletions(-) delete mode 100644 docs/source/developer_guide/evaluation/accuracy_report/Qwen2.5-32B.md diff --git a/docs/source/developer_guide/evaluation/accuracy_report/Qwen2.5-32B.md b/docs/source/developer_guide/evaluation/accuracy_report/Qwen2.5-32B.md deleted file mode 100644 index ff15c37..0000000 --- a/docs/source/developer_guide/evaluation/accuracy_report/Qwen2.5-32B.md +++ /dev/null @@ -1,19 +0,0 @@ -# Qwen2.5-32B - -* vLLM Version: vLLM: 0.10.1.1 , vLLM-KunLun Version: v0.10.1.1 -* Software Environment:OS: Ubuntu 22.04, PyTorch ≥ 2.5.1 -* Hardware Environment: KunLun P800 -* Parallel mode:TP4 - -```bash -+-----------+--------------------------+------------------+------+--------+---------+ -| Dataset | Metric | Subset | Num | Score | Cat.0 | -+-----------+--------------------------+------------------+------+--------+---------+ -| gsm8k | mean_acc | main | 1319 | 0.9158 | default | -| humaneval | pass@1 | openai_humaneval | 164 | 0.878 | default | -| ifeval | mean_prompt_level_strict | default | 541 | 0.8059 | default | -| ifeval | mean_inst_level_strict | default | 541 | 0.8765 | default | -| ifeval | mean_prompt_level_loose | default | 541 | 0.8262 | default | -| ifeval | mean_inst_level_loose | default | 541 | 0.8916 | default | -+-----------+--------------------------+------------------+------+--------+---------+ -``` \ No newline at end of file From 0b7fb2ad1981632786f3bb501f837ba7ef318afa Mon Sep 17 00:00:00 2001 From: xyDong0223 Date: Wed, 10 Dec 2025 21:58:27 +0800 Subject: [PATCH 5/6] Delete docs/source/developer_guide/evaluation/accuracy_report/Qwen3-30B-A3B-coder.md --- .../accuracy_report/Qwen3-30B-A3B-coder.md | 16 ---------------- 1 file changed, 16 deletions(-) delete mode 100644 docs/source/developer_guide/evaluation/accuracy_report/Qwen3-30B-A3B-coder.md diff --git a/docs/source/developer_guide/evaluation/accuracy_report/Qwen3-30B-A3B-coder.md b/docs/source/developer_guide/evaluation/accuracy_report/Qwen3-30B-A3B-coder.md deleted file mode 100644 index 7368e7b..0000000 --- a/docs/source/developer_guide/evaluation/accuracy_report/Qwen3-30B-A3B-coder.md +++ /dev/null @@ -1,16 +0,0 @@ -# Qwen3-30B-A3B-coder - -* vLLM Version: vLLM: 0.10.1.1 , vLLM-KunLun Version: v0.10.1.1 -* Software Environment:OS: Ubuntu 22.04, PyTorch ≥ 2.5.1 -* Hardware Environment: KunLun P800 -* Parallel mode:TP4 - -```bash -+-----------------+-------------+--------------------+------+--------+---------+ -| Dataset | Metric | Subset | Num | Score | Cat.0 | -+-----------------+-------------+--------------------+------+--------+---------+ -| gsm8k | mean_acc | main | 1319 | 0.9272 | default | -| humaneval | pass@1 | openai_humaneval | 164 | 0.9146 | default | -| live_code_bench | pass@1 | release_latest | 714 | 0.5644 | default | -+-----------------+-------------+--------------------+------+--------+---------+ -``` \ No newline at end of file From af2cd6097f59d59618ce51a1680f7bb6eb2f1c9d Mon Sep 17 00:00:00 2001 From: xyDong0223 Date: Thu, 11 Dec 2025 11:17:28 +0800 Subject: [PATCH 6/6] [Kernell] fix miss import os --- vllm_kunlun/v1/sample/ops/topk_topp_sampler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm_kunlun/v1/sample/ops/topk_topp_sampler.py b/vllm_kunlun/v1/sample/ops/topk_topp_sampler.py index e45c426..31fdf4d 100644 --- a/vllm_kunlun/v1/sample/ops/topk_topp_sampler.py +++ b/vllm_kunlun/v1/sample/ops/topk_topp_sampler.py @@ -2,7 +2,7 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project from typing import Optional - +import os import torch import torch.nn as nn from packaging import version