From 2bcadcb9d5f4a8f4e97b59c0117ac3002ce7664c Mon Sep 17 00:00:00 2001 From: fems14 <74094523+fems14@users.noreply.github.com> Date: Fri, 24 Oct 2025 00:06:45 +0800 Subject: [PATCH] =?UTF-8?q?=E3=80=90main=E3=80=91patch=20sched=5Fyield=20(?= =?UTF-8?q?#3648)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What this PR does / why we need it? On Arm systems, os.sched_yield() does not take effect, causing the GIL (Global Interpreter Lock) to remain unrelinquished and resulting in CPU bound issues. This PR applies a patch to sched_yield in vLLM, making the process execute time.sleep(0) instead to release the GIL. ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.11.0rc3 - vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.0 --------- Signed-off-by: fems14 <1804143737@qq.com> --- vllm_ascend/patch/platform/__init__.py | 1 + vllm_ascend/patch/platform/patch_sched_yield.py | 13 +++++++++++++ vllm_ascend/patch/worker/__init__.py | 1 + 3 files changed, 15 insertions(+) create mode 100644 vllm_ascend/patch/platform/patch_sched_yield.py diff --git a/vllm_ascend/patch/platform/__init__.py b/vllm_ascend/patch/platform/__init__.py index 99194f65..b4ef6332 100644 --- a/vllm_ascend/patch/platform/__init__.py +++ b/vllm_ascend/patch/platform/__init__.py @@ -19,6 +19,7 @@ import os import vllm_ascend.patch.platform.patch_config # noqa import vllm_ascend.patch.platform.patch_distributed # noqa import vllm_ascend.patch.platform.patch_mamba_config # noqa +import vllm_ascend.patch.platform.patch_sched_yield # noqa if os.getenv("DYNAMIC_EPLB", "false") == "true" or os.getenv( "EXPERT_MAP_RECORD", "false") == "true": diff --git a/vllm_ascend/patch/platform/patch_sched_yield.py b/vllm_ascend/patch/platform/patch_sched_yield.py new file mode 100644 index 00000000..694b9577 --- /dev/null +++ b/vllm_ascend/patch/platform/patch_sched_yield.py @@ -0,0 +1,13 @@ +import sys + +import vllm.distributed.utils +from vllm.platforms import CpuArchEnum, Platform + +is_arm = (Platform.get_cpu_architecture() == CpuArchEnum.ARM) + +USE_SCHED_YIELD = ( + ((sys.version_info[:3] >= (3, 11, 1)) or + (sys.version_info[:2] == (3, 10) and sys.version_info[2] >= 8)) + and not is_arm) + +vllm.distributed.utils.USE_SCHED_YIELD = USE_SCHED_YIELD diff --git a/vllm_ascend/patch/worker/__init__.py b/vllm_ascend/patch/worker/__init__.py index c3d03f2e..ae6d3997 100644 --- a/vllm_ascend/patch/worker/__init__.py +++ b/vllm_ascend/patch/worker/__init__.py @@ -21,6 +21,7 @@ if HAS_TRITON: import vllm_ascend.patch.worker.patch_triton # isort: off +import vllm_ascend.patch.platform.patch_sched_yield # noqa import vllm_ascend.patch.worker.patch_distributed # noqa import vllm_ascend.patch.worker.patch_logits # noqa import vllm_ascend.patch.worker.patch_roberta # noqa