### What this PR does / why we need it? On Arm systems, os.sched_yield() does not take effect, causing the GIL (Global Interpreter Lock) to remain unrelinquished and resulting in CPU bound issues. This PR applies a patch to sched_yield in vLLM, making the process execute time.sleep(0) instead to release the GIL. ### Does this PR introduce _any_ user-facing change? Signed-off-by: fems14 <1804143737@qq.com> Co-authored-by: fems14 <74094523+fems14@users.noreply.github.com>
This commit is contained in:
@@ -19,6 +19,7 @@ import os
|
||||
import vllm_ascend.patch.platform.patch_config # noqa
|
||||
import vllm_ascend.patch.platform.patch_distributed # noqa
|
||||
import vllm_ascend.patch.platform.patch_mamba_config # noqa
|
||||
import vllm_ascend.patch.platform.patch_sched_yield # noqa
|
||||
|
||||
if os.getenv("DYNAMIC_EPLB", "false") == "true" or os.getenv(
|
||||
"EXPERT_MAP_RECORD", "false") == "true":
|
||||
|
||||
13
vllm_ascend/patch/platform/patch_sched_yield.py
Normal file
13
vllm_ascend/patch/platform/patch_sched_yield.py
Normal file
@@ -0,0 +1,13 @@
|
||||
import sys
|
||||
|
||||
import vllm.distributed.utils
|
||||
from vllm.platforms import CpuArchEnum, Platform
|
||||
|
||||
is_arm = (Platform.get_cpu_architecture() == CpuArchEnum.ARM)
|
||||
|
||||
USE_SCHED_YIELD = (
|
||||
((sys.version_info[:3] >= (3, 11, 1)) or
|
||||
(sys.version_info[:2] == (3, 10) and sys.version_info[2] >= 8))
|
||||
and not is_arm)
|
||||
|
||||
vllm.distributed.utils.USE_SCHED_YIELD = USE_SCHED_YIELD
|
||||
@@ -21,6 +21,7 @@ if HAS_TRITON:
|
||||
import vllm_ascend.patch.worker.patch_triton
|
||||
|
||||
# isort: off
|
||||
import vllm_ascend.patch.platform.patch_sched_yield # noqa
|
||||
import vllm_ascend.patch.worker.patch_distributed # noqa
|
||||
import vllm_ascend.patch.worker.patch_logits # noqa
|
||||
import vllm_ascend.patch.worker.patch_roberta # noqa
|
||||
|
||||
Reference in New Issue
Block a user