From eaeb2efb20ff70875991483d63262f379a3afde8 Mon Sep 17 00:00:00 2001 From: zhanghw0354 Date: Wed, 3 Sep 2025 10:58:08 +0800 Subject: [PATCH] [Main][Feat]Set the Profiler parameters through environment variables consistent with vLLM (#2608) ### What this PR does / why we need it? Currently, when performing profiling in vLLM-Ascend, if you need to obtain the Python call stack, you have to manually modify the code. The code location is: [worker_v1.py#L337](https://github.com/vllm-project/vllm-ascend/blob/6c973361fc2eba5d3faa9b6b496b4b9fec4dc784/vllm_ascend/worker/worker_v1.py#L337) where you set with_stack to true. Now, in vLLM, you can set whether to obtain the Python call stack through an environment variable. The relevant PR is: [#21803](https://github.com/vllm-project/vllm/pull/21803) and the documentation is: [profiling](https://docs.vllm.ai/en/latest/contributing/profiling.html?h=vllm_torch_profiler_with_stack#profile-with-pytorch-profiler) This PR sets the profiler initialization parameters by using the same environment variable as vLLM, eliminating the need for manual code modification. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? CI passed with new added/existing test. - vLLM version: v0.10.1.1 - vLLM main: https://github.com/vllm-project/vllm/commit/0235103cbbdb511e6708aae600f759060a797c16 --------- Signed-off-by: zhanghaiwen Co-authored-by: zhanghaiwen --- tests/ut/worker/test_worker_v1.py | 8 +++++--- vllm_ascend/worker/worker_v1.py | 7 ++++--- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/tests/ut/worker/test_worker_v1.py b/tests/ut/worker/test_worker_v1.py index 9f50b6d..af3d904 100644 --- a/tests/ut/worker/test_worker_v1.py +++ b/tests/ut/worker/test_worker_v1.py @@ -452,11 +452,13 @@ class TestNPUWorker(TestBase): mock_logger, mock_envs_vllm, ): - """Test _init_profiler method - profiler enabled case""" + """Test _init_profiler method - profiler enabled case with stack and memory profiling enabled""" from vllm_ascend.worker.worker_v1 import NPUWorker # Set environment variables to enable profiler mock_envs_vllm.VLLM_TORCH_PROFILER_DIR = "/path/to/traces" + mock_envs_vllm.VLLM_TORCH_PROFILER_WITH_STACK = True + mock_envs_vllm.VLLM_TORCH_PROFILER_WITH_PROFILE_MEMORY = True # Set enum mocks mock_export_type.Text = "Text" @@ -516,8 +518,8 @@ class TestNPUWorker(TestBase): # Verify profiler parameters expected_activities = ["CPU", "NPU"] self.assertEqual(profile_kwargs["activities"], expected_activities) - self.assertFalse(profile_kwargs["with_stack"]) - self.assertFalse(profile_kwargs["profile_memory"]) + self.assertTrue(profile_kwargs["with_stack"]) + self.assertTrue(profile_kwargs["profile_memory"]) self.assertFalse(profile_kwargs["with_modules"]) self.assertEqual(profile_kwargs["experimental_config"], mock_experimental_config_instance) diff --git a/vllm_ascend/worker/worker_v1.py b/vllm_ascend/worker/worker_v1.py index be3af07..1062d47 100644 --- a/vllm_ascend/worker/worker_v1.py +++ b/vllm_ascend/worker/worker_v1.py @@ -334,8 +334,9 @@ class NPUWorker(WorkerBase): torch_npu.profiler.ProfilerActivity.CPU, torch_npu.profiler.ProfilerActivity.NPU, ], - with_stack=False, - profile_memory=False, + with_stack=envs_vllm.VLLM_TORCH_PROFILER_WITH_STACK, + profile_memory=envs_vllm.\ + VLLM_TORCH_PROFILER_WITH_PROFILE_MEMORY, with_modules=False, experimental_config=experimental_config, on_trace_ready=torch_npu.profiler.tensorboard_trace_handler( @@ -350,4 +351,4 @@ class NPUWorker(WorkerBase): return self.model_runner.get_supported_tasks() def take_draft_token_ids(self) -> Optional[DraftTokenIds]: - return self.model_runner.take_draft_token_ids() \ No newline at end of file + return self.model_runner.take_draft_token_ids()