[Main][Feat]Set the Profiler parameters through environment variables consistent with vLLM (#2608)
### What this PR does / why we need it? Currently, when performing profiling in vLLM-Ascend, if you need to obtain the Python call stack, you have to manually modify the code. The code location is: [worker_v1.py#L337](6c973361fc/vllm_ascend/worker/worker_v1.py (L337)) where you set with_stack to true. Now, in vLLM, you can set whether to obtain the Python call stack through an environment variable. The relevant PR is: [#21803](https://github.com/vllm-project/vllm/pull/21803) and the documentation is: [profiling](https://docs.vllm.ai/en/latest/contributing/profiling.html?h=vllm_torch_profiler_with_stack#profile-with-pytorch-profiler) This PR sets the profiler initialization parameters by using the same environment variable as vLLM, eliminating the need for manual code modification. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? CI passed with new added/existing test. - vLLM version: v0.10.1.1 - vLLM main:0235103cbb--------- Signed-off-by: zhanghaiwen <zhanghaiwen@cmss.chinamobile.com> Co-authored-by: zhanghaiwen <zhanghaiwen@cmss.chinamobile.com>
This commit is contained in:
@@ -452,11 +452,13 @@ class TestNPUWorker(TestBase):
|
|||||||
mock_logger,
|
mock_logger,
|
||||||
mock_envs_vllm,
|
mock_envs_vllm,
|
||||||
):
|
):
|
||||||
"""Test _init_profiler method - profiler enabled case"""
|
"""Test _init_profiler method - profiler enabled case with stack and memory profiling enabled"""
|
||||||
from vllm_ascend.worker.worker_v1 import NPUWorker
|
from vllm_ascend.worker.worker_v1 import NPUWorker
|
||||||
|
|
||||||
# Set environment variables to enable profiler
|
# Set environment variables to enable profiler
|
||||||
mock_envs_vllm.VLLM_TORCH_PROFILER_DIR = "/path/to/traces"
|
mock_envs_vllm.VLLM_TORCH_PROFILER_DIR = "/path/to/traces"
|
||||||
|
mock_envs_vllm.VLLM_TORCH_PROFILER_WITH_STACK = True
|
||||||
|
mock_envs_vllm.VLLM_TORCH_PROFILER_WITH_PROFILE_MEMORY = True
|
||||||
|
|
||||||
# Set enum mocks
|
# Set enum mocks
|
||||||
mock_export_type.Text = "Text"
|
mock_export_type.Text = "Text"
|
||||||
@@ -516,8 +518,8 @@ class TestNPUWorker(TestBase):
|
|||||||
# Verify profiler parameters
|
# Verify profiler parameters
|
||||||
expected_activities = ["CPU", "NPU"]
|
expected_activities = ["CPU", "NPU"]
|
||||||
self.assertEqual(profile_kwargs["activities"], expected_activities)
|
self.assertEqual(profile_kwargs["activities"], expected_activities)
|
||||||
self.assertFalse(profile_kwargs["with_stack"])
|
self.assertTrue(profile_kwargs["with_stack"])
|
||||||
self.assertFalse(profile_kwargs["profile_memory"])
|
self.assertTrue(profile_kwargs["profile_memory"])
|
||||||
self.assertFalse(profile_kwargs["with_modules"])
|
self.assertFalse(profile_kwargs["with_modules"])
|
||||||
self.assertEqual(profile_kwargs["experimental_config"],
|
self.assertEqual(profile_kwargs["experimental_config"],
|
||||||
mock_experimental_config_instance)
|
mock_experimental_config_instance)
|
||||||
|
|||||||
@@ -334,8 +334,9 @@ class NPUWorker(WorkerBase):
|
|||||||
torch_npu.profiler.ProfilerActivity.CPU,
|
torch_npu.profiler.ProfilerActivity.CPU,
|
||||||
torch_npu.profiler.ProfilerActivity.NPU,
|
torch_npu.profiler.ProfilerActivity.NPU,
|
||||||
],
|
],
|
||||||
with_stack=False,
|
with_stack=envs_vllm.VLLM_TORCH_PROFILER_WITH_STACK,
|
||||||
profile_memory=False,
|
profile_memory=envs_vllm.\
|
||||||
|
VLLM_TORCH_PROFILER_WITH_PROFILE_MEMORY,
|
||||||
with_modules=False,
|
with_modules=False,
|
||||||
experimental_config=experimental_config,
|
experimental_config=experimental_config,
|
||||||
on_trace_ready=torch_npu.profiler.tensorboard_trace_handler(
|
on_trace_ready=torch_npu.profiler.tensorboard_trace_handler(
|
||||||
@@ -350,4 +351,4 @@ class NPUWorker(WorkerBase):
|
|||||||
return self.model_runner.get_supported_tasks()
|
return self.model_runner.get_supported_tasks()
|
||||||
|
|
||||||
def take_draft_token_ids(self) -> Optional[DraftTokenIds]:
|
def take_draft_token_ids(self) -> Optional[DraftTokenIds]:
|
||||||
return self.model_runner.take_draft_token_ids()
|
return self.model_runner.take_draft_token_ids()
|
||||||
|
|||||||
Reference in New Issue
Block a user