From eaeb2efb20ff70875991483d63262f379a3afde8 Mon Sep 17 00:00:00 2001
From: zhanghw0354 <zhanghaiwencmss@139.com>
Date: Wed, 3 Sep 2025 10:58:08 +0800
Subject: [PATCH] [Main][Feat]Set the Profiler parameters through environment
 variables consistent with vLLM (#2608)

### What this PR does / why we need it?
Currently, when performing profiling in vLLM-Ascend, if you need to
obtain the Python call stack, you have to manually modify the code. The
code location is:
[worker_v1.py#L337](https://github.com/vllm-project/vllm-ascend/blob/6c973361fc2eba5d3faa9b6b496b4b9fec4dc784/vllm_ascend/worker/worker_v1.py#L337)
where you set with_stack to true.
Now, in vLLM, you can set whether to obtain the Python call stack
through an environment variable. The relevant PR is:
[#21803](https://github.com/vllm-project/vllm/pull/21803) and the
documentation is:
[profiling](https://docs.vllm.ai/en/latest/contributing/profiling.html?h=vllm_torch_profiler_with_stack#profile-with-pytorch-profiler)
This PR sets the profiler initialization parameters by using the same
environment variable as vLLM, eliminating the need for manual code
modification.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
CI passed with new added/existing test.

- vLLM version: v0.10.1.1
- vLLM main:
https://github.com/vllm-project/vllm/commit/0235103cbbdb511e6708aae600f759060a797c16

---------

Signed-off-by: zhanghaiwen <zhanghaiwen@cmss.chinamobile.com>
Co-authored-by: zhanghaiwen <zhanghaiwen@cmss.chinamobile.com>
---
 tests/ut/worker/test_worker_v1.py | 8 +++++---
 vllm_ascend/worker/worker_v1.py   | 7 ++++---
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/tests/ut/worker/test_worker_v1.py b/tests/ut/worker/test_worker_v1.py
index 9f50b6d..af3d904 100644
--- a/tests/ut/worker/test_worker_v1.py
+++ b/tests/ut/worker/test_worker_v1.py
@@ -452,11 +452,13 @@ class TestNPUWorker(TestBase):
         mock_logger,
         mock_envs_vllm,
     ):
-        """Test _init_profiler method - profiler enabled case"""
+        """Test _init_profiler method - profiler enabled case with stack and memory profiling enabled"""
         from vllm_ascend.worker.worker_v1 import NPUWorker
 
         # Set environment variables to enable profiler
         mock_envs_vllm.VLLM_TORCH_PROFILER_DIR = "/path/to/traces"
+        mock_envs_vllm.VLLM_TORCH_PROFILER_WITH_STACK = True
+        mock_envs_vllm.VLLM_TORCH_PROFILER_WITH_PROFILE_MEMORY = True
 
         # Set enum mocks
         mock_export_type.Text = "Text"
@@ -516,8 +518,8 @@ class TestNPUWorker(TestBase):
             # Verify profiler parameters
             expected_activities = ["CPU", "NPU"]
             self.assertEqual(profile_kwargs["activities"], expected_activities)
-            self.assertFalse(profile_kwargs["with_stack"])
-            self.assertFalse(profile_kwargs["profile_memory"])
+            self.assertTrue(profile_kwargs["with_stack"])
+            self.assertTrue(profile_kwargs["profile_memory"])
             self.assertFalse(profile_kwargs["with_modules"])
             self.assertEqual(profile_kwargs["experimental_config"],
                              mock_experimental_config_instance)
diff --git a/vllm_ascend/worker/worker_v1.py b/vllm_ascend/worker/worker_v1.py
index be3af07..1062d47 100644
--- a/vllm_ascend/worker/worker_v1.py
+++ b/vllm_ascend/worker/worker_v1.py
@@ -334,8 +334,9 @@ class NPUWorker(WorkerBase):
                     torch_npu.profiler.ProfilerActivity.CPU,
                     torch_npu.profiler.ProfilerActivity.NPU,
                 ],
-                with_stack=False,
-                profile_memory=False,
+                with_stack=envs_vllm.VLLM_TORCH_PROFILER_WITH_STACK,
+                profile_memory=envs_vllm.\
+                    VLLM_TORCH_PROFILER_WITH_PROFILE_MEMORY,
                 with_modules=False,
                 experimental_config=experimental_config,
                 on_trace_ready=torch_npu.profiler.tensorboard_trace_handler(
@@ -350,4 +351,4 @@ class NPUWorker(WorkerBase):
         return self.model_runner.get_supported_tasks()
 
     def take_draft_token_ids(self) -> Optional[DraftTokenIds]:
-        return self.model_runner.take_draft_token_ids()
\ No newline at end of file
+        return self.model_runner.take_draft_token_ids()