[Refactor] Migrate profiler config from env vars to explicit ProfilerConfig (#5928)

### What this PR does / why we need it? Migrate the torch profiler configuration from deprecated environment variables (`VLLM_TORCH_PROFILER_DIR`, `VLLM_TORCH_PROFILER_WITH_STACK`, `VLLM_TORCH_PROFILER_WITH_PROFILE_MEMORY`) to the explicit `ProfilerConfig` object, aligning with vLLM's configuration best practices. The profiler environment variable approach is deprecated in vLLM and will be removed in v0.14.0 or v1.0.0. ### Does this PR introduce _any_ user-facing change? yes, for deverlopers who want to fetch profiler, he should use `--profiler-config` instead of `VLLM_TORCH_PROFILER_DIR` ### How was this patch tested? - vLLM version: v0.13.0 - vLLM main: 11b6af5280 Signed-off-by: Meihan-chen <jcccx.cmh@gmail.com>
2026-01-19 09:27:55 +08:00
parent bc1f6713e7
commit 9cad1a8349
3 changed files with 68 additions and 38 deletions
--- a/docs/source/tutorials/DeepSeek-V3.2.md
+++ b/docs/source/tutorials/DeepSeek-V3.2.md
@@ -278,9 +278,6 @@ Before you start, please
        export VLLM_USE_V1=1
        export HCCL_BUFFSIZE=256
        export VLLM_TORCH_PROFILER_DIR="./vllm_profile"
        export VLLM_TORCH_PROFILER_WITH_STACK=0
        export ASCEND_AGGREGATE_ENABLE=1
        export ASCEND_TRANSPORT_PRINT=1
        export ACL_OP_INIT_MODE=1
@@ -302,6 +299,10 @@ Before you start, please
            --tensor-parallel-size $7 \
            --enable-expert-parallel \
            --speculative-config '{"num_speculative_tokens": 2, "method":"deepseek_mtp"}' \
            --profiler-config \
            '{"profiler": "torch",
            "torch_profiler_dir": "./vllm_profile",
            "torch_profiler_with_stack": false}' \
            --seed 1024 \
            --served-model-name dsv3 \
            --max-model-len 68000 \
@@ -351,9 +352,6 @@ Before you start, please
        export VLLM_USE_V1=1
        export HCCL_BUFFSIZE=256
        export VLLM_TORCH_PROFILER_DIR="./vllm_profile"
        export VLLM_TORCH_PROFILER_WITH_STACK=0
        export ASCEND_AGGREGATE_ENABLE=1
        export ASCEND_TRANSPORT_PRINT=1
        export ACL_OP_INIT_MODE=1
@@ -376,6 +374,10 @@ Before you start, please
            --tensor-parallel-size $7 \
            --enable-expert-parallel \
            --speculative-config '{"num_speculative_tokens": 2, "method":"deepseek_mtp"}' \
            --profiler-config \
            '{"profiler": "torch",
            "torch_profiler_dir": "./vllm_profile",
            "torch_profiler_with_stack": false}' \
            --seed 1024 \
            --served-model-name dsv3 \
            --max-model-len 68000 \
@@ -426,8 +428,6 @@ Before you start, please
        export VLLM_USE_V1=1
        export HCCL_BUFFSIZE=256
        export VLLM_TORCH_PROFILER_DIR="./vllm_profile"
        export VLLM_TORCH_PROFILER_WITH_STACK=0
        export ASCEND_AGGREGATE_ENABLE=1
        export ASCEND_TRANSPORT_PRINT=1
@@ -452,6 +452,10 @@ Before you start, please
            --tensor-parallel-size $7 \
            --enable-expert-parallel \
            --speculative-config '{"num_speculative_tokens": 2, "method":"deepseek_mtp"}' \
            --profiler-config \
            '{"profiler": "torch",
            "torch_profiler_dir": "./vllm_profile",
            "torch_profiler_with_stack": false}' \
            --seed 1024 \
            --served-model-name dsv3 \
            --max-model-len 68000 \
@@ -504,9 +508,6 @@ Before you start, please
        export VLLM_USE_V1=1
        export HCCL_BUFFSIZE=256
        export VLLM_TORCH_PROFILER_DIR="./vllm_profile"
        export VLLM_TORCH_PROFILER_WITH_STACK=0
        export ASCEND_AGGREGATE_ENABLE=1
        export ASCEND_TRANSPORT_PRINT=1
        export ACL_OP_INIT_MODE=1
@@ -530,6 +531,10 @@ Before you start, please
            --tensor-parallel-size $7 \
            --enable-expert-parallel \
            --speculative-config '{"num_speculative_tokens": 2, "method":"deepseek_mtp"}' \
            --profiler-config \
            '{"profiler": "torch",
            "torch_profiler_dir": "./vllm_profile",
            "torch_profiler_with_stack": false}' \
            --seed 1024 \
            --served-model-name dsv3 \
            --max-model-len 68000 \
--- a/tests/ut/worker/test_worker_v1.py
+++ b/tests/ut/worker/test_worker_v1.py
@@ -2,7 +2,7 @@ import unittest
 from unittest.mock import MagicMock, patch
 import torch
-from vllm.config import CacheConfig, ModelConfig, ParallelConfig, VllmConfig
+from vllm.config import CacheConfig, ModelConfig, ParallelConfig, ProfilerConfig, VllmConfig
 from tests.ut.base import TestBase
 from vllm_ascend.utils import vllm_version_is
@@ -311,26 +311,34 @@ class TestNPUWorker(TestBase):
            self.assertIn("Profiler is not enabled", str(cm.exception))
    @patch("vllm_ascend.worker.worker.envs_vllm")
    @patch("vllm_ascend.worker.worker.envs_ascend")
    def test_profile_and_msmonitor_both_enabled_raises_error(
-            self, mock_envs_vllm, mock_envs_ascend):
+            self, mock_envs_ascend):
        """Test profile method raises exception when both profiler and msmonitor are enabled"""
        from vllm_ascend.worker.worker import NPUWorker
        mock_envs_vllm.VLLM_TORCH_PROFILER_DIR = "/path/to/traces"
        mock_envs_ascend.MSMONITOR_USE_DAEMON = 1
        # Create profiler config object
        profiler_config = ProfilerConfig(
            profiler="torch",
            torch_profiler_dir="/path/to/traces"
        )
        vllm_config_mock = MagicMock()
        vllm_config_mock.profiler_config = profiler_config
        # Create worker mock
        with patch.object(NPUWorker, "__init__", lambda x, **kwargs: None):
            worker = NPUWorker()
            worker.vllm_config = vllm_config_mock
            # Test should raise exception
            with self.assertRaises(RuntimeError) as cm:
                _ = worker._init_profiler()
            self.assertIn(
-                "MSMONITOR_USE_DAEMON and VLLM_TORCH_PROFILER_DIR cannot be both set at the same time.",
+                "MSMONITOR_USE_DAEMON and torch profiler cannot be both enabled at the same time.",
                str(cm.exception))
    def test_lora_methods(self):
@@ -413,7 +421,6 @@ class TestNPUWorker(TestBase):
            mock_model_runner._dummy_run.assert_called_once_with(
                num_tokens=mock_decode_token_per_req, uniform_decode=True)
    @patch("vllm_ascend.worker.worker.envs_vllm")
    @patch("vllm_ascend.worker.worker.logger")
    @patch("torch_npu.profiler._ExperimentalConfig")
    @patch("torch_npu.profiler.profile")
@@ -432,15 +439,20 @@ class TestNPUWorker(TestBase):
        mock_profile,
        mock_experimental_config,
        mock_logger,
        mock_envs_vllm,
    ):
        """Test _init_profiler method - profiler enabled case with stack and memory profiling enabled"""
        from vllm_ascend.worker.worker import NPUWorker
-        # Set environment variables to enable profiler
+        # Create profiler config object
-        mock_envs_vllm.VLLM_TORCH_PROFILER_DIR = "/path/to/traces"
+        profiler_config = ProfilerConfig(
-        mock_envs_vllm.VLLM_TORCH_PROFILER_WITH_STACK = True
+            profiler="torch",
-        mock_envs_vllm.VLLM_TORCH_PROFILER_WITH_PROFILE_MEMORY = True
+            torch_profiler_dir="/path/to/traces",
            torch_profiler_with_stack=True,
            torch_profiler_with_memory=True
        )
        vllm_config_mock = MagicMock()
        vllm_config_mock.profiler_config = profiler_config
        # Set enum mocks
        mock_export_type.Text = "Text"
@@ -460,6 +472,7 @@ class TestNPUWorker(TestBase):
        # Create worker mock
        with patch.object(NPUWorker, "__init__", lambda x, **kwargs: None):
            worker = NPUWorker()
            worker.vllm_config = vllm_config_mock
            # Test _init_profiler
            result = worker._init_profiler()
@@ -511,17 +524,23 @@ class TestNPUWorker(TestBase):
            # Verify return value
            self.assertEqual(result, mock_profiler_instance)
-    @patch("vllm_ascend.worker.worker.envs_vllm")
+    def test_init_profiler_disabled(self):
    def test_init_profiler_disabled(self, mock_envs_vllm):
        """Test _init_profiler method - profiler disabled case"""
        from vllm_ascend.worker.worker import NPUWorker
-        # Set environment variable to disable profiler
+        # Create profiler config object with profiler disabled
-        mock_envs_vllm.VLLM_TORCH_PROFILER_DIR = None
+        profiler_config = ProfilerConfig(
            profiler=None,
            torch_profiler_dir=""
        )
        vllm_config_mock = MagicMock()
        vllm_config_mock.profiler_config = profiler_config
        # Create worker mock
        with patch.object(NPUWorker, "__init__", lambda x, **kwargs: None):
            worker = NPUWorker()
            worker.vllm_config = vllm_config_mock
            # Test _init_profiler
            result = worker._init_profiler()
@@ -529,17 +548,23 @@ class TestNPUWorker(TestBase):
            # Verify returns None
            self.assertIsNone(result)
-    @patch("vllm_ascend.worker.worker.envs_vllm")
+    def test_init_profiler_empty_dir(self):
    def test_init_profiler_empty_dir(self, mock_envs_vllm):
        """Test _init_profiler method - empty directory string case"""
        from vllm_ascend.worker.worker import NPUWorker
-        # Set environment variable to empty string
+        # Create profiler config object with empty dir
-        mock_envs_vllm.VLLM_TORCH_PROFILER_DIR = ""
+        profiler_config = ProfilerConfig(
            profiler="torch",
            torch_profiler_dir=""
        )
        vllm_config_mock = MagicMock()
        vllm_config_mock.profiler_config = profiler_config
        # Create worker mock
        with patch.object(NPUWorker, "__init__", lambda x, **kwargs: None):
            worker = NPUWorker()
            worker.vllm_config = vllm_config_mock
            # Test _init_profiler
            result = worker._init_profiler()
--- a/vllm_ascend/worker/worker.py
+++ b/vllm_ascend/worker/worker.py
@@ -489,14 +489,15 @@ class NPUWorker(WorkerBase):
        ensure_ec_transfer_initialized(self.vllm_config)
    def _init_profiler(self):
-        # Torch profiler. Enabled and configured through env vars:
+        # Torch profiler. Enabled through profiler_config:
-        # VLLM_TORCH_PROFILER_DIR=/path/to/save/trace
+        # --profiler-config.profiler=torch --profiler-config.torch_profiler_dir=/path/to/save/trace
-        if envs_vllm.VLLM_TORCH_PROFILER_DIR:
+        profiler_config = self.vllm_config.profiler_config
        if profiler_config.profiler == "torch" and profiler_config.torch_profiler_dir:
            if envs_ascend.MSMONITOR_USE_DAEMON:
                raise RuntimeError(
-                    "MSMONITOR_USE_DAEMON and VLLM_TORCH_PROFILER_DIR cannot be both set at the same time."
+                    "MSMONITOR_USE_DAEMON and torch profiler cannot be both enabled at the same time."
                )
-            torch_profiler_trace_dir = envs_vllm.VLLM_TORCH_PROFILER_DIR
+            torch_profiler_trace_dir = profiler_config.torch_profiler_dir
            logger.info("Profiling enabled. Traces will be saved to: %s",
                        torch_profiler_trace_dir)
@@ -517,9 +518,8 @@ class NPUWorker(WorkerBase):
                    torch_npu.profiler.ProfilerActivity.CPU,
                    torch_npu.profiler.ProfilerActivity.NPU,
                ],
-                with_stack=envs_vllm.VLLM_TORCH_PROFILER_WITH_STACK,
+                with_stack=profiler_config.torch_profiler_with_stack,
-                profile_memory=envs_vllm.\
+                profile_memory=profiler_config.torch_profiler_with_memory,
                    VLLM_TORCH_PROFILER_WITH_PROFILE_MEMORY,
                with_modules=False,
                experimental_config=experimental_config,
                on_trace_ready=torch_npu.profiler.tensorboard_trace_handler(