[Refactor] Migrate profiler config from env vars to explicit ProfilerConfig (#5928)

### What this PR does / why we need it?

Migrate the torch profiler configuration from deprecated environment
variables (`VLLM_TORCH_PROFILER_DIR`, `VLLM_TORCH_PROFILER_WITH_STACK`,
`VLLM_TORCH_PROFILER_WITH_PROFILE_MEMORY`) to the explicit
`ProfilerConfig` object, aligning with vLLM's configuration best
practices.
The profiler environment variable approach is deprecated in vLLM and
will be removed in v0.14.0 or v1.0.0.

### Does this PR introduce _any_ user-facing change?
yes, for deverlopers who want to fetch profiler, he should use `--profiler-config` instead of `VLLM_TORCH_PROFILER_DIR`
### How was this patch tested?

- vLLM version: v0.13.0
- vLLM main:
11b6af5280

Signed-off-by: Meihan-chen <jcccx.cmh@gmail.com>
This commit is contained in:
meihanc
2026-01-19 09:27:55 +08:00
committed by GitHub
parent bc1f6713e7
commit 9cad1a8349
3 changed files with 68 additions and 38 deletions

View File

@@ -278,9 +278,6 @@ Before you start, please
export VLLM_USE_V1=1 export VLLM_USE_V1=1
export HCCL_BUFFSIZE=256 export HCCL_BUFFSIZE=256
export VLLM_TORCH_PROFILER_DIR="./vllm_profile"
export VLLM_TORCH_PROFILER_WITH_STACK=0
export ASCEND_AGGREGATE_ENABLE=1 export ASCEND_AGGREGATE_ENABLE=1
export ASCEND_TRANSPORT_PRINT=1 export ASCEND_TRANSPORT_PRINT=1
export ACL_OP_INIT_MODE=1 export ACL_OP_INIT_MODE=1
@@ -302,6 +299,10 @@ Before you start, please
--tensor-parallel-size $7 \ --tensor-parallel-size $7 \
--enable-expert-parallel \ --enable-expert-parallel \
--speculative-config '{"num_speculative_tokens": 2, "method":"deepseek_mtp"}' \ --speculative-config '{"num_speculative_tokens": 2, "method":"deepseek_mtp"}' \
--profiler-config \
'{"profiler": "torch",
"torch_profiler_dir": "./vllm_profile",
"torch_profiler_with_stack": false}' \
--seed 1024 \ --seed 1024 \
--served-model-name dsv3 \ --served-model-name dsv3 \
--max-model-len 68000 \ --max-model-len 68000 \
@@ -351,9 +352,6 @@ Before you start, please
export VLLM_USE_V1=1 export VLLM_USE_V1=1
export HCCL_BUFFSIZE=256 export HCCL_BUFFSIZE=256
export VLLM_TORCH_PROFILER_DIR="./vllm_profile"
export VLLM_TORCH_PROFILER_WITH_STACK=0
export ASCEND_AGGREGATE_ENABLE=1 export ASCEND_AGGREGATE_ENABLE=1
export ASCEND_TRANSPORT_PRINT=1 export ASCEND_TRANSPORT_PRINT=1
export ACL_OP_INIT_MODE=1 export ACL_OP_INIT_MODE=1
@@ -376,6 +374,10 @@ Before you start, please
--tensor-parallel-size $7 \ --tensor-parallel-size $7 \
--enable-expert-parallel \ --enable-expert-parallel \
--speculative-config '{"num_speculative_tokens": 2, "method":"deepseek_mtp"}' \ --speculative-config '{"num_speculative_tokens": 2, "method":"deepseek_mtp"}' \
--profiler-config \
'{"profiler": "torch",
"torch_profiler_dir": "./vllm_profile",
"torch_profiler_with_stack": false}' \
--seed 1024 \ --seed 1024 \
--served-model-name dsv3 \ --served-model-name dsv3 \
--max-model-len 68000 \ --max-model-len 68000 \
@@ -426,8 +428,6 @@ Before you start, please
export VLLM_USE_V1=1 export VLLM_USE_V1=1
export HCCL_BUFFSIZE=256 export HCCL_BUFFSIZE=256
export VLLM_TORCH_PROFILER_DIR="./vllm_profile"
export VLLM_TORCH_PROFILER_WITH_STACK=0
export ASCEND_AGGREGATE_ENABLE=1 export ASCEND_AGGREGATE_ENABLE=1
export ASCEND_TRANSPORT_PRINT=1 export ASCEND_TRANSPORT_PRINT=1
@@ -452,6 +452,10 @@ Before you start, please
--tensor-parallel-size $7 \ --tensor-parallel-size $7 \
--enable-expert-parallel \ --enable-expert-parallel \
--speculative-config '{"num_speculative_tokens": 2, "method":"deepseek_mtp"}' \ --speculative-config '{"num_speculative_tokens": 2, "method":"deepseek_mtp"}' \
--profiler-config \
'{"profiler": "torch",
"torch_profiler_dir": "./vllm_profile",
"torch_profiler_with_stack": false}' \
--seed 1024 \ --seed 1024 \
--served-model-name dsv3 \ --served-model-name dsv3 \
--max-model-len 68000 \ --max-model-len 68000 \
@@ -504,9 +508,6 @@ Before you start, please
export VLLM_USE_V1=1 export VLLM_USE_V1=1
export HCCL_BUFFSIZE=256 export HCCL_BUFFSIZE=256
export VLLM_TORCH_PROFILER_DIR="./vllm_profile"
export VLLM_TORCH_PROFILER_WITH_STACK=0
export ASCEND_AGGREGATE_ENABLE=1 export ASCEND_AGGREGATE_ENABLE=1
export ASCEND_TRANSPORT_PRINT=1 export ASCEND_TRANSPORT_PRINT=1
export ACL_OP_INIT_MODE=1 export ACL_OP_INIT_MODE=1
@@ -530,6 +531,10 @@ Before you start, please
--tensor-parallel-size $7 \ --tensor-parallel-size $7 \
--enable-expert-parallel \ --enable-expert-parallel \
--speculative-config '{"num_speculative_tokens": 2, "method":"deepseek_mtp"}' \ --speculative-config '{"num_speculative_tokens": 2, "method":"deepseek_mtp"}' \
--profiler-config \
'{"profiler": "torch",
"torch_profiler_dir": "./vllm_profile",
"torch_profiler_with_stack": false}' \
--seed 1024 \ --seed 1024 \
--served-model-name dsv3 \ --served-model-name dsv3 \
--max-model-len 68000 \ --max-model-len 68000 \

View File

@@ -2,7 +2,7 @@ import unittest
from unittest.mock import MagicMock, patch from unittest.mock import MagicMock, patch
import torch import torch
from vllm.config import CacheConfig, ModelConfig, ParallelConfig, VllmConfig from vllm.config import CacheConfig, ModelConfig, ParallelConfig, ProfilerConfig, VllmConfig
from tests.ut.base import TestBase from tests.ut.base import TestBase
from vllm_ascend.utils import vllm_version_is from vllm_ascend.utils import vllm_version_is
@@ -311,26 +311,34 @@ class TestNPUWorker(TestBase):
self.assertIn("Profiler is not enabled", str(cm.exception)) self.assertIn("Profiler is not enabled", str(cm.exception))
@patch("vllm_ascend.worker.worker.envs_vllm")
@patch("vllm_ascend.worker.worker.envs_ascend") @patch("vllm_ascend.worker.worker.envs_ascend")
def test_profile_and_msmonitor_both_enabled_raises_error( def test_profile_and_msmonitor_both_enabled_raises_error(
self, mock_envs_vllm, mock_envs_ascend): self, mock_envs_ascend):
"""Test profile method raises exception when both profiler and msmonitor are enabled""" """Test profile method raises exception when both profiler and msmonitor are enabled"""
from vllm_ascend.worker.worker import NPUWorker from vllm_ascend.worker.worker import NPUWorker
mock_envs_vllm.VLLM_TORCH_PROFILER_DIR = "/path/to/traces"
mock_envs_ascend.MSMONITOR_USE_DAEMON = 1 mock_envs_ascend.MSMONITOR_USE_DAEMON = 1
# Create profiler config object
profiler_config = ProfilerConfig(
profiler="torch",
torch_profiler_dir="/path/to/traces"
)
vllm_config_mock = MagicMock()
vllm_config_mock.profiler_config = profiler_config
# Create worker mock # Create worker mock
with patch.object(NPUWorker, "__init__", lambda x, **kwargs: None): with patch.object(NPUWorker, "__init__", lambda x, **kwargs: None):
worker = NPUWorker() worker = NPUWorker()
worker.vllm_config = vllm_config_mock
# Test should raise exception # Test should raise exception
with self.assertRaises(RuntimeError) as cm: with self.assertRaises(RuntimeError) as cm:
_ = worker._init_profiler() _ = worker._init_profiler()
self.assertIn( self.assertIn(
"MSMONITOR_USE_DAEMON and VLLM_TORCH_PROFILER_DIR cannot be both set at the same time.", "MSMONITOR_USE_DAEMON and torch profiler cannot be both enabled at the same time.",
str(cm.exception)) str(cm.exception))
def test_lora_methods(self): def test_lora_methods(self):
@@ -413,7 +421,6 @@ class TestNPUWorker(TestBase):
mock_model_runner._dummy_run.assert_called_once_with( mock_model_runner._dummy_run.assert_called_once_with(
num_tokens=mock_decode_token_per_req, uniform_decode=True) num_tokens=mock_decode_token_per_req, uniform_decode=True)
@patch("vllm_ascend.worker.worker.envs_vllm")
@patch("vllm_ascend.worker.worker.logger") @patch("vllm_ascend.worker.worker.logger")
@patch("torch_npu.profiler._ExperimentalConfig") @patch("torch_npu.profiler._ExperimentalConfig")
@patch("torch_npu.profiler.profile") @patch("torch_npu.profiler.profile")
@@ -432,15 +439,20 @@ class TestNPUWorker(TestBase):
mock_profile, mock_profile,
mock_experimental_config, mock_experimental_config,
mock_logger, mock_logger,
mock_envs_vllm,
): ):
"""Test _init_profiler method - profiler enabled case with stack and memory profiling enabled""" """Test _init_profiler method - profiler enabled case with stack and memory profiling enabled"""
from vllm_ascend.worker.worker import NPUWorker from vllm_ascend.worker.worker import NPUWorker
# Set environment variables to enable profiler # Create profiler config object
mock_envs_vllm.VLLM_TORCH_PROFILER_DIR = "/path/to/traces" profiler_config = ProfilerConfig(
mock_envs_vllm.VLLM_TORCH_PROFILER_WITH_STACK = True profiler="torch",
mock_envs_vllm.VLLM_TORCH_PROFILER_WITH_PROFILE_MEMORY = True torch_profiler_dir="/path/to/traces",
torch_profiler_with_stack=True,
torch_profiler_with_memory=True
)
vllm_config_mock = MagicMock()
vllm_config_mock.profiler_config = profiler_config
# Set enum mocks # Set enum mocks
mock_export_type.Text = "Text" mock_export_type.Text = "Text"
@@ -460,6 +472,7 @@ class TestNPUWorker(TestBase):
# Create worker mock # Create worker mock
with patch.object(NPUWorker, "__init__", lambda x, **kwargs: None): with patch.object(NPUWorker, "__init__", lambda x, **kwargs: None):
worker = NPUWorker() worker = NPUWorker()
worker.vllm_config = vllm_config_mock
# Test _init_profiler # Test _init_profiler
result = worker._init_profiler() result = worker._init_profiler()
@@ -511,17 +524,23 @@ class TestNPUWorker(TestBase):
# Verify return value # Verify return value
self.assertEqual(result, mock_profiler_instance) self.assertEqual(result, mock_profiler_instance)
@patch("vllm_ascend.worker.worker.envs_vllm") def test_init_profiler_disabled(self):
def test_init_profiler_disabled(self, mock_envs_vllm):
"""Test _init_profiler method - profiler disabled case""" """Test _init_profiler method - profiler disabled case"""
from vllm_ascend.worker.worker import NPUWorker from vllm_ascend.worker.worker import NPUWorker
# Set environment variable to disable profiler # Create profiler config object with profiler disabled
mock_envs_vllm.VLLM_TORCH_PROFILER_DIR = None profiler_config = ProfilerConfig(
profiler=None,
torch_profiler_dir=""
)
vllm_config_mock = MagicMock()
vllm_config_mock.profiler_config = profiler_config
# Create worker mock # Create worker mock
with patch.object(NPUWorker, "__init__", lambda x, **kwargs: None): with patch.object(NPUWorker, "__init__", lambda x, **kwargs: None):
worker = NPUWorker() worker = NPUWorker()
worker.vllm_config = vllm_config_mock
# Test _init_profiler # Test _init_profiler
result = worker._init_profiler() result = worker._init_profiler()
@@ -529,17 +548,23 @@ class TestNPUWorker(TestBase):
# Verify returns None # Verify returns None
self.assertIsNone(result) self.assertIsNone(result)
@patch("vllm_ascend.worker.worker.envs_vllm") def test_init_profiler_empty_dir(self):
def test_init_profiler_empty_dir(self, mock_envs_vllm):
"""Test _init_profiler method - empty directory string case""" """Test _init_profiler method - empty directory string case"""
from vllm_ascend.worker.worker import NPUWorker from vllm_ascend.worker.worker import NPUWorker
# Set environment variable to empty string # Create profiler config object with empty dir
mock_envs_vllm.VLLM_TORCH_PROFILER_DIR = "" profiler_config = ProfilerConfig(
profiler="torch",
torch_profiler_dir=""
)
vllm_config_mock = MagicMock()
vllm_config_mock.profiler_config = profiler_config
# Create worker mock # Create worker mock
with patch.object(NPUWorker, "__init__", lambda x, **kwargs: None): with patch.object(NPUWorker, "__init__", lambda x, **kwargs: None):
worker = NPUWorker() worker = NPUWorker()
worker.vllm_config = vllm_config_mock
# Test _init_profiler # Test _init_profiler
result = worker._init_profiler() result = worker._init_profiler()

View File

@@ -489,14 +489,15 @@ class NPUWorker(WorkerBase):
ensure_ec_transfer_initialized(self.vllm_config) ensure_ec_transfer_initialized(self.vllm_config)
def _init_profiler(self): def _init_profiler(self):
# Torch profiler. Enabled and configured through env vars: # Torch profiler. Enabled through profiler_config:
# VLLM_TORCH_PROFILER_DIR=/path/to/save/trace # --profiler-config.profiler=torch --profiler-config.torch_profiler_dir=/path/to/save/trace
if envs_vllm.VLLM_TORCH_PROFILER_DIR: profiler_config = self.vllm_config.profiler_config
if profiler_config.profiler == "torch" and profiler_config.torch_profiler_dir:
if envs_ascend.MSMONITOR_USE_DAEMON: if envs_ascend.MSMONITOR_USE_DAEMON:
raise RuntimeError( raise RuntimeError(
"MSMONITOR_USE_DAEMON and VLLM_TORCH_PROFILER_DIR cannot be both set at the same time." "MSMONITOR_USE_DAEMON and torch profiler cannot be both enabled at the same time."
) )
torch_profiler_trace_dir = envs_vllm.VLLM_TORCH_PROFILER_DIR torch_profiler_trace_dir = profiler_config.torch_profiler_dir
logger.info("Profiling enabled. Traces will be saved to: %s", logger.info("Profiling enabled. Traces will be saved to: %s",
torch_profiler_trace_dir) torch_profiler_trace_dir)
@@ -517,9 +518,8 @@ class NPUWorker(WorkerBase):
torch_npu.profiler.ProfilerActivity.CPU, torch_npu.profiler.ProfilerActivity.CPU,
torch_npu.profiler.ProfilerActivity.NPU, torch_npu.profiler.ProfilerActivity.NPU,
], ],
with_stack=envs_vllm.VLLM_TORCH_PROFILER_WITH_STACK, with_stack=profiler_config.torch_profiler_with_stack,
profile_memory=envs_vllm.\ profile_memory=profiler_config.torch_profiler_with_memory,
VLLM_TORCH_PROFILER_WITH_PROFILE_MEMORY,
with_modules=False, with_modules=False,
experimental_config=experimental_config, experimental_config=experimental_config,
on_trace_ready=torch_npu.profiler.tensorboard_trace_handler( on_trace_ready=torch_npu.profiler.tensorboard_trace_handler(