[Refactor] Migrate profiler config from env vars to explicit ProfilerConfig (#5928)
### What this PR does / why we need it?
Migrate the torch profiler configuration from deprecated environment
variables (`VLLM_TORCH_PROFILER_DIR`, `VLLM_TORCH_PROFILER_WITH_STACK`,
`VLLM_TORCH_PROFILER_WITH_PROFILE_MEMORY`) to the explicit
`ProfilerConfig` object, aligning with vLLM's configuration best
practices.
The profiler environment variable approach is deprecated in vLLM and
will be removed in v0.14.0 or v1.0.0.
### Does this PR introduce _any_ user-facing change?
yes, for deverlopers who want to fetch profiler, he should use `--profiler-config` instead of `VLLM_TORCH_PROFILER_DIR`
### How was this patch tested?
- vLLM version: v0.13.0
- vLLM main:
11b6af5280
Signed-off-by: Meihan-chen <jcccx.cmh@gmail.com>
This commit is contained in:
@@ -278,9 +278,6 @@ Before you start, please
|
|||||||
export VLLM_USE_V1=1
|
export VLLM_USE_V1=1
|
||||||
export HCCL_BUFFSIZE=256
|
export HCCL_BUFFSIZE=256
|
||||||
|
|
||||||
export VLLM_TORCH_PROFILER_DIR="./vllm_profile"
|
|
||||||
export VLLM_TORCH_PROFILER_WITH_STACK=0
|
|
||||||
|
|
||||||
export ASCEND_AGGREGATE_ENABLE=1
|
export ASCEND_AGGREGATE_ENABLE=1
|
||||||
export ASCEND_TRANSPORT_PRINT=1
|
export ASCEND_TRANSPORT_PRINT=1
|
||||||
export ACL_OP_INIT_MODE=1
|
export ACL_OP_INIT_MODE=1
|
||||||
@@ -302,6 +299,10 @@ Before you start, please
|
|||||||
--tensor-parallel-size $7 \
|
--tensor-parallel-size $7 \
|
||||||
--enable-expert-parallel \
|
--enable-expert-parallel \
|
||||||
--speculative-config '{"num_speculative_tokens": 2, "method":"deepseek_mtp"}' \
|
--speculative-config '{"num_speculative_tokens": 2, "method":"deepseek_mtp"}' \
|
||||||
|
--profiler-config \
|
||||||
|
'{"profiler": "torch",
|
||||||
|
"torch_profiler_dir": "./vllm_profile",
|
||||||
|
"torch_profiler_with_stack": false}' \
|
||||||
--seed 1024 \
|
--seed 1024 \
|
||||||
--served-model-name dsv3 \
|
--served-model-name dsv3 \
|
||||||
--max-model-len 68000 \
|
--max-model-len 68000 \
|
||||||
@@ -351,9 +352,6 @@ Before you start, please
|
|||||||
export VLLM_USE_V1=1
|
export VLLM_USE_V1=1
|
||||||
export HCCL_BUFFSIZE=256
|
export HCCL_BUFFSIZE=256
|
||||||
|
|
||||||
export VLLM_TORCH_PROFILER_DIR="./vllm_profile"
|
|
||||||
export VLLM_TORCH_PROFILER_WITH_STACK=0
|
|
||||||
|
|
||||||
export ASCEND_AGGREGATE_ENABLE=1
|
export ASCEND_AGGREGATE_ENABLE=1
|
||||||
export ASCEND_TRANSPORT_PRINT=1
|
export ASCEND_TRANSPORT_PRINT=1
|
||||||
export ACL_OP_INIT_MODE=1
|
export ACL_OP_INIT_MODE=1
|
||||||
@@ -376,6 +374,10 @@ Before you start, please
|
|||||||
--tensor-parallel-size $7 \
|
--tensor-parallel-size $7 \
|
||||||
--enable-expert-parallel \
|
--enable-expert-parallel \
|
||||||
--speculative-config '{"num_speculative_tokens": 2, "method":"deepseek_mtp"}' \
|
--speculative-config '{"num_speculative_tokens": 2, "method":"deepseek_mtp"}' \
|
||||||
|
--profiler-config \
|
||||||
|
'{"profiler": "torch",
|
||||||
|
"torch_profiler_dir": "./vllm_profile",
|
||||||
|
"torch_profiler_with_stack": false}' \
|
||||||
--seed 1024 \
|
--seed 1024 \
|
||||||
--served-model-name dsv3 \
|
--served-model-name dsv3 \
|
||||||
--max-model-len 68000 \
|
--max-model-len 68000 \
|
||||||
@@ -426,8 +428,6 @@ Before you start, please
|
|||||||
export VLLM_USE_V1=1
|
export VLLM_USE_V1=1
|
||||||
export HCCL_BUFFSIZE=256
|
export HCCL_BUFFSIZE=256
|
||||||
|
|
||||||
export VLLM_TORCH_PROFILER_DIR="./vllm_profile"
|
|
||||||
export VLLM_TORCH_PROFILER_WITH_STACK=0
|
|
||||||
|
|
||||||
export ASCEND_AGGREGATE_ENABLE=1
|
export ASCEND_AGGREGATE_ENABLE=1
|
||||||
export ASCEND_TRANSPORT_PRINT=1
|
export ASCEND_TRANSPORT_PRINT=1
|
||||||
@@ -452,6 +452,10 @@ Before you start, please
|
|||||||
--tensor-parallel-size $7 \
|
--tensor-parallel-size $7 \
|
||||||
--enable-expert-parallel \
|
--enable-expert-parallel \
|
||||||
--speculative-config '{"num_speculative_tokens": 2, "method":"deepseek_mtp"}' \
|
--speculative-config '{"num_speculative_tokens": 2, "method":"deepseek_mtp"}' \
|
||||||
|
--profiler-config \
|
||||||
|
'{"profiler": "torch",
|
||||||
|
"torch_profiler_dir": "./vllm_profile",
|
||||||
|
"torch_profiler_with_stack": false}' \
|
||||||
--seed 1024 \
|
--seed 1024 \
|
||||||
--served-model-name dsv3 \
|
--served-model-name dsv3 \
|
||||||
--max-model-len 68000 \
|
--max-model-len 68000 \
|
||||||
@@ -504,9 +508,6 @@ Before you start, please
|
|||||||
export VLLM_USE_V1=1
|
export VLLM_USE_V1=1
|
||||||
export HCCL_BUFFSIZE=256
|
export HCCL_BUFFSIZE=256
|
||||||
|
|
||||||
export VLLM_TORCH_PROFILER_DIR="./vllm_profile"
|
|
||||||
export VLLM_TORCH_PROFILER_WITH_STACK=0
|
|
||||||
|
|
||||||
export ASCEND_AGGREGATE_ENABLE=1
|
export ASCEND_AGGREGATE_ENABLE=1
|
||||||
export ASCEND_TRANSPORT_PRINT=1
|
export ASCEND_TRANSPORT_PRINT=1
|
||||||
export ACL_OP_INIT_MODE=1
|
export ACL_OP_INIT_MODE=1
|
||||||
@@ -530,6 +531,10 @@ Before you start, please
|
|||||||
--tensor-parallel-size $7 \
|
--tensor-parallel-size $7 \
|
||||||
--enable-expert-parallel \
|
--enable-expert-parallel \
|
||||||
--speculative-config '{"num_speculative_tokens": 2, "method":"deepseek_mtp"}' \
|
--speculative-config '{"num_speculative_tokens": 2, "method":"deepseek_mtp"}' \
|
||||||
|
--profiler-config \
|
||||||
|
'{"profiler": "torch",
|
||||||
|
"torch_profiler_dir": "./vllm_profile",
|
||||||
|
"torch_profiler_with_stack": false}' \
|
||||||
--seed 1024 \
|
--seed 1024 \
|
||||||
--served-model-name dsv3 \
|
--served-model-name dsv3 \
|
||||||
--max-model-len 68000 \
|
--max-model-len 68000 \
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ import unittest
|
|||||||
from unittest.mock import MagicMock, patch
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
from vllm.config import CacheConfig, ModelConfig, ParallelConfig, VllmConfig
|
from vllm.config import CacheConfig, ModelConfig, ParallelConfig, ProfilerConfig, VllmConfig
|
||||||
|
|
||||||
from tests.ut.base import TestBase
|
from tests.ut.base import TestBase
|
||||||
from vllm_ascend.utils import vllm_version_is
|
from vllm_ascend.utils import vllm_version_is
|
||||||
@@ -311,26 +311,34 @@ class TestNPUWorker(TestBase):
|
|||||||
|
|
||||||
self.assertIn("Profiler is not enabled", str(cm.exception))
|
self.assertIn("Profiler is not enabled", str(cm.exception))
|
||||||
|
|
||||||
@patch("vllm_ascend.worker.worker.envs_vllm")
|
|
||||||
@patch("vllm_ascend.worker.worker.envs_ascend")
|
@patch("vllm_ascend.worker.worker.envs_ascend")
|
||||||
def test_profile_and_msmonitor_both_enabled_raises_error(
|
def test_profile_and_msmonitor_both_enabled_raises_error(
|
||||||
self, mock_envs_vllm, mock_envs_ascend):
|
self, mock_envs_ascend):
|
||||||
"""Test profile method raises exception when both profiler and msmonitor are enabled"""
|
"""Test profile method raises exception when both profiler and msmonitor are enabled"""
|
||||||
from vllm_ascend.worker.worker import NPUWorker
|
from vllm_ascend.worker.worker import NPUWorker
|
||||||
|
|
||||||
mock_envs_vllm.VLLM_TORCH_PROFILER_DIR = "/path/to/traces"
|
|
||||||
mock_envs_ascend.MSMONITOR_USE_DAEMON = 1
|
mock_envs_ascend.MSMONITOR_USE_DAEMON = 1
|
||||||
|
|
||||||
|
# Create profiler config object
|
||||||
|
profiler_config = ProfilerConfig(
|
||||||
|
profiler="torch",
|
||||||
|
torch_profiler_dir="/path/to/traces"
|
||||||
|
)
|
||||||
|
|
||||||
|
vllm_config_mock = MagicMock()
|
||||||
|
vllm_config_mock.profiler_config = profiler_config
|
||||||
|
|
||||||
# Create worker mock
|
# Create worker mock
|
||||||
with patch.object(NPUWorker, "__init__", lambda x, **kwargs: None):
|
with patch.object(NPUWorker, "__init__", lambda x, **kwargs: None):
|
||||||
worker = NPUWorker()
|
worker = NPUWorker()
|
||||||
|
worker.vllm_config = vllm_config_mock
|
||||||
|
|
||||||
# Test should raise exception
|
# Test should raise exception
|
||||||
with self.assertRaises(RuntimeError) as cm:
|
with self.assertRaises(RuntimeError) as cm:
|
||||||
_ = worker._init_profiler()
|
_ = worker._init_profiler()
|
||||||
|
|
||||||
self.assertIn(
|
self.assertIn(
|
||||||
"MSMONITOR_USE_DAEMON and VLLM_TORCH_PROFILER_DIR cannot be both set at the same time.",
|
"MSMONITOR_USE_DAEMON and torch profiler cannot be both enabled at the same time.",
|
||||||
str(cm.exception))
|
str(cm.exception))
|
||||||
|
|
||||||
def test_lora_methods(self):
|
def test_lora_methods(self):
|
||||||
@@ -413,7 +421,6 @@ class TestNPUWorker(TestBase):
|
|||||||
mock_model_runner._dummy_run.assert_called_once_with(
|
mock_model_runner._dummy_run.assert_called_once_with(
|
||||||
num_tokens=mock_decode_token_per_req, uniform_decode=True)
|
num_tokens=mock_decode_token_per_req, uniform_decode=True)
|
||||||
|
|
||||||
@patch("vllm_ascend.worker.worker.envs_vllm")
|
|
||||||
@patch("vllm_ascend.worker.worker.logger")
|
@patch("vllm_ascend.worker.worker.logger")
|
||||||
@patch("torch_npu.profiler._ExperimentalConfig")
|
@patch("torch_npu.profiler._ExperimentalConfig")
|
||||||
@patch("torch_npu.profiler.profile")
|
@patch("torch_npu.profiler.profile")
|
||||||
@@ -432,15 +439,20 @@ class TestNPUWorker(TestBase):
|
|||||||
mock_profile,
|
mock_profile,
|
||||||
mock_experimental_config,
|
mock_experimental_config,
|
||||||
mock_logger,
|
mock_logger,
|
||||||
mock_envs_vllm,
|
|
||||||
):
|
):
|
||||||
"""Test _init_profiler method - profiler enabled case with stack and memory profiling enabled"""
|
"""Test _init_profiler method - profiler enabled case with stack and memory profiling enabled"""
|
||||||
from vllm_ascend.worker.worker import NPUWorker
|
from vllm_ascend.worker.worker import NPUWorker
|
||||||
|
|
||||||
# Set environment variables to enable profiler
|
# Create profiler config object
|
||||||
mock_envs_vllm.VLLM_TORCH_PROFILER_DIR = "/path/to/traces"
|
profiler_config = ProfilerConfig(
|
||||||
mock_envs_vllm.VLLM_TORCH_PROFILER_WITH_STACK = True
|
profiler="torch",
|
||||||
mock_envs_vllm.VLLM_TORCH_PROFILER_WITH_PROFILE_MEMORY = True
|
torch_profiler_dir="/path/to/traces",
|
||||||
|
torch_profiler_with_stack=True,
|
||||||
|
torch_profiler_with_memory=True
|
||||||
|
)
|
||||||
|
|
||||||
|
vllm_config_mock = MagicMock()
|
||||||
|
vllm_config_mock.profiler_config = profiler_config
|
||||||
|
|
||||||
# Set enum mocks
|
# Set enum mocks
|
||||||
mock_export_type.Text = "Text"
|
mock_export_type.Text = "Text"
|
||||||
@@ -460,6 +472,7 @@ class TestNPUWorker(TestBase):
|
|||||||
# Create worker mock
|
# Create worker mock
|
||||||
with patch.object(NPUWorker, "__init__", lambda x, **kwargs: None):
|
with patch.object(NPUWorker, "__init__", lambda x, **kwargs: None):
|
||||||
worker = NPUWorker()
|
worker = NPUWorker()
|
||||||
|
worker.vllm_config = vllm_config_mock
|
||||||
|
|
||||||
# Test _init_profiler
|
# Test _init_profiler
|
||||||
result = worker._init_profiler()
|
result = worker._init_profiler()
|
||||||
@@ -511,17 +524,23 @@ class TestNPUWorker(TestBase):
|
|||||||
# Verify return value
|
# Verify return value
|
||||||
self.assertEqual(result, mock_profiler_instance)
|
self.assertEqual(result, mock_profiler_instance)
|
||||||
|
|
||||||
@patch("vllm_ascend.worker.worker.envs_vllm")
|
def test_init_profiler_disabled(self):
|
||||||
def test_init_profiler_disabled(self, mock_envs_vllm):
|
|
||||||
"""Test _init_profiler method - profiler disabled case"""
|
"""Test _init_profiler method - profiler disabled case"""
|
||||||
from vllm_ascend.worker.worker import NPUWorker
|
from vllm_ascend.worker.worker import NPUWorker
|
||||||
|
|
||||||
# Set environment variable to disable profiler
|
# Create profiler config object with profiler disabled
|
||||||
mock_envs_vllm.VLLM_TORCH_PROFILER_DIR = None
|
profiler_config = ProfilerConfig(
|
||||||
|
profiler=None,
|
||||||
|
torch_profiler_dir=""
|
||||||
|
)
|
||||||
|
|
||||||
|
vllm_config_mock = MagicMock()
|
||||||
|
vllm_config_mock.profiler_config = profiler_config
|
||||||
|
|
||||||
# Create worker mock
|
# Create worker mock
|
||||||
with patch.object(NPUWorker, "__init__", lambda x, **kwargs: None):
|
with patch.object(NPUWorker, "__init__", lambda x, **kwargs: None):
|
||||||
worker = NPUWorker()
|
worker = NPUWorker()
|
||||||
|
worker.vllm_config = vllm_config_mock
|
||||||
|
|
||||||
# Test _init_profiler
|
# Test _init_profiler
|
||||||
result = worker._init_profiler()
|
result = worker._init_profiler()
|
||||||
@@ -529,17 +548,23 @@ class TestNPUWorker(TestBase):
|
|||||||
# Verify returns None
|
# Verify returns None
|
||||||
self.assertIsNone(result)
|
self.assertIsNone(result)
|
||||||
|
|
||||||
@patch("vllm_ascend.worker.worker.envs_vllm")
|
def test_init_profiler_empty_dir(self):
|
||||||
def test_init_profiler_empty_dir(self, mock_envs_vllm):
|
|
||||||
"""Test _init_profiler method - empty directory string case"""
|
"""Test _init_profiler method - empty directory string case"""
|
||||||
from vllm_ascend.worker.worker import NPUWorker
|
from vllm_ascend.worker.worker import NPUWorker
|
||||||
|
|
||||||
# Set environment variable to empty string
|
# Create profiler config object with empty dir
|
||||||
mock_envs_vllm.VLLM_TORCH_PROFILER_DIR = ""
|
profiler_config = ProfilerConfig(
|
||||||
|
profiler="torch",
|
||||||
|
torch_profiler_dir=""
|
||||||
|
)
|
||||||
|
|
||||||
|
vllm_config_mock = MagicMock()
|
||||||
|
vllm_config_mock.profiler_config = profiler_config
|
||||||
|
|
||||||
# Create worker mock
|
# Create worker mock
|
||||||
with patch.object(NPUWorker, "__init__", lambda x, **kwargs: None):
|
with patch.object(NPUWorker, "__init__", lambda x, **kwargs: None):
|
||||||
worker = NPUWorker()
|
worker = NPUWorker()
|
||||||
|
worker.vllm_config = vllm_config_mock
|
||||||
|
|
||||||
# Test _init_profiler
|
# Test _init_profiler
|
||||||
result = worker._init_profiler()
|
result = worker._init_profiler()
|
||||||
|
|||||||
@@ -489,14 +489,15 @@ class NPUWorker(WorkerBase):
|
|||||||
ensure_ec_transfer_initialized(self.vllm_config)
|
ensure_ec_transfer_initialized(self.vllm_config)
|
||||||
|
|
||||||
def _init_profiler(self):
|
def _init_profiler(self):
|
||||||
# Torch profiler. Enabled and configured through env vars:
|
# Torch profiler. Enabled through profiler_config:
|
||||||
# VLLM_TORCH_PROFILER_DIR=/path/to/save/trace
|
# --profiler-config.profiler=torch --profiler-config.torch_profiler_dir=/path/to/save/trace
|
||||||
if envs_vllm.VLLM_TORCH_PROFILER_DIR:
|
profiler_config = self.vllm_config.profiler_config
|
||||||
|
if profiler_config.profiler == "torch" and profiler_config.torch_profiler_dir:
|
||||||
if envs_ascend.MSMONITOR_USE_DAEMON:
|
if envs_ascend.MSMONITOR_USE_DAEMON:
|
||||||
raise RuntimeError(
|
raise RuntimeError(
|
||||||
"MSMONITOR_USE_DAEMON and VLLM_TORCH_PROFILER_DIR cannot be both set at the same time."
|
"MSMONITOR_USE_DAEMON and torch profiler cannot be both enabled at the same time."
|
||||||
)
|
)
|
||||||
torch_profiler_trace_dir = envs_vllm.VLLM_TORCH_PROFILER_DIR
|
torch_profiler_trace_dir = profiler_config.torch_profiler_dir
|
||||||
logger.info("Profiling enabled. Traces will be saved to: %s",
|
logger.info("Profiling enabled. Traces will be saved to: %s",
|
||||||
torch_profiler_trace_dir)
|
torch_profiler_trace_dir)
|
||||||
|
|
||||||
@@ -517,9 +518,8 @@ class NPUWorker(WorkerBase):
|
|||||||
torch_npu.profiler.ProfilerActivity.CPU,
|
torch_npu.profiler.ProfilerActivity.CPU,
|
||||||
torch_npu.profiler.ProfilerActivity.NPU,
|
torch_npu.profiler.ProfilerActivity.NPU,
|
||||||
],
|
],
|
||||||
with_stack=envs_vllm.VLLM_TORCH_PROFILER_WITH_STACK,
|
with_stack=profiler_config.torch_profiler_with_stack,
|
||||||
profile_memory=envs_vllm.\
|
profile_memory=profiler_config.torch_profiler_with_memory,
|
||||||
VLLM_TORCH_PROFILER_WITH_PROFILE_MEMORY,
|
|
||||||
with_modules=False,
|
with_modules=False,
|
||||||
experimental_config=experimental_config,
|
experimental_config=experimental_config,
|
||||||
on_trace_ready=torch_npu.profiler.tensorboard_trace_handler(
|
on_trace_ready=torch_npu.profiler.tensorboard_trace_handler(
|
||||||
|
|||||||
Reference in New Issue
Block a user