diff --git a/docs/source/tutorials/DeepSeek-V3.2.md b/docs/source/tutorials/DeepSeek-V3.2.md index fcb1349d..e9ebd2ab 100644 --- a/docs/source/tutorials/DeepSeek-V3.2.md +++ b/docs/source/tutorials/DeepSeek-V3.2.md @@ -278,9 +278,6 @@ Before you start, please export VLLM_USE_V1=1 export HCCL_BUFFSIZE=256 - export VLLM_TORCH_PROFILER_DIR="./vllm_profile" - export VLLM_TORCH_PROFILER_WITH_STACK=0 - export ASCEND_AGGREGATE_ENABLE=1 export ASCEND_TRANSPORT_PRINT=1 export ACL_OP_INIT_MODE=1 @@ -302,6 +299,10 @@ Before you start, please --tensor-parallel-size $7 \ --enable-expert-parallel \ --speculative-config '{"num_speculative_tokens": 2, "method":"deepseek_mtp"}' \ + --profiler-config \ + '{"profiler": "torch", + "torch_profiler_dir": "./vllm_profile", + "torch_profiler_with_stack": false}' \ --seed 1024 \ --served-model-name dsv3 \ --max-model-len 68000 \ @@ -351,9 +352,6 @@ Before you start, please export VLLM_USE_V1=1 export HCCL_BUFFSIZE=256 - export VLLM_TORCH_PROFILER_DIR="./vllm_profile" - export VLLM_TORCH_PROFILER_WITH_STACK=0 - export ASCEND_AGGREGATE_ENABLE=1 export ASCEND_TRANSPORT_PRINT=1 export ACL_OP_INIT_MODE=1 @@ -376,6 +374,10 @@ Before you start, please --tensor-parallel-size $7 \ --enable-expert-parallel \ --speculative-config '{"num_speculative_tokens": 2, "method":"deepseek_mtp"}' \ + --profiler-config \ + '{"profiler": "torch", + "torch_profiler_dir": "./vllm_profile", + "torch_profiler_with_stack": false}' \ --seed 1024 \ --served-model-name dsv3 \ --max-model-len 68000 \ @@ -426,8 +428,6 @@ Before you start, please export VLLM_USE_V1=1 export HCCL_BUFFSIZE=256 - export VLLM_TORCH_PROFILER_DIR="./vllm_profile" - export VLLM_TORCH_PROFILER_WITH_STACK=0 export ASCEND_AGGREGATE_ENABLE=1 export ASCEND_TRANSPORT_PRINT=1 @@ -452,6 +452,10 @@ Before you start, please --tensor-parallel-size $7 \ --enable-expert-parallel \ --speculative-config '{"num_speculative_tokens": 2, "method":"deepseek_mtp"}' \ + --profiler-config \ + '{"profiler": "torch", + "torch_profiler_dir": "./vllm_profile", + "torch_profiler_with_stack": false}' \ --seed 1024 \ --served-model-name dsv3 \ --max-model-len 68000 \ @@ -504,9 +508,6 @@ Before you start, please export VLLM_USE_V1=1 export HCCL_BUFFSIZE=256 - export VLLM_TORCH_PROFILER_DIR="./vllm_profile" - export VLLM_TORCH_PROFILER_WITH_STACK=0 - export ASCEND_AGGREGATE_ENABLE=1 export ASCEND_TRANSPORT_PRINT=1 export ACL_OP_INIT_MODE=1 @@ -530,6 +531,10 @@ Before you start, please --tensor-parallel-size $7 \ --enable-expert-parallel \ --speculative-config '{"num_speculative_tokens": 2, "method":"deepseek_mtp"}' \ + --profiler-config \ + '{"profiler": "torch", + "torch_profiler_dir": "./vllm_profile", + "torch_profiler_with_stack": false}' \ --seed 1024 \ --served-model-name dsv3 \ --max-model-len 68000 \ diff --git a/tests/ut/worker/test_worker_v1.py b/tests/ut/worker/test_worker_v1.py index 3b9fd9a8..9114764e 100644 --- a/tests/ut/worker/test_worker_v1.py +++ b/tests/ut/worker/test_worker_v1.py @@ -2,7 +2,7 @@ import unittest from unittest.mock import MagicMock, patch import torch -from vllm.config import CacheConfig, ModelConfig, ParallelConfig, VllmConfig +from vllm.config import CacheConfig, ModelConfig, ParallelConfig, ProfilerConfig, VllmConfig from tests.ut.base import TestBase from vllm_ascend.utils import vllm_version_is @@ -311,26 +311,34 @@ class TestNPUWorker(TestBase): self.assertIn("Profiler is not enabled", str(cm.exception)) - @patch("vllm_ascend.worker.worker.envs_vllm") @patch("vllm_ascend.worker.worker.envs_ascend") def test_profile_and_msmonitor_both_enabled_raises_error( - self, mock_envs_vllm, mock_envs_ascend): + self, mock_envs_ascend): """Test profile method raises exception when both profiler and msmonitor are enabled""" from vllm_ascend.worker.worker import NPUWorker - mock_envs_vllm.VLLM_TORCH_PROFILER_DIR = "/path/to/traces" mock_envs_ascend.MSMONITOR_USE_DAEMON = 1 + # Create profiler config object + profiler_config = ProfilerConfig( + profiler="torch", + torch_profiler_dir="/path/to/traces" + ) + + vllm_config_mock = MagicMock() + vllm_config_mock.profiler_config = profiler_config + # Create worker mock with patch.object(NPUWorker, "__init__", lambda x, **kwargs: None): worker = NPUWorker() + worker.vllm_config = vllm_config_mock # Test should raise exception with self.assertRaises(RuntimeError) as cm: _ = worker._init_profiler() self.assertIn( - "MSMONITOR_USE_DAEMON and VLLM_TORCH_PROFILER_DIR cannot be both set at the same time.", + "MSMONITOR_USE_DAEMON and torch profiler cannot be both enabled at the same time.", str(cm.exception)) def test_lora_methods(self): @@ -413,7 +421,6 @@ class TestNPUWorker(TestBase): mock_model_runner._dummy_run.assert_called_once_with( num_tokens=mock_decode_token_per_req, uniform_decode=True) - @patch("vllm_ascend.worker.worker.envs_vllm") @patch("vllm_ascend.worker.worker.logger") @patch("torch_npu.profiler._ExperimentalConfig") @patch("torch_npu.profiler.profile") @@ -432,15 +439,20 @@ class TestNPUWorker(TestBase): mock_profile, mock_experimental_config, mock_logger, - mock_envs_vllm, ): """Test _init_profiler method - profiler enabled case with stack and memory profiling enabled""" from vllm_ascend.worker.worker import NPUWorker - # Set environment variables to enable profiler - mock_envs_vllm.VLLM_TORCH_PROFILER_DIR = "/path/to/traces" - mock_envs_vllm.VLLM_TORCH_PROFILER_WITH_STACK = True - mock_envs_vllm.VLLM_TORCH_PROFILER_WITH_PROFILE_MEMORY = True + # Create profiler config object + profiler_config = ProfilerConfig( + profiler="torch", + torch_profiler_dir="/path/to/traces", + torch_profiler_with_stack=True, + torch_profiler_with_memory=True + ) + + vllm_config_mock = MagicMock() + vllm_config_mock.profiler_config = profiler_config # Set enum mocks mock_export_type.Text = "Text" @@ -460,6 +472,7 @@ class TestNPUWorker(TestBase): # Create worker mock with patch.object(NPUWorker, "__init__", lambda x, **kwargs: None): worker = NPUWorker() + worker.vllm_config = vllm_config_mock # Test _init_profiler result = worker._init_profiler() @@ -511,17 +524,23 @@ class TestNPUWorker(TestBase): # Verify return value self.assertEqual(result, mock_profiler_instance) - @patch("vllm_ascend.worker.worker.envs_vllm") - def test_init_profiler_disabled(self, mock_envs_vllm): + def test_init_profiler_disabled(self): """Test _init_profiler method - profiler disabled case""" from vllm_ascend.worker.worker import NPUWorker - # Set environment variable to disable profiler - mock_envs_vllm.VLLM_TORCH_PROFILER_DIR = None + # Create profiler config object with profiler disabled + profiler_config = ProfilerConfig( + profiler=None, + torch_profiler_dir="" + ) + + vllm_config_mock = MagicMock() + vllm_config_mock.profiler_config = profiler_config # Create worker mock with patch.object(NPUWorker, "__init__", lambda x, **kwargs: None): worker = NPUWorker() + worker.vllm_config = vllm_config_mock # Test _init_profiler result = worker._init_profiler() @@ -529,17 +548,23 @@ class TestNPUWorker(TestBase): # Verify returns None self.assertIsNone(result) - @patch("vllm_ascend.worker.worker.envs_vllm") - def test_init_profiler_empty_dir(self, mock_envs_vllm): + def test_init_profiler_empty_dir(self): """Test _init_profiler method - empty directory string case""" from vllm_ascend.worker.worker import NPUWorker - # Set environment variable to empty string - mock_envs_vllm.VLLM_TORCH_PROFILER_DIR = "" + # Create profiler config object with empty dir + profiler_config = ProfilerConfig( + profiler="torch", + torch_profiler_dir="" + ) + + vllm_config_mock = MagicMock() + vllm_config_mock.profiler_config = profiler_config # Create worker mock with patch.object(NPUWorker, "__init__", lambda x, **kwargs: None): worker = NPUWorker() + worker.vllm_config = vllm_config_mock # Test _init_profiler result = worker._init_profiler() diff --git a/vllm_ascend/worker/worker.py b/vllm_ascend/worker/worker.py index 2120e015..546d849e 100644 --- a/vllm_ascend/worker/worker.py +++ b/vllm_ascend/worker/worker.py @@ -489,14 +489,15 @@ class NPUWorker(WorkerBase): ensure_ec_transfer_initialized(self.vllm_config) def _init_profiler(self): - # Torch profiler. Enabled and configured through env vars: - # VLLM_TORCH_PROFILER_DIR=/path/to/save/trace - if envs_vllm.VLLM_TORCH_PROFILER_DIR: + # Torch profiler. Enabled through profiler_config: + # --profiler-config.profiler=torch --profiler-config.torch_profiler_dir=/path/to/save/trace + profiler_config = self.vllm_config.profiler_config + if profiler_config.profiler == "torch" and profiler_config.torch_profiler_dir: if envs_ascend.MSMONITOR_USE_DAEMON: raise RuntimeError( - "MSMONITOR_USE_DAEMON and VLLM_TORCH_PROFILER_DIR cannot be both set at the same time." + "MSMONITOR_USE_DAEMON and torch profiler cannot be both enabled at the same time." ) - torch_profiler_trace_dir = envs_vllm.VLLM_TORCH_PROFILER_DIR + torch_profiler_trace_dir = profiler_config.torch_profiler_dir logger.info("Profiling enabled. Traces will be saved to: %s", torch_profiler_trace_dir) @@ -517,9 +518,8 @@ class NPUWorker(WorkerBase): torch_npu.profiler.ProfilerActivity.CPU, torch_npu.profiler.ProfilerActivity.NPU, ], - with_stack=envs_vllm.VLLM_TORCH_PROFILER_WITH_STACK, - profile_memory=envs_vllm.\ - VLLM_TORCH_PROFILER_WITH_PROFILE_MEMORY, + with_stack=profiler_config.torch_profiler_with_stack, + profile_memory=profiler_config.torch_profiler_with_memory, with_modules=False, experimental_config=experimental_config, on_trace_ready=torch_npu.profiler.tensorboard_trace_handler(