From 33c118c80e70cec64c9369b7ba4088c61c44bd31 Mon Sep 17 00:00:00 2001 From: mfyCn-1204 <1332490378@qq.com> Date: Thu, 25 Sep 2025 14:15:02 +0800 Subject: [PATCH] [core]vllm-ascend support msMonitor tool (#3123) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What this PR does / why we need it? vllm-ascend support [msMonitor ](https://gitcode.com/Ascend/mstt/tree/master/msmonitor)tool to collect performance of vllm-ascend ### Does this PR introduce _any_ user-facing change? 1.add env MSMONITOR_USE_DAEMON; 2.user cann enable msMonitor tool by setting MSMONITOR_USE_DAEMON=1 before run vllm-ascend model; 3.MSMONITOR_USE_DAEMON and VLLM_TORCH_PROFILER_DIR cannot both set ### How was this patch tested? 1.run vllm-ascend model while not set MSMONITOR_USE_DAEMON=1 or set MSMONITOR_USE_DAEMON=0, model will run successfully; 2.run vllm-ascend model while set MSMONITOR_USE_DAEMON=1, run msMonitor tool to collect profile data; 3.run vllm-ascend model while set MSMONITOR_USE_DAEMON=1 and VLLM_TORCH_PROFILER_DIR, will raise error - vLLM version: v0.10.2 - vLLM main: https://github.com/vllm-project/vllm/commit/f225ea7dd98e9f29752e5c032cd4a8ee1d712f16 Signed-off-by: mei-feiyao <1332490378@qq.com> --- tests/ut/worker/test_worker_v1.py | 22 ++++++++++++++++++++++ vllm_ascend/envs.py | 3 +++ vllm_ascend/worker/worker_v1.py | 10 ++++++++++ 3 files changed, 35 insertions(+) diff --git a/tests/ut/worker/test_worker_v1.py b/tests/ut/worker/test_worker_v1.py index 7ae9aa3..eb05a7a 100644 --- a/tests/ut/worker/test_worker_v1.py +++ b/tests/ut/worker/test_worker_v1.py @@ -355,6 +355,28 @@ class TestNPUWorker(TestBase): self.assertIn("Profiler is not enabled", str(cm.exception)) + @patch("vllm_ascend.worker.worker_v1.envs_vllm") + @patch("vllm_ascend.worker.worker_v1.envs_ascend") + def test_profile_and_msmonitor_both_enabled_raises_error( + self, mock_envs_vllm, mock_envs_ascend): + """Test profile method raises exception when both profiler and msmonitor are enabled""" + from vllm_ascend.worker.worker_v1 import NPUWorker + + mock_envs_vllm.VLLM_TORCH_PROFILER_DIR = "/path/to/traces" + mock_envs_ascend.MSMONITOR_USE_DAEMON = 1 + + # Create worker mock + with patch.object(NPUWorker, "__init__", lambda x, **kwargs: None): + worker = NPUWorker() + + # Test should raise exception + with self.assertRaises(RuntimeError) as cm: + _ = worker._init_profiler() + + self.assertIn( + "MSMONITOR_USE_DAEMON and VLLM_TORCH_PROFILER_DIR cannot be both set at the same time.", + str(cm.exception)) + def test_lora_methods(self): """Test LoRA related methods""" from vllm_ascend.worker.worker_v1 import NPUWorker diff --git a/vllm_ascend/envs.py b/vllm_ascend/envs.py index 61df5e1..dec0a12 100644 --- a/vllm_ascend/envs.py +++ b/vllm_ascend/envs.py @@ -159,6 +159,9 @@ env_variables: Dict[str, Callable[[], Any]] = { # caused by the initialization of the Mooncake connector. "PHYSICAL_DEVICES": lambda: os.getenv("PHYSICAL_DEVICES", None), + # Whether to enable msMonitor tool to monitor the performance of vllm-ascend. + "MSMONITOR_USE_DAEMON": + lambda: bool(int(os.getenv("MSMONITOR_USE_DAEMON", '0'))), } # end-env-vars-definition diff --git a/vllm_ascend/worker/worker_v1.py b/vllm_ascend/worker/worker_v1.py index 820ec63..fedec87 100644 --- a/vllm_ascend/worker/worker_v1.py +++ b/vllm_ascend/worker/worker_v1.py @@ -25,6 +25,7 @@ import torch.nn as nn import torch_npu import vllm.envs as envs_vllm from torch_npu.op_plugin.atb._atb_ops import _register_atb_extensions +from torch_npu.profiler import dynamic_profile as dp from vllm.config import VllmConfig from vllm.distributed import (ensure_model_parallel_initialized, init_distributed_environment) @@ -41,6 +42,7 @@ from vllm.v1.outputs import (EMPTY_MODEL_RUNNER_OUTPUT, AsyncModelRunnerOutput, DraftTokenIds, ModelRunnerOutput) from vllm.v1.worker.worker_base import WorkerBase +import vllm_ascend.envs as envs_ascend from vllm_ascend.ascend_config import init_ascend_config from vllm_ascend.device_allocator.camem import CaMemAllocator from vllm_ascend.distributed.parallel_state import init_ascend_model_parallel @@ -221,6 +223,10 @@ class NPUWorker(WorkerBase): self, scheduler_output: "SchedulerOutput", ) -> Optional[Union[ModelRunnerOutput, AsyncModelRunnerOutput]]: + # enable msMonitor to monitor the performance of vllm-ascend + if envs_ascend.MSMONITOR_USE_DAEMON: + dp.step() + intermediate_tensors = None forward_pass = scheduler_output.total_num_scheduled_tokens > 0 if forward_pass and not get_pp_group().is_first_rank: @@ -350,6 +356,10 @@ class NPUWorker(WorkerBase): # Torch profiler. Enabled and configured through env vars: # VLLM_TORCH_PROFILER_DIR=/path/to/save/trace if envs_vllm.VLLM_TORCH_PROFILER_DIR: + if envs_ascend.MSMONITOR_USE_DAEMON: + raise RuntimeError( + "MSMONITOR_USE_DAEMON and VLLM_TORCH_PROFILER_DIR cannot be both set at the same time." + ) torch_profiler_trace_dir = envs_vllm.VLLM_TORCH_PROFILER_DIR logger.info("Profiling enabled. Traces will be saved to: %s", torch_profiler_trace_dir)