[core]vllm-ascend support msMonitor tool (#3123)
### What this PR does / why we need it?
vllm-ascend support [msMonitor
](https://gitcode.com/Ascend/mstt/tree/master/msmonitor)tool to collect
performance of vllm-ascend
### Does this PR introduce _any_ user-facing change?
1.add env MSMONITOR_USE_DAEMON;
2.user cann enable msMonitor tool by setting MSMONITOR_USE_DAEMON=1
before run vllm-ascend model;
3.MSMONITOR_USE_DAEMON and VLLM_TORCH_PROFILER_DIR cannot both set
### How was this patch tested?
1.run vllm-ascend model while not set MSMONITOR_USE_DAEMON=1 or set
MSMONITOR_USE_DAEMON=0, model will run successfully;
2.run vllm-ascend model while set MSMONITOR_USE_DAEMON=1, run msMonitor
tool to collect profile data;
3.run vllm-ascend model while set MSMONITOR_USE_DAEMON=1 and
VLLM_TORCH_PROFILER_DIR, will raise error
- vLLM version: v0.10.2
- vLLM main:
f225ea7dd9
Signed-off-by: mei-feiyao <1332490378@qq.com>
This commit is contained in:
@@ -355,6 +355,28 @@ class TestNPUWorker(TestBase):
|
|||||||
|
|
||||||
self.assertIn("Profiler is not enabled", str(cm.exception))
|
self.assertIn("Profiler is not enabled", str(cm.exception))
|
||||||
|
|
||||||
|
@patch("vllm_ascend.worker.worker_v1.envs_vllm")
|
||||||
|
@patch("vllm_ascend.worker.worker_v1.envs_ascend")
|
||||||
|
def test_profile_and_msmonitor_both_enabled_raises_error(
|
||||||
|
self, mock_envs_vllm, mock_envs_ascend):
|
||||||
|
"""Test profile method raises exception when both profiler and msmonitor are enabled"""
|
||||||
|
from vllm_ascend.worker.worker_v1 import NPUWorker
|
||||||
|
|
||||||
|
mock_envs_vllm.VLLM_TORCH_PROFILER_DIR = "/path/to/traces"
|
||||||
|
mock_envs_ascend.MSMONITOR_USE_DAEMON = 1
|
||||||
|
|
||||||
|
# Create worker mock
|
||||||
|
with patch.object(NPUWorker, "__init__", lambda x, **kwargs: None):
|
||||||
|
worker = NPUWorker()
|
||||||
|
|
||||||
|
# Test should raise exception
|
||||||
|
with self.assertRaises(RuntimeError) as cm:
|
||||||
|
_ = worker._init_profiler()
|
||||||
|
|
||||||
|
self.assertIn(
|
||||||
|
"MSMONITOR_USE_DAEMON and VLLM_TORCH_PROFILER_DIR cannot be both set at the same time.",
|
||||||
|
str(cm.exception))
|
||||||
|
|
||||||
def test_lora_methods(self):
|
def test_lora_methods(self):
|
||||||
"""Test LoRA related methods"""
|
"""Test LoRA related methods"""
|
||||||
from vllm_ascend.worker.worker_v1 import NPUWorker
|
from vllm_ascend.worker.worker_v1 import NPUWorker
|
||||||
|
|||||||
@@ -159,6 +159,9 @@ env_variables: Dict[str, Callable[[], Any]] = {
|
|||||||
# caused by the initialization of the Mooncake connector.
|
# caused by the initialization of the Mooncake connector.
|
||||||
"PHYSICAL_DEVICES":
|
"PHYSICAL_DEVICES":
|
||||||
lambda: os.getenv("PHYSICAL_DEVICES", None),
|
lambda: os.getenv("PHYSICAL_DEVICES", None),
|
||||||
|
# Whether to enable msMonitor tool to monitor the performance of vllm-ascend.
|
||||||
|
"MSMONITOR_USE_DAEMON":
|
||||||
|
lambda: bool(int(os.getenv("MSMONITOR_USE_DAEMON", '0'))),
|
||||||
}
|
}
|
||||||
|
|
||||||
# end-env-vars-definition
|
# end-env-vars-definition
|
||||||
|
|||||||
@@ -25,6 +25,7 @@ import torch.nn as nn
|
|||||||
import torch_npu
|
import torch_npu
|
||||||
import vllm.envs as envs_vllm
|
import vllm.envs as envs_vllm
|
||||||
from torch_npu.op_plugin.atb._atb_ops import _register_atb_extensions
|
from torch_npu.op_plugin.atb._atb_ops import _register_atb_extensions
|
||||||
|
from torch_npu.profiler import dynamic_profile as dp
|
||||||
from vllm.config import VllmConfig
|
from vllm.config import VllmConfig
|
||||||
from vllm.distributed import (ensure_model_parallel_initialized,
|
from vllm.distributed import (ensure_model_parallel_initialized,
|
||||||
init_distributed_environment)
|
init_distributed_environment)
|
||||||
@@ -41,6 +42,7 @@ from vllm.v1.outputs import (EMPTY_MODEL_RUNNER_OUTPUT, AsyncModelRunnerOutput,
|
|||||||
DraftTokenIds, ModelRunnerOutput)
|
DraftTokenIds, ModelRunnerOutput)
|
||||||
from vllm.v1.worker.worker_base import WorkerBase
|
from vllm.v1.worker.worker_base import WorkerBase
|
||||||
|
|
||||||
|
import vllm_ascend.envs as envs_ascend
|
||||||
from vllm_ascend.ascend_config import init_ascend_config
|
from vllm_ascend.ascend_config import init_ascend_config
|
||||||
from vllm_ascend.device_allocator.camem import CaMemAllocator
|
from vllm_ascend.device_allocator.camem import CaMemAllocator
|
||||||
from vllm_ascend.distributed.parallel_state import init_ascend_model_parallel
|
from vllm_ascend.distributed.parallel_state import init_ascend_model_parallel
|
||||||
@@ -221,6 +223,10 @@ class NPUWorker(WorkerBase):
|
|||||||
self,
|
self,
|
||||||
scheduler_output: "SchedulerOutput",
|
scheduler_output: "SchedulerOutput",
|
||||||
) -> Optional[Union[ModelRunnerOutput, AsyncModelRunnerOutput]]:
|
) -> Optional[Union[ModelRunnerOutput, AsyncModelRunnerOutput]]:
|
||||||
|
# enable msMonitor to monitor the performance of vllm-ascend
|
||||||
|
if envs_ascend.MSMONITOR_USE_DAEMON:
|
||||||
|
dp.step()
|
||||||
|
|
||||||
intermediate_tensors = None
|
intermediate_tensors = None
|
||||||
forward_pass = scheduler_output.total_num_scheduled_tokens > 0
|
forward_pass = scheduler_output.total_num_scheduled_tokens > 0
|
||||||
if forward_pass and not get_pp_group().is_first_rank:
|
if forward_pass and not get_pp_group().is_first_rank:
|
||||||
@@ -350,6 +356,10 @@ class NPUWorker(WorkerBase):
|
|||||||
# Torch profiler. Enabled and configured through env vars:
|
# Torch profiler. Enabled and configured through env vars:
|
||||||
# VLLM_TORCH_PROFILER_DIR=/path/to/save/trace
|
# VLLM_TORCH_PROFILER_DIR=/path/to/save/trace
|
||||||
if envs_vllm.VLLM_TORCH_PROFILER_DIR:
|
if envs_vllm.VLLM_TORCH_PROFILER_DIR:
|
||||||
|
if envs_ascend.MSMONITOR_USE_DAEMON:
|
||||||
|
raise RuntimeError(
|
||||||
|
"MSMONITOR_USE_DAEMON and VLLM_TORCH_PROFILER_DIR cannot be both set at the same time."
|
||||||
|
)
|
||||||
torch_profiler_trace_dir = envs_vllm.VLLM_TORCH_PROFILER_DIR
|
torch_profiler_trace_dir = envs_vllm.VLLM_TORCH_PROFILER_DIR
|
||||||
logger.info("Profiling enabled. Traces will be saved to: %s",
|
logger.info("Profiling enabled. Traces will be saved to: %s",
|
||||||
torch_profiler_trace_dir)
|
torch_profiler_trace_dir)
|
||||||
|
|||||||
Reference in New Issue
Block a user