diff --git a/python/sglang/srt/managers/io_struct.py b/python/sglang/srt/managers/io_struct.py index a94d1968e..578b39c21 100644 --- a/python/sglang/srt/managers/io_struct.py +++ b/python/sglang/srt/managers/io_struct.py @@ -20,7 +20,7 @@ import copy import uuid from dataclasses import dataclass, field from enum import Enum -from typing import Any, Dict, List, Optional, Union +from typing import Any, Dict, List, Literal, Optional, Union from sglang.srt.managers.schedule_batch import BaseFinishReason from sglang.srt.sampling.sampling_params import SamplingParams @@ -650,7 +650,7 @@ class ProfileReqInput: # If it is set, profiling is automatically stopped after this step, and # the caller doesn't need to run stop_profile. num_steps: Optional[int] = None - activities: Optional[List[str]] = None + activities: Optional[List[Literal["CPU", "GPU", "MEM", "CUDA_PROFILER"]]] = None class ProfileReqType(Enum): diff --git a/python/sglang/srt/managers/scheduler.py b/python/sglang/srt/managers/scheduler.py index f66c30eef..e386a5854 100644 --- a/python/sglang/srt/managers/scheduler.py +++ b/python/sglang/srt/managers/scheduler.py @@ -379,7 +379,7 @@ class Scheduler( # Init profiler self.torch_profiler = None self.torch_profiler_output_dir: Optional[str] = None - self.torch_profiler_activities: Optional[List[str]] = None + self.profiler_activities: Optional[List[str]] = None self.profiler_target_forward_ct: Optional[int] = None # Init metrics stats @@ -1818,7 +1818,7 @@ class Scheduler( num_steps: Optional[int], activities: Optional[List[str]], ) -> None: - if self.torch_profiler_activities: + if self.profiler_activities: return ProfileReqOutput( success=False, message="Profiling is already in progress. Call /stop_profile first.", @@ -1830,7 +1830,7 @@ class Scheduler( activities = ["CPU", "GPU"] self.torch_profiler_output_dir = output_dir - self.torch_profiler_activities = activities + self.profiler_activities = activities logger.info( "Profiling starts. Traces will be saved to: %s", self.torch_profiler_output_dir, @@ -1854,6 +1854,9 @@ class Scheduler( if "MEM" in activities: torch.cuda.memory._record_memory_history(max_entries=100000) + if "CUDA_PROFILER" in activities: + torch.cuda.cudart().cudaProfilerStart() + if num_steps: self.profiler_target_forward_ct = self.forward_ct + num_steps # The caller will be notified when reaching profiler_target_forward_ct @@ -1862,7 +1865,7 @@ class Scheduler( return ProfileReqOutput(success=True, message="Succeeded") def stop_profile(self) -> None: - if self.torch_profiler_activities is None: + if self.profiler_activities is None: return logger.info("Stop profiling...") @@ -1875,7 +1878,7 @@ class Scheduler( ) ) - if "MEM" in self.torch_profiler_activities: + if "MEM" in self.profiler_activities: memory_profile_path = os.path.join( self.torch_profiler_trace_dir, str(time.time()) + f"-TP-{self.tp_rank}-memory" + ".pickle", @@ -1883,13 +1886,16 @@ class Scheduler( torch.cuda.memory._dump_snapshot(memory_profile_path) torch.cuda.memory._record_memory_history(enabled=None) + if "CUDA_PROFILER" in self.profiler_activities: + torch.cuda.cudart().cudaProfilerStop() + logger.info( "Profiling done. Traces are saved to: %s", self.torch_profiler_output_dir, ) self.torch_profiler = None self.torch_profiler_output_dir = None - self.torch_profiler_activities = None + self.profiler_activities = None if self.profiler_target_forward_ct: self.send_to_tokenizer.send_pyobj( @@ -1957,7 +1963,6 @@ def run_scheduler_process( dp_rank: Optional[int], pipe_writer, ): - # Generate the prefix if dp_rank is None: prefix = f" TP{tp_rank}"