Support controlling nsys start and end range programmatically (#4688)

This commit is contained in:
fzyzcjy
2025-03-28 13:21:13 +08:00
committed by GitHub
parent 550586ef42
commit 53a2c3b466
2 changed files with 14 additions and 9 deletions

View File

@@ -20,7 +20,7 @@ import copy
import uuid
from dataclasses import dataclass, field
from enum import Enum
from typing import Any, Dict, List, Optional, Union
from typing import Any, Dict, List, Literal, Optional, Union
from sglang.srt.managers.schedule_batch import BaseFinishReason
from sglang.srt.sampling.sampling_params import SamplingParams
@@ -650,7 +650,7 @@ class ProfileReqInput:
# If it is set, profiling is automatically stopped after this step, and
# the caller doesn't need to run stop_profile.
num_steps: Optional[int] = None
activities: Optional[List[str]] = None
activities: Optional[List[Literal["CPU", "GPU", "MEM", "CUDA_PROFILER"]]] = None
class ProfileReqType(Enum):

View File

@@ -379,7 +379,7 @@ class Scheduler(
# Init profiler
self.torch_profiler = None
self.torch_profiler_output_dir: Optional[str] = None
self.torch_profiler_activities: Optional[List[str]] = None
self.profiler_activities: Optional[List[str]] = None
self.profiler_target_forward_ct: Optional[int] = None
# Init metrics stats
@@ -1818,7 +1818,7 @@ class Scheduler(
num_steps: Optional[int],
activities: Optional[List[str]],
) -> None:
if self.torch_profiler_activities:
if self.profiler_activities:
return ProfileReqOutput(
success=False,
message="Profiling is already in progress. Call /stop_profile first.",
@@ -1830,7 +1830,7 @@ class Scheduler(
activities = ["CPU", "GPU"]
self.torch_profiler_output_dir = output_dir
self.torch_profiler_activities = activities
self.profiler_activities = activities
logger.info(
"Profiling starts. Traces will be saved to: %s",
self.torch_profiler_output_dir,
@@ -1854,6 +1854,9 @@ class Scheduler(
if "MEM" in activities:
torch.cuda.memory._record_memory_history(max_entries=100000)
if "CUDA_PROFILER" in activities:
torch.cuda.cudart().cudaProfilerStart()
if num_steps:
self.profiler_target_forward_ct = self.forward_ct + num_steps
# The caller will be notified when reaching profiler_target_forward_ct
@@ -1862,7 +1865,7 @@ class Scheduler(
return ProfileReqOutput(success=True, message="Succeeded")
def stop_profile(self) -> None:
if self.torch_profiler_activities is None:
if self.profiler_activities is None:
return
logger.info("Stop profiling...")
@@ -1875,7 +1878,7 @@ class Scheduler(
)
)
if "MEM" in self.torch_profiler_activities:
if "MEM" in self.profiler_activities:
memory_profile_path = os.path.join(
self.torch_profiler_trace_dir,
str(time.time()) + f"-TP-{self.tp_rank}-memory" + ".pickle",
@@ -1883,13 +1886,16 @@ class Scheduler(
torch.cuda.memory._dump_snapshot(memory_profile_path)
torch.cuda.memory._record_memory_history(enabled=None)
if "CUDA_PROFILER" in self.profiler_activities:
torch.cuda.cudart().cudaProfilerStop()
logger.info(
"Profiling done. Traces are saved to: %s",
self.torch_profiler_output_dir,
)
self.torch_profiler = None
self.torch_profiler_output_dir = None
self.torch_profiler_activities = None
self.profiler_activities = None
if self.profiler_target_forward_ct:
self.send_to_tokenizer.send_pyobj(
@@ -1957,7 +1963,6 @@ def run_scheduler_process(
dp_rank: Optional[int],
pipe_writer,
):
# Generate the prefix
if dp_rank is None:
prefix = f" TP{tp_rank}"