Support controlling nsys start and end range programmatically (#4688)

This commit is contained in:
fzyzcjy
2025-03-28 13:21:13 +08:00
committed by GitHub
parent 550586ef42
commit 53a2c3b466
2 changed files with 14 additions and 9 deletions

View File

@@ -379,7 +379,7 @@ class Scheduler(
# Init profiler
self.torch_profiler = None
self.torch_profiler_output_dir: Optional[str] = None
self.torch_profiler_activities: Optional[List[str]] = None
self.profiler_activities: Optional[List[str]] = None
self.profiler_target_forward_ct: Optional[int] = None
# Init metrics stats
@@ -1818,7 +1818,7 @@ class Scheduler(
num_steps: Optional[int],
activities: Optional[List[str]],
) -> None:
if self.torch_profiler_activities:
if self.profiler_activities:
return ProfileReqOutput(
success=False,
message="Profiling is already in progress. Call /stop_profile first.",
@@ -1830,7 +1830,7 @@ class Scheduler(
activities = ["CPU", "GPU"]
self.torch_profiler_output_dir = output_dir
self.torch_profiler_activities = activities
self.profiler_activities = activities
logger.info(
"Profiling starts. Traces will be saved to: %s",
self.torch_profiler_output_dir,
@@ -1854,6 +1854,9 @@ class Scheduler(
if "MEM" in activities:
torch.cuda.memory._record_memory_history(max_entries=100000)
if "CUDA_PROFILER" in activities:
torch.cuda.cudart().cudaProfilerStart()
if num_steps:
self.profiler_target_forward_ct = self.forward_ct + num_steps
# The caller will be notified when reaching profiler_target_forward_ct
@@ -1862,7 +1865,7 @@ class Scheduler(
return ProfileReqOutput(success=True, message="Succeeded")
def stop_profile(self) -> None:
if self.torch_profiler_activities is None:
if self.profiler_activities is None:
return
logger.info("Stop profiling...")
@@ -1875,7 +1878,7 @@ class Scheduler(
)
)
if "MEM" in self.torch_profiler_activities:
if "MEM" in self.profiler_activities:
memory_profile_path = os.path.join(
self.torch_profiler_trace_dir,
str(time.time()) + f"-TP-{self.tp_rank}-memory" + ".pickle",
@@ -1883,13 +1886,16 @@ class Scheduler(
torch.cuda.memory._dump_snapshot(memory_profile_path)
torch.cuda.memory._record_memory_history(enabled=None)
if "CUDA_PROFILER" in self.profiler_activities:
torch.cuda.cudart().cudaProfilerStop()
logger.info(
"Profiling done. Traces are saved to: %s",
self.torch_profiler_output_dir,
)
self.torch_profiler = None
self.torch_profiler_output_dir = None
self.torch_profiler_activities = None
self.profiler_activities = None
if self.profiler_target_forward_ct:
self.send_to_tokenizer.send_pyobj(
@@ -1957,7 +1963,6 @@ def run_scheduler_process(
dp_rank: Optional[int],
pipe_writer,
):
# Generate the prefix
if dp_rank is None:
prefix = f" TP{tp_rank}"