Support controlling nsys start and end range programmatically (#4688)
This commit is contained in:
@@ -20,7 +20,7 @@ import copy
|
|||||||
import uuid
|
import uuid
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from typing import Any, Dict, List, Optional, Union
|
from typing import Any, Dict, List, Literal, Optional, Union
|
||||||
|
|
||||||
from sglang.srt.managers.schedule_batch import BaseFinishReason
|
from sglang.srt.managers.schedule_batch import BaseFinishReason
|
||||||
from sglang.srt.sampling.sampling_params import SamplingParams
|
from sglang.srt.sampling.sampling_params import SamplingParams
|
||||||
@@ -650,7 +650,7 @@ class ProfileReqInput:
|
|||||||
# If it is set, profiling is automatically stopped after this step, and
|
# If it is set, profiling is automatically stopped after this step, and
|
||||||
# the caller doesn't need to run stop_profile.
|
# the caller doesn't need to run stop_profile.
|
||||||
num_steps: Optional[int] = None
|
num_steps: Optional[int] = None
|
||||||
activities: Optional[List[str]] = None
|
activities: Optional[List[Literal["CPU", "GPU", "MEM", "CUDA_PROFILER"]]] = None
|
||||||
|
|
||||||
|
|
||||||
class ProfileReqType(Enum):
|
class ProfileReqType(Enum):
|
||||||
|
|||||||
@@ -379,7 +379,7 @@ class Scheduler(
|
|||||||
# Init profiler
|
# Init profiler
|
||||||
self.torch_profiler = None
|
self.torch_profiler = None
|
||||||
self.torch_profiler_output_dir: Optional[str] = None
|
self.torch_profiler_output_dir: Optional[str] = None
|
||||||
self.torch_profiler_activities: Optional[List[str]] = None
|
self.profiler_activities: Optional[List[str]] = None
|
||||||
self.profiler_target_forward_ct: Optional[int] = None
|
self.profiler_target_forward_ct: Optional[int] = None
|
||||||
|
|
||||||
# Init metrics stats
|
# Init metrics stats
|
||||||
@@ -1818,7 +1818,7 @@ class Scheduler(
|
|||||||
num_steps: Optional[int],
|
num_steps: Optional[int],
|
||||||
activities: Optional[List[str]],
|
activities: Optional[List[str]],
|
||||||
) -> None:
|
) -> None:
|
||||||
if self.torch_profiler_activities:
|
if self.profiler_activities:
|
||||||
return ProfileReqOutput(
|
return ProfileReqOutput(
|
||||||
success=False,
|
success=False,
|
||||||
message="Profiling is already in progress. Call /stop_profile first.",
|
message="Profiling is already in progress. Call /stop_profile first.",
|
||||||
@@ -1830,7 +1830,7 @@ class Scheduler(
|
|||||||
activities = ["CPU", "GPU"]
|
activities = ["CPU", "GPU"]
|
||||||
|
|
||||||
self.torch_profiler_output_dir = output_dir
|
self.torch_profiler_output_dir = output_dir
|
||||||
self.torch_profiler_activities = activities
|
self.profiler_activities = activities
|
||||||
logger.info(
|
logger.info(
|
||||||
"Profiling starts. Traces will be saved to: %s",
|
"Profiling starts. Traces will be saved to: %s",
|
||||||
self.torch_profiler_output_dir,
|
self.torch_profiler_output_dir,
|
||||||
@@ -1854,6 +1854,9 @@ class Scheduler(
|
|||||||
if "MEM" in activities:
|
if "MEM" in activities:
|
||||||
torch.cuda.memory._record_memory_history(max_entries=100000)
|
torch.cuda.memory._record_memory_history(max_entries=100000)
|
||||||
|
|
||||||
|
if "CUDA_PROFILER" in activities:
|
||||||
|
torch.cuda.cudart().cudaProfilerStart()
|
||||||
|
|
||||||
if num_steps:
|
if num_steps:
|
||||||
self.profiler_target_forward_ct = self.forward_ct + num_steps
|
self.profiler_target_forward_ct = self.forward_ct + num_steps
|
||||||
# The caller will be notified when reaching profiler_target_forward_ct
|
# The caller will be notified when reaching profiler_target_forward_ct
|
||||||
@@ -1862,7 +1865,7 @@ class Scheduler(
|
|||||||
return ProfileReqOutput(success=True, message="Succeeded")
|
return ProfileReqOutput(success=True, message="Succeeded")
|
||||||
|
|
||||||
def stop_profile(self) -> None:
|
def stop_profile(self) -> None:
|
||||||
if self.torch_profiler_activities is None:
|
if self.profiler_activities is None:
|
||||||
return
|
return
|
||||||
|
|
||||||
logger.info("Stop profiling...")
|
logger.info("Stop profiling...")
|
||||||
@@ -1875,7 +1878,7 @@ class Scheduler(
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
if "MEM" in self.torch_profiler_activities:
|
if "MEM" in self.profiler_activities:
|
||||||
memory_profile_path = os.path.join(
|
memory_profile_path = os.path.join(
|
||||||
self.torch_profiler_trace_dir,
|
self.torch_profiler_trace_dir,
|
||||||
str(time.time()) + f"-TP-{self.tp_rank}-memory" + ".pickle",
|
str(time.time()) + f"-TP-{self.tp_rank}-memory" + ".pickle",
|
||||||
@@ -1883,13 +1886,16 @@ class Scheduler(
|
|||||||
torch.cuda.memory._dump_snapshot(memory_profile_path)
|
torch.cuda.memory._dump_snapshot(memory_profile_path)
|
||||||
torch.cuda.memory._record_memory_history(enabled=None)
|
torch.cuda.memory._record_memory_history(enabled=None)
|
||||||
|
|
||||||
|
if "CUDA_PROFILER" in self.profiler_activities:
|
||||||
|
torch.cuda.cudart().cudaProfilerStop()
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
"Profiling done. Traces are saved to: %s",
|
"Profiling done. Traces are saved to: %s",
|
||||||
self.torch_profiler_output_dir,
|
self.torch_profiler_output_dir,
|
||||||
)
|
)
|
||||||
self.torch_profiler = None
|
self.torch_profiler = None
|
||||||
self.torch_profiler_output_dir = None
|
self.torch_profiler_output_dir = None
|
||||||
self.torch_profiler_activities = None
|
self.profiler_activities = None
|
||||||
|
|
||||||
if self.profiler_target_forward_ct:
|
if self.profiler_target_forward_ct:
|
||||||
self.send_to_tokenizer.send_pyobj(
|
self.send_to_tokenizer.send_pyobj(
|
||||||
@@ -1957,7 +1963,6 @@ def run_scheduler_process(
|
|||||||
dp_rank: Optional[int],
|
dp_rank: Optional[int],
|
||||||
pipe_writer,
|
pipe_writer,
|
||||||
):
|
):
|
||||||
|
|
||||||
# Generate the prefix
|
# Generate the prefix
|
||||||
if dp_rank is None:
|
if dp_rank is None:
|
||||||
prefix = f" TP{tp_rank}"
|
prefix = f" TP{tp_rank}"
|
||||||
|
|||||||
Reference in New Issue
Block a user