diff --git a/python/sglang/srt/entrypoints/http_server.py b/python/sglang/srt/entrypoints/http_server.py index 6e4a1f3f5..fb030d02e 100644 --- a/python/sglang/srt/entrypoints/http_server.py +++ b/python/sglang/srt/entrypoints/http_server.py @@ -338,7 +338,11 @@ async def start_profile_async(obj: Optional[ProfileReqInput] = None): obj = ProfileReqInput() await _global_state.tokenizer_manager.start_profile( - obj.output_dir, obj.num_steps, obj.activities + output_dir=obj.output_dir, + num_steps=obj.num_steps, + activities=obj.activities, + with_stack=obj.with_stack, + record_shapes=obj.record_shapes, ) return Response( content="Start profiling.\n", diff --git a/python/sglang/srt/managers/io_struct.py b/python/sglang/srt/managers/io_struct.py index f92bbf044..5390668cf 100644 --- a/python/sglang/srt/managers/io_struct.py +++ b/python/sglang/srt/managers/io_struct.py @@ -836,6 +836,8 @@ class ProfileReqInput: # the caller doesn't need to run stop_profile. num_steps: Optional[int] = None activities: Optional[List[Literal["CPU", "GPU", "MEM", "CUDA_PROFILER"]]] = None + with_stack: Optional[bool] = None + record_shapes: Optional[bool] = None class ProfileReqType(Enum): diff --git a/python/sglang/srt/managers/tokenizer_manager.py b/python/sglang/srt/managers/tokenizer_manager.py index 306359dda..05d1a54f4 100644 --- a/python/sglang/srt/managers/tokenizer_manager.py +++ b/python/sglang/srt/managers/tokenizer_manager.py @@ -747,12 +747,16 @@ class TokenizerManager: output_dir: Optional[str] = None, num_steps: Optional[int] = None, activities: Optional[List[str]] = None, + with_stack: Optional[bool] = None, + record_shapes: Optional[bool] = None, ): req = ProfileReq( type=ProfileReqType.START_PROFILE, output_dir=output_dir, num_steps=num_steps, activities=activities, + with_stack=with_stack, + record_shapes=record_shapes, profile_id=str(time.time()), ) result = (await self.start_profile_communicator(req))[0]