Fix stop_profile does not wait for finishing (#4741)

This commit is contained in:
fzyzcjy
2025-05-18 08:06:15 +08:00
committed by GitHub
parent e3b8a72291
commit 01d2838c0f
4 changed files with 19 additions and 11 deletions

View File

@@ -1512,7 +1512,7 @@ class Scheduler(
self.profiler_target_forward_ct
and self.profiler_target_forward_ct <= self.forward_ct
):
self.stop_profile()
self.send_to_tokenizer.send_pyobj(self.stop_profile())
if self.forward_sleep_time is not None:
logger.info(f"Scheduler.run_batch sleep {self.forward_sleep_time}s")
@@ -2114,7 +2114,10 @@ class Scheduler(
def stop_profile(self) -> None:
if self.profiler_activities is None:
return
return ProfileReqOutput(
success=False,
message="Profiling is not in progress. Call /start_profile first.",
)
logger.info("Stop profiling...")
if self.torch_profiler is not None:
@@ -2145,10 +2148,7 @@ class Scheduler(
self.torch_profiler_output_dir = None
self.profiler_activities = None
if self.profiler_target_forward_ct:
self.send_to_tokenizer.send_pyobj(
ProfileReqOutput(success=True, message="Succeeded.")
)
return ProfileReqOutput(success=True, message="Succeeded")
def expert_distribution_handle(self, recv_req: ExpertDistributionReq):
if recv_req == ExpertDistributionReq.START_RECORD: