Use cuda event wait and synchronization instead of busy waiting (#2089)

This commit is contained in:
Lianmin Zheng
2024-11-19 00:21:46 -08:00
committed by GitHub
parent b110453802
commit b7a065eae3
6 changed files with 28 additions and 26 deletions

View File

@@ -1063,7 +1063,7 @@ class ScheduleBatch:
out_cache_loc=self.out_cache_loc,
return_logprob=self.return_logprob,
decoding_reqs=self.decoding_reqs,
sampling_info=dataclasses.replace(self.sampling_info),
sampling_info=self.sampling_info,
)
def __str__(self):