Use cuda event wait and synchronization instead of busy waiting (#2089)

This commit is contained in:
Lianmin Zheng
2024-11-19 00:21:46 -08:00
committed by GitHub
parent b110453802
commit b7a065eae3
6 changed files with 28 additions and 26 deletions

View File

@@ -387,9 +387,6 @@ class Scheduler:
batch = self.get_next_batch_to_run()
self.cur_batch = batch
if batch:
# We need a stream synchronization here. Otherwise, there will be cuda illegal memory access errors.
_ = batch.seq_lens[0].item()
result = self.run_batch(batch)
result_queue.append((batch.copy(), result))