Fix cuda illegal memory access in overlap mode (#2070)

This commit is contained in:
Lianmin Zheng
2024-11-17 21:29:30 -08:00
committed by GitHub
parent a9e90b4bce
commit 116685337e
3 changed files with 10 additions and 11 deletions

View File

@@ -1055,9 +1055,6 @@ class ScheduleBatch:
)
def copy(self):
# We need a stream synchronization here. Otherwise, there will be cuda illegal memory access errors.
_ = self.seq_lens[0].item()
# Only contain fields that will be used by process_batch_result
return ScheduleBatch(
reqs=self.reqs,

View File

@@ -390,6 +390,9 @@ class Scheduler:
batch = self.get_next_batch_to_run()
self.cur_batch = batch
if batch:
# We need a stream synchronization here. Otherwise, there will be cuda illegal memory access errors.
_ = batch.seq_lens[0].item()
result = self.run_batch(batch)
result_queue.append((batch.copy(), result))