Fix cuda illegal memory access in overlap mode (#2070)
This commit is contained in:
@@ -390,6 +390,9 @@ class Scheduler:
|
||||
batch = self.get_next_batch_to_run()
|
||||
self.cur_batch = batch
|
||||
if batch:
|
||||
# We need a stream synchronization here. Otherwise, there will be cuda illegal memory access errors.
|
||||
_ = batch.seq_lens[0].item()
|
||||
|
||||
result = self.run_batch(batch)
|
||||
result_queue.append((batch.copy(), result))
|
||||
|
||||
|
||||
Reference in New Issue
Block a user