Fix cuda illegal memory access in overlap mode (#2070)
This commit is contained in:
@@ -1055,9 +1055,6 @@ class ScheduleBatch:
|
||||
)
|
||||
|
||||
def copy(self):
|
||||
# We need a stream synchronization here. Otherwise, there will be cuda illegal memory access errors.
|
||||
_ = self.seq_lens[0].item()
|
||||
|
||||
# Only contain fields that will be used by process_batch_result
|
||||
return ScheduleBatch(
|
||||
reqs=self.reqs,
|
||||
|
||||
@@ -390,6 +390,9 @@ class Scheduler:
|
||||
batch = self.get_next_batch_to_run()
|
||||
self.cur_batch = batch
|
||||
if batch:
|
||||
# We need a stream synchronization here. Otherwise, there will be cuda illegal memory access errors.
|
||||
_ = batch.seq_lens[0].item()
|
||||
|
||||
result = self.run_batch(batch)
|
||||
result_queue.append((batch.copy(), result))
|
||||
|
||||
|
||||
Reference in New Issue
Block a user