Enable overlap by default (#2067)

This commit is contained in:
Lianmin Zheng
2024-11-19 22:07:58 -08:00
committed by GitHub
parent 699384cb01
commit 7d671e4ad2
17 changed files with 92 additions and 75 deletions

View File

@@ -1,3 +1,4 @@
import time
import unittest
from types import SimpleNamespace
@@ -56,10 +57,10 @@ class TestTorchCompile(unittest.TestCase):
return response.json()
def test_throughput(self):
import time
# Warmup
res = self.run_decode(16)
max_tokens = 256
tic = time.time()
res = self.run_decode(max_tokens)
tok = time.time()