Use cuda event wait and synchronization instead of busy waiting (#2089)

This commit is contained in:
Lianmin Zheng
2024-11-19 00:21:46 -08:00
committed by GitHub
parent b110453802
commit b7a065eae3
6 changed files with 28 additions and 26 deletions

View File

@@ -38,7 +38,7 @@ class TestLargeMaxNewTokens(unittest.TestCase):
api_key=cls.api_key,
other_args=(
"--max-total-token",
"1024",
"1536",
"--context-len",
"8192",
"--decode-log-interval",