Use cuda event wait and synchronization instead of busy waiting (#2089)
This commit is contained in:
@@ -38,7 +38,7 @@ class TestLargeMaxNewTokens(unittest.TestCase):
|
||||
api_key=cls.api_key,
|
||||
other_args=(
|
||||
"--max-total-token",
|
||||
"1024",
|
||||
"1536",
|
||||
"--context-len",
|
||||
"8192",
|
||||
"--decode-log-interval",
|
||||
|
||||
Reference in New Issue
Block a user