Log if cuda graph is used & extend cuda graph capture to cuda-graph-max-bs (#6201)
Co-authored-by: SangBin Cho <rkooo567@gmail.com>
This commit is contained in:
@@ -492,9 +492,6 @@ class TestSRTEndpoint(CustomTestCase):
|
||||
max_total_num_tokens = response_json["max_total_num_tokens"]
|
||||
self.assertIsInstance(max_total_num_tokens, int)
|
||||
|
||||
attention_backend = response_json["attention_backend"]
|
||||
self.assertIsInstance(attention_backend, str)
|
||||
|
||||
version = response_json["version"]
|
||||
self.assertIsInstance(version, str)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user