Log if cuda graph is used & extend cuda graph capture to cuda-graph-max-bs (#6201)

Co-authored-by: SangBin Cho <rkooo567@gmail.com>
This commit is contained in:
Lianmin Zheng
2025-05-12 00:17:33 -07:00
committed by GitHub
parent 7d3a3d4510
commit fba8eccd7e
27 changed files with 293 additions and 121 deletions

View File

@@ -492,9 +492,6 @@ class TestSRTEndpoint(CustomTestCase):
max_total_num_tokens = response_json["max_total_num_tokens"]
self.assertIsInstance(max_total_num_tokens, int)
attention_backend = response_json["attention_backend"]
self.assertIsInstance(attention_backend, str)
version = response_json["version"]
self.assertIsInstance(version, str)