Log if cuda graph is used & extend cuda graph capture to cuda-graph-max-bs (#6201)
Co-authored-by: SangBin Cho <rkooo567@gmail.com>
This commit is contained in:
@@ -105,7 +105,9 @@ class TestDeepseekV3MTPChannelInt8(CustomTestCase):
|
||||
self.assertGreater(metrics["accuracy"], 0.60)
|
||||
|
||||
server_info = requests.get(self.base_url + "/get_server_info")
|
||||
avg_spec_accept_length = server_info.json()["avg_spec_accept_length"]
|
||||
avg_spec_accept_length = server_info.json()["internal_states"][0][
|
||||
"avg_spec_accept_length"
|
||||
]
|
||||
print(f"{avg_spec_accept_length=}")
|
||||
self.assertGreater(avg_spec_accept_length, 2.5)
|
||||
|
||||
@@ -199,7 +201,9 @@ class TestDeepseekV3MTPBlockInt8(CustomTestCase):
|
||||
self.assertGreater(metrics["accuracy"], 0.60)
|
||||
|
||||
server_info = requests.get(self.base_url + "/get_server_info")
|
||||
avg_spec_accept_length = server_info.json()["avg_spec_accept_length"]
|
||||
avg_spec_accept_length = server_info.json()["internal_states"][0][
|
||||
"avg_spec_accept_length"
|
||||
]
|
||||
print(f"{avg_spec_accept_length=}")
|
||||
self.assertGreater(avg_spec_accept_length, 2.5)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user