Merged three native APIs into one: get_server_info (#2152)
This commit is contained in:
committed by
GitHub
parent
84a1698d67
commit
dbe1729395
@@ -63,12 +63,13 @@ class TestDataParallelism(unittest.TestCase):
|
||||
assert response.status_code == 200
|
||||
|
||||
def test_get_memory_pool_size(self):
|
||||
response = requests.get(self.base_url + "/get_memory_pool_size")
|
||||
# use `get_server_info` instead since `get_memory_pool_size` is merged into `get_server_info`
|
||||
response = requests.get(self.base_url + "/get_server_info")
|
||||
assert response.status_code == 200
|
||||
|
||||
time.sleep(5)
|
||||
|
||||
response = requests.get(self.base_url + "/get_memory_pool_size")
|
||||
response = requests.get(self.base_url + "/get_server_info")
|
||||
assert response.status_code == 200
|
||||
|
||||
|
||||
|
||||
@@ -154,9 +154,18 @@ class TestSRTEndpoint(unittest.TestCase):
|
||||
self.assertEqual(res["meta_info"]["completion_tokens"], new_tokens)
|
||||
self.assertEqual(len(res["meta_info"]["output_token_logprobs"]), new_tokens)
|
||||
|
||||
def test_get_memory_pool_size(self):
|
||||
response = requests.post(self.base_url + "/get_memory_pool_size")
|
||||
self.assertIsInstance(response.json(), int)
|
||||
def test_get_server_info(self):
|
||||
response = requests.get(self.base_url + "/get_server_info")
|
||||
response_json = response.json()
|
||||
|
||||
max_total_num_tokens = response_json["max_total_num_tokens"]
|
||||
self.assertIsInstance(max_total_num_tokens, int)
|
||||
|
||||
memory_pool_size = response_json["memory_pool_size"]
|
||||
self.assertIsInstance(memory_pool_size, int)
|
||||
|
||||
attention_backend = response_json["attention_backend"]
|
||||
self.assertIsInstance(attention_backend, str)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Reference in New Issue
Block a user