support torchair mode (#2641)
### What this PR does / why we need it?
support torchair mode
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
- vLLM version: v0.10.1.1
- vLLM main:
5438967fbc
Signed-off-by: zhangdepeng <zhangdepeng2@huawei.com>
Signed-off-by: p00465316 <panchao13@huawei.com>
Co-authored-by: zhangdepeng <zhangdepeng2@huawei.com>
This commit is contained in:
@@ -46,6 +46,7 @@ class TestAscendConfig(TestBase):
|
||||
|
||||
torchair_graph_config = ascend_config.torchair_graph_config
|
||||
self.assertFalse(torchair_graph_config.enabled)
|
||||
self.assertEqual(torchair_graph_config.mode, '')
|
||||
self.assertFalse(torchair_graph_config.use_cached_graph)
|
||||
self.assertEqual(torchair_graph_config.graph_batch_sizes, [])
|
||||
self.assertFalse(torchair_graph_config.graph_batch_sizes_init)
|
||||
@@ -294,6 +295,17 @@ class TestAscendConfig(TestBase):
|
||||
}
|
||||
init_ascend_config(test_vllm_config)
|
||||
|
||||
# mode should not be configured without torchair graph mode
|
||||
with self.assertRaises(RuntimeError):
|
||||
test_vllm_config.additional_config = {
|
||||
"torchair_graph_config": {
|
||||
"enabled": False,
|
||||
"mode": 'max-autotune',
|
||||
},
|
||||
"refresh": True
|
||||
}
|
||||
init_ascend_config(test_vllm_config)
|
||||
|
||||
# enable_kv_nz should not be enabled without torchair graph mode
|
||||
with self.assertRaises(RuntimeError):
|
||||
test_vllm_config.additional_config = {
|
||||
|
||||
Reference in New Issue
Block a user