[Bugfix] Add quantization param for multi-node CI (#3383)
### What this PR does / why we need it? Add quantization param for `deepseek-w8a8` multi-node test ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.11.0rc3 - vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.0 Signed-off-by: wangli <wangli858794774@gmail.com>
This commit is contained in:
@@ -7,6 +7,7 @@
|
|||||||
"server_parameters": {
|
"server_parameters": {
|
||||||
"leader_config": {
|
"leader_config": {
|
||||||
"model": "vllm-ascend/DeepSeek-V3-W8A8",
|
"model": "vllm-ascend/DeepSeek-V3-W8A8",
|
||||||
|
"quantization": "ascend",
|
||||||
"additional_config": {
|
"additional_config": {
|
||||||
"ascend_scheduler_config": {
|
"ascend_scheduler_config": {
|
||||||
"enabled": true
|
"enabled": true
|
||||||
@@ -18,6 +19,7 @@
|
|||||||
},
|
},
|
||||||
"worker_config": {
|
"worker_config": {
|
||||||
"model": "vllm-ascend/DeepSeek-V3-W8A8",
|
"model": "vllm-ascend/DeepSeek-V3-W8A8",
|
||||||
|
"quantization": "ascend",
|
||||||
"additional_config": {
|
"additional_config": {
|
||||||
"ascend_scheduler_config": {
|
"ascend_scheduler_config": {
|
||||||
"enabled": true
|
"enabled": true
|
||||||
|
|||||||
Reference in New Issue
Block a user