Revert "drop ascend scheduler" (#4580)
Reverts vllm-project/vllm-ascend#4498 - vLLM version: v0.11.2 - vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.2
This commit is contained in:
@@ -82,6 +82,9 @@ def test_models_distributed_DeepSeek_multistream_moe():
|
||||
"enabled": True,
|
||||
},
|
||||
"enable_multistream_moe": True,
|
||||
"ascend_scheduler_config": {
|
||||
"enabled": True,
|
||||
},
|
||||
"refresh": True,
|
||||
},
|
||||
) as vllm_model:
|
||||
@@ -151,9 +154,14 @@ def test_models_distributed_DeepSeek_W4A8DYNAMIC(model):
|
||||
quantization="ascend",
|
||||
enforce_eager=True,
|
||||
enable_expert_parallel=True,
|
||||
additional_config={"torchair_graph_config": {
|
||||
"enabled": False,
|
||||
}},
|
||||
additional_config={
|
||||
"torchair_graph_config": {
|
||||
"enabled": False,
|
||||
},
|
||||
"ascend_scheduler_config": {
|
||||
"enabled": True,
|
||||
}
|
||||
},
|
||||
) as vllm_model:
|
||||
vllm_model.generate_greedy(prompts, max_tokens)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user