Revert "drop ascend scheduler" (#4580)

Reverts vllm-project/vllm-ascend#4498
- vLLM version: v0.11.2
- vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.2
This commit is contained in:
Mengqing Cao
2025-11-29 22:20:48 +08:00
committed by GitHub
parent 4dbe4fd123
commit 517fd9272d
52 changed files with 2948 additions and 85 deletions

View File

@@ -63,6 +63,11 @@ async def test_models(model: str, mode: str) -> None:
"PYTORCH_NPU_ALLOC_CONF": "expandable_segments:True",
"VLLM_ASCEND_ENABLE_FLASHCOMM1": "1"
}
additional_config: dict[str, Any] = {
"ascend_scheduler_config": {
"enabled": False
},
}
compilation_config = {"cudagraph_mode": "FULL_DECODE_ONLY"}
server_args = [
"--quantization", "ascend", "--async-scheduling",
@@ -77,6 +82,7 @@ async def test_models(model: str, mode: str) -> None:
server_args.extend(
["--compilation-config",
json.dumps(compilation_config)])
server_args.extend(["--additional-config", json.dumps(additional_config)])
request_keyword_args: dict[str, Any] = {
**api_keyword_args,
}