Revert "drop ascend scheduler" (#4580)
Reverts vllm-project/vllm-ascend#4498 - vLLM version: v0.11.2 - vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.2
This commit is contained in:
@@ -24,7 +24,6 @@ Run `pytest tests/e2e/multicard/test_qwen3_next.py`.
|
||||
import os
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
from modelscope import snapshot_download # type: ignore
|
||||
|
||||
from tests.e2e.conftest import VllmRunner
|
||||
@@ -64,8 +63,6 @@ def test_models_distributed_Qwen3_NEXT_TP4_FULL_DECODE_ONLY():
|
||||
del vllm_model
|
||||
|
||||
|
||||
@pytest.mark.skip(
|
||||
reason="Qwen3-Next + MTP doesn't work with chunked prefill. Fix Me")
|
||||
def test_models_distributed_Qwen3_NEXT_MTP_TP4_SIMILARITY():
|
||||
example_prompts = [
|
||||
"Hello, my name is",
|
||||
@@ -92,6 +89,12 @@ def test_models_distributed_Qwen3_NEXT_MTP_TP4_SIMILARITY():
|
||||
gpu_memory_utilization=0.8,
|
||||
distributed_executor_backend="mp",
|
||||
enforce_eager=True,
|
||||
additional_config={
|
||||
"ascend_scheduler_config": {
|
||||
"enabled": True,
|
||||
"enable_chunked_prefill": False
|
||||
}
|
||||
},
|
||||
speculative_config={
|
||||
"method": "qwen3_next_mtp",
|
||||
"num_speculative_tokens": 1
|
||||
|
||||
Reference in New Issue
Block a user