[Test] Remove VLLM_USE_V1 in example and tests (#1733)
V1 is enabled by default, no need to set it by hand now. This PR remove
the useless setting in example and tests
- vLLM version: v0.9.2
- vLLM main:
9ad0a4588b
Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
@@ -26,12 +26,11 @@ from unittest.mock import patch
|
||||
from modelscope import snapshot_download # type: ignore
|
||||
from vllm import SamplingParams
|
||||
|
||||
from tests.conftest import VllmRunner
|
||||
from tests.e2e.conftest import VllmRunner
|
||||
|
||||
|
||||
@patch.dict(
|
||||
os.environ, {
|
||||
"VLLM_USE_V1": "1",
|
||||
"VLLM_WORKER_MULTIPROC_METHOD": "spawn",
|
||||
"TASK_QUEUE_ENABLE": "1",
|
||||
"VLLM_ENABLE_FUSED_EXPERTS_ALLGATHER_EP": "1"
|
||||
@@ -56,12 +55,10 @@ def test_generate_with_allgather():
|
||||
vllm_model.generate(example_prompts, sampling_params)
|
||||
|
||||
|
||||
@patch.dict(
|
||||
os.environ, {
|
||||
"VLLM_USE_V1": "1",
|
||||
"VLLM_WORKER_MULTIPROC_METHOD": "spawn",
|
||||
"TASK_QUEUE_ENABLE": "1"
|
||||
})
|
||||
@patch.dict(os.environ, {
|
||||
"VLLM_WORKER_MULTIPROC_METHOD": "spawn",
|
||||
"TASK_QUEUE_ENABLE": "1"
|
||||
})
|
||||
def test_generate_with_alltoall():
|
||||
example_prompts = ["Hello, my name is"]
|
||||
sampling_params = SamplingParams(max_tokens=100, temperature=0.0)
|
||||
@@ -79,4 +76,4 @@ def test_generate_with_alltoall():
|
||||
},
|
||||
"expert_tensor_parallel_size": 1
|
||||
}) as vllm_model:
|
||||
vllm_model.generate(example_prompts, sampling_params)
|
||||
vllm_model.generate(example_prompts, sampling_params)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import pytest
|
||||
from modelscope import snapshot_download # type: ignore
|
||||
|
||||
from tests.conftest import VllmRunner
|
||||
from tests.e2e.conftest import VllmRunner
|
||||
from tests.e2e.singlecard.test_ilama_lora import (EXPECTED_LORA_OUTPUT,
|
||||
MODEL_PATH, do_sample)
|
||||
|
||||
|
||||
@@ -27,7 +27,7 @@ from modelscope import snapshot_download # type: ignore
|
||||
from vllm import SamplingParams
|
||||
from vllm.model_executor.models.registry import ModelRegistry
|
||||
|
||||
from tests.conftest import VllmRunner
|
||||
from tests.e2e.conftest import VllmRunner
|
||||
|
||||
os.environ["PYTORCH_NPU_ALLOC_CONF"] = "max_split_size_mb:256"
|
||||
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
#
|
||||
import pytest
|
||||
|
||||
from tests.conftest import VllmRunner
|
||||
from tests.e2e.conftest import VllmRunner
|
||||
|
||||
MODELS = [
|
||||
"Qwen/Qwen3-0.6B",
|
||||
|
||||
@@ -2,12 +2,10 @@
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
"""Compare the with and without prefix caching on V1 scheduler or AscendScheduler."""
|
||||
|
||||
import os
|
||||
|
||||
import pytest
|
||||
|
||||
from tests.conftest import VllmRunner
|
||||
from tests.model_utils import check_outputs_equal
|
||||
from tests.e2e.conftest import VllmRunner
|
||||
from tests.e2e.model_utils import check_outputs_equal
|
||||
|
||||
MODELS = [
|
||||
# for MHA
|
||||
@@ -60,8 +58,6 @@ INPUT_PROMPTS = [
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.skipif(os.getenv("VLLM_USE_V1") == "0",
|
||||
reason="mtp is not supported on v1")
|
||||
@pytest.mark.parametrize("model", MODELS)
|
||||
@pytest.mark.parametrize("max_tokens", [50])
|
||||
def test_prefix_cache_with_v1_scheduler(model: str, max_tokens: int) -> None:
|
||||
@@ -89,8 +85,6 @@ def test_prefix_cache_with_v1_scheduler(model: str, max_tokens: int) -> None:
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.skipif(os.getenv("VLLM_USE_V1") == "0",
|
||||
reason="mtp is not supported on v1")
|
||||
@pytest.mark.parametrize("model", MODELS)
|
||||
@pytest.mark.parametrize("max_tokens", [50])
|
||||
def test_prefix_cache_with_ascend_scheduler(model: str,
|
||||
|
||||
@@ -22,9 +22,7 @@ Run `pytest tests/multicard/test_torchair_graph_mode.py`.
|
||||
import os
|
||||
from typing import Dict
|
||||
|
||||
import pytest
|
||||
|
||||
from tests.conftest import VllmRunner
|
||||
from tests.e2e.conftest import VllmRunner
|
||||
|
||||
os.environ["PYTORCH_NPU_ALLOC_CONF"] = "max_split_size_mb:256"
|
||||
|
||||
@@ -78,8 +76,6 @@ def _deepseek_torchair_test_fixture(
|
||||
print(f"Generated text: {vllm_output[i][1]!r}")
|
||||
|
||||
|
||||
@pytest.mark.skipif(os.getenv("VLLM_USE_V1") == "0",
|
||||
reason="torchair graph is not supported on v0")
|
||||
def test_e2e_deepseekv3_with_torchair():
|
||||
additional_config = {
|
||||
"torchair_graph_config": {
|
||||
@@ -89,8 +85,6 @@ def test_e2e_deepseekv3_with_torchair():
|
||||
_deepseek_torchair_test_fixture(additional_config)
|
||||
|
||||
|
||||
@pytest.mark.skipif(os.getenv("VLLM_USE_V1") == "0",
|
||||
reason="torchair graph is not supported on v0")
|
||||
def test_e2e_deepseekv3_with_torchair_ms_mla():
|
||||
additional_config = {
|
||||
"torchair_graph_config": {
|
||||
@@ -150,8 +144,6 @@ def _pangu_torchair_test_fixture(
|
||||
print(f"Generated text: {vllm_output[i][1]!r}")
|
||||
|
||||
|
||||
@pytest.mark.skipif(os.getenv("VLLM_USE_V1") == "0",
|
||||
reason="torchair graph is not supported on v0")
|
||||
def test_e2e_pangu_with_torchair():
|
||||
additional_config = {
|
||||
"torchair_graph_config": {
|
||||
|
||||
Reference in New Issue
Block a user