Remove VLLM_USE_V1 (#4086)

Drop VLLM_USE_V1 usage.  This env has been removed from vLLM already.

- vLLM version: v0.11.0
- vLLM main:
83f478bb19

Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
wangxiyuan
2025-11-11 15:43:39 +08:00
committed by GitHub
parent d5567680a2
commit f811a24bf0
21 changed files with 7 additions and 63 deletions

View File

@@ -9,7 +9,6 @@ import torch
from einops import rearrange
from torch import nn
from transformers.activations import ACT2FN
from vllm import envs
from vllm.attention import AttentionBackend, AttentionMetadata
from vllm.compilation.decorators import support_torch_compile
from vllm.config import (CacheConfig, ModelConfig, SpeculativeConfig,
@@ -668,7 +667,6 @@ class CustomQwen3NextForCausalLM(Qwen3NextForCausalLM):
scheduler_config = vllm_config.scheduler_config
assert not cache_config.enable_prefix_caching, \
"Qwen3Next currently does not support prefix caching"
assert envs.VLLM_USE_V1, "Qwen3Next requires VLLM_USE_V1"
self.quant_config = vllm_config.quant_config
self.config = config
self.scheduler_config = scheduler_config

View File

@@ -1,6 +1,5 @@
import ast
import vllm.envs as envs
from vllm.config.speculative import SpeculativeConfig
from vllm.logger import logger
@@ -163,11 +162,6 @@ def __post_init__(self):
# Replace hf_config for EAGLE draft_model
if self.method in ("eagle", "eagle3"):
if self.enable_chunked_prefill and not envs.VLLM_USE_V1:
raise ValueError(
"Chunked prefill and EAGLE are not compatible "
"when using V0.")
from vllm.transformers_utils.configs import SpeculatorsConfig
from vllm.transformers_utils.configs.eagle import EAGLEConfig

View File

@@ -20,7 +20,6 @@ import os
from typing import TYPE_CHECKING, Optional, Tuple
import torch
import vllm.envs as envs_vllm
from vllm.logger import logger
from vllm.platforms import Platform, PlatformEnum
@@ -117,8 +116,6 @@ class NPUPlatform(Platform):
@classmethod
def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
if not envs_vllm.VLLM_USE_V1:
raise ValueError("vLLM Ascend does not support V0 engine.")
# initialize ascend config from vllm additional_config
ascend_config = init_ascend_config(vllm_config)

View File

@@ -21,7 +21,6 @@ from typing import Any, List, Optional, Union
import torch
import torch.nn.functional as F
import vllm
import vllm.envs as envs
from torch import nn
from transformers import Qwen2Config
from vllm.attention import AttentionMetadata, AttentionType
@@ -112,12 +111,9 @@ class CustomQwen2Attention(Qwen2Attention):
is_prefill=False,
is_qwen_torchair=True)
forward_kwargs = {}
if envs.VLLM_USE_V1:
output_shape = q.shape
output = torch.empty(output_shape,
dtype=q.dtype,
device=q.device)
forward_kwargs['output'] = output
output_shape = q.shape
output = torch.empty(output_shape, dtype=q.dtype, device=q.device)
forward_kwargs['output'] = output
attn_output = self.attn.impl.forward(self.attn,
q,

View File

@@ -19,7 +19,6 @@
from typing import Any, List, Optional, Union
import torch
import vllm.envs as envs
from torch import nn
from transformers import PretrainedConfig
from vllm.attention import Attention, AttentionMetadata
@@ -244,12 +243,9 @@ class CustomQwen3MoeAttention(Qwen3MoeAttention):
is_prefill=False,
is_qwen_torchair=True)
forward_kwargs = {}
if envs.VLLM_USE_V1:
output_shape = q.shape
output = torch.empty(output_shape,
dtype=q.dtype,
device=q.device)
forward_kwargs['output'] = output
output_shape = q.shape
output = torch.empty(output_shape, dtype=q.dtype, device=q.device)
forward_kwargs['output'] = output
attn_output = self.attn.impl.forward(self.attn,
q,