Remove VLLM_USE_V1 (#4086)
Drop VLLM_USE_V1 usage. This env has been removed from vLLM already.
- vLLM version: v0.11.0
- vLLM main:
83f478bb19
Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
@@ -21,7 +21,6 @@ from typing import Any, List, Optional, Union
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
import vllm
|
||||
import vllm.envs as envs
|
||||
from torch import nn
|
||||
from transformers import Qwen2Config
|
||||
from vllm.attention import AttentionMetadata, AttentionType
|
||||
@@ -112,12 +111,9 @@ class CustomQwen2Attention(Qwen2Attention):
|
||||
is_prefill=False,
|
||||
is_qwen_torchair=True)
|
||||
forward_kwargs = {}
|
||||
if envs.VLLM_USE_V1:
|
||||
output_shape = q.shape
|
||||
output = torch.empty(output_shape,
|
||||
dtype=q.dtype,
|
||||
device=q.device)
|
||||
forward_kwargs['output'] = output
|
||||
output_shape = q.shape
|
||||
output = torch.empty(output_shape, dtype=q.dtype, device=q.device)
|
||||
forward_kwargs['output'] = output
|
||||
|
||||
attn_output = self.attn.impl.forward(self.attn,
|
||||
q,
|
||||
|
||||
@@ -19,7 +19,6 @@
|
||||
from typing import Any, List, Optional, Union
|
||||
|
||||
import torch
|
||||
import vllm.envs as envs
|
||||
from torch import nn
|
||||
from transformers import PretrainedConfig
|
||||
from vllm.attention import Attention, AttentionMetadata
|
||||
@@ -244,12 +243,9 @@ class CustomQwen3MoeAttention(Qwen3MoeAttention):
|
||||
is_prefill=False,
|
||||
is_qwen_torchair=True)
|
||||
forward_kwargs = {}
|
||||
if envs.VLLM_USE_V1:
|
||||
output_shape = q.shape
|
||||
output = torch.empty(output_shape,
|
||||
dtype=q.dtype,
|
||||
device=q.device)
|
||||
forward_kwargs['output'] = output
|
||||
output_shape = q.shape
|
||||
output = torch.empty(output_shape, dtype=q.dtype, device=q.device)
|
||||
forward_kwargs['output'] = output
|
||||
|
||||
attn_output = self.attn.impl.forward(self.attn,
|
||||
q,
|
||||
|
||||
Reference in New Issue
Block a user