[Misc] Remove VLLM_USE_V1 usage in code (#1764)

We plan to remove V0 code from this version. The first step is to delete
v0 usage.

Related: https://github.com/vllm-project/vllm-ascend/issues/1620

- vLLM version: v0.9.2
- vLLM main:
61e20828da

Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
wangxiyuan
2025-07-15 11:52:16 +08:00
committed by GitHub
parent 494b0f474f
commit 7bdada58eb
6 changed files with 100 additions and 217 deletions

View File

@@ -29,7 +29,6 @@ from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union
import torch
import torch_npu
import vllm.envs as envs
from torch import nn
from transformers import PretrainedConfig
from vllm.attention import Attention, AttentionMetadata
@@ -579,20 +578,17 @@ class CustomDeepseekV2MLAAttention(DeepseekV2MLAAttention):
else:
hidden_states_or_q_c = hidden_states
if self.torchair_graph_enabled:
if envs.VLLM_USE_V1:
output_shape = hidden_states.shape
output = torch.empty(output_shape,
dtype=hidden_states_or_q_c.dtype,
device=hidden_states_or_q_c.device)
forward_kwargs['output'] = output
output_shape = hidden_states.shape
output = torch.empty(output_shape,
dtype=hidden_states_or_q_c.dtype,
device=hidden_states_or_q_c.device)
forward_kwargs['output'] = output
output = self.mla_attn.impl.forward(self.mla_attn,
hidden_states_or_q_c,
hidden_states, None, kv_cache,
attn_metadata,
**forward_kwargs)
if envs.VLLM_USE_V1:
output = output.view(-1, output_shape[-1])
output = output.view(-1, output_shape[-1])
return output
else:
kv_c, k_pe = self.kv_a_proj_with_mqa(hidden_states)[0].split(
@@ -660,7 +656,7 @@ class CustomDeepseekV2DecoderLayer(DeepseekV2DecoderLayer):
prefix=f"{prefix}.mlp",
)
self.mla_moe_communication = ascend_config.torchair_graph_config.enable_multistream_moe \
and model_config.use_mla and envs.VLLM_USE_V1 and self.tp_size > 1
and model_config.use_mla and self.tp_size > 1
else:
self.mlp = CustomDeepseekV2MLP(
hidden_size=config.hidden_size,