[Misc] Remove VLLM_USE_V1 usage in code (#1764)
We plan to remove V0 code from this version. The first step is to delete
v0 usage.
Related: https://github.com/vllm-project/vllm-ascend/issues/1620
- vLLM version: v0.9.2
- vLLM main:
61e20828da
Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
@@ -29,7 +29,6 @@ from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union
|
||||
|
||||
import torch
|
||||
import torch_npu
|
||||
import vllm.envs as envs
|
||||
from torch import nn
|
||||
from transformers import PretrainedConfig
|
||||
from vllm.attention import Attention, AttentionMetadata
|
||||
@@ -579,20 +578,17 @@ class CustomDeepseekV2MLAAttention(DeepseekV2MLAAttention):
|
||||
else:
|
||||
hidden_states_or_q_c = hidden_states
|
||||
if self.torchair_graph_enabled:
|
||||
if envs.VLLM_USE_V1:
|
||||
output_shape = hidden_states.shape
|
||||
output = torch.empty(output_shape,
|
||||
dtype=hidden_states_or_q_c.dtype,
|
||||
device=hidden_states_or_q_c.device)
|
||||
forward_kwargs['output'] = output
|
||||
|
||||
output_shape = hidden_states.shape
|
||||
output = torch.empty(output_shape,
|
||||
dtype=hidden_states_or_q_c.dtype,
|
||||
device=hidden_states_or_q_c.device)
|
||||
forward_kwargs['output'] = output
|
||||
output = self.mla_attn.impl.forward(self.mla_attn,
|
||||
hidden_states_or_q_c,
|
||||
hidden_states, None, kv_cache,
|
||||
attn_metadata,
|
||||
**forward_kwargs)
|
||||
if envs.VLLM_USE_V1:
|
||||
output = output.view(-1, output_shape[-1])
|
||||
output = output.view(-1, output_shape[-1])
|
||||
return output
|
||||
else:
|
||||
kv_c, k_pe = self.kv_a_proj_with_mqa(hidden_states)[0].split(
|
||||
@@ -660,7 +656,7 @@ class CustomDeepseekV2DecoderLayer(DeepseekV2DecoderLayer):
|
||||
prefix=f"{prefix}.mlp",
|
||||
)
|
||||
self.mla_moe_communication = ascend_config.torchair_graph_config.enable_multistream_moe \
|
||||
and model_config.use_mla and envs.VLLM_USE_V1 and self.tp_size > 1
|
||||
and model_config.use_mla and self.tp_size > 1
|
||||
else:
|
||||
self.mlp = CustomDeepseekV2MLP(
|
||||
hidden_size=config.hidden_size,
|
||||
|
||||
Reference in New Issue
Block a user