[Model] Support DeepSeek-V4
This commit is contained in:
50
vllm_mlu/lora/layers/base_linear.py
Normal file
50
vllm_mlu/lora/layers/base_linear.py
Normal file
@@ -0,0 +1,50 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM-MLU project
|
||||
|
||||
import torch
|
||||
|
||||
from vllm.lora.layers.base_linear import BaseLinearLayerWithLoRA
|
||||
from vllm.platforms import current_platform
|
||||
|
||||
from vllm_mlu.mlu_hijack_utils import MluHijackObject
|
||||
|
||||
|
||||
def vllm__lora__layers__row_parallel_linear__BaseLinearLayerWithLoRA__apply(
|
||||
self,
|
||||
x: torch.Tensor,
|
||||
bias: torch.Tensor | None,
|
||||
residual: torch.Tensor | None = None,
|
||||
) -> torch.Tensor:
|
||||
'''
|
||||
=============================
|
||||
Modify by vllm_mlu
|
||||
=============================
|
||||
@brief: add residual in matmul
|
||||
'''
|
||||
output = self.base_layer.quant_method.apply(self.base_layer, x, bias, residual)
|
||||
'''
|
||||
==================
|
||||
End of MLU Hijack
|
||||
==================
|
||||
'''
|
||||
# In transformers backend, x and output have extra batch dimension like
|
||||
# (1, seq_len, hidden_dim), while punica expects (seq_len, hidden_dim),
|
||||
# therefore we need to flatten the batch dimensions.
|
||||
if x.ndim == 3 and output.ndim == 3:
|
||||
output = output.flatten(0, 1)
|
||||
x = x.flatten(0, 1)
|
||||
|
||||
lora_output: torch.Tensor | None = self.punica_wrapper.add_lora_linear(
|
||||
output, x, self.lora_a_stacked, self.lora_b_stacked, 1.0, self.output_slices
|
||||
)
|
||||
if not current_platform.can_update_inplace():
|
||||
output = lora_output
|
||||
|
||||
return output
|
||||
|
||||
|
||||
MluHijackObject.apply_hijack(
|
||||
BaseLinearLayerWithLoRA,
|
||||
BaseLinearLayerWithLoRA.apply,
|
||||
vllm__lora__layers__row_parallel_linear__BaseLinearLayerWithLoRA__apply,
|
||||
)
|
||||
Reference in New Issue
Block a user