Files
enginex-mlu370-vllm/vllm-v0.6.2/tests/kernels/test_feed_forward.py
2026-02-04 17:22:39 +08:00

94 lines
3.3 KiB
Python

import pytest
import numpy
import torch
from vllm.config import ParallelConfig
from vllm.model_executor.model_loader.weight_utils import default_weight_loader, initialize_dummy_weights
from vllm_mlu.model_executor.layers.feed_forward import FeedForward
from vllm.model_executor.model_loader.utils import set_default_torch_dtype
from vllm.model_executor.utils import set_random_seed
from ..utils import init_test_distributed_environment
def compute_diff(baseline: numpy.ndarray, compare: numpy.ndarray):
error = numpy.abs(baseline - compare)
diff1 = numpy.sum(error) / numpy.sum(numpy.abs(baseline))
diff2 = numpy.sqrt(numpy.sum(error**2)/numpy.sum(baseline**2))
return diff1, diff2
BATCH_SIZE = [1]
SEQ_LENS = [128]
HIDDEN_SIZE = [32]
INTERMEDIATE_SIZE = [64]
HIDDEN_ACT = ['silu', 'gelu']
IS_GATED = [True, False]
BIAS = [True, False]
UP_PROJ_NAME = ['up_proj']
DOWN_PROJ_NAME = ['down_proj']
DTYPE = [torch.float16]
SEED = [0]
@pytest.mark.parametrize("batch_size", BATCH_SIZE)
@pytest.mark.parametrize("seq_len", SEQ_LENS)
@pytest.mark.parametrize("hidden_size", HIDDEN_SIZE)
@pytest.mark.parametrize("intermediate_size", INTERMEDIATE_SIZE)
@pytest.mark.parametrize("hidden_act", HIDDEN_ACT)
@pytest.mark.parametrize("is_gated", IS_GATED)
@pytest.mark.parametrize("bias", BIAS)
@pytest.mark.parametrize("up_proj_name", UP_PROJ_NAME)
@pytest.mark.parametrize("down_proj_name", DOWN_PROJ_NAME)
@pytest.mark.parametrize("dtype", DTYPE)
@pytest.mark.parametrize("seed", SEED)
@torch.inference_mode()
def test_feed_forward(
batch_size: int,
seq_len: int,
hidden_size: int,
intermediate_size: int,
hidden_act: str,
is_gated: bool,
bias: bool,
up_proj_name: str,
down_proj_name: str,
dtype: torch.dtype,
seed : int
) -> None:
device = torch.device("mlu:0")
set_random_seed(seed)
# init distributed environment
# now only support tensor_parallel_size=1 and pipeline_parallel_size=1
if not torch.distributed.is_initialized():
init_test_distributed_environment(pp_size=1,
tp_size=1,
rank=0,
distributed_init_port="3000",
local_rank=0)
with set_default_torch_dtype(dtype):
# create ffn and initialize weights
ffn = FeedForward(hidden_size=hidden_size,
intermediate_size=intermediate_size,
hidden_act=hidden_act,
up_proj_name=up_proj_name,
is_gated=is_gated,
down_proj_name=down_proj_name,
bias=bias).to(device)
initialize_dummy_weights(ffn, low=-1e-1, high=1e-1)
# create input
hidden_states = torch.randn(batch_size, seq_len, hidden_size, dtype=dtype, device=device)
# ffn forward
out = ffn(hidden_states)
# reference ffn forward
ref_out = ffn._forward(hidden_states)
# compute the diff1 and diff2 value, for fp16, the threshold is 5e-3
diff1, diff2 = compute_diff(baseline=ref_out.cpu().float().detach().numpy(),
compare=out.cpu().float().detach().numpy())
del ffn, hidden_states, out, ref_out
assert diff1 <= 5e-3 and diff2 <= 5e-3