[Fix] Fixes attribute error in MLA implementation (#3618)
### What this PR does / why we need it? Corrects the attribute access for retrieving the device from `q_a_proj` to `q_proj`. This prevents an `AttributeError` as `q_a_proj` does not exist on the class instance. ### Does this PR introduce _any_ user-facing change? None. ### How was this patch tested? Need MLAPO tests. - vLLM version: v0.11.0rc3 - vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.0 --------- Signed-off-by: Yizhou Liu <liu_yizhou@outlook.com>
This commit is contained in:
@@ -1,8 +1,5 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
from vllm import SamplingParams
|
||||
from vllm.config import CompilationConfig, CUDAGraphMode
|
||||
@@ -111,19 +108,3 @@ def test_mtp2_correctness_full_graph(
|
||||
model_name: str,
|
||||
):
|
||||
mtp_correctness(sampling_config, model_name, 2, CUDAGraphMode.FULL)
|
||||
|
||||
|
||||
@patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_MLAPO": "1"})
|
||||
def test_mtp_correctness_piecewise_graph_with_mlapo_kernel(
|
||||
sampling_config: SamplingParams,
|
||||
model_name: str,
|
||||
):
|
||||
mtp_correctness(sampling_config, model_name, 1)
|
||||
|
||||
|
||||
@patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_MLAPO": "1"})
|
||||
def test_mtp_correctness_full_graph_with_mlapo_kernel(
|
||||
sampling_config: SamplingParams,
|
||||
model_name: str,
|
||||
):
|
||||
mtp_correctness(sampling_config, model_name, 1, CUDAGraphMode.FULL)
|
||||
|
||||
Reference in New Issue
Block a user