[BugFix] Fix mlapo accuracy problem related with weight processing. (#3857)

This PR fixes a mlapo accuracy problem related with weight processing.
Furthermore, modify mlapo related e2e test with quantized deepseek model
to make it effective.

Signed-off-by: whx-sjtu <2952154980@qq.com>
This commit is contained in:
whx
2025-10-30 00:35:50 +08:00
committed by GitHub
parent d9249c968e
commit 211d4b9da4
2 changed files with 2 additions and 18 deletions

View File

@@ -111,19 +111,3 @@ def test_mtp2_correctness_full_graph(
model_name: str,
):
mtp_correctness(sampling_config, model_name, 2, CUDAGraphMode.FULL)
@patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_MLAPO": "1"})
def test_mtp_correctness_piecewise_graph_with_mlapo_kernel(
sampling_config: SamplingParams,
model_name: str,
):
mtp_correctness(sampling_config, model_name, 1)
@patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_MLAPO": "1"})
def test_mtp_correctness_full_graph_with_mlapo_kernel(
sampling_config: SamplingParams,
model_name: str,
):
mtp_correctness(sampling_config, model_name, 1, CUDAGraphMode.FULL)