[3/N][Refactor][Quantization]remove packed_modules_mapping from models (#3021)
### What this PR does / why we need it?
Some custom models in vllm-ascend define packed_modules_mapping, which
prevent keeping same model class with vllm community. So move these
custom packed_modules_mapping to quant utils.py. After this pr, some
custom models can be removed.
### Does this PR introduce _any_ user-facing change?
tested by CI
### How was this patch tested?
tested by CI
- vLLM version: v0.10.2
- vLLM main:
5089fd749c
Signed-off-by: 22dimensions <waitingwind@foxmail.com>
This commit is contained in:
@@ -15,41 +15,11 @@
|
||||
import math
|
||||
import unittest
|
||||
|
||||
import pytest
|
||||
import torch
|
||||
from vllm.model_executor.models.qwen3_moe import Qwen3MoeForCausalLM
|
||||
|
||||
from vllm_ascend.models.qwen3_moe import CustomQwen3MoeForCausalLM
|
||||
from vllm_ascend.torchair.models.qwen3_moe import CustomQwen3MoeAttention
|
||||
|
||||
|
||||
class TestCustomQwen3MoeForCausalLM:
|
||||
|
||||
def test_class_inheritance(self):
|
||||
assert issubclass(CustomQwen3MoeForCausalLM, Qwen3MoeForCausalLM)
|
||||
|
||||
@pytest.mark.parametrize("key, expected", [
|
||||
("qkv_proj", ["q_proj", "k_proj", "v_proj"]),
|
||||
("gate_up_proj", ["gate_proj", "up_proj"]),
|
||||
("experts",
|
||||
["experts.0.gate_proj", "experts.0.up_proj", "experts.0.down_proj"]),
|
||||
])
|
||||
def test_packed_modules_mapping(self, key, expected):
|
||||
assert CustomQwen3MoeForCausalLM.packed_modules_mapping[
|
||||
key] == expected
|
||||
|
||||
def test_packed_modules_mapping_structure(self):
|
||||
expected_mapping = {
|
||||
"qkv_proj": ["q_proj", "k_proj", "v_proj"],
|
||||
"gate_up_proj": ["gate_proj", "up_proj"],
|
||||
"experts": [
|
||||
"experts.0.gate_proj", "experts.0.up_proj",
|
||||
"experts.0.down_proj"
|
||||
]
|
||||
}
|
||||
assert CustomQwen3MoeForCausalLM.packed_modules_mapping == expected_mapping
|
||||
|
||||
|
||||
class DummyRMSNorm:
|
||||
|
||||
def __init__(self, dim: int, eps: float = 1e-6):
|
||||
|
||||
Reference in New Issue
Block a user