[2/N][Pangu][MoE] Remove Pangu Related Code (#5130)

### What this PR does / why we need it?
Remove Pangu Related Code

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
e2e & ut

- vLLM version: v0.12.0
- vLLM main:
ad32e3e19c

---------

Signed-off-by: weichen <calvin_zhu0210@outlook.com>
This commit is contained in:
weichen
2025-12-19 09:00:07 +08:00
committed by GitHub
parent 1b47fca0e8
commit ca6f631cba
11 changed files with 8 additions and 1444 deletions

View File

@@ -9,8 +9,7 @@ from .w4a4_flatquant_dynamic import AscendW4A4FlatQuantDynamicLinearMethod
from .w4a8_dynamic import (AscendW4A8DynamicFusedMoEMethod,
AscendW4A8DynamicLinearMethod)
from .w4a16 import AscendW4A16FusedMoEMethod
from .w8a8 import (AscendC8KVCacheMethod, AscendW8A8FusedMoEMethod,
AscendW8A8LinearMethod)
from .w8a8 import AscendW8A8LinearMethod
from .w8a8_dynamic import (AscendW8A8DynamicFusedMoEMethod,
AscendW8A8DynamicLinearMethod)
from .w8a8_pdmix import (AscendW8A8PDMixFusedMoeMethod,
@@ -29,8 +28,6 @@ ASCEND_QUANTIZATION_METHOD_MAP: Dict[str, Dict[str, Type[Any]]] = {
},
"W8A8": {
"linear": AscendW8A8LinearMethod,
"moe": AscendW8A8FusedMoEMethod,
"attention": AscendC8KVCacheMethod,
},
"W8A8_DYNAMIC": {
"linear": AscendW8A8DynamicLinearMethod,
@@ -39,10 +36,7 @@ ASCEND_QUANTIZATION_METHOD_MAP: Dict[str, Dict[str, Type[Any]]] = {
"W8A8_MIX": {
"linear": AscendW8A8PDMixLinearMethod,
"moe": AscendW8A8PDMixFusedMoeMethod,
},
"C8": {
"attention": AscendC8KVCacheMethod,
},
}
}
@@ -100,9 +94,6 @@ def get_quant_method_modelslim(
# Attention
if '.attn' in prefix and 'fa_quant_type' in quant_description.keys():
quant_type = quant_description['fa_quant_type']
# Use KVCache int8
elif '.attn' in prefix and 'kv_quant_type' in quant_description.keys():
quant_type = quant_description['kv_quant_type']
# Linear
else:
quant_type = get_linear_quant_type(quant_description, prefix,