[Refactor]refactor 310p ops and add ut (#6591)

### What this PR does / why we need it? This pull request focuses on a significant refactoring effort within the vllm-ascend project, specifically targeting operations optimized for the Ascend 310P hardware. The changes aim to streamline the implementation of core components like quantization and multi-head attention, making the codebase more maintainable and robust. Concurrently, new unit tests have been introduced to ensure the correctness and reliability of these refactored modules. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? E2E test with qwen3-32b w8a8 - vLLM version: v0.15.0 - vLLM main: d7e17aaacd --------- Signed-off-by: pu-zhe <zpuaa@outlook.com>
2026-02-07 09:25:17 +08:00
parent 6c49f95da2
commit 23524f2ca4
6 changed files with 173 additions and 28 deletions
--- a/vllm_ascend/_310p/quantization/methods/w8a8_static.py
+++ b/vllm_ascend/_310p/quantization/methods/w8a8_static.py
@@ -26,7 +26,7 @@ from .registry import register_scheme


@register_scheme("W8A8", "linear")
-class AscendW8A8LinearMethod310P(AscendLinearScheme):
+class AscendW8A8LinearMethod310(AscendLinearScheme):
    """310P-only W8A8 static linear scheme.

    Notes:
--- a/vllm_ascend/_310p/quantization/modelslim_config.py
+++ b/vllm_ascend/_310p/quantization/modelslim_config.py
@@ -46,7 +46,7 @@ from vllm_ascend.utils import ASCEND_QUANTIZATION_METHOD
 logger = init_logger(__name__)


-def create_scheme_for_layer_310p(
+def create_scheme_for_layer(
    cfg: AscendModelSlimConfig,
    quant_description: dict[str, Any],
    prefix: str,
@@ -140,7 +140,7 @@ class AscendModelSlimConfig310(AscendModelSlimConfig):

                return AscendUnquantizedLinearMethod()

-            scheme = create_scheme_for_layer_310p(
+            scheme = create_scheme_for_layer(
                cfg=self,
                quant_description=self.quant_description,
                prefix=prefix,