[Lint]Style: Convert vllm-ascend/ to ruff format(Batch #7) (#6023)

### What this PR does / why we need it?
**Scope of Changes**:
| File Path |
| :--- |
|` vllm_ascend/quantization/compressed_tensors/compressed_tensors.py`|
|` vllm_ascend/quantization/quant_config.py`|
|` vllm_ascend/quantization/utils.py`|
|` vllm_ascend/quantization/w4a16.py`|
|` vllm_ascend/quantization/w4a4_flatquant_dynamic.py`|
|` vllm_ascend/quantization/w4a8_dynamic.py`|
|` vllm_ascend/quantization/w8a16.py`|
|` vllm_ascend/quantization/w8a8.py`|
|` vllm_ascend/quantization/w8a8_dynamic.py`|
|` vllm_ascend/quantization/w8a8_pdmix.py`|
|` vllm_ascend/quantization/w8a8mxfp8.py`|
|` vllm_ascend/sample/rejection_sampler.py`|
|` vllm_ascend/sample/sampler.py`|
|` vllm_ascend/worker/block_table.py`|

### Does this PR introduce _any_ user-facing change?

### How was this patch tested?

- vLLM version: v0.13.0
- vLLM main:
2c24bc6996

Signed-off-by: MrZ20 <2609716663@qq.com>
This commit is contained in:
SILONG ZENG
2026-02-06 14:56:53 +08:00
committed by GitHub
parent d0bc16859c
commit 99aedaff63
20 changed files with 997 additions and 1307 deletions

View File

@@ -21,7 +21,7 @@ Schemes are automatically registered via the @register_scheme decorator.
Usage:
from vllm_ascend.quantization.methods import get_scheme_class
# Get a scheme class by quant_type and layer_type
scheme_cls = get_scheme_class("W8A8_DYNAMIC", "linear")
scheme = scheme_cls()
@@ -30,28 +30,26 @@ Usage:
from typing import Any
# Import base classes
from .base import (AscendAttentionScheme, AscendLinearScheme, AscendMoEScheme,
QuantType)
from .base import AscendAttentionScheme, AscendLinearScheme, AscendMoEScheme, QuantType
# Import registry functions
from .registry import get_scheme_class, register_scheme
# Import all scheme classes for external access
from .w4a4_flatquant import AscendW4A4FlatQuantDynamicLinearMethod
from .w4a4_laos_dynamic import AscendW4A4LaosDynamicLinearMethod
from .w4a8 import (AscendW4A8DynamicFusedMoEMethod,
AscendW4A8DynamicLinearMethod)
from .w4a8 import AscendW4A8DynamicFusedMoEMethod, AscendW4A8DynamicLinearMethod
from .w4a16 import AscendW4A16FusedMoEMethod
from .w8a8_dynamic import (AscendW8A8DynamicFusedMoEMethod,
AscendW8A8DynamicLinearMethod)
from .w8a8_dynamic import AscendW8A8DynamicFusedMoEMethod, AscendW8A8DynamicLinearMethod
from .w8a8_mxfp8 import AscendW8A8MXFP8DynamicLinearMethod
from .w8a8_pdmix import (AscendW8A8PDMixFusedMoeMethod,
AscendW8A8PDMixLinearMethod)
from .w8a8_pdmix import AscendW8A8PDMixFusedMoeMethod, AscendW8A8PDMixLinearMethod
from .w8a8_static import AscendW8A8LinearMethod
from .w8a16 import AscendW8A16LinearMethod
def is_mx_quant_type(instance: Any) -> bool:
"""Checks if the quantization method is a microscaling (MX) type."""
MX_QUANT_TYPES = (AscendW8A8MXFP8DynamicLinearMethod, )
MX_QUANT_TYPES = (AscendW8A8MXFP8DynamicLinearMethod,)
return isinstance(instance, MX_QUANT_TYPES)