[Lint]Style: Convert vllm-ascend/ to ruff format(Batch #7) (#6023)

### What this PR does / why we need it? **Scope of Changes**: | File Path | | :--- | |` vllm_ascend/quantization/compressed_tensors/compressed_tensors.py`| |` vllm_ascend/quantization/quant_config.py`| |` vllm_ascend/quantization/utils.py`| |` vllm_ascend/quantization/w4a16.py`| |` vllm_ascend/quantization/w4a4_flatquant_dynamic.py`| |` vllm_ascend/quantization/w4a8_dynamic.py`| |` vllm_ascend/quantization/w8a16.py`| |` vllm_ascend/quantization/w8a8.py`| |` vllm_ascend/quantization/w8a8_dynamic.py`| |` vllm_ascend/quantization/w8a8_pdmix.py`| |` vllm_ascend/quantization/w8a8mxfp8.py`| |` vllm_ascend/sample/rejection_sampler.py`| |` vllm_ascend/sample/sampler.py`| |` vllm_ascend/worker/block_table.py`| ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.13.0 - vLLM main: 2c24bc6996 Signed-off-by: MrZ20 <2609716663@qq.com>
2026-02-06 14:56:53 +08:00
parent d0bc16859c
commit 99aedaff63
20 changed files with 997 additions and 1307 deletions
--- a/vllm_ascend/quantization/methods/base.py
+++ b/vllm_ascend/quantization/methods/base.py
@@ -17,14 +17,16 @@
 """Abstract base classes for Ascend quantization schemes."""

 from abc import ABC, abstractmethod
+from collections.abc import Callable
 from enum import Enum
-from typing import Any, Callable, Dict, Optional
+from typing import Any

 import torch


 class QuantType(Enum):
    """Quantization type enum for MoE schemes."""
+
    NONE = 0
    W8A8 = 1
    W4A8 = 2
@@ -32,84 +34,78 @@ class QuantType(Enum):

 class AscendLinearScheme(ABC):
    """Base class for all linear quantization schemes.
-    
+
    Subclasses must implement get_weight() and apply() methods.
    Other methods have default implementations that return empty dicts
    or do nothing.
    """

    @abstractmethod
-    def get_weight(self, input_size: int, output_size: int,
-                   params_dtype: torch.dtype) -> Dict[str, Any]:
+    def get_weight(self, input_size: int, output_size: int, params_dtype: torch.dtype) -> dict[str, Any]:
        """Return weight tensor specifications.
-        
+
        Args:
            input_size: Input dimension of the linear layer.
            output_size: Output dimension of the linear layer.
            params_dtype: Data type for parameters.
-            
+
        Returns:
            Dictionary mapping parameter names to empty tensors with
            the correct shape and dtype.
        """
        ...

-    def get_pertensor_param(self, params_dtype: torch.dtype) -> Dict[str, Any]:
+    def get_pertensor_param(self, params_dtype: torch.dtype) -> dict[str, Any]:
        """Return per-tensor parameter specifications (e.g., input_scale).
-        
+
        Args:
            params_dtype: Data type for parameters.
-            
+
        Returns:
            Dictionary mapping parameter names to empty tensors.
        """
        return {}

-    def get_perchannel_param(self, output_size: int,
-                             params_dtype: torch.dtype) -> Dict[str, Any]:
+    def get_perchannel_param(self, output_size: int, params_dtype: torch.dtype) -> dict[str, Any]:
        """Return per-channel parameter specifications (e.g., weight_scale).
-        
+
        Args:
            output_size: Output dimension of the linear layer.
            params_dtype: Data type for parameters.
-            
+
        Returns:
            Dictionary mapping parameter names to empty tensors.
        """
        return {}

-    def get_pergroup_param(self,
-                           input_size: int,
-                           output_size: int,
-                           params_dtype: torch.dtype,
-                           layer_type: Optional[str] = None) -> Dict[str, Any]:
+    def get_pergroup_param(
+        self, input_size: int, output_size: int, params_dtype: torch.dtype, layer_type: str | None = None
+    ) -> dict[str, Any]:
        """Return per-group parameter specifications.
-        
+
        Args:
            input_size: Input dimension of the linear layer.
            output_size: Output dimension of the linear layer.
            params_dtype: Data type for parameters.
            layer_type: Type of layer (e.g., "row" for RowParallelLinear).
-            
+
        Returns:
            Dictionary mapping parameter names to empty tensors.
        """
        return {}

    @abstractmethod
-    def apply(self,
-              layer: torch.nn.Module,
-              x: torch.Tensor,
-              bias: Optional[torch.Tensor] = None,
-              tp_rank: Optional[int] = 0) -> torch.Tensor:
+    def apply(
+        self, layer: torch.nn.Module, x: torch.Tensor, bias: torch.Tensor | None = None, tp_rank: int | None = 0
+    ) -> torch.Tensor:
        """Forward computation.
-        
+
        Args:
            layer: The linear layer module.
            x: Input tensor.
            bias: Optional bias tensor.
            tp_rank: Tensor parallel rank.
-            
+
        Returns:
            Output tensor after quantized linear operation.
        """
@@ -117,42 +113,51 @@ class AscendLinearScheme(ABC):

    def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
        """Post-loading weight processing (transpose, format conversion, etc.).
-        
+
        Args:
            layer: The linear layer module.
        """
-        pass
+        return


 class AscendAttentionScheme(ABC):
    """Base class for all attention quantization schemes.
-    
+
    Subclasses must implement apply() method.
    Other methods have default implementations.
    """

    def create_weights(self, layer: torch.nn.Module) -> None:
        """Create weights for attention quantization.
-        
+
        Args:
            layer: The attention layer module.
        """
-        pass
+        return

    def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
        """Post-loading weight processing for attention layer.
-        
+
        Args:
            layer: The attention layer module.
        """
-        pass
+        return

    @abstractmethod
-    def apply(self, layer: torch.nn.Module, query: torch.Tensor,
-              key: torch.Tensor, value: torch.Tensor, kv_cache, attn_metadata,
-              attn_type, scale, output) -> torch.Tensor:
+    def apply(
+        self,
+        layer: torch.nn.Module,
+        query: torch.Tensor,
+        key: torch.Tensor,
+        value: torch.Tensor,
+        kv_cache,
+        attn_metadata,
+        attn_type,
+        scale,
+        output,
+    ) -> torch.Tensor:
        """Forward computation for attention layer.
-        
+
        Args:
            layer: The attention layer module.
            query: Query tensor.
@@ -163,7 +168,7 @@ class AscendAttentionScheme(ABC):
            attn_type: Attention type.
            scale: Scale factor.
            output: Output tensor.
-            
+
        Returns:
            Output tensor after attention computation.
        """
@@ -172,10 +177,10 @@ class AscendAttentionScheme(ABC):

 class AscendMoEScheme(ABC):
    """Base class for all MoE quantization schemes.
-    
+
    Subclasses must implement get_weight(), get_dynamic_quant_param(),
    and apply() methods.
-    
+
    Attributes:
        quant_type: The quantization type for this scheme. Subclasses should
                   override this class attribute to declare their quant type.
@@ -185,35 +190,34 @@ class AscendMoEScheme(ABC):
    quant_type: QuantType = QuantType.NONE

    @abstractmethod
-    def get_weight(self, num_experts: int,
-                   intermediate_size_per_partition: int, hidden_sizes: int,
-                   params_dtype: torch.dtype) -> Dict[str, Any]:
+    def get_weight(
+        self, num_experts: int, intermediate_size_per_partition: int, hidden_sizes: int, params_dtype: torch.dtype
+    ) -> dict[str, Any]:
        """Return weight tensor specifications for MoE layer.
-        
+
        Args:
            num_experts: Number of experts.
            intermediate_size_per_partition: Intermediate size per partition.
            hidden_sizes: Hidden dimension size.
            params_dtype: Data type for parameters.
-            
+
        Returns:
            Dictionary mapping parameter names to empty tensors.
        """
        ...

    @abstractmethod
-    def get_dynamic_quant_param(self, num_experts: int,
-                                intermediate_size_per_partition: int,
-                                hidden_sizes: int,
-                                params_dtype: torch.dtype) -> Dict[str, Any]:
+    def get_dynamic_quant_param(
+        self, num_experts: int, intermediate_size_per_partition: int, hidden_sizes: int, params_dtype: torch.dtype
+    ) -> dict[str, Any]:
        """Return dynamic quantization parameters for MoE layer.
-        
+
        Args:
            num_experts: Number of experts.
            intermediate_size_per_partition: Intermediate size per partition.
            hidden_sizes: Hidden dimension size.
            params_dtype: Data type for parameters.
-            
+
        Returns:
            Dictionary mapping parameter names to empty tensors.
        """
@@ -229,21 +233,21 @@ class AscendMoEScheme(ABC):
        renormalize: bool,
        use_grouped_topk: bool = False,
        global_num_experts: int = -1,
-        expert_map: Optional[torch.Tensor] = None,
-        topk_group: Optional[int] = None,
-        num_expert_group: Optional[int] = None,
-        custom_routing_function: Optional[Callable] = None,
+        expert_map: torch.Tensor | None = None,
+        topk_group: int | None = None,
+        num_expert_group: int | None = None,
+        custom_routing_function: Callable | None = None,
        scoring_func: str = "softmax",
        routed_scaling_factor: float = 1.0,
-        e_score_correction_bias: Optional[torch.Tensor] = None,
+        e_score_correction_bias: torch.Tensor | None = None,
        is_prefill: bool = True,
        enable_force_load_balance: bool = False,
-        log2phy: Optional[torch.Tensor] = None,
+        log2phy: torch.Tensor | None = None,
        global_redundant_expert_num: int = 0,
        **kwargs,
    ) -> torch.Tensor:
        """Forward computation for MoE layer.
-        
+
        Args:
            layer: The MoE layer module.
            x: Input hidden states.
@@ -264,7 +268,7 @@ class AscendMoEScheme(ABC):
            log2phy: Logical to physical expert mapping.
            global_redundant_expert_num: Number of redundant experts.
            **kwargs: Additional keyword arguments.
-            
+
        Returns:
            Output tensor after MoE computation.
        """
@@ -272,8 +276,8 @@ class AscendMoEScheme(ABC):

    def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
        """Post-loading weight processing for MoE layer.
-        
+
        Args:
            layer: The MoE layer module.
        """
-        pass
+        return