[Lint]Style: Convert vllm-ascend/ to ruff format(Batch #12) (#6177)

### What this PR does / why we need it? **Scope of Changes**: | File Path | | :--- | | `vllm_ascend/ops/triton/activation/swiglu_quant.py` | | `vllm_ascend/ops/triton/batch_invariant/matmul.py` | | `vllm_ascend/ops/triton/batch_invariant/mean.py` | | `vllm_ascend/ops/triton/batch_invariant/rmsnorm.py` | | `vllm_ascend/ops/triton/fla/chunk.py` | | `vllm_ascend/ops/triton/fla/chunk_delta_h.py` | | `vllm_ascend/ops/triton/fla/chunk_o.py` | | `vllm_ascend/ops/triton/fla/chunk_scaled_dot_kkt.py` | | `vllm_ascend/ops/triton/fla/cumsum.py` | | `vllm_ascend/ops/triton/fla/fused_qkvzba_split_reshape.py` | | `vllm_ascend/ops/triton/fla/l2norm.py` | | `vllm_ascend/ops/triton/fla/layernorm_guard.py` | | `vllm_ascend/ops/triton/fla/sigmoid_gating.py` | | `vllm_ascend/ops/triton/fla/solve_tril.py` | | `vllm_ascend/ops/triton/fla/utils.py` | | `vllm_ascend/ops/triton/fla/wy_fast.py` | | `vllm_ascend/ops/triton/fused_gdn_gating.py` | | `vllm_ascend/ops/triton/layernorm_gated.py` | | `vllm_ascend/ops/triton/linearnorm/split_qkv_rmsnorm_rope.py` | | `vllm_ascend/ops/triton/mamba/causal_conv1d.py` | | `vllm_ascend/ops/triton/reject_sample.py` | | `vllm_ascend/ops/triton/rope.py` | | `vllm_ascend/ops/triton/spec_decode/utils.py` | | `vllm_ascend/ops/triton/triton_utils.py` | ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.14.0 - vLLM main: d68209402d Signed-off-by: MrZ20 <2609716663@qq.com>
2026-01-23 14:59:19 +08:00
parent 193acc2c19
commit 78af0c30a3
25 changed files with 760 additions and 996 deletions
--- a/vllm_ascend/ops/triton/fla/layernorm_guard.py
+++ b/vllm_ascend/ops/triton/fla/layernorm_guard.py
@@ -12,10 +12,12 @@ from vllm.triton_utils import tl, triton
 MAX_CORES = 65535


-@triton.heuristics({
-    "HAS_BIAS": lambda args: args["B"] is not None,
-    "HAS_Z": lambda args: args["Z"] is not None,
-})
+@triton.heuristics(
+    {
+        "HAS_BIAS": lambda args: args["B"] is not None,
+        "HAS_Z": lambda args: args["Z"] is not None,
+    }
+)
@triton.jit
 def layer_norm_fwd_kernel(
    X,  # pointer to the input
@@ -49,13 +51,10 @@ def layer_norm_fwd_kernel(
        n_iters = n_iters + 1

    for i in tl.range(n_iters):
-        X_base = X + (i * BLOCK_ROWS *
-                      stride_x_row) + row * stride_x_row + group * N
-        Y_base = Y + (i * BLOCK_ROWS *
-                      stride_y_row) + row * stride_y_row + group * N
+        X_base = X + (i * BLOCK_ROWS * stride_x_row) + row * stride_x_row + group * N
+        Y_base = Y + (i * BLOCK_ROWS * stride_y_row) + row * stride_y_row + group * N
        if HAS_Z:
-            Z_base = Z + (i * BLOCK_ROWS *
-                          stride_z_row) + row * stride_z_row + group * N
+            Z_base = Z + (i * BLOCK_ROWS * stride_z_row) + row * stride_z_row + group * N
        if not IS_RMS_NORM:
            Mean_base = Mean + (i * BLOCK_ROWS) + group * M
        Rstd_base = Rstd + (i * BLOCK_ROWS) + group * M
@@ -64,17 +63,17 @@ def layer_norm_fwd_kernel(
            B_base = B + group * N
        # Compute mean and variance
        cols = tl.arange(0, BLOCK_N)
-        x = tl.load(X_base + cols, mask=cols < N, other=0.).to(tl.float32)
+        x = tl.load(X_base + cols, mask=cols < N, other=0.0).to(tl.float32)
        if HAS_Z and not NORM_BEFORE_GATE:
            z = tl.load(Z_base + cols, mask=cols < N).to(tl.float32)
            x *= z * tl.sigmoid(z)
        if not IS_RMS_NORM:
            mean = tl.sum(x, axis=0) / N
            tl.store(Mean_base + row, mean)
-            xbar = tl.where(cols < N, x - mean, 0.)
+            xbar = tl.where(cols < N, x - mean, 0.0)
            var = tl.sum(xbar * xbar, axis=0) / N
        else:
-            xbar = tl.where(cols < N, x, 0.)
+            xbar = tl.where(cols < N, x, 0.0)
            var = tl.sum(xbar * xbar, axis=0) / N
        rstd = 1 / tl.sqrt(var + eps)
        tl.store(Rstd_base + row, rstd)
@@ -112,26 +111,24 @@ def _layer_norm_fwd(
    if z is not None:
        assert z.stride(-1) == 1
        assert z.shape == (M, N)
-    assert weight.shape == (N, )
+    assert weight.shape == (N,)
    assert weight.stride(-1) == 1
    if bias is not None:
        assert bias.stride(-1) == 1
-        assert bias.shape == (N, )
+        assert bias.shape == (N,)
    # allocate output
    if out is not None:
        assert out.shape == x.shape
    else:
        out = torch.empty_like(x)
    assert out.stride(-1) == 1
-    mean = (torch.empty((ngroups * M, ), dtype=torch.float32, device=x.device)
-            if not is_rms_norm else None)
-    rstd = torch.empty((ngroups * M, ), dtype=torch.float32, device=x.device)
+    mean = torch.empty((ngroups * M,), dtype=torch.float32, device=x.device) if not is_rms_norm else None
+    rstd = torch.empty((ngroups * M,), dtype=torch.float32, device=x.device)
    # Less than 64KB per feature: enqueue fused kernel
    MAX_FUSED_SIZE = 65536 // x.element_size()
    BLOCK_N = min(MAX_FUSED_SIZE, triton.next_power_of_2(group_size))
    if group_size > BLOCK_N:
-        raise RuntimeError(
-            "This layer norm doesn't support feature dim >= 64KB.")
+        raise RuntimeError("This layer norm doesn't support feature dim >= 64KB.")
    # heuristics for number of warps
    num_warps = min(max(BLOCK_N // 256, 1), 8)
    grid = (M if M < MAX_CORES else MAX_CORES, ngroups)
@@ -160,7 +157,6 @@ def _layer_norm_fwd(


 class LayerNormFn(torch.autograd.Function):
-
    @staticmethod
    def forward(
        ctx,