Fix some ci issue and refactor modelrunner (#2445)

### What this PR does / why we need it? Fix some ci issue and refactor modelrunner ### Does this PR introduce _any_ user-facing change? N/A ### How was this patch tested? CI passed with existing test. - vLLM version: v0.10.0 - vLLM main: 4d9c61993a --------- Signed-off-by: wangli <wangli858794774@gmail.com> Signed-off-by: MengqingCao <cmq0113@163.com> Signed-off-by: weiguihua2 <weiguihua2@huawei.com> Co-authored-by: wangli <wangli858794774@gmail.com> Co-authored-by: weiguihua2 <weiguihua2@huawei.com>
2025-08-20 09:01:04 +08:00
parent 955411611c
commit 1327f9be1c
28 changed files with 1612 additions and 1020 deletions
--- a/vllm_ascend/lora/punica_wrapper/lora_ops.py
+++ b/vllm_ascend/lora/punica_wrapper/lora_ops.py
@@ -52,14 +52,9 @@ def bgmv_expand_slice(inputs: torch.Tensor,
                      slice_offset: int,
                      slice_size: int,
                      add_inputs: bool = True):
-    return torch.ops._C.bgmv_expand(
-        inputs,
-        lora_b_weights,
-        lora_indices_tensor,
-        output_tensor,
-        slice_offset,
-        slice_size
-    )
+    return torch.ops._C.bgmv_expand(inputs, lora_b_weights,
+                                    lora_indices_tensor, output_tensor,
+                                    slice_offset, slice_size)


 def sgmv_shrink(
@@ -74,8 +69,9 @@ def sgmv_shrink(
    token_nums: int,
    scaling: float,
 ):
-    return torch.ops._C.sgmv_shrink(inputs, lora_a_weights, lora_indices_tensor,
-                                    seq_len_tensor, output_tensor, scaling)
+    return torch.ops._C.sgmv_shrink(inputs, lora_a_weights,
+                                    lora_indices_tensor, seq_len_tensor,
+                                    output_tensor, scaling)


 def sgmv_expand(inputs: torch.Tensor,
@@ -111,12 +107,6 @@ def sgmv_expand_slice(inputs: torch.Tensor,
                      slice_offset: int,
                      slice_size: int,
                      add_inputs: bool = False):
-    return torch.ops._C.sgmv_expand(
-            inputs,
-            lora_b_weights,
-            lora_indices_tensor,
-            seq_len_tensor,
-            output_tensor,
-            slice_offset,
-            slice_size
-    )
+    return torch.ops._C.sgmv_expand(inputs, lora_b_weights,
+                                    lora_indices_tensor, seq_len_tensor,
+                                    output_tensor, slice_offset, slice_size)