Set csgmv as default lora backend. (#11488)

2025-10-15 21:53:24 -07:00
parent cbac499750
commit b0d20cdec7
11 changed files with 11 additions and 23 deletions
--- a/test/srt/lora/test_lora.py
+++ b/test/srt/lora/test_lora.py
@@ -81,13 +81,12 @@ class TestLoRA(CustomTestCase):
        for model_case in model_cases:
            for torch_dtype in TORCH_DTYPES:
                max_new_tokens = 32
-                backend = "triton"
                base_path = model_case.base
                lora_adapter_paths = [a.name for a in model_case.adaptors]
                assert len(lora_adapter_paths) >= 2

                print(
-                    f"\n========== Testing multiple batches on base '{base_path}' with backend={backend}, dtype={torch_dtype} ---"
+                    f"\n========== Testing multiple batches on base '{base_path}', dtype={torch_dtype} ---"
                )

                # Initialize runners
@@ -97,7 +96,6 @@ class TestLoRA(CustomTestCase):
                    model_type="generation",
                    lora_paths=[lora_adapter_paths[0], lora_adapter_paths[1]],
                    max_loras_per_batch=len(lora_adapter_paths) + 1,
-                    lora_backend=backend,
                    sleep_on_idle=True,  # Eliminate non-determinism by forcing all requests to be processed in one batch.
                    attention_backend="torch_native",
                )
@@ -142,7 +140,7 @@ class TestLoRA(CustomTestCase):
                            if rouge_score < rouge_tol:
                                raise AssertionError(
                                    f"ROUGE-L score {rouge_score} below tolerance {rouge_tol} "
-                                    f"for base '{base_path}', adaptor '{lora_paths}', backend '{backend}', prompt: '{prompts}...'"
+                                    f"for base '{base_path}', adaptor '{lora_paths}', prompt: '{prompts}...'"
                                )

                        print(f"--- Batch {i} Comparison Passed --- ")