Set csgmv as default lora backend. (#11488)

This commit is contained in:
Lifu Huang
2025-10-15 21:53:24 -07:00
committed by GitHub
parent cbac499750
commit b0d20cdec7
11 changed files with 11 additions and 23 deletions

View File

@@ -81,13 +81,12 @@ class TestLoRA(CustomTestCase):
for model_case in model_cases:
for torch_dtype in TORCH_DTYPES:
max_new_tokens = 32
backend = "triton"
base_path = model_case.base
lora_adapter_paths = [a.name for a in model_case.adaptors]
assert len(lora_adapter_paths) >= 2
print(
f"\n========== Testing multiple batches on base '{base_path}' with backend={backend}, dtype={torch_dtype} ---"
f"\n========== Testing multiple batches on base '{base_path}', dtype={torch_dtype} ---"
)
# Initialize runners
@@ -97,7 +96,6 @@ class TestLoRA(CustomTestCase):
model_type="generation",
lora_paths=[lora_adapter_paths[0], lora_adapter_paths[1]],
max_loras_per_batch=len(lora_adapter_paths) + 1,
lora_backend=backend,
sleep_on_idle=True, # Eliminate non-determinism by forcing all requests to be processed in one batch.
attention_backend="torch_native",
)
@@ -142,7 +140,7 @@ class TestLoRA(CustomTestCase):
if rouge_score < rouge_tol:
raise AssertionError(
f"ROUGE-L score {rouge_score} below tolerance {rouge_tol} "
f"for base '{base_path}', adaptor '{lora_paths}', backend '{backend}', prompt: '{prompts}...'"
f"for base '{base_path}', adaptor '{lora_paths}', prompt: '{prompts}...'"
)
print(f"--- Batch {i} Comparison Passed --- ")