From e903f695c86fb47709f0e5cd88cc774a64d63854 Mon Sep 17 00:00:00 2001 From: Lifu Huang Date: Wed, 10 Sep 2025 01:04:39 -0700 Subject: [PATCH] Fix potential flakiness in test_lora_qwen3 (#10250) --- test/srt/lora/test_lora.py | 12 +++--------- test/srt/lora/test_lora_qwen3.py | 14 +++++++++----- test/srt/lora/utils.py | 9 +++++++++ 3 files changed, 21 insertions(+), 14 deletions(-) diff --git a/test/srt/lora/test_lora.py b/test/srt/lora/test_lora.py index 536cec71a..ab1c630fc 100644 --- a/test/srt/lora/test_lora.py +++ b/test/srt/lora/test_lora.py @@ -24,6 +24,7 @@ from utils import ( CI_MULTI_LORA_MODELS, TORCH_DTYPES, LoRAModelCase, + ensure_reproducibility, ) from sglang.test.runners import HFRunner, SRTRunner @@ -76,13 +77,6 @@ class TestLoRA(CustomTestCase): return batches - def ensure_reproducibility(self): - seed = 42 - random.seed(seed) - torch.manual_seed(seed) - torch.cuda.manual_seed_all(seed) - torch.use_deterministic_algorithms(True) - def _run_lora_multiple_batch_on_model_cases(self, model_cases: List[LoRAModelCase]): for model_case in model_cases: for torch_dtype in TORCH_DTYPES: @@ -121,14 +115,14 @@ class TestLoRA(CustomTestCase): f"\n--- Running Batch {i} --- prompts: {prompts}, lora_paths: {lora_paths}" ) - self.ensure_reproducibility() + ensure_reproducibility() srt_outputs = srt_runner.batch_forward( prompts, max_new_tokens=max_new_tokens, lora_paths=lora_paths, ) - self.ensure_reproducibility() + ensure_reproducibility() hf_outputs = hf_runner.forward( prompts, max_new_tokens=max_new_tokens, diff --git a/test/srt/lora/test_lora_qwen3.py b/test/srt/lora/test_lora_qwen3.py index d114e1ee8..f77156707 100644 --- a/test/srt/lora/test_lora_qwen3.py +++ b/test/srt/lora/test_lora_qwen3.py @@ -18,7 +18,7 @@ import random import unittest from typing import List -from utils import TORCH_DTYPES, LoRAAdaptor, LoRAModelCase +from utils import TORCH_DTYPES, LoRAAdaptor, LoRAModelCase, ensure_reproducibility from sglang.test.runners import HFRunner, SRTRunner from sglang.test.test_utils import CustomTestCase, calculate_rouge_l, is_in_ci @@ -59,19 +59,18 @@ TEST_MULTIPLE_BATCH_PROMPTS = [ The Transformers are large language models, They're used to make predictions on text. """, - # "AI is a field of computer science focused on", TODO: Add it back after fixing its bug + "AI is a field of computer science focused on", "Computer science is the study of", "Write a short story.", "What are the main components of a computer?", ] -class TestLoRA(CustomTestCase): - +class TestLoRAQwen3(CustomTestCase): def _run_lora_multiple_batch_on_model_cases(self, model_cases: List[LoRAModelCase]): for model_case in model_cases: for torch_dtype in TORCH_DTYPES: - max_new_tokens = 10 + max_new_tokens = 32 backend = "triton" base_path = model_case.base lora_adapter_paths = [a.name for a in model_case.adaptors] @@ -133,6 +132,7 @@ class TestLoRA(CustomTestCase): ) # Initialize runners + ensure_reproducibility() srt_runner = SRTRunner( base_path, torch_dtype=torch_dtype, @@ -140,7 +140,11 @@ class TestLoRA(CustomTestCase): lora_paths=[lora_adapter_paths[0], lora_adapter_paths[1]], max_loras_per_batch=len(lora_adapter_paths) + 1, lora_backend=backend, + sleep_on_idle=True, # Eliminate non-determinism by forcing all requests to be processed in one batch. + attention_backend="torch_native", ) + + ensure_reproducibility() hf_runner = HFRunner( base_path, torch_dtype=torch_dtype, diff --git a/test/srt/lora/utils.py b/test/srt/lora/utils.py index 705231965..94ce8ab60 100644 --- a/test/srt/lora/utils.py +++ b/test/srt/lora/utils.py @@ -13,6 +13,7 @@ # ============================================================================== import dataclasses +import random from typing import List import torch @@ -386,3 +387,11 @@ def run_lora_test_by_batch( srt_no_lora_outputs.output_strs[i].strip(" "), hf_no_lora_outputs.output_strs[i].strip(" "), ) + + +def ensure_reproducibility(): + seed = 42 + random.seed(seed) + torch.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + torch.use_deterministic_algorithms(True)