Fix the lora adapter when lora path is none (#4799)

Co-authored-by: Beichen Ma <mabeichen12@gmail.com>
This commit is contained in:
Qiaolin Yu
2025-03-28 00:03:08 -04:00
committed by GitHub
parent 42a45df043
commit 9fdc6d6abc
3 changed files with 17 additions and 14 deletions

View File

@@ -96,6 +96,11 @@ class TestLoRA(CustomTestCase):
srt_outputs = srt_runner.forward(
prompts, max_new_tokens=max_new_tokens, lora_paths=batch_lora_paths
)
srt_outputs_lora_path_none = srt_runner.forward(
prompts,
max_new_tokens=max_new_tokens,
lora_paths=[None] * len(prompts),
)
with HFRunner(
base_path, torch_dtype=torch_dtype, model_type="generation"
@@ -169,18 +174,20 @@ class TestLoRA(CustomTestCase):
print(f"{srt_outputs.output_strs=}")
print(f"{hf_no_lora_outputs.output_strs=}")
print(f"{srt_no_lora_outputs.output_strs=}")
print(f"{srt_outputs_lora_path_none.output_strs=}")
for i in range(len(prompts)):
assert srt_outputs.output_strs[i].strip(" ") == hf_outputs.output_strs[i], (
srt_outputs.output_strs[i].strip(" "),
hf_outputs.output_strs[i],
)
# assert (
# srt_no_lora_outputs.output_strs[i].strip(" ")
# == hf_no_lora_outputs.output_strs[i]
# ), (
# srt_no_lora_outputs.output_strs[i].strip(" "),
# hf_no_lora_outputs.output_strs[i],
# )
assert (
srt_no_lora_outputs.output_strs[i].strip(" ")
== hf_no_lora_outputs.output_strs[i]
), (
srt_no_lora_outputs.output_strs[i].strip(" "),
hf_no_lora_outputs.output_strs[i],
)
assert srt_outputs_lora_path_none == srt_no_lora_outputs
def serving(self, prompts, lora_set, tp_size, torch_dtype, max_new_tokens):
print("=================== testing serving =======================")
@@ -257,7 +264,7 @@ class TestLoRA(CustomTestCase):
srt_no_lora_logprobs = torch.Tensor(
srt_no_lora_outputs.top_input_logprobs[i]
)
srt_logprobs = torch.uensor(srt_outputs.top_input_logprobs[i])
srt_logprobs = torch.Tensor(srt_outputs.top_input_logprobs[i])
print("max_diff", torch.max(abs(srt_no_lora_logprobs - srt_logprobs)))
print(f"{srt_no_lora_outputs.output_strs=}")
@@ -280,7 +287,7 @@ class TestLoRA(CustomTestCase):
tp_size = 1
max_new_tokens = 32
self.inference(PROMPTS, lora_set, tp_size, torch_dtype, max_new_tokens)
# self.serving(PROMPTS, lora_set, tp_size, torch_dtype, max_new_tokens)
self.serving(PROMPTS, lora_set, tp_size, torch_dtype, max_new_tokens)
# self.base_inference(
# PROMPTS, lora_set, tp_size, torch_dtype, max_new_tokens
# )