2024-09-22 01:50:37 -07:00
|
|
|
"""
|
|
|
|
|
Usage:
|
|
|
|
|
|
|
|
|
|
To test a specific model:
|
|
|
|
|
1. Add it to ALL_OTHER_MODELS
|
|
|
|
|
2. Run `ONLY_RUN=Qwen/Qwen2-1.5B python3 -m unittest test_generation_models.TestGenerationModels.test_others`
|
|
|
|
|
"""
|
|
|
|
|
|
2024-08-03 18:20:50 -07:00
|
|
|
"""
|
|
|
|
|
Copyright 2023-2024 SGLang Team
|
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
|
you may not use this file except in compliance with the License.
|
|
|
|
|
You may obtain a copy of the License at
|
2024-08-08 16:31:19 -07:00
|
|
|
|
2024-08-03 18:20:50 -07:00
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
2024-08-08 16:31:19 -07:00
|
|
|
|
2024-08-03 18:20:50 -07:00
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
|
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
|
See the License for the specific language governing permissions and
|
|
|
|
|
limitations under the License.
|
|
|
|
|
"""
|
|
|
|
|
|
2024-09-22 01:50:37 -07:00
|
|
|
import dataclasses
|
2024-08-25 16:21:37 -07:00
|
|
|
import multiprocessing as mp
|
2024-09-22 01:50:37 -07:00
|
|
|
import os
|
2024-08-03 18:20:50 -07:00
|
|
|
import unittest
|
2024-09-22 01:50:37 -07:00
|
|
|
from typing import List
|
2024-08-03 18:20:50 -07:00
|
|
|
|
|
|
|
|
import torch
|
|
|
|
|
|
|
|
|
|
from sglang.test.runners import DEFAULT_PROMPTS, HFRunner, SRTRunner
|
2024-09-24 21:37:33 -07:00
|
|
|
from sglang.test.test_utils import calculate_rouge_l, is_in_ci
|
2024-08-03 18:20:50 -07:00
|
|
|
|
2024-08-26 01:29:12 +08:00
|
|
|
|
2024-09-22 01:50:37 -07:00
|
|
|
@dataclasses.dataclass
|
|
|
|
|
class ModelCase:
|
|
|
|
|
model_path: str
|
|
|
|
|
tp_size: int = 1
|
|
|
|
|
prefill_tolerance: float = 5e-2
|
|
|
|
|
decode_tolerance: float = 5e-2
|
|
|
|
|
rouge_l_tolerance: float = 1
|
2024-10-17 09:23:29 -07:00
|
|
|
skip_long_prompt: bool = False
|
2024-08-26 01:29:12 +08:00
|
|
|
|
|
|
|
|
|
2024-10-11 05:03:20 -07:00
|
|
|
# Popular models that run on the CI
|
2024-09-22 01:50:37 -07:00
|
|
|
CI_MODELS = [
|
2024-10-02 10:12:07 -07:00
|
|
|
ModelCase("meta-llama/Llama-3.1-8B-Instruct"),
|
2024-10-21 13:47:12 -07:00
|
|
|
ModelCase("google/gemma-2-2b"),
|
2024-09-22 01:50:37 -07:00
|
|
|
]
|
2024-08-26 01:29:12 +08:00
|
|
|
|
2024-10-11 05:03:20 -07:00
|
|
|
# All other models that do not run on the CI
|
2024-09-22 01:50:37 -07:00
|
|
|
ALL_OTHER_MODELS = [
|
|
|
|
|
ModelCase("Qwen/Qwen2-1.5B"),
|
2024-10-03 12:41:15 +09:00
|
|
|
ModelCase("Qwen/Qwen2.5-14B-Instruct"),
|
2024-10-17 09:23:29 -07:00
|
|
|
ModelCase("HuggingFaceTB/SmolLM-135M-Instruct", skip_long_prompt=True),
|
|
|
|
|
ModelCase("allenai/OLMo-1B-0724-hf", decode_tolerance=8e-2, skip_long_prompt=True),
|
2024-10-21 12:08:30 +08:00
|
|
|
ModelCase("THUDM/glm-4-9b-chat"),
|
2024-10-29 19:52:33 -05:00
|
|
|
ModelCase("openai-community/gpt2")
|
2024-09-22 01:50:37 -07:00
|
|
|
]
|
2024-08-26 01:29:12 +08:00
|
|
|
|
2024-09-22 01:50:37 -07:00
|
|
|
TORCH_DTYPES = [torch.float16]
|
2024-08-26 01:29:12 +08:00
|
|
|
|
|
|
|
|
|
2024-08-11 23:13:45 -07:00
|
|
|
class TestGenerationModels(unittest.TestCase):
|
2024-11-03 13:27:12 -08:00
|
|
|
|
2024-10-11 05:03:20 -07:00
|
|
|
@classmethod
|
|
|
|
|
def setUpClass(cls):
|
2024-11-03 13:27:12 -08:00
|
|
|
mp.set_start_method("spawn", force=True)
|
2024-10-11 05:03:20 -07:00
|
|
|
|
2024-09-22 01:50:37 -07:00
|
|
|
def assert_close_logits_and_output_strs(
|
2024-08-03 18:20:50 -07:00
|
|
|
self,
|
2024-09-22 01:50:37 -07:00
|
|
|
prompts: List[str],
|
|
|
|
|
model_case: ModelCase,
|
|
|
|
|
torch_dtype: torch.dtype,
|
2024-08-03 18:20:50 -07:00
|
|
|
) -> None:
|
2024-09-22 01:50:37 -07:00
|
|
|
model_path = model_case.model_path
|
|
|
|
|
prefill_tolerance, decode_tolerance, rouge_l_tolerance = (
|
|
|
|
|
model_case.prefill_tolerance,
|
|
|
|
|
model_case.decode_tolerance,
|
|
|
|
|
model_case.rouge_l_tolerance,
|
|
|
|
|
)
|
|
|
|
|
max_new_tokens = 32
|
2024-09-12 16:46:14 -07:00
|
|
|
|
2024-08-03 18:20:50 -07:00
|
|
|
with HFRunner(
|
2024-09-27 23:32:11 -07:00
|
|
|
model_path,
|
|
|
|
|
torch_dtype=torch_dtype,
|
|
|
|
|
model_type="generation",
|
2024-08-03 18:20:50 -07:00
|
|
|
) as hf_runner:
|
2024-08-11 23:13:45 -07:00
|
|
|
hf_outputs = hf_runner.forward(prompts, max_new_tokens=max_new_tokens)
|
2024-08-03 18:20:50 -07:00
|
|
|
|
|
|
|
|
with SRTRunner(
|
|
|
|
|
model_path,
|
2024-09-22 01:50:37 -07:00
|
|
|
tp_size=model_case.tp_size,
|
2024-08-03 18:20:50 -07:00
|
|
|
torch_dtype=torch_dtype,
|
2024-09-27 23:32:11 -07:00
|
|
|
model_type="generation",
|
2024-08-03 18:20:50 -07:00
|
|
|
) as srt_runner:
|
2024-08-11 23:13:45 -07:00
|
|
|
srt_outputs = srt_runner.forward(prompts, max_new_tokens=max_new_tokens)
|
2024-08-03 18:20:50 -07:00
|
|
|
|
|
|
|
|
for i in range(len(prompts)):
|
2024-09-22 01:50:37 -07:00
|
|
|
# Compare input logprobs
|
2024-08-03 18:20:50 -07:00
|
|
|
hf_logprobs = torch.Tensor(hf_outputs.top_input_logprobs[i])
|
|
|
|
|
srt_logprobs = torch.Tensor(srt_outputs.top_input_logprobs[i])
|
2024-09-09 13:05:13 -07:00
|
|
|
input_len = hf_logprobs.shape[0]
|
|
|
|
|
print(
|
|
|
|
|
"prefill logprobs max_diff", torch.max(abs(hf_logprobs - srt_logprobs))
|
|
|
|
|
)
|
|
|
|
|
if input_len <= 100:
|
2024-09-22 01:50:37 -07:00
|
|
|
assert torch.all(abs(hf_logprobs - srt_logprobs) < prefill_tolerance), (
|
|
|
|
|
f"prefill logprobs are not all close with model_path={model_path} prompts={prompts} "
|
|
|
|
|
f"prefill_tolerance={prefill_tolerance}."
|
|
|
|
|
f"{hf_logprobs=}, {srt_logprobs=}"
|
|
|
|
|
)
|
2024-08-03 18:20:50 -07:00
|
|
|
|
2024-09-22 01:50:37 -07:00
|
|
|
# Compare output logprobs
|
2024-09-09 13:05:13 -07:00
|
|
|
hf_logprobs = torch.Tensor(hf_outputs.top_output_logprobs[i])
|
|
|
|
|
srt_logprobs = torch.Tensor(srt_outputs.top_output_logprobs[i])
|
2024-09-22 01:50:37 -07:00
|
|
|
|
2024-09-09 13:05:13 -07:00
|
|
|
print(
|
2024-09-22 01:50:37 -07:00
|
|
|
"decode logprobs max_diff", torch.max(abs(hf_logprobs - srt_logprobs))
|
2024-09-09 13:05:13 -07:00
|
|
|
)
|
|
|
|
|
if input_len <= 100:
|
2024-09-22 01:50:37 -07:00
|
|
|
assert torch.all(abs(hf_logprobs - srt_logprobs) < decode_tolerance), (
|
|
|
|
|
f"decode logprobs are not all close with model_path={model_path} prompts={prompts} "
|
|
|
|
|
f"decode_tolerance={decode_tolerance}."
|
|
|
|
|
f"{hf_logprobs=}, {srt_logprobs=}"
|
|
|
|
|
)
|
2024-09-09 13:05:13 -07:00
|
|
|
|
2024-09-22 01:50:37 -07:00
|
|
|
# Compare output strings
|
|
|
|
|
print(f"{hf_outputs.output_strs=}")
|
|
|
|
|
print(f"{srt_outputs.output_strs=}")
|
2024-08-26 01:29:12 +08:00
|
|
|
rouge_l_scores = calculate_rouge_l(
|
|
|
|
|
hf_outputs.output_strs, srt_outputs.output_strs
|
|
|
|
|
)
|
2024-09-22 01:50:37 -07:00
|
|
|
print(f"{rouge_l_scores=}")
|
2024-08-26 01:29:12 +08:00
|
|
|
assert all(
|
2024-09-22 01:50:37 -07:00
|
|
|
score >= rouge_l_tolerance for score in rouge_l_scores
|
|
|
|
|
), f"Not all ROUGE-L scores are greater than rouge_l_tolerance={rouge_l_tolerance}"
|
|
|
|
|
|
|
|
|
|
def test_ci_models(self):
|
|
|
|
|
for model_case in CI_MODELS:
|
2024-08-03 18:20:50 -07:00
|
|
|
for torch_dtype in TORCH_DTYPES:
|
2024-10-17 09:23:29 -07:00
|
|
|
|
|
|
|
|
# Skip long prompts for models that do not have a long context
|
|
|
|
|
prompts = DEFAULT_PROMPTS
|
|
|
|
|
if model_case.skip_long_prompt:
|
|
|
|
|
prompts = [p for p in DEFAULT_PROMPTS if len(p) < 1000]
|
|
|
|
|
|
|
|
|
|
# Assert the logits and output strs are close
|
2024-09-22 01:50:37 -07:00
|
|
|
self.assert_close_logits_and_output_strs(
|
2024-10-17 09:23:29 -07:00
|
|
|
prompts, model_case, torch_dtype
|
2024-08-03 18:20:50 -07:00
|
|
|
)
|
|
|
|
|
|
2024-09-22 01:50:37 -07:00
|
|
|
def test_others(self):
|
2024-09-24 21:37:33 -07:00
|
|
|
if is_in_ci():
|
|
|
|
|
return
|
|
|
|
|
|
2024-09-22 01:50:37 -07:00
|
|
|
for model_case in ALL_OTHER_MODELS:
|
2024-10-11 05:03:20 -07:00
|
|
|
# Only run a specified model
|
2024-09-22 01:50:37 -07:00
|
|
|
if (
|
|
|
|
|
"ONLY_RUN" in os.environ
|
|
|
|
|
and os.environ["ONLY_RUN"] != model_case.model_path
|
|
|
|
|
):
|
|
|
|
|
continue
|
2024-10-11 05:03:20 -07:00
|
|
|
|
2024-10-17 09:23:29 -07:00
|
|
|
# Skip long prompts for models that do not have a long context
|
2024-10-11 05:03:20 -07:00
|
|
|
prompts = DEFAULT_PROMPTS
|
2024-10-17 09:23:29 -07:00
|
|
|
if model_case.skip_long_prompt:
|
2024-10-11 05:03:20 -07:00
|
|
|
prompts = [p for p in DEFAULT_PROMPTS if len(p) < 1000]
|
|
|
|
|
|
|
|
|
|
# Assert the logits and output strs are close
|
|
|
|
|
self.assert_close_logits_and_output_strs(prompts, model_case, torch.float16)
|
2024-08-03 18:20:50 -07:00
|
|
|
|
2024-08-25 16:21:37 -07:00
|
|
|
|
2024-09-22 01:50:37 -07:00
|
|
|
if __name__ == "__main__":
|
2024-08-25 19:56:42 -07:00
|
|
|
unittest.main()
|