2024-11-30 22:14:48 -08:00
|
|
|
import unittest
|
|
|
|
|
|
|
|
|
|
from huggingface_hub import hf_hub_download
|
|
|
|
|
|
|
|
|
|
import sglang as sgl
|
2025-03-26 07:53:12 +08:00
|
|
|
from sglang.test.test_utils import CustomTestCase
|
2024-11-30 22:14:48 -08:00
|
|
|
|
|
|
|
|
|
2025-03-26 07:53:12 +08:00
|
|
|
class TestGGUF(CustomTestCase):
|
2024-11-30 22:14:48 -08:00
|
|
|
def test_models(self):
|
|
|
|
|
prompt = "Today is a sunny day and I like"
|
|
|
|
|
sampling_params = {"temperature": 0, "max_new_tokens": 8}
|
|
|
|
|
|
|
|
|
|
model_path = hf_hub_download(
|
|
|
|
|
"Qwen/Qwen2-1.5B-Instruct-GGUF",
|
|
|
|
|
filename="qwen2-1_5b-instruct-q4_k_m.gguf",
|
|
|
|
|
)
|
|
|
|
|
|
2025-03-04 13:40:40 -08:00
|
|
|
engine = sgl.Engine(model_path=model_path, random_seed=42, cuda_graph_max_bs=2)
|
2024-11-30 22:14:48 -08:00
|
|
|
outputs = engine.generate(prompt, sampling_params)["text"]
|
|
|
|
|
engine.shutdown()
|
|
|
|
|
|
|
|
|
|
self.assertEqual(outputs, " it. I have a lot of work")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
unittest.main()
|