Simplify tokenizer manager (#1904)

This commit is contained in:
Lianmin Zheng
2024-11-03 08:38:26 -08:00
committed by GitHub
parent 916b3cdddc
commit c17c578108
11 changed files with 261 additions and 443 deletions

View File

@@ -8,7 +8,7 @@ suites = {
"models/test_embedding_models.py",
"models/test_generation_models.py",
"models/test_lora.py",
"models/test_reward_models.py",
# "models/test_reward_models.py",
"sampling/penaltylib",
"test_chunked_prefill.py",
"test_double_sparsity.py",

View File

@@ -1,3 +1,8 @@
"""
python3 -m unittest test_openai_server.TestOpenAIServer.test_batch
python3 -m unittest test_openai_server.TestOpenAIServer.test_completion
"""
import json
import time
import unittest

View File

@@ -1,3 +1,6 @@
"""
python3 -m unittest test_skip_tokenizer_init.TestSkipTokenizerInit.test_parallel_sample
"""
import json
import unittest

View File

@@ -1,5 +1,6 @@
"""
python3 -m unittest test_srt_endpoint.TestSRTEndpoint.test_simple_decode
python3 -m unittest test_srt_endpoint.TestSRTEndpoint.test_parallel_sample
"""
import json
@@ -36,11 +37,17 @@ class TestSRTEndpoint(unittest.TestCase):
return_text=False,
n=1,
stream=False,
batch=False,
):
if batch:
text = ["The capital of France is"]
else:
text = "The capital of France is"
response = requests.post(
self.base_url + "/generate",
json={
"text": "The capital of France is",
"text": text,
"sampling_params": {
"temperature": 0 if n == 1 else 0.5,
"max_new_tokens": 16,
@@ -67,6 +74,9 @@ class TestSRTEndpoint(unittest.TestCase):
def test_simple_decode(self):
self.run_decode()
def test_simple_decode_batch(self):
self.run_decode(batch=True)
def test_parallel_sample(self):
self.run_decode(n=3)

View File

@@ -1,6 +1,7 @@
"""
Usage:
python3 -m unittest test_vision_openai_server.TestOpenAIVisionServer.test_mixed_batch
python3 -m unittest test_vision_openai_server.TestOpenAIVisionServer.test_multi_images_chat_completion
"""
import base64