chore: update vllm to 0.5.4 (#966)
This commit is contained in:
@@ -18,9 +18,7 @@ import torch
|
||||
from sglang.test.runners import DEFAULT_PROMPTS, HFRunner, SRTRunner
|
||||
|
||||
MODELS = [
|
||||
# (model_name, tp_size)
|
||||
("meta-llama/Meta-Llama-3.1-8B-Instruct", 1),
|
||||
# ("meta-llama/Meta-Llama-3.1-8B-Instruct", 2),
|
||||
]
|
||||
TORCH_DTYPES = [torch.float16]
|
||||
|
||||
@@ -51,7 +49,7 @@ class TestCausalModels(unittest.TestCase):
|
||||
hf_logprobs = torch.Tensor(hf_outputs.top_input_logprobs[i])
|
||||
srt_logprobs = torch.Tensor(srt_outputs.top_input_logprobs[i])
|
||||
|
||||
tolerance = 2e-2
|
||||
tolerance = 3e-2
|
||||
assert torch.all(
|
||||
abs(hf_logprobs - srt_logprobs) < tolerance
|
||||
), f"prefill logprobs not all close"
|
||||
|
||||
@@ -20,7 +20,7 @@ if __name__ == "__main__":
|
||||
arg_parser.add_argument(
|
||||
"--timeout-per-file",
|
||||
type=int,
|
||||
default=1000,
|
||||
default=2000,
|
||||
help="The time limit for running one file in seconds.",
|
||||
)
|
||||
arg_parser.add_argument(
|
||||
|
||||
@@ -11,7 +11,7 @@ class TestAccuracy(unittest.TestCase):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
|
||||
cls.base_url = f"http://localhost:8157"
|
||||
cls.base_url = "http://127.0.0.1:8157"
|
||||
cls.process = popen_launch_server(
|
||||
cls.model,
|
||||
cls.base_url,
|
||||
|
||||
@@ -11,7 +11,7 @@ class TestAccuracy(unittest.TestCase):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
|
||||
cls.base_url = f"http://localhost:8157"
|
||||
cls.base_url = "http://127.0.0.1:8157"
|
||||
cls.process = popen_launch_server(cls.model, cls.base_url, timeout=300)
|
||||
|
||||
@classmethod
|
||||
|
||||
@@ -14,7 +14,7 @@ class TestOpenAIServer(unittest.TestCase):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
|
||||
cls.base_url = f"http://localhost:8157"
|
||||
cls.base_url = "http://127.0.0.1:8157"
|
||||
cls.api_key = "sk-123456"
|
||||
cls.process = popen_launch_server(
|
||||
cls.model, cls.base_url, timeout=300, api_key=cls.api_key
|
||||
|
||||
@@ -13,7 +13,7 @@ class TestSRTEndpoint(unittest.TestCase):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
|
||||
cls.base_url = f"http://localhost:{8157}"
|
||||
cls.base_url = "http://127.0.0.1:8157"
|
||||
cls.process = popen_launch_server(cls.model, cls.base_url, timeout=300)
|
||||
|
||||
@classmethod
|
||||
|
||||
@@ -11,7 +11,7 @@ class TestAccuracy(unittest.TestCase):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
|
||||
cls.base_url = f"http://localhost:8157"
|
||||
cls.base_url = "http://127.0.0.1:8157"
|
||||
cls.process = popen_launch_server(
|
||||
cls.model, cls.base_url, timeout=300, other_args=["--enable-torch-compile"]
|
||||
)
|
||||
|
||||
@@ -13,7 +13,7 @@ class TestOpenAIVisionServer(unittest.TestCase):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
cls.model = "liuhaotian/llava-v1.6-vicuna-7b"
|
||||
cls.base_url = "http://localhost:8157"
|
||||
cls.base_url = "http://127.0.0.1:8157"
|
||||
cls.api_key = "sk-123456"
|
||||
cls.process = popen_launch_server(
|
||||
cls.model,
|
||||
|
||||
Reference in New Issue
Block a user