Provide an argument to set the maximum batch size for cuda graph (#1809)

This commit is contained in:
Lianmin Zheng
2024-10-26 15:09:33 -07:00
committed by GitHub
parent 9d6fb08457
commit 2b80978859
4 changed files with 25 additions and 10 deletions

View File

@@ -34,7 +34,7 @@ class TestLargeMaxNewTokens(unittest.TestCase):
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
api_key=cls.api_key,
other_args=("--max-total-token", "1024", "--context-len", "8192"),
env={"SGLANG_CLIP_MAX_NEW_TOKENS": "256", **os.environ},
env={"SGLANG_CLIP_MAX_NEW_TOKENS_ESTIMATION": "256", **os.environ},
return_stdout_stderr=(cls.stdout, cls.stderr),
)
cls.base_url += "/v1"