Provide an argument to set the maximum batch size for cuda graph (#1809)
This commit is contained in:
@@ -34,7 +34,7 @@ class TestLargeMaxNewTokens(unittest.TestCase):
|
||||
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||
api_key=cls.api_key,
|
||||
other_args=("--max-total-token", "1024", "--context-len", "8192"),
|
||||
env={"SGLANG_CLIP_MAX_NEW_TOKENS": "256", **os.environ},
|
||||
env={"SGLANG_CLIP_MAX_NEW_TOKENS_ESTIMATION": "256", **os.environ},
|
||||
return_stdout_stderr=(cls.stdout, cls.stderr),
|
||||
)
|
||||
cls.base_url += "/v1"
|
||||
|
||||
Reference in New Issue
Block a user