CUDA-graph-compatible releasing and resuming KV cache and model weight memory (#2630)
This commit is contained in:
@@ -29,6 +29,7 @@ suites = {
|
||||
"test_openai_server.py",
|
||||
"test_pytorch_sampling_backend.py",
|
||||
"test_radix_attention.py",
|
||||
"test_release_memory_occupation.py",
|
||||
"test_retract_decode.py",
|
||||
"test_server_args.py",
|
||||
"test_session_control.py",
|
||||
|
||||
Reference in New Issue
Block a user