CUDA-graph-compatible releasing and resuming KV cache and model weight memory (#2630)
This commit is contained in:
@@ -44,6 +44,7 @@ srt_hpu = ["sglang[runtime_common]"]
|
||||
openai = ["openai>=1.0", "tiktoken"]
|
||||
anthropic = ["anthropic>=0.20.0"]
|
||||
litellm = ["litellm>=1.0.0"]
|
||||
torch_memory_saver = ["torch_memory_saver"]
|
||||
test = [
|
||||
"jsonlines",
|
||||
"matplotlib",
|
||||
|
||||
Reference in New Issue
Block a user