Move sampler into CUDA graph (#1201)

Co-authored-by: Yineng Zhang <me@zhyncs.com>
This commit is contained in:
Liangsheng Yin
2024-08-26 07:02:50 -07:00
committed by GitHub
parent 97589a60a2
commit 75ce37f401
28 changed files with 336 additions and 110 deletions

View File

@@ -180,7 +180,7 @@ class SRTRunner:
tp_size=tp_size,
dtype=get_dtype_str(torch_dtype),
port=port,
mem_fraction_static=0.7,
mem_fraction_static=0.69,
trust_remote_code=False,
is_embedding=not self.is_generation,
)