port fp8 mixtral (#460)

This commit is contained in:
Lianmin Zheng
2024-05-21 11:46:35 -07:00
committed by GitHub
parent 19d2135cb8
commit 0fafc5606b
6 changed files with 633 additions and 118 deletions

View File

@@ -106,6 +106,7 @@ def get_available_gpu_memory(gpu_id, distributed=True):
"which may cause useless memory allocation for torch CUDA context.",
)
torch.cuda.empty_cache()
free_gpu_memory, _ = torch.cuda.mem_get_info(gpu_id)
if distributed: