Improve tensor parallel performance (#625)

Co-authored-by: Mingyi <wisclmy0611@gmail.com>
This commit is contained in:
Ying Sheng
2024-07-15 07:10:51 -07:00
committed by GitHub
parent 5ac8b80677
commit 6a2941f4d0
10 changed files with 171 additions and 81 deletions

View File

@@ -67,10 +67,12 @@ class ServerArgs:
if self.tokenizer_path is None:
self.tokenizer_path = self.model_path
if self.mem_fraction_static is None:
if self.tp_size >= 8:
if self.tp_size >= 16:
self.mem_fraction_static = 0.74
elif self.tp_size >= 8:
self.mem_fraction_static = 0.78
elif self.tp_size >= 4:
self.mem_fraction_static = 0.80
self.mem_fraction_static = 0.82
elif self.tp_size >= 2:
self.mem_fraction_static = 0.85
else: