From dc67d9769382cf83b3e2644a4366d6473445a6c6 Mon Sep 17 00:00:00 2001 From: Yineng Zhang Date: Wed, 4 Sep 2024 04:29:53 +1000 Subject: [PATCH] misc: speedup load safetensors (#1319) Co-authored-by: ispobock --- python/sglang/srt/model_executor/model_runner.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/sglang/srt/model_executor/model_runner.py b/python/sglang/srt/model_executor/model_runner.py index 09b3c7127..3d3e0cde9 100644 --- a/python/sglang/srt/model_executor/model_runner.py +++ b/python/sglang/srt/model_executor/model_runner.py @@ -162,6 +162,7 @@ class ModelRunner: return min_per_gpu_memory def load_model(self): + torch.set_num_threads(1) logger.info( f"Load weight begin. avail mem={get_available_gpu_memory(self.gpu_id):.2f} GB" )