[Minor] Improve the function organization in TokenizerManager & improve loggers (#1208)

This commit is contained in:
Lianmin Zheng
2024-08-25 14:46:34 -07:00
committed by GitHub
parent 30b4f771b0
commit 902278008a
12 changed files with 137 additions and 134 deletions

View File

@@ -109,7 +109,7 @@ class ModelRunner:
def init_torch_distributed(self):
# Init torch distributed
torch.cuda.set_device(self.gpu_id)
logger.info(f"[gpu={self.gpu_id}] Init nccl begin.")
logger.info("Init nccl begin.")
if not self.server_args.enable_p2p_check:
monkey_patch_vllm_p2p_access_check(self.gpu_id)
@@ -152,8 +152,7 @@ class ModelRunner:
def load_model(self):
logger.info(
f"[gpu={self.gpu_id}] Load weight begin. "
f"avail mem={get_available_gpu_memory(self.gpu_id):.2f} GB"
f"Load weight begin. avail mem={get_available_gpu_memory(self.gpu_id):.2f} GB"
)
if torch.cuda.get_device_capability()[0] < 8:
logger.info(
@@ -208,7 +207,7 @@ class ModelRunner:
)
logger.info(
f"[gpu={self.gpu_id}] Load weight end. "
f"Load weight end. "
f"type={type(self.model).__name__}, "
f"dtype={self.dtype}, "
f"avail mem={get_available_gpu_memory(self.gpu_id):.2f} GB"
@@ -224,7 +223,7 @@ class ModelRunner:
from vllm.model_executor.model_loader.utils import set_default_torch_dtype
logger.info(
f"[gpu={self.gpu_id}] Update weights begin. "
f"Update weights begin. "
f"avail mem={get_available_gpu_memory(self.gpu_id):.2f} GB"
)
@@ -298,7 +297,7 @@ class ModelRunner:
self.load_config = load_config
self.model_config.path = model_path
logger.info(f"[gpu={self.gpu_id}] Update weights end.")
logger.info("Update weights end.")
return True, "Succeeded to update model weights"
def profile_max_num_token(self, total_gpu_memory: int):
@@ -387,7 +386,7 @@ class ModelRunner:
layer_num=self.model_config.num_hidden_layers,
)
logger.info(
f"[gpu={self.gpu_id}] Memory pool end. "
f"Memory pool end. "
f"avail mem={get_available_gpu_memory(self.gpu_id):.2f} GB"
)
@@ -473,9 +472,7 @@ class ModelRunner:
self.cuda_graph_runner = None
return
logger.info(
f"[gpu={self.gpu_id}] Capture cuda graph begin. This can take up to several minutes."
)
logger.info("Capture cuda graph begin. This can take up to several minutes.")
if self.server_args.disable_cuda_graph_padding:
batch_size_list = list(range(1, 32)) + [64, 128]