[Minor] Improve the function organization in TokenizerManager & improve loggers (#1208)
This commit is contained in:
@@ -109,7 +109,7 @@ class ModelRunner:
|
||||
def init_torch_distributed(self):
|
||||
# Init torch distributed
|
||||
torch.cuda.set_device(self.gpu_id)
|
||||
logger.info(f"[gpu={self.gpu_id}] Init nccl begin.")
|
||||
logger.info("Init nccl begin.")
|
||||
|
||||
if not self.server_args.enable_p2p_check:
|
||||
monkey_patch_vllm_p2p_access_check(self.gpu_id)
|
||||
@@ -152,8 +152,7 @@ class ModelRunner:
|
||||
|
||||
def load_model(self):
|
||||
logger.info(
|
||||
f"[gpu={self.gpu_id}] Load weight begin. "
|
||||
f"avail mem={get_available_gpu_memory(self.gpu_id):.2f} GB"
|
||||
f"Load weight begin. avail mem={get_available_gpu_memory(self.gpu_id):.2f} GB"
|
||||
)
|
||||
if torch.cuda.get_device_capability()[0] < 8:
|
||||
logger.info(
|
||||
@@ -208,7 +207,7 @@ class ModelRunner:
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"[gpu={self.gpu_id}] Load weight end. "
|
||||
f"Load weight end. "
|
||||
f"type={type(self.model).__name__}, "
|
||||
f"dtype={self.dtype}, "
|
||||
f"avail mem={get_available_gpu_memory(self.gpu_id):.2f} GB"
|
||||
@@ -224,7 +223,7 @@ class ModelRunner:
|
||||
from vllm.model_executor.model_loader.utils import set_default_torch_dtype
|
||||
|
||||
logger.info(
|
||||
f"[gpu={self.gpu_id}] Update weights begin. "
|
||||
f"Update weights begin. "
|
||||
f"avail mem={get_available_gpu_memory(self.gpu_id):.2f} GB"
|
||||
)
|
||||
|
||||
@@ -298,7 +297,7 @@ class ModelRunner:
|
||||
self.load_config = load_config
|
||||
self.model_config.path = model_path
|
||||
|
||||
logger.info(f"[gpu={self.gpu_id}] Update weights end.")
|
||||
logger.info("Update weights end.")
|
||||
return True, "Succeeded to update model weights"
|
||||
|
||||
def profile_max_num_token(self, total_gpu_memory: int):
|
||||
@@ -387,7 +386,7 @@ class ModelRunner:
|
||||
layer_num=self.model_config.num_hidden_layers,
|
||||
)
|
||||
logger.info(
|
||||
f"[gpu={self.gpu_id}] Memory pool end. "
|
||||
f"Memory pool end. "
|
||||
f"avail mem={get_available_gpu_memory(self.gpu_id):.2f} GB"
|
||||
)
|
||||
|
||||
@@ -473,9 +472,7 @@ class ModelRunner:
|
||||
self.cuda_graph_runner = None
|
||||
return
|
||||
|
||||
logger.info(
|
||||
f"[gpu={self.gpu_id}] Capture cuda graph begin. This can take up to several minutes."
|
||||
)
|
||||
logger.info("Capture cuda graph begin. This can take up to several minutes.")
|
||||
|
||||
if self.server_args.disable_cuda_graph_padding:
|
||||
batch_size_list = list(range(1, 32)) + [64, 128]
|
||||
|
||||
Reference in New Issue
Block a user