# SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project from collections.abc import Sequence from typing import Optional, Union import torch from vllm.logger import init_logger from vllm.lora.request import LoRARequest from vllm.pooling_params import PoolingParams from vllm.sampling_params import BeamSearchParams, SamplingParams logger = init_logger(__name__) class RequestLogger: def __init__(self, *, max_log_len: Optional[int]) -> None: self.max_log_len = max_log_len def log_inputs( self, request_id: str, prompt: Optional[str], prompt_token_ids: Optional[list[int]], prompt_embeds: Optional[torch.Tensor], params: Optional[Union[SamplingParams, PoolingParams, BeamSearchParams]], lora_request: Optional[LoRARequest], ) -> None: max_log_len = self.max_log_len if max_log_len is not None: if prompt is not None: prompt = prompt[:max_log_len] if prompt_token_ids is not None: prompt_token_ids = prompt_token_ids[:max_log_len] logger.info( "Received request %s: prompt: %r, " "params: %s, prompt_token_ids: %s, " "prompt_embeds shape: %s, " "lora_request: %s.", request_id, prompt, params, prompt_token_ids, prompt_embeds.shape if prompt_embeds is not None else None, lora_request) def log_outputs( self, request_id: str, outputs: str, output_token_ids: Optional[Sequence[int]], finish_reason: Optional[str] = None, is_streaming: bool = False, delta: bool = False, ) -> None: max_log_len = self.max_log_len if max_log_len is not None: if outputs is not None: outputs = outputs[:max_log_len] if output_token_ids is not None: # Convert to list and apply truncation output_token_ids = list(output_token_ids)[:max_log_len] stream_info = "" if is_streaming: stream_info = (" (streaming delta)" if delta else " (streaming complete)") logger.info( "Generated response %s%s: output: %r, " "output_token_ids: %s, finish_reason: %s", request_id, stream_info, outputs, output_token_ids, finish_reason, )