bi_150-vllm/vllm/tracing/utils.py

# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

from collections.abc import Mapping

from vllm.logger import init_logger
from vllm.utils.func_utils import run_once

logger = init_logger(__name__)

# Standard W3C headers used for context propagation
TRACE_HEADERS = ["traceparent", "tracestate"]


class SpanAttributes:
    """
    Standard attributes for spans.

    These are largely based on OpenTelemetry Semantic Conventions but are defined
    here as constants so they can be used by any backend or logger.
    """

    # Attribute names copied from OTel semantic conventions to avoid version conflicts
    GEN_AI_USAGE_COMPLETION_TOKENS = "gen_ai.usage.completion_tokens"
    GEN_AI_USAGE_PROMPT_TOKENS = "gen_ai.usage.prompt_tokens"
    GEN_AI_REQUEST_MAX_TOKENS = "gen_ai.request.max_tokens"
    GEN_AI_REQUEST_TOP_P = "gen_ai.request.top_p"
    GEN_AI_REQUEST_TEMPERATURE = "gen_ai.request.temperature"
    GEN_AI_RESPONSE_MODEL = "gen_ai.response.model"

    # Custom attributes added until they are standardized
    GEN_AI_REQUEST_ID = "gen_ai.request.id"
    GEN_AI_REQUEST_N = "gen_ai.request.n"
    GEN_AI_USAGE_NUM_SEQUENCES = "gen_ai.usage.num_sequences"
    GEN_AI_LATENCY_TIME_IN_QUEUE = "gen_ai.latency.time_in_queue"
    GEN_AI_LATENCY_TIME_TO_FIRST_TOKEN = "gen_ai.latency.time_to_first_token"
    GEN_AI_LATENCY_E2E = "gen_ai.latency.e2e"
    GEN_AI_LATENCY_TIME_IN_SCHEDULER = "gen_ai.latency.time_in_scheduler"

    # Latency breakdowns
    GEN_AI_LATENCY_TIME_IN_MODEL_FORWARD = "gen_ai.latency.time_in_model_forward"
    GEN_AI_LATENCY_TIME_IN_MODEL_EXECUTE = "gen_ai.latency.time_in_model_execute"
    GEN_AI_LATENCY_TIME_IN_MODEL_PREFILL = "gen_ai.latency.time_in_model_prefill"
    GEN_AI_LATENCY_TIME_IN_MODEL_DECODE = "gen_ai.latency.time_in_model_decode"
    GEN_AI_LATENCY_TIME_IN_MODEL_INFERENCE = "gen_ai.latency.time_in_model_inference"


class LoadingSpanAttributes:
    """Custom attributes for code-level tracing (file, line number)."""

    CODE_NAMESPACE = "code.namespace"
    CODE_FUNCTION = "code.function"
    CODE_FILEPATH = "code.filepath"
    CODE_LINENO = "code.lineno"


def contains_trace_headers(headers: Mapping[str, str]) -> bool:
    """Check if the provided headers dictionary contains trace context."""
    return any(h in headers for h in TRACE_HEADERS)


def extract_trace_headers(headers: Mapping[str, str]) -> Mapping[str, str]:
    """
    Extract only trace-related headers from a larger header dictionary.
    Useful for logging or passing context to a non-OTel client.
    """
    return {h: headers[h] for h in TRACE_HEADERS if h in headers}


@run_once
def log_tracing_disabled_warning() -> None:
    logger.warning("Received a request with trace context but tracing is disabled")
Add minimal vLLM 0.16.1 build repo for BI-V150 2026-04-18 10:56:22 +08:00			`# SPDX-License-Identifier: Apache-2.0`
			`# SPDX-FileCopyrightText: Copyright contributors to the vLLM project`

			`from collections.abc import Mapping`

			`from vllm.logger import init_logger`
			`from vllm.utils.func_utils import run_once`

			`logger = init_logger(__name__)`

			`# Standard W3C headers used for context propagation`
			`TRACE_HEADERS = ["traceparent", "tracestate"]`


			`class SpanAttributes:`
			`"""`
			`Standard attributes for spans.`

			`These are largely based on OpenTelemetry Semantic Conventions but are defined`
			`here as constants so they can be used by any backend or logger.`
			`"""`

			`# Attribute names copied from OTel semantic conventions to avoid version conflicts`
			`GEN_AI_USAGE_COMPLETION_TOKENS = "gen_ai.usage.completion_tokens"`
			`GEN_AI_USAGE_PROMPT_TOKENS = "gen_ai.usage.prompt_tokens"`
			`GEN_AI_REQUEST_MAX_TOKENS = "gen_ai.request.max_tokens"`
			`GEN_AI_REQUEST_TOP_P = "gen_ai.request.top_p"`
			`GEN_AI_REQUEST_TEMPERATURE = "gen_ai.request.temperature"`
			`GEN_AI_RESPONSE_MODEL = "gen_ai.response.model"`

			`# Custom attributes added until they are standardized`
			`GEN_AI_REQUEST_ID = "gen_ai.request.id"`
			`GEN_AI_REQUEST_N = "gen_ai.request.n"`
			`GEN_AI_USAGE_NUM_SEQUENCES = "gen_ai.usage.num_sequences"`
			`GEN_AI_LATENCY_TIME_IN_QUEUE = "gen_ai.latency.time_in_queue"`
			`GEN_AI_LATENCY_TIME_TO_FIRST_TOKEN = "gen_ai.latency.time_to_first_token"`
			`GEN_AI_LATENCY_E2E = "gen_ai.latency.e2e"`
			`GEN_AI_LATENCY_TIME_IN_SCHEDULER = "gen_ai.latency.time_in_scheduler"`

			`# Latency breakdowns`
			`GEN_AI_LATENCY_TIME_IN_MODEL_FORWARD = "gen_ai.latency.time_in_model_forward"`
			`GEN_AI_LATENCY_TIME_IN_MODEL_EXECUTE = "gen_ai.latency.time_in_model_execute"`
			`GEN_AI_LATENCY_TIME_IN_MODEL_PREFILL = "gen_ai.latency.time_in_model_prefill"`
			`GEN_AI_LATENCY_TIME_IN_MODEL_DECODE = "gen_ai.latency.time_in_model_decode"`
			`GEN_AI_LATENCY_TIME_IN_MODEL_INFERENCE = "gen_ai.latency.time_in_model_inference"`


			`class LoadingSpanAttributes:`
			`"""Custom attributes for code-level tracing (file, line number)."""`

			`CODE_NAMESPACE = "code.namespace"`
			`CODE_FUNCTION = "code.function"`
			`CODE_FILEPATH = "code.filepath"`
			`CODE_LINENO = "code.lineno"`


			`def contains_trace_headers(headers: Mapping[str, str]) -> bool:`
			`"""Check if the provided headers dictionary contains trace context."""`
			`return any(h in headers for h in TRACE_HEADERS)`


			`def extract_trace_headers(headers: Mapping[str, str]) -> Mapping[str, str]:`
			`"""`
			`Extract only trace-related headers from a larger header dictionary.`
			`Useful for logging or passing context to a non-OTel client.`
			`"""`
			`return {h: headers[h] for h in TRACE_HEADERS if h in headers}`


			`@run_once`
			`def log_tracing_disabled_warning() -> None:`
			`logger.warning("Received a request with trace context but tracing is disabled")`