bi_150-vllm/vllm/entrypoints/pooling/classify/serving.py

# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

from typing import TypeAlias

import numpy as np

from vllm import ClassificationOutput
from vllm.config import ModelConfig
from vllm.entrypoints.chat_utils import ChatTemplateConfig
from vllm.entrypoints.openai.engine.protocol import UsageInfo
from vllm.entrypoints.pooling.base.serving import PoolingServeContext, PoolingServing
from vllm.logger import init_logger
from vllm.renderers import BaseRenderer

from .io_processor import ClassifyIOProcessor
from .protocol import (
    ClassificationData,
    ClassificationRequest,
    ClassificationResponse,
)

logger = init_logger(__name__)


ClassificationServeContext: TypeAlias = PoolingServeContext[ClassificationRequest]


class ServingClassification(PoolingServing):
    request_id_prefix = "classify"

    def init_io_processor(
        self,
        model_config: ModelConfig,
        renderer: BaseRenderer,
        chat_template_config: ChatTemplateConfig,
    ) -> ClassifyIOProcessor:
        return ClassifyIOProcessor(
            model_config=model_config,
            renderer=renderer,
            chat_template_config=chat_template_config,
        )

    async def _build_response(
        self,
        ctx: ClassificationServeContext,
    ) -> ClassificationResponse:
        final_res_batch_checked = await self.io_processor.post_process_async(
            ctx.final_res_batch
        )

        id2label = getattr(self.model_config.hf_config, "id2label", {})
        num_prompt_tokens = 0
        items: list[ClassificationData] = []
        for idx, final_res in enumerate(final_res_batch_checked):
            classify_res = ClassificationOutput.from_base(final_res.outputs)

            probs = classify_res.probs
            predicted_index = int(np.argmax(probs))
            label = id2label.get(predicted_index)

            item = ClassificationData(
                index=idx,
                label=label,
                probs=probs,
                num_classes=len(probs),
            )

            items.append(item)
            prompt_token_ids = final_res.prompt_token_ids
            num_prompt_tokens += len(prompt_token_ids)

        usage = UsageInfo(
            prompt_tokens=num_prompt_tokens,
            total_tokens=num_prompt_tokens,
        )

        return ClassificationResponse(
            id=ctx.request_id,
            created=ctx.created_time,
            model=ctx.model_name,
            data=items,
            usage=usage,
        )
Add minimal vLLM 0.16.1 build repo for BI-V150 2026-04-18 10:56:22 +08:00			`# SPDX-License-Identifier: Apache-2.0`
			`# SPDX-FileCopyrightText: Copyright contributors to the vLLM project`

Upgrade to vllm 0.17.0 corex v4.1 overlay 2026-04-29 19:38:22 +08:00			`from typing import TypeAlias`
Add minimal vLLM 0.16.1 build repo for BI-V150 2026-04-18 10:56:22 +08:00
			`import numpy as np`
Upgrade to vllm 0.17.0 corex v4.1 overlay 2026-04-29 19:38:22 +08:00
			`from vllm import ClassificationOutput`
			`from vllm.config import ModelConfig`
			`from vllm.entrypoints.chat_utils import ChatTemplateConfig`
			`from vllm.entrypoints.openai.engine.protocol import UsageInfo`
			`from vllm.entrypoints.pooling.base.serving import PoolingServeContext, PoolingServing`
			`from vllm.logger import init_logger`
			`from vllm.renderers import BaseRenderer`

			`from .io_processor import ClassifyIOProcessor`
			`from .protocol import (`
Add minimal vLLM 0.16.1 build repo for BI-V150 2026-04-18 10:56:22 +08:00			`ClassificationData,`
			`ClassificationRequest,`
			`ClassificationResponse,`
			`)`

			`logger = init_logger(__name__)`


Upgrade to vllm 0.17.0 corex v4.1 overlay 2026-04-29 19:38:22 +08:00			`ClassificationServeContext: TypeAlias = PoolingServeContext[ClassificationRequest]`
Add minimal vLLM 0.16.1 build repo for BI-V150 2026-04-18 10:56:22 +08:00

Upgrade to vllm 0.17.0 corex v4.1 overlay 2026-04-29 19:38:22 +08:00			`class ServingClassification(PoolingServing):`
Add minimal vLLM 0.16.1 build repo for BI-V150 2026-04-18 10:56:22 +08:00			`request_id_prefix = "classify"`

Upgrade to vllm 0.17.0 corex v4.1 overlay 2026-04-29 19:38:22 +08:00			`def init_io_processor(`
Add minimal vLLM 0.16.1 build repo for BI-V150 2026-04-18 10:56:22 +08:00			`self,`
Upgrade to vllm 0.17.0 corex v4.1 overlay 2026-04-29 19:38:22 +08:00			`model_config: ModelConfig,`
			`renderer: BaseRenderer,`
			`chat_template_config: ChatTemplateConfig,`
			`) -> ClassifyIOProcessor:`
			`return ClassifyIOProcessor(`
			`model_config=model_config,`
			`renderer=renderer,`
			`chat_template_config=chat_template_config,`
Add minimal vLLM 0.16.1 build repo for BI-V150 2026-04-18 10:56:22 +08:00			`)`

Upgrade to vllm 0.17.0 corex v4.1 overlay 2026-04-29 19:38:22 +08:00			`async def _build_response(`
Add minimal vLLM 0.16.1 build repo for BI-V150 2026-04-18 10:56:22 +08:00			`self,`
			`ctx: ClassificationServeContext,`
Upgrade to vllm 0.17.0 corex v4.1 overlay 2026-04-29 19:38:22 +08:00			`) -> ClassificationResponse:`
			`final_res_batch_checked = await self.io_processor.post_process_async(`
			`ctx.final_res_batch`
			`)`
Add minimal vLLM 0.16.1 build repo for BI-V150 2026-04-18 10:56:22 +08:00
Upgrade to vllm 0.17.0 corex v4.1 overlay 2026-04-29 19:38:22 +08:00			`id2label = getattr(self.model_config.hf_config, "id2label", {})`
Add minimal vLLM 0.16.1 build repo for BI-V150 2026-04-18 10:56:22 +08:00			`num_prompt_tokens = 0`
Upgrade to vllm 0.17.0 corex v4.1 overlay 2026-04-29 19:38:22 +08:00			`items: list[ClassificationData] = []`
Add minimal vLLM 0.16.1 build repo for BI-V150 2026-04-18 10:56:22 +08:00			`for idx, final_res in enumerate(final_res_batch_checked):`
			`classify_res = ClassificationOutput.from_base(final_res.outputs)`

			`probs = classify_res.probs`
			`predicted_index = int(np.argmax(probs))`
			`label = id2label.get(predicted_index)`

			`item = ClassificationData(`
			`index=idx,`
			`label=label,`
			`probs=probs,`
			`num_classes=len(probs),`
			`)`

			`items.append(item)`
			`prompt_token_ids = final_res.prompt_token_ids`
			`num_prompt_tokens += len(prompt_token_ids)`

			`usage = UsageInfo(`
			`prompt_tokens=num_prompt_tokens,`
			`total_tokens=num_prompt_tokens,`
			`)`

			`return ClassificationResponse(`
			`id=ctx.request_id,`
			`created=ctx.created_time,`
			`model=ctx.model_name,`
			`data=items,`
			`usage=usage,`
			`)`