enginex-vastai-va16-vllm/vllm_vacc/vllm/executor/executor_base.py

import asyncio
from typing import List

from vllm.v1.outputs import PoolerOutput, SamplerOutput
from vllm.sequence import ExecuteModelRequest

# class DistributedExecutorBase():
#     """Abstract superclass of distributed executor implementations."""

async def execute_model_async(
        self,
        execute_model_req: ExecuteModelRequest) -> List[SamplerOutput]:
    if self.parallel_worker_tasks is None:
        # Start model execution loop running in the parallel workers
        self.parallel_worker_tasks = asyncio.create_task(
            self._start_worker_execution_loop())
        await asyncio.sleep(0)
    # Only the driver worker returns the sampling results.
    await asyncio.sleep(0)
    return await self._driver_execute_model_async(execute_model_req)