init
This commit is contained in:
108
vllm_vacc/vllm/v1/engine/async_llm.py
Normal file
108
vllm_vacc/vllm/v1/engine/async_llm.py
Normal file
@@ -0,0 +1,108 @@
|
||||
|
||||
from typing import Any, Optional, Union
|
||||
|
||||
import vllm.envs as envs
|
||||
from vllm.config import VllmConfig
|
||||
from vllm.engine.arg_utils import AsyncEngineArgs
|
||||
from vllm.usage.usage_lib import UsageContext
|
||||
from vllm.v1.executor.abstract import Executor
|
||||
from vllm.v1.metrics.loggers import StatLoggerFactory
|
||||
|
||||
from vllm.engine.protocol import EngineClient
|
||||
from vllm_vacc.vllm.v1.engine import EngineCoreRequest
|
||||
from vllm.v1.engine.output_processor import (OutputProcessor,
|
||||
RequestOutputCollector)
|
||||
from vllm.v1.engine.parallel_sampling import ParentRequest
|
||||
from vllm.v1.engine.async_llm import logger
|
||||
|
||||
|
||||
class AsyncLLM(EngineClient):
|
||||
|
||||
@classmethod
|
||||
def from_vllm_config(
|
||||
cls,
|
||||
vllm_config: VllmConfig,
|
||||
start_engine_loop: bool = True,
|
||||
usage_context: UsageContext = UsageContext.ENGINE_CONTEXT,
|
||||
stat_loggers: Optional[list[StatLoggerFactory]] = None,
|
||||
enable_log_requests: bool = False,
|
||||
disable_log_stats: bool = False,
|
||||
client_addresses: Optional[dict[str, str]] = None,
|
||||
client_count: int = 1,
|
||||
client_index: int = 0,
|
||||
disable_log_requests: bool = True, # Deprecated, will be removed
|
||||
) -> "AsyncLLM":
|
||||
# vacc support spec_num = 1
|
||||
from .vllm_config_checker import check_spec_model
|
||||
check_spec_model(vllm_config)
|
||||
|
||||
if not envs.VLLM_USE_V1:
|
||||
raise ValueError(
|
||||
"Using V1 AsyncLLMEngine, but envs.VLLM_USE_V1=False. "
|
||||
"This should not happen. As a workaround, try using "
|
||||
"AsyncLLMEngine.from_vllm_config(...) or explicitly set "
|
||||
"VLLM_USE_V1=0 or 1 and report this issue on Github.")
|
||||
|
||||
# Create the LLMEngine.
|
||||
from vllm.v1.engine.async_llm import AsyncLLM as DefaultAsyncLLM
|
||||
async_cls = DefaultAsyncLLM
|
||||
return async_cls(
|
||||
vllm_config=vllm_config,
|
||||
executor_class=Executor.get_class(vllm_config),
|
||||
start_engine_loop=start_engine_loop,
|
||||
stat_loggers=stat_loggers,
|
||||
log_requests=enable_log_requests,
|
||||
log_stats=not disable_log_stats,
|
||||
usage_context=usage_context,
|
||||
client_addresses=client_addresses,
|
||||
client_count=client_count,
|
||||
client_index=client_index,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_engine_args(
|
||||
cls,
|
||||
engine_args: AsyncEngineArgs,
|
||||
start_engine_loop: bool = True,
|
||||
usage_context: UsageContext = UsageContext.ENGINE_CONTEXT,
|
||||
stat_loggers: Optional[list[StatLoggerFactory]] = None,
|
||||
) -> "AsyncLLM":
|
||||
"""Create an AsyncLLM from the EngineArgs."""
|
||||
# Create the engine configs.
|
||||
vllm_config = engine_args.create_engine_config(usage_context)
|
||||
executor_class = Executor.get_class(vllm_config)
|
||||
|
||||
# vacc support spec_num = 1
|
||||
from .vllm_config_checker import check_spec_model
|
||||
check_spec_model(vllm_config)
|
||||
|
||||
# Create the AsyncLLM.
|
||||
from vllm.v1.engine.async_llm import AsyncLLM as DefaultAsyncLLM
|
||||
async_cls = DefaultAsyncLLM
|
||||
return async_cls(
|
||||
vllm_config=vllm_config,
|
||||
executor_class=executor_class,
|
||||
log_requests=not engine_args.disable_log_requests,
|
||||
log_stats=not engine_args.disable_log_stats,
|
||||
start_engine_loop=start_engine_loop,
|
||||
usage_context=usage_context,
|
||||
stat_loggers=stat_loggers,
|
||||
)
|
||||
|
||||
async def _add_request(self, request: EngineCoreRequest,
|
||||
prompt: Optional[str],
|
||||
parent_req: Optional[ParentRequest], index: int,
|
||||
queue: RequestOutputCollector):
|
||||
|
||||
# Add the request to OutputProcessor (this process).
|
||||
self.output_processor.add_request(request, prompt, parent_req, index,
|
||||
queue)
|
||||
|
||||
# Add the EngineCoreRequest to EngineCore (separate process).
|
||||
await self.engine_core.add_request_async(request)
|
||||
|
||||
if self.log_requests:
|
||||
if request.prompt_token_ids is not None:
|
||||
logger.info("Added request: %s, prompt length: %s", request.request_id, len(request.prompt_token_ids))
|
||||
else:
|
||||
logger.info("Added request %s.", request.request_id)
|
||||
Reference in New Issue
Block a user