from vllm.engine.arg_utils import EngineArgs from vllm.usage.usage_lib import UsageContext from typing import Dict, Optional from vllm.engine.metrics_types import StatLoggerBase class LLMEngine: @classmethod def from_engine_args( cls, engine_args: EngineArgs, usage_context: UsageContext = UsageContext.ENGINE_CONTEXT, stat_loggers: Optional[Dict[str, StatLoggerBase]] = None, ) -> "LLMEngine": """Creates an LLM engine from the engine arguments.""" # Create the engine configs. vllm_config = engine_args.create_engine_config(usage_context) #patch to prevent num_speculative_tokens > 1 speculative_mode = hasattr(vllm_config, 'speculative_config') if speculative_mode and \ hasattr(vllm_config.speculative_config, 'num_speculative_tokens') and \ vllm_config.speculative_config.num_speculative_tokens != 1: raise ValueError(f'run_mp_engine: only support num_speculative_tokens == 1, but get {vllm_config.speculative_config.num_speculative_tokens}') default_model_infos = "default" if speculative_mode: if hasattr(vllm_config.speculative_config, 'method'): default_model_infos = vllm_config.speculative_config.method from vllm_vacc.vllm.config_manager import vllm_vacc_config_manager vllm_vacc_config_manager().update_model_infos(default_model_infos) import vllm.envs as envs engine_cls = None if envs.VLLM_USE_V1: from vllm.v1.engine.llm_engine import LLMEngine as V1LLMEngine engine_cls = V1LLMEngine else: from vllm.engine.llm_engine import LLMEngine as DefaultEngine engine_cls = DefaultEngine assert engine_cls is not None, f"LLMEngine is empty: {engine_cls}" return engine_cls.from_vllm_config( vllm_config=vllm_config, usage_context=usage_context, stat_loggers=stat_loggers, disable_log_stats=engine_args.disable_log_stats, )