Files
2026-02-04 17:22:39 +08:00

59 lines
1.8 KiB
Python

"""vLLM: a high-throughput and memory-efficient inference engine for LLMs"""
import os
os.environ['PYTORCH_CNDEV_BASED_MLU_CHECK'] = '1'
os.environ['CN_NOTIFIER_POOL_MAX'] = "1000"
from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs
from vllm.engine.async_llm_engine import AsyncLLMEngine
from vllm.engine.llm_engine import LLMEngine
from vllm.entrypoints.llm import LLM
from vllm.executor.ray_utils import initialize_ray_cluster
from vllm.inputs import PromptType, TextPrompt, TokensPrompt
from vllm.model_executor.models import ModelRegistry
from vllm.outputs import (CompletionOutput, EmbeddingOutput,
EmbeddingRequestOutput, RequestOutput)
from vllm.pooling_params import PoolingParams
from vllm.sampling_params import SamplingParams
from .version import (__version__, __version_tuple__,
__vllm_mlu_version__, __torch_version__)
from vllm.platforms import current_platform
if current_platform.is_mlu():
try:
import vllm_mlu
print("\033[0;32mApply vllm_mlu success, running in performance version !\033[0m")
except ModuleNotFoundError:
print("\033[0;31mApply vllm_mlu failed, running in basic version !\033[0m")
except Exception as e:
print("\033[0;31mApply vllm_mlu failed!\033[0m")
raise Exception(e)
__version__ = f"{__version__}+mlu{__vllm_mlu_version__}.pt{__torch_version__}"
__all__ = [
"__version__",
"__version_tuple__",
"LLM",
"ModelRegistry",
"PromptType",
"TextPrompt",
"TokensPrompt",
"SamplingParams",
"RequestOutput",
"CompletionOutput",
"EmbeddingOutput",
"EmbeddingRequestOutput",
"LLMEngine",
"EngineArgs",
"AsyncLLMEngine",
"AsyncEngineArgs",
"initialize_ray_cluster",
"PoolingParams",
]