"""vLLM: a high-throughput and memory-efficient inference engine for LLMs"""

import os
os.environ['PYTORCH_CNDEV_BASED_MLU_CHECK'] = '1'
os.environ['CN_NOTIFIER_POOL_MAX'] = "1000"

from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs
from vllm.engine.async_llm_engine import AsyncLLMEngine
from vllm.engine.llm_engine import LLMEngine
from vllm.entrypoints.llm import LLM
from vllm.executor.ray_utils import initialize_ray_cluster
from vllm.inputs import PromptType, TextPrompt, TokensPrompt
from vllm.model_executor.models import ModelRegistry
from vllm.outputs import (CompletionOutput, EmbeddingOutput,
                          EmbeddingRequestOutput, RequestOutput)
from vllm.pooling_params import PoolingParams
from vllm.sampling_params import SamplingParams

from .version import (__version__, __version_tuple__,
                      __vllm_mlu_version__, __torch_version__)


from vllm.platforms import current_platform

if current_platform.is_mlu():
    try:
        import vllm_mlu
        print("\033[0;32mApply vllm_mlu success, running in performance version !\033[0m")
    except ModuleNotFoundError:
        print("\033[0;31mApply vllm_mlu failed, running in basic version !\033[0m")
    except Exception as e:
        print("\033[0;31mApply vllm_mlu failed!\033[0m")
        raise Exception(e)


__version__ = f"{__version__}+mlu{__vllm_mlu_version__}.pt{__torch_version__}"


__all__ = [
    "__version__",
    "__version_tuple__",
    "LLM",
    "ModelRegistry",
    "PromptType",
    "TextPrompt",
    "TokensPrompt",
    "SamplingParams",
    "RequestOutput",
    "CompletionOutput",
    "EmbeddingOutput",
    "EmbeddingRequestOutput",
    "LLMEngine",
    "EngineArgs",
    "AsyncLLMEngine",
    "AsyncEngineArgs",
    "initialize_ray_cluster",
    "PoolingParams",
]