forked from EngineX-Cambricon/enginex-mlu370-vllm
add qwen3
This commit is contained in:
58
vllm-v0.6.2/vllm/__init__.py
Normal file
58
vllm-v0.6.2/vllm/__init__.py
Normal file
@@ -0,0 +1,58 @@
|
||||
"""vLLM: a high-throughput and memory-efficient inference engine for LLMs"""
|
||||
|
||||
import os
|
||||
os.environ['PYTORCH_CNDEV_BASED_MLU_CHECK'] = '1'
|
||||
os.environ['CN_NOTIFIER_POOL_MAX'] = "1000"
|
||||
|
||||
from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs
|
||||
from vllm.engine.async_llm_engine import AsyncLLMEngine
|
||||
from vllm.engine.llm_engine import LLMEngine
|
||||
from vllm.entrypoints.llm import LLM
|
||||
from vllm.executor.ray_utils import initialize_ray_cluster
|
||||
from vllm.inputs import PromptType, TextPrompt, TokensPrompt
|
||||
from vllm.model_executor.models import ModelRegistry
|
||||
from vllm.outputs import (CompletionOutput, EmbeddingOutput,
|
||||
EmbeddingRequestOutput, RequestOutput)
|
||||
from vllm.pooling_params import PoolingParams
|
||||
from vllm.sampling_params import SamplingParams
|
||||
|
||||
from .version import (__version__, __version_tuple__,
|
||||
__vllm_mlu_version__, __torch_version__)
|
||||
|
||||
|
||||
from vllm.platforms import current_platform
|
||||
|
||||
if current_platform.is_mlu():
|
||||
try:
|
||||
import vllm_mlu
|
||||
print("\033[0;32mApply vllm_mlu success, running in performance version !\033[0m")
|
||||
except ModuleNotFoundError:
|
||||
print("\033[0;31mApply vllm_mlu failed, running in basic version !\033[0m")
|
||||
except Exception as e:
|
||||
print("\033[0;31mApply vllm_mlu failed!\033[0m")
|
||||
raise Exception(e)
|
||||
|
||||
|
||||
__version__ = f"{__version__}+mlu{__vllm_mlu_version__}.pt{__torch_version__}"
|
||||
|
||||
|
||||
__all__ = [
|
||||
"__version__",
|
||||
"__version_tuple__",
|
||||
"LLM",
|
||||
"ModelRegistry",
|
||||
"PromptType",
|
||||
"TextPrompt",
|
||||
"TokensPrompt",
|
||||
"SamplingParams",
|
||||
"RequestOutput",
|
||||
"CompletionOutput",
|
||||
"EmbeddingOutput",
|
||||
"EmbeddingRequestOutput",
|
||||
"LLMEngine",
|
||||
"EngineArgs",
|
||||
"AsyncLLMEngine",
|
||||
"AsyncEngineArgs",
|
||||
"initialize_ray_cluster",
|
||||
"PoolingParams",
|
||||
]
|
||||
Reference in New Issue
Block a user