add qwen3

2026-02-04 17:22:39 +08:00
parent d1c0f68ab4
commit 8511fe8530
1932 changed files with 300426 additions and 0 deletions
--- a/vllm-v0.6.2/vllm/init.py
+++ b/vllm-v0.6.2/vllm/init.py
@@ -0,0 +1,58 @@
+"""vLLM: a high-throughput and memory-efficient inference engine for LLMs"""
+
+import os
+os.environ['PYTORCH_CNDEV_BASED_MLU_CHECK'] = '1'
+os.environ['CN_NOTIFIER_POOL_MAX'] = "1000"
+
+from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs
+from vllm.engine.async_llm_engine import AsyncLLMEngine
+from vllm.engine.llm_engine import LLMEngine
+from vllm.entrypoints.llm import LLM
+from vllm.executor.ray_utils import initialize_ray_cluster
+from vllm.inputs import PromptType, TextPrompt, TokensPrompt
+from vllm.model_executor.models import ModelRegistry
+from vllm.outputs import (CompletionOutput, EmbeddingOutput,
+                          EmbeddingRequestOutput, RequestOutput)
+from vllm.pooling_params import PoolingParams
+from vllm.sampling_params import SamplingParams
+
+from .version import (__version__, __version_tuple__,
+                      __vllm_mlu_version__, __torch_version__)
+
+
+from vllm.platforms import current_platform
+
+if current_platform.is_mlu():
+    try:
+        import vllm_mlu
+        print("\033[0;32mApply vllm_mlu success, running in performance version !\033[0m")
+    except ModuleNotFoundError:
+        print("\033[0;31mApply vllm_mlu failed, running in basic version !\033[0m")
+    except Exception as e:
+        print("\033[0;31mApply vllm_mlu failed!\033[0m")
+        raise Exception(e)
+
+
+__version__ = f"{__version__}+mlu{__vllm_mlu_version__}.pt{__torch_version__}"
+
+
+__all__ = [
+    "__version__",
+    "__version_tuple__",
+    "LLM",
+    "ModelRegistry",
+    "PromptType",
+    "TextPrompt",
+    "TokensPrompt",
+    "SamplingParams",
+    "RequestOutput",
+    "CompletionOutput",
+    "EmbeddingOutput",
+    "EmbeddingRequestOutput",
+    "LLMEngine",
+    "EngineArgs",
+    "AsyncLLMEngine",
+    "AsyncEngineArgs",
+    "initialize_ray_cluster",
+    "PoolingParams",
+]