提交vllm0.11.0开发分支

This commit is contained in:
chenyili
2025-12-10 17:51:24 +08:00
parent deab7dd0b6
commit 7c22d621fb
175 changed files with 31856 additions and 8683 deletions

View File

@@ -47,15 +47,18 @@ env_variables: Dict[str, Callable[[], Any]] = {
# The C compiler used for compiling the package. If not set, the default
# value is None, which means the system default C compiler will be used.
"C_COMPILER": lambda: os.getenv("C_COMPILER", None),
"SOC_VERSION": lambda: os.getenv("SOC_VERSION", "KUNLUNP800"),
# The version of the Kunlun chip. If not set, the default value is
# KUNLUN910B1(Available for A2 and A3 series). It's used for package building.
# Please make sure that the version is correct.
"SOC_VERSION": lambda: os.getenv("SOC_VERSION", "KUNLUN910B1"),
# If set, vllm-kunlun will print verbose logs during compilation
"VERBOSE": lambda: bool(int(os.getenv("VERBOSE", "0"))),
# The home path for CANN toolkit. If not set, the default value is
# /usr/local/Kunlun/kunlun-toolkit/latest
"KUNLUN_HOME_PATH": lambda: os.getenv("KUNLUN_HOME_PATH", None),
# The path for XCCL library, it's used by pyxccl communicator backend. If
# not set, the default value is libxccl.so。
"XCCL_SO_PATH": lambda: os.environ.get("XCCL_SO_PATH", None),
# The path for HCCL library, it's used by pyhccl communicator backend. If
# not set, the default value is libhccl.so。
"HCCL_SO_PATH": lambda: os.environ.get("HCCL_SO_PATH", None),
# The version of vllm is installed. This value is used for developers who
# installed vllm from source locally. In this case, the version of vllm is
# usually changed. For example, if the version of vllm is "0.9.0", but when
@@ -116,6 +119,7 @@ env_variables: Dict[str, Callable[[], Any]] = {
# and the mla_pa will be the default path of deepseek decode path.
"VLLM_KUNLUN_MLA_PA": lambda: int(os.getenv("VLLM_KUNLUN_MLA_PA", 0)),
# Whether to enable MatmulAllReduce fusion kernel when tensor parallel is enabled.
# this feature is supported in A2, and eager mode will get better performance.
"VLLM_KUNLUN_ENABLE_MATMUL_ALLREDUCE": lambda: bool(
int(os.getenv("VLLM_KUNLUN_ENABLE_MATMUL_ALLREDUCE", "0"))
),