Remove VLLM_ASCEND_ENABLE_DENSE_OPTIMIZE (#5272)

`VLLM_ASCEND_ENABLE_DENSE_OPTIMIZE` is only used together with
`VLLM_ASCEND_ENABLE_PREFETCH_MLP` which is useless totally. This PR
remove it.
- vLLM version: release/v0.13.0
- vLLM main:
ad32e3e19c

Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
wangxiyuan
2025-12-25 11:09:56 +08:00
committed by GitHub
parent 13cd6362c6
commit 2ae0bad96d
8 changed files with 7 additions and 21 deletions

View File

@@ -53,13 +53,13 @@ from vllm.distributed import (split_tensor_along_last_dim,
from vllm.distributed.parallel_state import get_tp_group
from vllm.forward_context import get_forward_context
from vllm_ascend import envs as envs_ascend
from vllm_ascend.ascend_config import get_ascend_config
from vllm_ascend.distributed.parallel_state import (get_flashcomm2_odp_group,
get_flashcomm2_otp_group,
get_mlp_tp_group,
get_otp_group)
from vllm_ascend.utils import (dense_optim_enable, enable_sp,
flashcomm2_enable,
from vllm_ascend.utils import (enable_sp, flashcomm2_enable,
get_flashcomm2_reorgnized_batch_ids,
matmul_allreduce_enable, mlp_tp_enable,
oproj_tp_enable, shared_expert_dp_enabled)
@@ -135,7 +135,7 @@ class CustomRowParallelOp(CustomLinearOp):
def apply(self, input_):
output, output_bias = self.apply_impl(input_)
if dense_optim_enable():
if envs_ascend.VLLM_ASCEND_ENABLE_PREFETCH_MLP:
torch.ops.vllm.maybe_prefetch_mlp_gate_up_proj(output, self.prefix)
if not self.return_bias:
return output