Remove VLLM_ASCEND_ENABLE_DENSE_OPTIMIZE (#5272)

`VLLM_ASCEND_ENABLE_DENSE_OPTIMIZE` is only used together with `VLLM_ASCEND_ENABLE_PREFETCH_MLP` which is useless totally. This PR remove it. - vLLM version: release/v0.13.0 - vLLM main: ad32e3e19c Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
2025-12-25 11:09:56 +08:00
parent 13cd6362c6
commit 2ae0bad96d
8 changed files with 7 additions and 21 deletions
--- a/vllm_ascend/ops/linear_op.py
+++ b/vllm_ascend/ops/linear_op.py
@@ -53,13 +53,13 @@ from vllm.distributed import (split_tensor_along_last_dim,
 from vllm.distributed.parallel_state import get_tp_group
 from vllm.forward_context import get_forward_context

+from vllm_ascend import envs as envs_ascend
 from vllm_ascend.ascend_config import get_ascend_config
 from vllm_ascend.distributed.parallel_state import (get_flashcomm2_odp_group,
                                                    get_flashcomm2_otp_group,
                                                    get_mlp_tp_group,
                                                    get_otp_group)
-from vllm_ascend.utils import (dense_optim_enable, enable_sp,
-                               flashcomm2_enable,
+from vllm_ascend.utils import (enable_sp, flashcomm2_enable,
                               get_flashcomm2_reorgnized_batch_ids,
                               matmul_allreduce_enable, mlp_tp_enable,
                               oproj_tp_enable, shared_expert_dp_enabled)
@@ -135,7 +135,7 @@ class CustomRowParallelOp(CustomLinearOp):

    def apply(self, input_):
        output, output_bias = self.apply_impl(input_)
-        if dense_optim_enable():
+        if envs_ascend.VLLM_ASCEND_ENABLE_PREFETCH_MLP:
            torch.ops.vllm.maybe_prefetch_mlp_gate_up_proj(output, self.prefix)
        if not self.return_bias:
            return output