[Misc] Drop Prefetch MLP Env (#7357)

### What this PR does / why we need it? remove deprecated environment variables related to MLP prefetching ### Does this PR introduce _any_ user-facing change? yes, the deprecated env vars can not be used then. - vLLM version: v0.17.0 - vLLM main: 4034c3d32e Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
2026-03-19 14:27:27 +08:00
parent ce239db4fb
commit 8e0ebb470a
4 changed files with 10 additions and 65 deletions
--- a/vllm_ascend/envs.py
+++ b/vllm_ascend/envs.py
@@ -77,16 +77,6 @@ env_variables: dict[str, Callable[[], Any]] = {
    # For a detailed introduction to the parameters and the differences and applicable scenarios
    # between this feature and FLASHCOMM1, please refer to the feature guide in the documentation.
    "VLLM_ASCEND_FLASHCOMM2_PARALLEL_SIZE": lambda: int(os.getenv("VLLM_ASCEND_FLASHCOMM2_PARALLEL_SIZE", 0)),
-    # Whether to enable MLP weight prefetch, only used in small concurrency.
-    "VLLM_ASCEND_ENABLE_PREFETCH_MLP": lambda: bool(int(os.getenv("VLLM_ASCEND_ENABLE_PREFETCH_MLP", "0"))),
-    # buffer size for gate up prefetch
-    "VLLM_ASCEND_MLP_GATE_UP_PREFETCH_SIZE": lambda: int(
-        os.getenv("VLLM_ASCEND_MLP_GATE_UP_PREFETCH_SIZE", 18 * 1024 * 1024)
-    ),
-    # buffer size for down proj prefetch
-    "VLLM_ASCEND_MLP_DOWN_PREFETCH_SIZE": lambda: int(
-        os.getenv("VLLM_ASCEND_MLP_DOWN_PREFETCH_SIZE", 18 * 1024 * 1024)
-    ),
    # Whether to enable msMonitor tool to monitor the performance of vllm-ascend.
    "MSMONITOR_USE_DAEMON": lambda: bool(int(os.getenv("MSMONITOR_USE_DAEMON", "0"))),
    # Whether to enable MLAPO optimization for DeepSeek W8A8 series models.