[Misc] Drop Prefetch MLP Env (#7357)

### What this PR does / why we need it?
remove deprecated environment variables related to MLP prefetching

### Does this PR introduce _any_ user-facing change?
yes, the deprecated env vars can not be used then.

- vLLM version: v0.17.0
- vLLM main:
4034c3d32e

Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
wangxiyuan
2026-03-19 14:27:27 +08:00
committed by GitHub
parent ce239db4fb
commit 8e0ebb470a
4 changed files with 10 additions and 65 deletions

View File

@@ -14,7 +14,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import warnings
from typing import TYPE_CHECKING
from vllm.logger import logger
@@ -48,9 +47,11 @@ class AscendConfig:
eplb_config = additional_config.get("eplb_config", {})
self.eplb_config = EplbConfig(eplb_config)
weight_prefetch_config = additional_config.get("weight_prefetch_config", {})
self.weight_prefetch_config = WeightPrefetchConfig(weight_prefetch_config)
# Dump / PrecisionDebugger configuration
self.dump_config_path = additional_config.get("dump_config_path", None)
self._construct_weight_prefetch_config(additional_config)
self.layer_sharding = additional_config.get("layer_sharding", None)
if self.layer_sharding:
logger.info_once(
@@ -158,29 +159,6 @@ class AscendConfig:
and get_ascend_device_type() != AscendDeviceType.A5
)
def _construct_weight_prefetch_config(self, additional_config):
weight_prefetch_config = additional_config.get("weight_prefetch_config", {})
self.weight_prefetch_config = WeightPrefetchConfig(weight_prefetch_config)
# Deprecated env var handling for backward compatibility
if os.getenv("VLLM_ASCEND_ENABLE_PREFETCH_MLP", "0") == "1":
MAX_PREFETCH_WEIGHT_SIZE: int = 18 * 1024 * 1024
gate_up_prefetch_size = int(os.getenv("VLLM_ASCEND_MLP_GATE_UP_PREFETCH_SIZE", MAX_PREFETCH_WEIGHT_SIZE))
down_prefetch_size = int(os.getenv("VLLM_ASCEND_MLP_DOWN_PREFETCH_SIZE", MAX_PREFETCH_WEIGHT_SIZE))
self.weight_prefetch_config.set_mlp_pre_version_compatibale_config(
gate_up_prefetch_size, down_prefetch_size
)
logger.info_once(
f"MLP weight prefetch enabled from env variable VLLM_ASCEND_ENABLE_PREFETCH_MLP."
f"gate_up_prefetch_size={gate_up_prefetch_size}, "
f"down_prefetch_size={down_prefetch_size}."
)
warnings.warn(
"VLLM_ASCEND_ENABLE_PREFETCH_MLP is deprecated and will be removed in a v0.16.0 version. "
"Please use weight_prefetch_config in additional-config for now instead.",
DeprecationWarning,
stacklevel=2,
)
@staticmethod
def _get_compile_ranges(compilation_config):
from vllm_ascend.utils import vllm_version_is
@@ -380,28 +358,19 @@ class WeightPrefetchConfig:
Configuration Object for weight_prefetch_config from additional_config
"""
mlp_pre_version_compatibale_config: dict = {}
prefetch_ratio: dict = {
"attn": {
"qkv": 1.0,
"o": 1.0,
},
"moe": {"gate_up": 0.8},
"mlp": {"gate_up": 1, "down": 1.0},
"mlp": {"gate_up": 1.0, "down": 1.0},
}
def __init__(self, weight_prefetch_config: dict):
self.enabled = weight_prefetch_config.get("enabled", False)
self.prefetch_ratio = weight_prefetch_config.get("prefetch_ratio", self.prefetch_ratio)
def set_mlp_pre_version_compatibale_config(self, gate_up_prefetch_size: int, down_prefetch_size: int):
config = {
"gate_up": gate_up_prefetch_size,
"down": down_prefetch_size,
}
self.mlp_pre_version_compatibale_config = config
class EplbConfig:
"""