Remove useless env (#4858)

cleanup useless env. These envs are not used anymore `VLLM_ASCEND_TRACE_RECOMPILES`, `VLLM_ASCEND_KV_CACHE_MEGABYTES_FLOATING_TOLERANCE`, `VLLM_ASCEND_MLA_PA`, `PHYSICAL_DEVICES` - vLLM version: v0.12.0 - vLLM main: ad32e3e19c Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
2025-12-11 06:51:07 +08:00
parent 08441baedd
commit f917d5edcf
4 changed files with 0 additions and 28 deletions
--- a/tests/e2e/multicard/test_offline_inference_distributed.py
+++ b/tests/e2e/multicard/test_offline_inference_distributed.py
@@ -138,7 +138,6 @@ def test_models_distributed_Qwen3_W4A8DYNAMIC_new_version(model):


@pytest.mark.parametrize("model", DEEPSEEK_W4A8_MODELS)
-@patch.dict(os.environ, {"VLLM_ASCEND_MLA_PA": "1"})
@patch.dict(os.environ, {"HCCL_BUFFSIZE": "1024"})
 def test_models_distributed_DeepSeek_W4A8DYNAMIC(model):
    prompts = [
--- a/tests/ut/kv_connector/test_mooncake_connector.py
+++ b/tests/ut/kv_connector/test_mooncake_connector.py
@@ -1055,7 +1055,6 @@ class MockTransferEngine:

 class MockEnvsAscend:
    MOONCAKE_CONNECTOR_PROTOCOL = "mock_protocol"
-    PHYSICAL_DEVICES = "10,11"


 def mock_get_tensor_model_parallel_rank():
--- a/tests/ut/kv_connector/test_mooncake_layerwise_connector.py
+++ b/tests/ut/kv_connector/test_mooncake_layerwise_connector.py
@@ -893,12 +893,3 @@ class TestMooncakeLayerwiseConnectorWorker(unittest.TestCase):
        worker.register_kv_caches(mla_caches)
        self.assertTrue(worker.use_mla)
        self.assertEqual(len(worker.block_len), 2)
-
-    def test_device_id_selection_with_physical_devices(self):
-        worker = MooncakeLayerwiseConnectorWorker(self.vllm_config,
-                                                  self.engine_id)
-        self.assertIsNotNone(worker.engine)
-
-
-if __name__ == '__main__':
-    unittest.main()
--- a/vllm_ascend/envs.py
+++ b/vllm_ascend/envs.py
@@ -68,9 +68,6 @@ env_variables: Dict[str, Callable[[], Any]] = {
    # that the correct package is installed.
    "VLLM_VERSION":
    lambda: os.getenv("VLLM_VERSION", None),
-    # Whether to enable the trace recompiles from pytorch.
-    "VLLM_ASCEND_TRACE_RECOMPILES":
-    lambda: bool(int(os.getenv("VLLM_ASCEND_TRACE_RECOMPILES", '0'))),
    # Whether to enable fused_experts_allgather_ep. MoeInitRoutingV3 and
    # GroupedMatmulFinalizeRouting operators are combined to implement EP.
    "VLLM_ENABLE_FUSED_EXPERTS_ALLGATHER_EP":
@@ -86,16 +83,6 @@ env_variables: Dict[str, Callable[[], Any]] = {
    # value to False to disable the optimized model.
    "USE_OPTIMIZED_MODEL":
    lambda: bool(int(os.getenv('USE_OPTIMIZED_MODEL', '1'))),
-    # The tolerance of the kv cache size, if the difference between the
-    # actual kv cache size and the cached kv cache size is less than this value,
-    # then the cached kv cache size will be used.
-    "VLLM_ASCEND_KV_CACHE_MEGABYTES_FLOATING_TOLERANCE":
-    lambda: int(
-        os.getenv("VLLM_ASCEND_KV_CACHE_MEGABYTES_FLOATING_TOLERANCE", 64)),
-    # Whether to enable mla_pa for deepseek mla decode, this flag will be removed after its available torch_npu is public accessible
-    # and the mla_pa will be the default path of deepseek decode path.
-    "VLLM_ASCEND_MLA_PA":
-    lambda: int(os.getenv("VLLM_ASCEND_MLA_PA", 0)),
    # Whether to enable MatmulAllReduce fusion kernel when tensor parallel is enabled.
    # this feature is supported in A2, and eager mode will get better performance.
    "VLLM_ASCEND_ENABLE_MATMUL_ALLREDUCE":
@@ -130,10 +117,6 @@ env_variables: Dict[str, Callable[[], Any]] = {
    # this feature in eager mode will get better performance.
    "VLLM_ASCEND_ENABLE_MLP_OPTIMIZE":
    lambda: bool(int(os.getenv("VLLM_ASCEND_ENABLE_MLP_OPTIMIZE", '0'))),
-    # Determine the number of physical devices in a non-full-use scenario
-    # caused by the initialization of the Mooncake connector.
-    "PHYSICAL_DEVICES":
-    lambda: os.getenv("PHYSICAL_DEVICES", None),
    # Whether to enable msMonitor tool to monitor the performance of vllm-ascend.
    "MSMONITOR_USE_DAEMON":
    lambda: bool(int(os.getenv("MSMONITOR_USE_DAEMON", '0'))),