From b80a484864553547bc8ca3beda24858ef7bc135a Mon Sep 17 00:00:00 2001
From: linfeng-yuan <1102311262@qq.com>
Date: Sat, 7 Jun 2025 19:45:33 +0800
Subject: [PATCH] Fix typo of VLLM_ASCEND_ENABLE_TOPK_OPTIMIZE (#1112)

### What this PR does / why we need it?
Fix typo of VLLM_ASCEND_ENABLE_TOPK_OPTIMIZE

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

CI passed

Signed-off-by: linfeng-yuan <1102311262@qq.com>
---
 tests/multicard/test_offline_inference_distributed.py  | 2 +-
 tests/singlecard/test_offline_inference.py             | 2 +-
 vllm_ascend/envs.py                                    | 4 ++--
 vllm_ascend/patch/worker/patch_common/patch_sampler.py | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/multicard/test_offline_inference_distributed.py b/tests/multicard/test_offline_inference_distributed.py
index 50675cf..dc02c4b 100644
--- a/tests/multicard/test_offline_inference_distributed.py
+++ b/tests/multicard/test_offline_inference_distributed.py
@@ -61,7 +61,7 @@ def test_models_distributed_DeepSeek():
         vllm_model.generate_greedy(example_prompts, max_tokens)
 
 
-@patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_TOPK_OPTIMZE": "1"})
+@patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_TOPK_OPTIMIZE": "1"})
 def test_models_distributed_topk() -> None:
     example_prompts = [
         "vLLM is a high-throughput and memory-efficient inference and serving engine for LLMs.",
diff --git a/tests/singlecard/test_offline_inference.py b/tests/singlecard/test_offline_inference.py
index d3ed09f..a65451d 100644
--- a/tests/singlecard/test_offline_inference.py
+++ b/tests/singlecard/test_offline_inference.py
@@ -83,7 +83,7 @@ def test_multimodal(model, prompt_template, vllm_runner):
                                    max_tokens=64)
 
 
-@patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_TOPK_OPTIMZE": "1"})
+@patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_TOPK_OPTIMIZE": "1"})
 def test_models_topk() -> None:
     example_prompts = [
         "Hello, my name is",
diff --git a/vllm_ascend/envs.py b/vllm_ascend/envs.py
index 9aa2d70..f46178e 100644
--- a/vllm_ascend/envs.py
+++ b/vllm_ascend/envs.py
@@ -57,8 +57,8 @@ env_variables: Dict[str, Callable[[], Any]] = {
     lambda: bool(int(os.getenv("VLLM_ENABLE_MC2", '0'))),
     # Whether to enable the topk optimization. It's disabled by default for experimental support
     # We'll make it enabled by default in the future.
-    "VLLM_ASCEND_ENABLE_TOPK_OPTIMZE":
-    lambda: bool(int(os.getenv("VLLM_ASCEND_ENABLE_TOPK_OPTIMZE", '0'))),
+    "VLLM_ASCEND_ENABLE_TOPK_OPTIMIZE":
+    lambda: bool(int(os.getenv("VLLM_ASCEND_ENABLE_TOPK_OPTIMIZE", '0'))),
     # Whether to use LCCL communication. If not set, the default value is False.
     "USING_LCCL_COM":
     lambda: bool(int(os.getenv("USING_LCCL_COM", '0'))),
diff --git a/vllm_ascend/patch/worker/patch_common/patch_sampler.py b/vllm_ascend/patch/worker/patch_common/patch_sampler.py
index 4954041..a6fbfbc 100644
--- a/vllm_ascend/patch/worker/patch_common/patch_sampler.py
+++ b/vllm_ascend/patch/worker/patch_common/patch_sampler.py
@@ -97,5 +97,5 @@ def topk_topp_forward_native(
 
 
 Sampler.apply_min_p = apply_min_p
-if envs.VLLM_ASCEND_ENABLE_TOPK_OPTIMZE:
+if envs.VLLM_ASCEND_ENABLE_TOPK_OPTIMIZE:
     TopKTopPSampler.forward_native = topk_topp_forward_native