diff --git a/tests/multicard/test_offline_inference_distributed.py b/tests/multicard/test_offline_inference_distributed.py index 50675cf..dc02c4b 100644 --- a/tests/multicard/test_offline_inference_distributed.py +++ b/tests/multicard/test_offline_inference_distributed.py @@ -61,7 +61,7 @@ def test_models_distributed_DeepSeek(): vllm_model.generate_greedy(example_prompts, max_tokens) -@patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_TOPK_OPTIMZE": "1"}) +@patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_TOPK_OPTIMIZE": "1"}) def test_models_distributed_topk() -> None: example_prompts = [ "vLLM is a high-throughput and memory-efficient inference and serving engine for LLMs.", diff --git a/tests/singlecard/test_offline_inference.py b/tests/singlecard/test_offline_inference.py index d3ed09f..a65451d 100644 --- a/tests/singlecard/test_offline_inference.py +++ b/tests/singlecard/test_offline_inference.py @@ -83,7 +83,7 @@ def test_multimodal(model, prompt_template, vllm_runner): max_tokens=64) -@patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_TOPK_OPTIMZE": "1"}) +@patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_TOPK_OPTIMIZE": "1"}) def test_models_topk() -> None: example_prompts = [ "Hello, my name is", diff --git a/vllm_ascend/envs.py b/vllm_ascend/envs.py index 9aa2d70..f46178e 100644 --- a/vllm_ascend/envs.py +++ b/vllm_ascend/envs.py @@ -57,8 +57,8 @@ env_variables: Dict[str, Callable[[], Any]] = { lambda: bool(int(os.getenv("VLLM_ENABLE_MC2", '0'))), # Whether to enable the topk optimization. It's disabled by default for experimental support # We'll make it enabled by default in the future. - "VLLM_ASCEND_ENABLE_TOPK_OPTIMZE": - lambda: bool(int(os.getenv("VLLM_ASCEND_ENABLE_TOPK_OPTIMZE", '0'))), + "VLLM_ASCEND_ENABLE_TOPK_OPTIMIZE": + lambda: bool(int(os.getenv("VLLM_ASCEND_ENABLE_TOPK_OPTIMIZE", '0'))), # Whether to use LCCL communication. If not set, the default value is False. "USING_LCCL_COM": lambda: bool(int(os.getenv("USING_LCCL_COM", '0'))), diff --git a/vllm_ascend/patch/worker/patch_common/patch_sampler.py b/vllm_ascend/patch/worker/patch_common/patch_sampler.py index 4954041..a6fbfbc 100644 --- a/vllm_ascend/patch/worker/patch_common/patch_sampler.py +++ b/vllm_ascend/patch/worker/patch_common/patch_sampler.py @@ -97,5 +97,5 @@ def topk_topp_forward_native( Sampler.apply_min_p = apply_min_p -if envs.VLLM_ASCEND_ENABLE_TOPK_OPTIMZE: +if envs.VLLM_ASCEND_ENABLE_TOPK_OPTIMIZE: TopKTopPSampler.forward_native = topk_topp_forward_native