Fix typo of VLLM_ASCEND_ENABLE_TOPK_OPTIMIZE (#1112)
### What this PR does / why we need it? Fix typo of VLLM_ASCEND_ENABLE_TOPK_OPTIMIZE ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? CI passed Signed-off-by: linfeng-yuan <1102311262@qq.com>
This commit is contained in:
@@ -61,7 +61,7 @@ def test_models_distributed_DeepSeek():
|
|||||||
vllm_model.generate_greedy(example_prompts, max_tokens)
|
vllm_model.generate_greedy(example_prompts, max_tokens)
|
||||||
|
|
||||||
|
|
||||||
@patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_TOPK_OPTIMZE": "1"})
|
@patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_TOPK_OPTIMIZE": "1"})
|
||||||
def test_models_distributed_topk() -> None:
|
def test_models_distributed_topk() -> None:
|
||||||
example_prompts = [
|
example_prompts = [
|
||||||
"vLLM is a high-throughput and memory-efficient inference and serving engine for LLMs.",
|
"vLLM is a high-throughput and memory-efficient inference and serving engine for LLMs.",
|
||||||
|
|||||||
@@ -83,7 +83,7 @@ def test_multimodal(model, prompt_template, vllm_runner):
|
|||||||
max_tokens=64)
|
max_tokens=64)
|
||||||
|
|
||||||
|
|
||||||
@patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_TOPK_OPTIMZE": "1"})
|
@patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_TOPK_OPTIMIZE": "1"})
|
||||||
def test_models_topk() -> None:
|
def test_models_topk() -> None:
|
||||||
example_prompts = [
|
example_prompts = [
|
||||||
"Hello, my name is",
|
"Hello, my name is",
|
||||||
|
|||||||
@@ -57,8 +57,8 @@ env_variables: Dict[str, Callable[[], Any]] = {
|
|||||||
lambda: bool(int(os.getenv("VLLM_ENABLE_MC2", '0'))),
|
lambda: bool(int(os.getenv("VLLM_ENABLE_MC2", '0'))),
|
||||||
# Whether to enable the topk optimization. It's disabled by default for experimental support
|
# Whether to enable the topk optimization. It's disabled by default for experimental support
|
||||||
# We'll make it enabled by default in the future.
|
# We'll make it enabled by default in the future.
|
||||||
"VLLM_ASCEND_ENABLE_TOPK_OPTIMZE":
|
"VLLM_ASCEND_ENABLE_TOPK_OPTIMIZE":
|
||||||
lambda: bool(int(os.getenv("VLLM_ASCEND_ENABLE_TOPK_OPTIMZE", '0'))),
|
lambda: bool(int(os.getenv("VLLM_ASCEND_ENABLE_TOPK_OPTIMIZE", '0'))),
|
||||||
# Whether to use LCCL communication. If not set, the default value is False.
|
# Whether to use LCCL communication. If not set, the default value is False.
|
||||||
"USING_LCCL_COM":
|
"USING_LCCL_COM":
|
||||||
lambda: bool(int(os.getenv("USING_LCCL_COM", '0'))),
|
lambda: bool(int(os.getenv("USING_LCCL_COM", '0'))),
|
||||||
|
|||||||
@@ -97,5 +97,5 @@ def topk_topp_forward_native(
|
|||||||
|
|
||||||
|
|
||||||
Sampler.apply_min_p = apply_min_p
|
Sampler.apply_min_p = apply_min_p
|
||||||
if envs.VLLM_ASCEND_ENABLE_TOPK_OPTIMZE:
|
if envs.VLLM_ASCEND_ENABLE_TOPK_OPTIMIZE:
|
||||||
TopKTopPSampler.forward_native = topk_topp_forward_native
|
TopKTopPSampler.forward_native = topk_topp_forward_native
|
||||||
|
|||||||
Reference in New Issue
Block a user