diff --git a/vllm_ascend/spec_decode/eagle_proposer.py b/vllm_ascend/spec_decode/eagle_proposer.py index e342227..d14dc6d 100644 --- a/vllm_ascend/spec_decode/eagle_proposer.py +++ b/vllm_ascend/spec_decode/eagle_proposer.py @@ -1,5 +1,4 @@ # SPDX-License-Identifier: Apache-2.0 -import os from typing import Optional import numpy as np @@ -72,8 +71,7 @@ class EagleProposer(Proposer): 1, device=device, dtype=torch.int32) - attn_mask_len = min(self.vllm_config.model_config.max_model_len, - int(os.getenv("PAGED_ATTENTION_MASK_LEN", 10000))) + attn_mask_len = self.vllm_config.model_config.max_model_len self.attn_mask_builder = AttentionMaskBuilder( attn_mask_len, self.vllm_config.model_config.dtype)