[CI] Upgrade vLLM version (#3139)
Upgrade vLLM version to the newest commit. - Fix the break change introduced by969b4da3a6- Add a patch to quick fix torhcairde94289a98- fix the ut error introduced byde94289a98Close: https://github.com/vllm-project/vllm-ascend/issues/3138 - vLLM version: v0.10.2 - vLLM main:f225ea7dd9--------- Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com> Signed-off-by: MengqingCao <cmq0113@163.com> Co-authored-by: MengqingCao <cmq0113@163.com>
This commit is contained in:
@@ -304,17 +304,24 @@ class NPUModelRunner(LoRAModelRunnerMixin):
|
||||
(self.max_num_tokens, self.model_config.get_hidden_size()),
|
||||
dtype=self.dtype,
|
||||
device=self.device)
|
||||
|
||||
# Set up Attention
|
||||
self.attn_backend = get_attn_backend(
|
||||
0,
|
||||
self.dtype,
|
||||
None,
|
||||
self.block_size,
|
||||
self.model_config.is_attention_free,
|
||||
use_mla=self.model_config.use_mla,
|
||||
)
|
||||
|
||||
if vllm_version_is("0.10.2"):
|
||||
self.attn_backend = get_attn_backend(
|
||||
0,
|
||||
self.dtype,
|
||||
None,
|
||||
self.block_size,
|
||||
self.model_config.is_attention_free,
|
||||
use_mla=self.model_config.use_mla,
|
||||
)
|
||||
else:
|
||||
self.attn_backend = get_attn_backend(
|
||||
0,
|
||||
self.dtype,
|
||||
None,
|
||||
self.block_size,
|
||||
use_mla=self.model_config.use_mla,
|
||||
)
|
||||
if torch.version.cann.startswith("8.3"):
|
||||
self.attn_mask_builder = AttentionMaskBuilder(
|
||||
self.scheduler_config.max_num_batched_tokens, self.dtype,
|
||||
|
||||
Reference in New Issue
Block a user