[Bugfix] Sync MRotaryEmbedding interface change to recover CI (#1399)
### What this PR does / why we need it? Sync MRotaryEmbedding interface change to recover main CI (https://github.com/vllm-project/vllm/pull/19939) ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? CI passed --------- Signed-off-by: wangli <wangli858794774@gmail.com>
This commit is contained in:
@@ -274,6 +274,7 @@ class NPUModelRunner(LoRAModelRunnerMixin):
|
|||||||
dtype=torch.int64,
|
dtype=torch.int64,
|
||||||
device="cpu",
|
device="cpu",
|
||||||
pin_memory=True)
|
pin_memory=True)
|
||||||
|
self.mrope_positions_np = self.mrope_positions_cpu.numpy()
|
||||||
|
|
||||||
if self.is_multimodal_model:
|
if self.is_multimodal_model:
|
||||||
self.inputs_embeds = torch.zeros(
|
self.inputs_embeds = torch.zeros(
|
||||||
@@ -793,14 +794,23 @@ class NPUModelRunner(LoRAModelRunnerMixin):
|
|||||||
dst_start = mrope_pos_ptr
|
dst_start = mrope_pos_ptr
|
||||||
dst_end = mrope_pos_ptr + completion_part_len
|
dst_end = mrope_pos_ptr + completion_part_len
|
||||||
|
|
||||||
self.mrope_positions_cpu[:, dst_start:dst_end] = \
|
if vllm_version_is("0.9.1"):
|
||||||
|
self.mrope_positions_cpu[:, dst_start:dst_end] = \
|
||||||
|
MRotaryEmbedding.get_next_input_positions_tensor(
|
||||||
|
req.mrope_position_delta,
|
||||||
|
context_len=num_computed_tokens +
|
||||||
|
prompt_part_len,
|
||||||
|
seq_len=num_computed_tokens +
|
||||||
|
prompt_part_len +
|
||||||
|
completion_part_len,
|
||||||
|
)
|
||||||
|
else:
|
||||||
MRotaryEmbedding.get_next_input_positions_tensor(
|
MRotaryEmbedding.get_next_input_positions_tensor(
|
||||||
req.mrope_position_delta,
|
out=self.mrope_positions_np,
|
||||||
context_len=num_computed_tokens +
|
out_offset=dst_start,
|
||||||
prompt_part_len,
|
mrope_position_delta=req.mrope_position_delta,
|
||||||
seq_len=num_computed_tokens +
|
context_len=num_computed_tokens + prompt_part_len,
|
||||||
prompt_part_len +
|
num_new_tokens=completion_part_len,
|
||||||
completion_part_len,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
mrope_pos_ptr += completion_part_len
|
mrope_pos_ptr += completion_part_len
|
||||||
|
|||||||
Reference in New Issue
Block a user