[Bugfix] Resolve the interface compatibility issue of get_input_embeddings in MM (#4638)

### What this PR does / why we need it? Resolve the interface compatibility issue of get_input_embeddings in MM， because the get_input_embeddings func of other model does not have the is_multimodal parameter --------- Signed-off-by: Levi-JQ <yujinqi2@huawei.com> Co-authored-by: Levi-JQ <yujinqi2@huawei.com>
2025-12-02 22:21:47 +08:00
parent 3b4cb23616
commit 865f1f7fc8
1 changed files with 13 additions and 5 deletions
--- a/vllm_ascend/worker/model_runner_v1.py
+++ b/vllm_ascend/worker/model_runner_v1.py
@@ -1394,11 +1394,19 @@ class NPUModelRunner(LoRAModelRunnerMixin):
            # embeddings), we always use embeddings (rather than token ids)
            # as input to the multimodal model, even when the input is text.
            input_ids = self.input_ids[:total_num_scheduled_tokens]
-            inputs_embeds = self.model.get_input_embeddings(
-                input_ids,
-                multimodal_embeddings=mm_embeds,
-                is_multimodal=is_mm_embed,
-            )
+            model_type = self.vllm_config.model_config.hf_config.model_type
+            if model_type == "qwen2_5_vl":
+                inputs_embeds = self.model.get_input_embeddings(
+                    input_ids,
+                    multimodal_embeddings=mm_embeds,
+                    is_multimodal=is_mm_embed,
+                )
+            else:
+                if mm_embeds:
+                    inputs_embeds = self.model.get_input_embeddings(
+                        input_ids, mm_embeds)
+                else:
+                    inputs_embeds = self.model.get_input_embeddings(input_ids)
            # TODO(woosuk): Avoid the copy. Optimize.
            self.inputs_embeds[:total_num_scheduled_tokens].copy_(
                inputs_embeds)