[bugfix] some bugs maybe fail to run (#896)

### What this PR does / why we need it? Solve the bug that the graph mode is the same as p and d, and some other bugs. ### Does this PR introduce _any_ user-facing change? Wouldn't be ### How was this patch tested? Follow the end-to-end test Signed-off-by: ningbenzhe1 <ningbenzhe@huawei.com>
2025-06-03 11:07:33 +08:00
parent 92bc5576d8
commit 6ec64a3f96
7 changed files with 15 additions and 7 deletions
--- a/vllm_ascend/worker/mtp_proposer_v1.py
+++ b/vllm_ascend/worker/mtp_proposer_v1.py
@@ -219,4 +219,4 @@ def prepare_input_kernel(out_ptr: torch.Tensor, cu_query_lens: torch.Tensor,

    global_indices_flat = global_indices[mask]
    values_flat = values[mask]
-    out_ptr[global_indices_flat] = values_flat
+    out_ptr[global_indices_flat] = values_flat
--- a/vllm_ascend/worker/worker_v1.py
+++ b/vllm_ascend/worker/worker_v1.py
@@ -173,7 +173,7 @@ class NPUWorker(WorkerBase):
        scheduler_output: "SchedulerOutput",
    ) -> Optional[ModelRunnerOutput]:
        output = self.model_runner.execute_model(scheduler_output)
-        return output if self.rank == 0 else None
+        return output if self.is_driver_worker else None

    def load_model(self) -> None:
        self.model_runner.load_model()