From 358ba6899401e5a1f5e8860e8ea88900cc27dd38 Mon Sep 17 00:00:00 2001
From: rjg-lyh <83491835+rjg-lyh@users.noreply.github.com>
Date: Wed, 27 Aug 2025 09:08:17 +0800
Subject: [PATCH] [main][bugfix] Fix MatmulNZ format bug on some machines
 (#2549)

### What this PR does / why we need it?
This PR fixes the bug on some machines where quantmatmul failed to run
with the NZ format. The change ensures proper execution under the
expected data layout.

### How was this patch tested?
CI passed with existing test.

- vLLM version: v0.10.1.1
- vLLM main:
https://github.com/vllm-project/vllm/commit/b5d34af3286ee0334d9f7bd729774ac55c5805e9

Signed-off-by: rjg-lyh <1318825571@qq.com>
---
 vllm_ascend/worker/model_runner_v1.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/vllm_ascend/worker/model_runner_v1.py b/vllm_ascend/worker/model_runner_v1.py
index aa72eb0..4dc186f 100644
--- a/vllm_ascend/worker/model_runner_v1.py
+++ b/vllm_ascend/worker/model_runner_v1.py
@@ -112,6 +112,9 @@ import torch_npu
 
 import vllm_ascend.envs as envs_ascend
 
+# if true, allow tensor initialization and casting with internal format (e.g., NZ)
+torch.npu.config.allow_internal_format = True
+
 if is_310p():
     torch_npu.npu.set_compile_mode(jit_compile=False)
     ACL_FORMAT = ACL_FORMAT_FRACTAL_NZ