From 42774df744355ce0b26e46ce7348a4a2d3cc49ab Mon Sep 17 00:00:00 2001 From: panchao-hub <315134829@qq.com> Date: Mon, 5 Jan 2026 09:17:26 +0800 Subject: [PATCH] [Bugfix] Fix weight transpose in RL scenarios (#5567) ### What this PR does / why we need it? In the training-inference switching scenario, there is no need to resume the model weights during KV cache resumption, as this would lead to format mismatch. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? - vLLM version: v0.13.0 - vLLM main: https://github.com/vllm-project/vllm/commit/7157596103666ee7ccb7008acee8bff8a8ff1731 Signed-off-by: p00465316 Co-authored-by: p00465316 --- vllm_ascend/worker/worker.py | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/vllm_ascend/worker/worker.py b/vllm_ascend/worker/worker.py index 303dae36..2937aa4a 100644 --- a/vllm_ascend/worker/worker.py +++ b/vllm_ascend/worker/worker.py @@ -170,23 +170,25 @@ class NPUWorker(WorkerBase): hidden_size = self.vllm_config.model_config.hf_config.hidden_size model = self.model_runner.model - for name, param in model.named_parameters(): - if 'w2_weight' in name and param.shape[2] == hidden_size: - parts = name.split('.') - param_name = parts[-1] - parent_module = model.get_submodule(".".join(parts[:-1])) + if tags is None or "weights" in tags: + for name, param in model.named_parameters(): + if 'w2_weight' in name and param.shape[2] == hidden_size: + parts = name.split('.') + param_name = parts[-1] + parent_module = model.get_submodule(".".join(parts[:-1])) - w2_data = param.transpose(1, 2) - w2_data = torch.nn.Parameter(w2_data, requires_grad=False) - setattr(parent_module, param_name, w2_data) - elif 'w13_weight' in name and param.shape[1] == hidden_size: - parts = name.split('.') - param_name = parts[-1] - parent_module = model.get_submodule(".".join(parts[:-1])) + w2_data = param.transpose(1, 2) + w2_data = torch.nn.Parameter(w2_data, requires_grad=False) + setattr(parent_module, param_name, w2_data) + elif 'w13_weight' in name and param.shape[1] == hidden_size: + parts = name.split('.') + param_name = parts[-1] + parent_module = model.get_submodule(".".join(parts[:-1])) - w13_data = param.transpose(1, 2) - w13_data = torch.nn.Parameter(w13_data, requires_grad=False) - setattr(parent_module, param_name, w13_data) + w13_data = param.transpose(1, 2) + w13_data = torch.nn.Parameter(w13_data, + requires_grad=False) + setattr(parent_module, param_name, w13_data) # Restore the buffers after level 2 sleep if len(self._sleep_saved_buffers):