[Bugfix] Fix weight transpose in RL scenarios (#5567)

### What this PR does / why we need it?
In the training-inference switching scenario, there is no need to resume
the model weights during KV cache resumption, as this would lead to
format mismatch.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?

- vLLM version: v0.13.0
- vLLM main:
7157596103

Signed-off-by: p00465316 <panchao13@huawei.com>
Co-authored-by: p00465316 <panchao13@huawei.com>
This commit is contained in:
panchao-hub
2026-01-05 09:17:26 +08:00
committed by GitHub
parent d25a2c20c5
commit 42774df744

View File

@@ -170,6 +170,7 @@ class NPUWorker(WorkerBase):
hidden_size = self.vllm_config.model_config.hf_config.hidden_size hidden_size = self.vllm_config.model_config.hf_config.hidden_size
model = self.model_runner.model model = self.model_runner.model
if tags is None or "weights" in tags:
for name, param in model.named_parameters(): for name, param in model.named_parameters():
if 'w2_weight' in name and param.shape[2] == hidden_size: if 'w2_weight' in name and param.shape[2] == hidden_size:
parts = name.split('.') parts = name.split('.')
@@ -185,7 +186,8 @@ class NPUWorker(WorkerBase):
parent_module = model.get_submodule(".".join(parts[:-1])) parent_module = model.get_submodule(".".join(parts[:-1]))
w13_data = param.transpose(1, 2) w13_data = param.transpose(1, 2)
w13_data = torch.nn.Parameter(w13_data, requires_grad=False) w13_data = torch.nn.Parameter(w13_data,
requires_grad=False)
setattr(parent_module, param_name, w13_data) setattr(parent_module, param_name, w13_data)
# Restore the buffers after level 2 sleep # Restore the buffers after level 2 sleep