[Bugfix] Fix weight transpose in RL scenarios (#5567)
### What this PR does / why we need it?
In the training-inference switching scenario, there is no need to resume
the model weights during KV cache resumption, as this would lead to
format mismatch.
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
- vLLM version: v0.13.0
- vLLM main:
7157596103
Signed-off-by: p00465316 <panchao13@huawei.com>
Co-authored-by: p00465316 <panchao13@huawei.com>
This commit is contained in:
@@ -170,6 +170,7 @@ class NPUWorker(WorkerBase):
|
|||||||
|
|
||||||
hidden_size = self.vllm_config.model_config.hf_config.hidden_size
|
hidden_size = self.vllm_config.model_config.hf_config.hidden_size
|
||||||
model = self.model_runner.model
|
model = self.model_runner.model
|
||||||
|
if tags is None or "weights" in tags:
|
||||||
for name, param in model.named_parameters():
|
for name, param in model.named_parameters():
|
||||||
if 'w2_weight' in name and param.shape[2] == hidden_size:
|
if 'w2_weight' in name and param.shape[2] == hidden_size:
|
||||||
parts = name.split('.')
|
parts = name.split('.')
|
||||||
@@ -185,7 +186,8 @@ class NPUWorker(WorkerBase):
|
|||||||
parent_module = model.get_submodule(".".join(parts[:-1]))
|
parent_module = model.get_submodule(".".join(parts[:-1]))
|
||||||
|
|
||||||
w13_data = param.transpose(1, 2)
|
w13_data = param.transpose(1, 2)
|
||||||
w13_data = torch.nn.Parameter(w13_data, requires_grad=False)
|
w13_data = torch.nn.Parameter(w13_data,
|
||||||
|
requires_grad=False)
|
||||||
setattr(parent_module, param_name, w13_data)
|
setattr(parent_module, param_name, w13_data)
|
||||||
|
|
||||||
# Restore the buffers after level 2 sleep
|
# Restore the buffers after level 2 sleep
|
||||||
|
|||||||
Reference in New Issue
Block a user