[Bugfix] fix kv buffer register & dp attention & deepepmoe (#9327)

This commit is contained in:
chenxu140
2025-08-20 01:09:48 +08:00
committed by GitHub
parent ecc9f3e47a
commit 01d47a27b6
3 changed files with 3 additions and 5 deletions

View File

@@ -234,7 +234,7 @@ def initialize_dp_attention(
_DpGatheredBufferWrapper.set_metadata(
hidden_size=model_config.hidden_size,
dtype=model_config.dtype,
device=torch.device("cuda"),
device=torch.device(server_args.device),
)