Overlapped weight offload (#8034)

This commit is contained in:
fzyzcjy
2025-08-23 17:06:46 +08:00
committed by GitHub
parent ccd3fb946e
commit 2600fc0d47
9 changed files with 584 additions and 10 deletions

View File

@@ -92,6 +92,7 @@ class TpModelWorker:
pp_rank=pp_rank,
pp_size=server_args.pp_size,
nccl_port=nccl_port,
dp_rank=dp_rank,
server_args=server_args,
is_draft_worker=is_draft_worker,
req_to_token_pool=req_to_token_pool,