Overlapped weight offload (#8034)
This commit is contained in:
@@ -92,6 +92,7 @@ class TpModelWorker:
|
||||
pp_rank=pp_rank,
|
||||
pp_size=server_args.pp_size,
|
||||
nccl_port=nccl_port,
|
||||
dp_rank=dp_rank,
|
||||
server_args=server_args,
|
||||
is_draft_worker=is_draft_worker,
|
||||
req_to_token_pool=req_to_token_pool,
|
||||
|
||||
Reference in New Issue
Block a user