[PD] use int32 for kv indices & get num_reserved_decode_tokens from server_args (#7214)
This commit is contained in:
@@ -656,6 +656,7 @@ class Scheduler(
|
||||
bootstrap_port=self.server_args.disaggregation_bootstrap_port,
|
||||
max_total_num_tokens=self.max_total_num_tokens,
|
||||
prefill_pp_size=self.server_args.disaggregation_prefill_pp,
|
||||
num_reserved_decode_tokens=self.server_args.num_reserved_decode_tokens,
|
||||
transfer_backend=self.transfer_backend,
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user