[PD] use int32 for kv indices & get num_reserved_decode_tokens from server_args (#7214)

This commit is contained in:
Byron Hsu
2025-06-15 11:51:03 -07:00
committed by GitHub
parent fff10809bf
commit 88f9c347b2
8 changed files with 24 additions and 26 deletions

View File

@@ -656,6 +656,7 @@ class Scheduler(
bootstrap_port=self.server_args.disaggregation_bootstrap_port,
max_total_num_tokens=self.max_total_num_tokens,
prefill_pp_size=self.server_args.disaggregation_prefill_pp,
num_reserved_decode_tokens=self.server_args.num_reserved_decode_tokens,
transfer_backend=self.transfer_backend,
)