[Feature] Reuse flashinfer workspace for PD-Multiplexing. (#11540)

This commit is contained in:
ykcombat
2025-10-18 02:35:06 +08:00
committed by GitHub
parent 2bc3fcd420
commit f440baa136
3 changed files with 13 additions and 2 deletions

View File

@@ -284,6 +284,7 @@ class ModelRunner:
self.use_mla_backend = self.model_config.attention_arch == AttentionArch.MLA
self.attention_chunk_size = model_config.attention_chunk_size
self.forward_pass_id = 0
self.init_new_workspace = False
# Apply the rank zero filter to logger
if server_args.show_time_cost: