[Feature] Reuse flashinfer workspace for PD-Multiplexing. (#11540)
This commit is contained in:
@@ -284,6 +284,7 @@ class ModelRunner:
|
||||
self.use_mla_backend = self.model_config.attention_arch == AttentionArch.MLA
|
||||
self.attention_chunk_size = model_config.attention_chunk_size
|
||||
self.forward_pass_id = 0
|
||||
self.init_new_workspace = False
|
||||
|
||||
# Apply the rank zero filter to logger
|
||||
if server_args.show_time_cost:
|
||||
|
||||
Reference in New Issue
Block a user