[Feature] Reuse flashinfer workspace for PD-Multiplexing. (#11540)

2025-10-18 02:35:06 +08:00
parent 2bc3fcd420
commit f440baa136
3 changed files with 13 additions and 2 deletions
--- a/python/sglang/srt/model_executor/model_runner.py
+++ b/python/sglang/srt/model_executor/model_runner.py
@@ -284,6 +284,7 @@ class ModelRunner:
        self.use_mla_backend = self.model_config.attention_arch == AttentionArch.MLA
        self.attention_chunk_size = model_config.attention_chunk_size
        self.forward_pass_id = 0
+        self.init_new_workspace = False

        # Apply the rank zero filter to logger
        if server_args.show_time_cost: