[3/4] Speed up CSGMV backend perf by 10% through dynamic chunking + kernel optimization (#10592)

This commit is contained in:
Lifu Huang
2025-09-20 22:47:48 -07:00
committed by GitHub
parent 720c1c8ca3
commit 08ecd0aa2a
10 changed files with 158 additions and 84 deletions

View File

@@ -1195,6 +1195,7 @@ class ModelRunner:
max_lora_rank=self.server_args.max_lora_rank,
target_modules=self.server_args.lora_target_modules,
lora_paths=self.server_args.lora_paths,
server_args=self.server_args,
)
def load_lora_adapter(self, lora_ref: LoRARef):