[3/4] Speed up CSGMV backend perf by 10% through dynamic chunking + kernel optimization (#10592)
This commit is contained in:
@@ -1195,6 +1195,7 @@ class ModelRunner:
|
||||
max_lora_rank=self.server_args.max_lora_rank,
|
||||
target_modules=self.server_args.lora_target_modules,
|
||||
lora_paths=self.server_args.lora_paths,
|
||||
server_args=self.server_args,
|
||||
)
|
||||
|
||||
def load_lora_adapter(self, lora_ref: LoRARef):
|
||||
|
||||
Reference in New Issue
Block a user