Rename lora_path to lora_id in batches (#8437)
This commit is contained in:
@@ -576,11 +576,11 @@ class CudaGraphRunner:
|
||||
)
|
||||
|
||||
if self.model_runner.server_args.enable_lora:
|
||||
# It is safe to capture CUDA graph using empty LoRA path, as the LoRA kernels will always be launched whenever
|
||||
# `--enable-lora` is set to True (and return immediately if the LoRA path is empty for perf optimization).
|
||||
lora_paths = [None] * bs
|
||||
# It is safe to capture CUDA graph using empty LoRA id, as the LoRA kernels will always be launched whenever
|
||||
# `--enable-lora` is set to True (and return immediately if the LoRA id is empty for perf optimization).
|
||||
lora_ids = [None] * bs
|
||||
else:
|
||||
lora_paths = None
|
||||
lora_ids = None
|
||||
|
||||
forward_batch = ForwardBatch(
|
||||
forward_mode=self.capture_forward_mode,
|
||||
@@ -607,11 +607,11 @@ class CudaGraphRunner:
|
||||
capture_hidden_mode=self.capture_hidden_mode,
|
||||
num_token_non_padded=self.num_token_non_padded,
|
||||
global_forward_mode=self.capture_forward_mode,
|
||||
lora_paths=lora_paths,
|
||||
lora_ids=lora_ids,
|
||||
)
|
||||
self.tbo_plugin.capture_one_batch_size(forward_batch, num_tokens=num_tokens)
|
||||
|
||||
if lora_paths is not None:
|
||||
if lora_ids is not None:
|
||||
self.model_runner.lora_manager.prepare_lora_batch(forward_batch)
|
||||
|
||||
# Attention backend
|
||||
|
||||
@@ -248,7 +248,7 @@ class ForwardBatch:
|
||||
encoder_out_cache_loc: Optional[torch.Tensor] = None
|
||||
|
||||
# For LoRA
|
||||
lora_paths: Optional[List[str]] = None
|
||||
lora_ids: Optional[List[str]] = None
|
||||
|
||||
# For input embeddings
|
||||
input_embeds: Optional[torch.Tensor] = None
|
||||
@@ -327,7 +327,7 @@ class ForwardBatch:
|
||||
is_extend_in_batch=batch.is_extend_in_batch,
|
||||
can_run_dp_cuda_graph=batch.can_run_dp_cuda_graph,
|
||||
global_forward_mode=batch.global_forward_mode,
|
||||
lora_paths=batch.lora_paths,
|
||||
lora_ids=batch.lora_ids,
|
||||
sampling_info=batch.sampling_info,
|
||||
req_to_token_pool=model_runner.req_to_token_pool,
|
||||
token_to_kv_pool=model_runner.token_to_kv_pool,
|
||||
|
||||
Reference in New Issue
Block a user