Input_embeds support (#2052)

This commit is contained in:
Rin Intachuen
2024-11-25 19:35:04 -05:00
committed by GitHub
parent 1f76fc6e3f
commit 1aea19f64b
9 changed files with 204 additions and 15 deletions

View File

@@ -526,12 +526,20 @@ class Scheduler:
recv_req: TokenizedGenerateReqInput,
):
if recv_req.session_id is None or recv_req.session_id not in self.sessions:
# Check if input_embeds is present and create dummy input_ids
if recv_req.input_embeds is not None:
# Generate fake input_ids based on the length of input_embeds
seq_length = len(recv_req.input_embeds)
fake_input_ids = [1] * seq_length
recv_req.input_ids = fake_input_ids
req = Req(
recv_req.rid,
recv_req.input_text,
recv_req.input_ids,
recv_req.sampling_params,
lora_path=recv_req.lora_path,
input_embeds=recv_req.input_embeds,
)
req.tokenizer = self.tokenizer
if recv_req.session_id is not None: