[Feat] Add modalities for vision server when handling pixel values for llava (#1346)
This commit is contained in:
committed by
GitHub
parent
8e6bdf851c
commit
662ecd9368
@@ -188,6 +188,7 @@ class TokenizerManager:
|
||||
pixel_values, image_hashes, image_sizes = await self._get_pixel_values(
|
||||
obj.image_data if not_use_index else obj.image_data[index]
|
||||
)
|
||||
modalities = obj.modalities
|
||||
return_logprob = (
|
||||
obj.return_logprob if not_use_index else obj.return_logprob[index]
|
||||
)
|
||||
@@ -243,6 +244,7 @@ class TokenizerManager:
|
||||
pixel_values, image_hashes, image_sizes = await self._get_pixel_values(
|
||||
obj.image_data[0]
|
||||
)
|
||||
modalities = obj.modalities
|
||||
return_logprob = obj.return_logprob[0]
|
||||
logprob_start_len = obj.logprob_start_len[0]
|
||||
top_logprobs_num = obj.top_logprobs_num[0]
|
||||
@@ -263,6 +265,7 @@ class TokenizerManager:
|
||||
logprob_start_len,
|
||||
top_logprobs_num,
|
||||
obj.stream,
|
||||
modalities,
|
||||
)
|
||||
else: # is embedding
|
||||
tokenized_obj = TokenizedEmbeddingReqInput(
|
||||
@@ -346,6 +349,7 @@ class TokenizerManager:
|
||||
pixel_values, image_hashes, image_sizes = (
|
||||
await self._get_pixel_values(obj.image_data[index])
|
||||
)
|
||||
modalities = obj.modalities
|
||||
|
||||
tokenized_obj = TokenizedGenerateReqInput(
|
||||
rid,
|
||||
@@ -359,6 +363,7 @@ class TokenizerManager:
|
||||
obj.logprob_start_len[index],
|
||||
obj.top_logprobs_num[index],
|
||||
obj.stream,
|
||||
modalities,
|
||||
)
|
||||
else:
|
||||
tokenized_obj = TokenizedEmbeddingReqInput(
|
||||
|
||||
Reference in New Issue
Block a user