Improve the control of streaming and improve the first token latency in streaming (#117)

This commit is contained in:
Lianmin Zheng
2024-01-29 17:05:42 -08:00
committed by GitHub
parent cd6872334e
commit 6f560c761b
12 changed files with 46 additions and 23 deletions

View File

@@ -158,7 +158,7 @@ class LlavaLlamaForCausalLM(nn.Module):
num_patch_height, num_patch_width, height, width, -1
)
else:
raise NotImplementedError
raise NotImplementedError()
if "unpad" in self.mm_patch_merge_type:
image_feature = image_feature.permute(
4, 0, 2, 1, 3