fix: second_per_grid_ts should be used to get mrope position (#3682)
This commit is contained in:
@@ -402,9 +402,16 @@ class ForwardBatch:
|
||||
extend_start_loc : extend_start_loc + extend_seq_len
|
||||
],
|
||||
image_grid_thw=image_inputs.image_grid_thws,
|
||||
video_grid_thw=image_inputs.video_grid_thws,
|
||||
image_token_id=image_inputs.im_token_id,
|
||||
video_token_id=image_inputs.video_token_id,
|
||||
vision_start_token_id=hf_config.vision_start_token_id,
|
||||
vision_end_token_id=hf_config.vision_end_token_id,
|
||||
spatial_merge_size=hf_config.vision_config.spatial_merge_size,
|
||||
context_len=0,
|
||||
seq_len=len(self.input_ids),
|
||||
second_per_grid_ts=image_inputs.second_per_grid_ts,
|
||||
tokens_per_second=hf_config.vision_config.tokens_per_second,
|
||||
)
|
||||
)
|
||||
batch.image_inputs[i].mrope_position_delta = mrope_position_delta
|
||||
|
||||
@@ -258,10 +258,12 @@ class ModelRunner:
|
||||
|
||||
if self.model_config.hf_config.architectures == [
|
||||
"Qwen2VLForConditionalGeneration"
|
||||
] or self.model_config.hf_config.architectures == [
|
||||
"Qwen2_5_VLForConditionalGeneration"
|
||||
]:
|
||||
# TODO: qwen2-vl does not support radix cache now, set disable_radix_cache=True automatically
|
||||
# TODO: qwen2-vl series does not support radix cache now, set disable_radix_cache=True automatically
|
||||
logger.info(
|
||||
"Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl."
|
||||
"Automatically turn off --chunked-prefill-size and disable radix cache for qwen-vl series."
|
||||
)
|
||||
server_args.chunked_prefill_size = -1
|
||||
server_args.disable_radix_cache = True
|
||||
|
||||
Reference in New Issue
Block a user