fix: second_per_grid_ts should be used to get mrope position (#3682)

2025-03-18 09:12:38 +08:00
parent 98be3bd306
commit d373a48c98
8 changed files with 93 additions and 69 deletions
--- a/python/sglang/srt/model_executor/forward_batch_info.py
+++ b/python/sglang/srt/model_executor/forward_batch_info.py
@@ -402,9 +402,16 @@ class ForwardBatch:
                                extend_start_loc : extend_start_loc + extend_seq_len
                            ],
                            image_grid_thw=image_inputs.image_grid_thws,
+                            video_grid_thw=image_inputs.video_grid_thws,
+                            image_token_id=image_inputs.im_token_id,
+                            video_token_id=image_inputs.video_token_id,
                            vision_start_token_id=hf_config.vision_start_token_id,
+                            vision_end_token_id=hf_config.vision_end_token_id,
                            spatial_merge_size=hf_config.vision_config.spatial_merge_size,
                            context_len=0,
+                            seq_len=len(self.input_ids),
+                            second_per_grid_ts=image_inputs.second_per_grid_ts,
+                            tokens_per_second=hf_config.vision_config.tokens_per_second,
                        )
                    )
                    batch.image_inputs[i].mrope_position_delta = mrope_position_delta
--- a/python/sglang/srt/model_executor/model_runner.py
+++ b/python/sglang/srt/model_executor/model_runner.py
@@ -258,10 +258,12 @@ class ModelRunner:

            if self.model_config.hf_config.architectures == [
                "Qwen2VLForConditionalGeneration"
+            ] or self.model_config.hf_config.architectures == [
+                "Qwen2_5_VLForConditionalGeneration"
            ]:
-                # TODO: qwen2-vl does not support radix cache now, set disable_radix_cache=True automatically
+                # TODO: qwen2-vl series does not support radix cache now, set disable_radix_cache=True automatically
                logger.info(
-                    "Automatically turn off --chunked-prefill-size and disable radix cache for qwen2-vl."
+                    "Automatically turn off --chunked-prefill-size and disable radix cache for qwen-vl series."
                )
                server_args.chunked_prefill_size = -1
                server_args.disable_radix_cache = True