fix: second_per_grid_ts should be used to get mrope position (#3682)

This commit is contained in:
Mick
2025-03-18 09:12:38 +08:00
committed by GitHub
parent 98be3bd306
commit d373a48c98
8 changed files with 93 additions and 69 deletions

View File

@@ -191,7 +191,7 @@ class TestOpenAIVisionServer(unittest.TestCase):
# from transformers import AutoTokenizer
from decord import VideoReader, cpu
max_frames_num = 12
max_frames_num = 20
vr = VideoReader(video_path, ctx=cpu(0))
total_frame_num = len(vr)
uniform_sampled_frames = np.linspace(
@@ -226,6 +226,22 @@ class TestOpenAIVisionServer(unittest.TestCase):
return messages
def prepare_video_messages_video_direct(self, video_path):
messages = [
{
"role": "user",
"content": [
{
"type": "image_url",
"image_url": {"url": f"video:{video_path}"},
"modalities": "video",
},
{"type": "text", "text": "Please describe the video in detail."},
],
},
]
return messages
def test_video_chat_completion(self):
url = "https://raw.githubusercontent.com/EvolvingLMMs-Lab/sglang/dev/onevision_local/assets/jobs.mp4"
cache_dir = os.path.expanduser("~/.cache")
@@ -241,6 +257,7 @@ class TestOpenAIVisionServer(unittest.TestCase):
client = openai.Client(api_key=self.api_key, base_url=self.base_url)
# messages = self.prepare_video_messages_video_direct(file_path)
messages = self.prepare_video_messages(file_path)
video_request = client.chat.completions.create(
@@ -266,6 +283,7 @@ class TestOpenAIVisionServer(unittest.TestCase):
"man" in video_response
or "person" in video_response
or "individual" in video_response
or "speaker" in video_response
), video_response
assert (
"present" in video_response
@@ -368,7 +386,7 @@ class TestOpenAIVisionServer(unittest.TestCase):
list(executor.map(self.run_decode_with_image, image_ids))
class TestQWen2VLServer(TestOpenAIVisionServer):
class TestQwen2VLServer(TestOpenAIVisionServer):
@classmethod
def setUpClass(cls):
cls.model = "Qwen/Qwen2-VL-7B-Instruct"
@@ -382,14 +400,14 @@ class TestQWen2VLServer(TestOpenAIVisionServer):
other_args=[
"--chat-template",
"qwen2-vl",
"--chunked-prefill-size",
"10000",
"--mem-fraction-static",
"0.4",
],
)
cls.base_url += "/v1"
class TestQWen2_5_VLServer(TestOpenAIVisionServer):
class TestQwen2_5_VLServer(TestOpenAIVisionServer):
@classmethod
def setUpClass(cls):
cls.model = "Qwen/Qwen2.5-VL-7B-Instruct"
@@ -403,9 +421,6 @@ class TestQWen2_5_VLServer(TestOpenAIVisionServer):
other_args=[
"--chat-template",
"qwen2-vl",
# FIXME: workaround to chunked prefill within image embeds
"--chunked-prefill-size",
"10000",
"--mem-fraction-static",
"0.4",
],
@@ -508,6 +523,8 @@ class TestMinicpmvServer(TestOpenAIVisionServer):
"--trust-remote-code",
"--chat-template",
"minicpmv",
"--mem-fraction-static",
"0.4",
],
)
cls.base_url += "/v1"