fix: second_per_grid_ts should be used to get mrope position (#3682)

This commit is contained in:
Mick
2025-03-18 09:12:38 +08:00
committed by GitHub
parent 98be3bd306
commit d373a48c98
8 changed files with 93 additions and 69 deletions

View File

@@ -68,7 +68,7 @@ suites = {
TestFile("test_update_weights_from_tensor.py", 48),
TestFile("test_vertex_endpoint.py", 31),
TestFile("test_vision_chunked_prefill.py", 223),
TestFile("test_vision_llm.py", 18.4),
TestFile("test_vlm_accuracy.py", 60),
TestFile("test_vision_openai_server.py", 344),
TestFile("test_fim_completion.py", 120),
TestFile("test_w8a8_quantization.py", 46),

View File

@@ -191,7 +191,7 @@ class TestOpenAIVisionServer(unittest.TestCase):
# from transformers import AutoTokenizer
from decord import VideoReader, cpu
max_frames_num = 12
max_frames_num = 20
vr = VideoReader(video_path, ctx=cpu(0))
total_frame_num = len(vr)
uniform_sampled_frames = np.linspace(
@@ -226,6 +226,22 @@ class TestOpenAIVisionServer(unittest.TestCase):
return messages
def prepare_video_messages_video_direct(self, video_path):
messages = [
{
"role": "user",
"content": [
{
"type": "image_url",
"image_url": {"url": f"video:{video_path}"},
"modalities": "video",
},
{"type": "text", "text": "Please describe the video in detail."},
],
},
]
return messages
def test_video_chat_completion(self):
url = "https://raw.githubusercontent.com/EvolvingLMMs-Lab/sglang/dev/onevision_local/assets/jobs.mp4"
cache_dir = os.path.expanduser("~/.cache")
@@ -241,6 +257,7 @@ class TestOpenAIVisionServer(unittest.TestCase):
client = openai.Client(api_key=self.api_key, base_url=self.base_url)
# messages = self.prepare_video_messages_video_direct(file_path)
messages = self.prepare_video_messages(file_path)
video_request = client.chat.completions.create(
@@ -266,6 +283,7 @@ class TestOpenAIVisionServer(unittest.TestCase):
"man" in video_response
or "person" in video_response
or "individual" in video_response
or "speaker" in video_response
), video_response
assert (
"present" in video_response
@@ -368,7 +386,7 @@ class TestOpenAIVisionServer(unittest.TestCase):
list(executor.map(self.run_decode_with_image, image_ids))
class TestQWen2VLServer(TestOpenAIVisionServer):
class TestQwen2VLServer(TestOpenAIVisionServer):
@classmethod
def setUpClass(cls):
cls.model = "Qwen/Qwen2-VL-7B-Instruct"
@@ -382,14 +400,14 @@ class TestQWen2VLServer(TestOpenAIVisionServer):
other_args=[
"--chat-template",
"qwen2-vl",
"--chunked-prefill-size",
"10000",
"--mem-fraction-static",
"0.4",
],
)
cls.base_url += "/v1"
class TestQWen2_5_VLServer(TestOpenAIVisionServer):
class TestQwen2_5_VLServer(TestOpenAIVisionServer):
@classmethod
def setUpClass(cls):
cls.model = "Qwen/Qwen2.5-VL-7B-Instruct"
@@ -403,9 +421,6 @@ class TestQWen2_5_VLServer(TestOpenAIVisionServer):
other_args=[
"--chat-template",
"qwen2-vl",
# FIXME: workaround to chunked prefill within image embeds
"--chunked-prefill-size",
"10000",
"--mem-fraction-static",
"0.4",
],
@@ -508,6 +523,8 @@ class TestMinicpmvServer(TestOpenAIVisionServer):
"--trust-remote-code",
"--chat-template",
"minicpmv",
"--mem-fraction-static",
"0.4",
],
)
cls.base_url += "/v1"

View File

@@ -17,8 +17,6 @@ from sglang.srt.model_executor.model_runner import ModelRunner
from sglang.srt.openai_api.protocol import ChatCompletionRequest
from sglang.srt.server_args import ServerArgs
MiniCPMV = "openbmb/MiniCPM-V-2_6"
# Test the logits output between HF and SGLang
class VisionLLMLogitsBase(unittest.IsolatedAsyncioTestCase):
@@ -155,7 +153,7 @@ class TestMiniCPMVLogits(VisionLLMLogitsBase):
@classmethod
def setUpClass(cls):
super().setUpClass()
cls.model_path = MiniCPMV
cls.model_path = "openbmb/MiniCPM-V-2_6"
cls.tokenizer = AutoTokenizer.from_pretrained(
cls.model_path, trust_remote_code=True
)