vlm: enable GLM4.1V server testing & fix video processing (#10095)
Signed-off-by: Xinyuan Tong <xinyuantong.cs@gmail.com> Co-authored-by: Binyao Jiang <byjiang1996@gmail.com>
This commit is contained in:
@@ -2,7 +2,6 @@ import re
|
|||||||
from typing import List, Union
|
from typing import List, Union
|
||||||
|
|
||||||
from decord import VideoReader
|
from decord import VideoReader
|
||||||
from transformers.video_utils import VideoMetadata
|
|
||||||
|
|
||||||
from sglang.srt.layers.rotary_embedding import MRotaryEmbedding
|
from sglang.srt.layers.rotary_embedding import MRotaryEmbedding
|
||||||
from sglang.srt.models.glm4v import Glm4vForConditionalGeneration
|
from sglang.srt.models.glm4v import Glm4vForConditionalGeneration
|
||||||
@@ -66,17 +65,18 @@ class Glm4vImageProcessor(SGLangBaseProcessor):
|
|||||||
total_num_frames = len(vr)
|
total_num_frames = len(vr)
|
||||||
duration = total_num_frames / video_fps if video_fps else 0
|
duration = total_num_frames / video_fps if video_fps else 0
|
||||||
|
|
||||||
metadata = VideoMetadata(
|
|
||||||
total_num_frames=int(total_num_frames),
|
|
||||||
fps=float(video_fps),
|
|
||||||
duration=float(duration),
|
|
||||||
video_backend="decord",
|
|
||||||
)
|
|
||||||
|
|
||||||
# Extract all frames
|
# Extract all frames
|
||||||
indices = list(range(total_num_frames))
|
indices = list(range(total_num_frames))
|
||||||
frames = vr.get_batch(indices).asnumpy()
|
frames = vr.get_batch(indices).asnumpy()
|
||||||
metadata.frames_indices = indices
|
|
||||||
|
# Return metadata as dict so transformers can properly create VideoMetadata objects
|
||||||
|
metadata = {
|
||||||
|
"total_num_frames": int(total_num_frames),
|
||||||
|
"fps": float(video_fps),
|
||||||
|
"duration": float(duration),
|
||||||
|
"video_backend": "decord",
|
||||||
|
"frames_indices": indices,
|
||||||
|
}
|
||||||
|
|
||||||
return frames, metadata
|
return frames, metadata
|
||||||
|
|
||||||
|
|||||||
@@ -217,31 +217,27 @@ class TestKimiVLServer(ImageOpenAITestMixin):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
# Skip for ci test
|
class TestGLM41VServer(ImageOpenAITestMixin, VideoOpenAITestMixin):
|
||||||
# class TestGLM41VServer(TestOpenAIVisionServer):
|
@classmethod
|
||||||
# @classmethod
|
def setUpClass(cls):
|
||||||
# def setUpClass(cls):
|
cls.model = "zai-org/GLM-4.1V-9B-Thinking"
|
||||||
# cls.model = "zai-org/GLM-4.1V-9B-Thinking"
|
cls.base_url = DEFAULT_URL_FOR_TEST
|
||||||
# cls.base_url = DEFAULT_URL_FOR_TEST
|
cls.api_key = "sk-123456"
|
||||||
# cls.api_key = "sk-123456"
|
cls.process = popen_launch_server(
|
||||||
# cls.process = popen_launch_server(
|
cls.model,
|
||||||
# cls.model,
|
cls.base_url,
|
||||||
# cls.base_url,
|
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
# timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
other_args=[
|
||||||
# other_args=[
|
"--trust-remote-code",
|
||||||
# "--trust-remote-code",
|
"--mem-fraction-static",
|
||||||
# "--mem-fraction-static",
|
"0.68",
|
||||||
# "0.68",
|
"--cuda-graph-max-bs",
|
||||||
# "--cuda-graph-max-bs",
|
"4",
|
||||||
# "4",
|
"--reasoning-parser",
|
||||||
# "--reasoning-parser",
|
"glm45",
|
||||||
# "glm45",
|
],
|
||||||
# ],
|
)
|
||||||
# )
|
cls.base_url += "/v1"
|
||||||
# cls.base_url += "/v1"
|
|
||||||
|
|
||||||
# def test_video_chat_completion(self):
|
|
||||||
# self._test_video_chat_completion()
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
Reference in New Issue
Block a user