vlm: enforce pybase64 for image and str encode/decode (#10700)
This commit is contained in:
@@ -75,12 +75,6 @@ CAT_SHORT2LONG = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
# DATA SAVING
|
|
||||||
def save_json(filename, ds):
|
|
||||||
with open(filename, "w") as f:
|
|
||||||
json.dump(ds, f, indent=4)
|
|
||||||
|
|
||||||
|
|
||||||
def get_multi_choice_info(options):
|
def get_multi_choice_info(options):
|
||||||
"""
|
"""
|
||||||
Given the list of options for multiple choice question
|
Given the list of options for multiple choice question
|
||||||
|
|||||||
@@ -6,7 +6,6 @@ python3 -m sglang.launch_server --model-path lmms-lab/llava-onevision-qwen2-72b-
|
|||||||
python3 llava_onevision_server.py
|
python3 llava_onevision_server.py
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import base64
|
|
||||||
import io
|
import io
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
@@ -14,6 +13,7 @@ import time
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import openai
|
import openai
|
||||||
|
import pybase64
|
||||||
import requests
|
import requests
|
||||||
from decord import VideoReader, cpu
|
from decord import VideoReader, cpu
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
@@ -213,7 +213,7 @@ def prepare_video_messages(video_path):
|
|||||||
pil_img = Image.fromarray(frame)
|
pil_img = Image.fromarray(frame)
|
||||||
buff = io.BytesIO()
|
buff = io.BytesIO()
|
||||||
pil_img.save(buff, format="JPEG")
|
pil_img.save(buff, format="JPEG")
|
||||||
base64_str = base64.b64encode(buff.getvalue()).decode("utf-8")
|
base64_str = pybase64.b64encode(buff.getvalue()).decode("utf-8")
|
||||||
base64_frames.append(base64_str)
|
base64_frames.append(base64_str)
|
||||||
|
|
||||||
messages = [{"role": "user", "content": []}]
|
messages = [{"role": "user", "content": []}]
|
||||||
|
|||||||
@@ -31,7 +31,10 @@ from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple, Union
|
|||||||
|
|
||||||
import aiohttp
|
import aiohttp
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import pybase64
|
||||||
import requests
|
import requests
|
||||||
|
from datasets import load_dataset
|
||||||
|
from PIL import Image
|
||||||
from tqdm.asyncio import tqdm
|
from tqdm.asyncio import tqdm
|
||||||
from transformers import (
|
from transformers import (
|
||||||
AutoProcessor,
|
AutoProcessor,
|
||||||
@@ -1020,14 +1023,6 @@ def sample_mmmu_requests(
|
|||||||
Returns:
|
Returns:
|
||||||
List of tuples (prompt, prompt_token_len, output_token_len).
|
List of tuples (prompt, prompt_token_len, output_token_len).
|
||||||
"""
|
"""
|
||||||
try:
|
|
||||||
import io
|
|
||||||
|
|
||||||
import pybase64
|
|
||||||
from datasets import load_dataset
|
|
||||||
except ImportError:
|
|
||||||
raise ImportError("Please install datasets: pip install datasets")
|
|
||||||
|
|
||||||
print("Loading MMMU dataset from HuggingFace...")
|
print("Loading MMMU dataset from HuggingFace...")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -1396,13 +1391,6 @@ def sample_image_requests(
|
|||||||
- Text lengths follow the 'random' dataset sampling rule. ``prompt_len``
|
- Text lengths follow the 'random' dataset sampling rule. ``prompt_len``
|
||||||
only counts text tokens and excludes image data.
|
only counts text tokens and excludes image data.
|
||||||
"""
|
"""
|
||||||
try:
|
|
||||||
import pybase64
|
|
||||||
from PIL import Image
|
|
||||||
except ImportError as e:
|
|
||||||
raise ImportError(
|
|
||||||
"Please install Pillow to generate random images: pip install pillow"
|
|
||||||
) from e
|
|
||||||
|
|
||||||
# Parse resolution (supports presets and 'heightxwidth')
|
# Parse resolution (supports presets and 'heightxwidth')
|
||||||
width, height = parse_image_resolution(image_resolution)
|
width, height = parse_image_resolution(image_resolution)
|
||||||
|
|||||||
@@ -1,9 +1,9 @@
|
|||||||
import base64
|
|
||||||
import pickle
|
import pickle
|
||||||
import time
|
import time
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, List, Optional
|
from typing import Any, List, Optional
|
||||||
|
|
||||||
|
import pybase64
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
from sglang.srt.utils import MultiprocessingSerializer
|
from sglang.srt.utils import MultiprocessingSerializer
|
||||||
@@ -77,14 +77,16 @@ class NaiveDistributed:
|
|||||||
)
|
)
|
||||||
|
|
||||||
_get_path(self._rank).write_text(
|
_get_path(self._rank).write_text(
|
||||||
base64.b64encode(pickle.dumps(obj)).decode("utf-8") + text_postfix
|
pybase64.b64encode(pickle.dumps(obj)).decode("utf-8") + text_postfix
|
||||||
)
|
)
|
||||||
|
|
||||||
def _read_one(interesting_rank: int):
|
def _read_one(interesting_rank: int):
|
||||||
p = _get_path(interesting_rank)
|
p = _get_path(interesting_rank)
|
||||||
while True:
|
while True:
|
||||||
if p.exists() and (text := p.read_text()).endswith(text_postfix):
|
if p.exists() and (text := p.read_text()).endswith(text_postfix):
|
||||||
return pickle.loads(base64.b64decode(text[: -len(text_postfix)]))
|
return pickle.loads(
|
||||||
|
pybase64.b64decode(text[: -len(text_postfix)], validate=True)
|
||||||
|
)
|
||||||
time.sleep(0.001)
|
time.sleep(0.001)
|
||||||
|
|
||||||
return [
|
return [
|
||||||
|
|||||||
@@ -872,9 +872,9 @@ def get_image_bytes(image_file: Union[str, bytes]):
|
|||||||
return f.read()
|
return f.read()
|
||||||
elif image_file.startswith("data:"):
|
elif image_file.startswith("data:"):
|
||||||
image_file = image_file.split(",")[1]
|
image_file = image_file.split(",")[1]
|
||||||
return pybase64.b64decode(image_file)
|
return pybase64.b64decode(image_file, validate=True)
|
||||||
elif isinstance(image_file, str):
|
elif isinstance(image_file, str):
|
||||||
return pybase64.b64decode(image_file)
|
return pybase64.b64decode(image_file, validate=True)
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError(f"Invalid image: {image_file}")
|
raise NotImplementedError(f"Invalid image: {image_file}")
|
||||||
|
|
||||||
@@ -911,7 +911,7 @@ def load_video(video_file: Union[str, bytes], use_gpu: bool = True):
|
|||||||
vr = VideoReader(tmp_file.name, ctx=ctx)
|
vr = VideoReader(tmp_file.name, ctx=ctx)
|
||||||
elif video_file.startswith("data:"):
|
elif video_file.startswith("data:"):
|
||||||
_, encoded = video_file.split(",", 1)
|
_, encoded = video_file.split(",", 1)
|
||||||
video_bytes = pybase64.b64decode(encoded)
|
video_bytes = pybase64.b64decode(encoded, validate=True)
|
||||||
tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
|
tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
|
||||||
tmp_file.write(video_bytes)
|
tmp_file.write(video_bytes)
|
||||||
tmp_file.close()
|
tmp_file.close()
|
||||||
@@ -919,7 +919,7 @@ def load_video(video_file: Union[str, bytes], use_gpu: bool = True):
|
|||||||
elif os.path.isfile(video_file):
|
elif os.path.isfile(video_file):
|
||||||
vr = VideoReader(video_file, ctx=ctx)
|
vr = VideoReader(video_file, ctx=ctx)
|
||||||
else:
|
else:
|
||||||
video_bytes = pybase64.b64decode(video_file)
|
video_bytes = pybase64.b64decode(video_file, validate=True)
|
||||||
tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
|
tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
|
||||||
tmp_file.write(video_bytes)
|
tmp_file.write(video_bytes)
|
||||||
tmp_file.close()
|
tmp_file.close()
|
||||||
@@ -2083,7 +2083,7 @@ class MultiprocessingSerializer:
|
|||||||
|
|
||||||
if output_str:
|
if output_str:
|
||||||
# Convert bytes to base64-encoded string
|
# Convert bytes to base64-encoded string
|
||||||
output = pybase64.b64encode(output).decode("utf-8")
|
pybase64.b64encode(output).decode("utf-8")
|
||||||
|
|
||||||
return output
|
return output
|
||||||
|
|
||||||
|
|||||||
@@ -4,7 +4,6 @@ python3 -m unittest test_bnb.TestVisionModel.test_vlm
|
|||||||
python3 -m unittest test_bnb.TestLanguageModel.test_mmlu
|
python3 -m unittest test_bnb.TestLanguageModel.test_mmlu
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import base64
|
|
||||||
import io
|
import io
|
||||||
import json
|
import json
|
||||||
import multiprocessing as mp
|
import multiprocessing as mp
|
||||||
@@ -15,6 +14,7 @@ from types import SimpleNamespace
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import openai
|
import openai
|
||||||
|
import pybase64
|
||||||
import requests
|
import requests
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
|
|
||||||
|
|||||||
@@ -3,7 +3,6 @@ Usage:
|
|||||||
python3 -m unittest test_vision_chunked_prefill.TestVisionChunkedPrefill.test_chunked_prefill
|
python3 -m unittest test_vision_chunked_prefill.TestVisionChunkedPrefill.test_chunked_prefill
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import base64
|
|
||||||
import io
|
import io
|
||||||
import os
|
import os
|
||||||
import unittest
|
import unittest
|
||||||
@@ -11,6 +10,7 @@ from concurrent.futures import ThreadPoolExecutor
|
|||||||
from typing import Union
|
from typing import Union
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import pybase64
|
||||||
import requests
|
import requests
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
|
|
||||||
@@ -45,7 +45,7 @@ class TestVisionChunkedPrefill(CustomTestCase):
|
|||||||
pil_img = Image.fromarray(frame)
|
pil_img = Image.fromarray(frame)
|
||||||
buff = io.BytesIO()
|
buff = io.BytesIO()
|
||||||
pil_img.save(buff, format="JPEG")
|
pil_img.save(buff, format="JPEG")
|
||||||
base64_str = base64.b64encode(buff.getvalue()).decode("utf-8")
|
base64_str = pybase64.b64encode(buff.getvalue()).decode("utf-8")
|
||||||
base64_frames.append(base64_str)
|
base64_frames.append(base64_str)
|
||||||
|
|
||||||
messages = [{"role": "user", "content": []}]
|
messages = [{"role": "user", "content": []}]
|
||||||
|
|||||||
@@ -1,10 +1,10 @@
|
|||||||
import base64
|
|
||||||
import io
|
import io
|
||||||
import os
|
import os
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import openai
|
import openai
|
||||||
|
import pybase64
|
||||||
import requests
|
import requests
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
|
|
||||||
@@ -386,7 +386,7 @@ class ImageOpenAITestMixin(TestOpenAIMLLMServerBase):
|
|||||||
pil_img = Image.fromarray(frame)
|
pil_img = Image.fromarray(frame)
|
||||||
buff = io.BytesIO()
|
buff = io.BytesIO()
|
||||||
pil_img.save(buff, format="JPEG")
|
pil_img.save(buff, format="JPEG")
|
||||||
base64_str = base64.b64encode(buff.getvalue()).decode("utf-8")
|
base64_str = pybase64.b64encode(buff.getvalue()).decode("utf-8")
|
||||||
base64_frames.append(base64_str)
|
base64_frames.append(base64_str)
|
||||||
|
|
||||||
messages = [{"role": "user", "content": []}]
|
messages = [{"role": "user", "content": []}]
|
||||||
|
|||||||
Reference in New Issue
Block a user