[Multimodal][Perf] Use pybase64 instead of base64 (#7724)
This commit is contained in:
@@ -38,6 +38,7 @@ runtime_common = [
|
|||||||
"psutil",
|
"psutil",
|
||||||
"pydantic",
|
"pydantic",
|
||||||
"pynvml",
|
"pynvml",
|
||||||
|
"pybase64",
|
||||||
"python-multipart",
|
"python-multipart",
|
||||||
"pyzmq>=25.1.2",
|
"pyzmq>=25.1.2",
|
||||||
"soundfile==0.13.1",
|
"soundfile==0.13.1",
|
||||||
|
|||||||
@@ -814,9 +814,9 @@ def sample_mmmu_requests(
|
|||||||
List of tuples (prompt, prompt_token_len, output_token_len).
|
List of tuples (prompt, prompt_token_len, output_token_len).
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
import base64
|
|
||||||
import io
|
import io
|
||||||
|
|
||||||
|
import pybase64
|
||||||
from datasets import load_dataset
|
from datasets import load_dataset
|
||||||
except ImportError:
|
except ImportError:
|
||||||
raise ImportError("Please install datasets: pip install datasets")
|
raise ImportError("Please install datasets: pip install datasets")
|
||||||
@@ -867,7 +867,7 @@ def sample_mmmu_requests(
|
|||||||
# Encode image to base64
|
# Encode image to base64
|
||||||
buffered = io.BytesIO()
|
buffered = io.BytesIO()
|
||||||
image.save(buffered, format="JPEG")
|
image.save(buffered, format="JPEG")
|
||||||
img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
|
img_str = pybase64.b64encode(buffered.getvalue()).decode("utf-8")
|
||||||
image_data = f"data:image/jpeg;base64,{img_str}"
|
image_data = f"data:image/jpeg;base64,{img_str}"
|
||||||
else:
|
else:
|
||||||
continue
|
continue
|
||||||
|
|||||||
@@ -1,4 +1,3 @@
|
|||||||
import base64
|
|
||||||
import copy
|
import copy
|
||||||
import dataclasses
|
import dataclasses
|
||||||
import multiprocessing
|
import multiprocessing
|
||||||
@@ -7,6 +6,7 @@ import threading
|
|||||||
import time
|
import time
|
||||||
from typing import Any, Dict, List, Optional, Tuple, Union
|
from typing import Any, Dict, List, Optional, Tuple, Union
|
||||||
|
|
||||||
|
import pybase64
|
||||||
import requests
|
import requests
|
||||||
import torch
|
import torch
|
||||||
import torch.distributed as dist
|
import torch.distributed as dist
|
||||||
|
|||||||
@@ -28,12 +28,12 @@ LLaVA-Onevision : https://arxiv.org/pdf/2408.03326
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
import ast
|
import ast
|
||||||
import base64
|
|
||||||
import math
|
import math
|
||||||
import re
|
import re
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import pybase64
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
|
|
||||||
from sglang.srt.utils import flatten_nested_list
|
from sglang.srt.utils import flatten_nested_list
|
||||||
@@ -252,7 +252,7 @@ def process_anyres_image(image, processor, grid_pinpoints):
|
|||||||
|
|
||||||
|
|
||||||
def load_image_from_base64(image):
|
def load_image_from_base64(image):
|
||||||
return Image.open(BytesIO(base64.b64decode(image)))
|
return Image.open(BytesIO(pybase64.b64decode(image, validate=True)))
|
||||||
|
|
||||||
|
|
||||||
def expand2square(pil_img, background_color):
|
def expand2square(pil_img, background_color):
|
||||||
|
|||||||
@@ -15,7 +15,6 @@
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import base64
|
|
||||||
import builtins
|
import builtins
|
||||||
import ctypes
|
import ctypes
|
||||||
import dataclasses
|
import dataclasses
|
||||||
@@ -68,6 +67,7 @@ from typing import (
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import psutil
|
import psutil
|
||||||
|
import pybase64
|
||||||
import requests
|
import requests
|
||||||
import torch
|
import torch
|
||||||
import torch.distributed
|
import torch.distributed
|
||||||
@@ -616,7 +616,7 @@ def decode_video_base64(video_base64):
|
|||||||
from PIL import Image
|
from PIL import Image
|
||||||
|
|
||||||
# Decode the base64 string
|
# Decode the base64 string
|
||||||
video_bytes = base64.b64decode(video_base64)
|
video_bytes = pybase64.b64decode(video_base64, validate=True)
|
||||||
|
|
||||||
# Placeholder for the start indices of each PNG image
|
# Placeholder for the start indices of each PNG image
|
||||||
img_starts = []
|
img_starts = []
|
||||||
@@ -702,7 +702,9 @@ def load_audio(audio_file: str, sr: int = 16000, mono: bool = True) -> np.ndarra
|
|||||||
audio, original_sr = sf.read(BytesIO(audio_file))
|
audio, original_sr = sf.read(BytesIO(audio_file))
|
||||||
elif audio_file.startswith("data:"):
|
elif audio_file.startswith("data:"):
|
||||||
audio_file = audio_file.split(",")[1]
|
audio_file = audio_file.split(",")[1]
|
||||||
audio, original_sr = sf.read(BytesIO(base64.b64decode(audio_file)))
|
audio, original_sr = sf.read(
|
||||||
|
BytesIO(pybase64.b64decode(audio_file, validate=True))
|
||||||
|
)
|
||||||
elif audio_file.startswith("http://") or audio_file.startswith("https://"):
|
elif audio_file.startswith("http://") or audio_file.startswith("https://"):
|
||||||
timeout = int(os.getenv("REQUEST_TIMEOUT", "5"))
|
timeout = int(os.getenv("REQUEST_TIMEOUT", "5"))
|
||||||
response = requests.get(audio_file, stream=True, timeout=timeout)
|
response = requests.get(audio_file, stream=True, timeout=timeout)
|
||||||
@@ -771,12 +773,12 @@ def load_image(
|
|||||||
image = Image.open(image_file)
|
image = Image.open(image_file)
|
||||||
elif image_file.startswith("data:"):
|
elif image_file.startswith("data:"):
|
||||||
image_file = image_file.split(",")[1]
|
image_file = image_file.split(",")[1]
|
||||||
image = Image.open(BytesIO(base64.b64decode(image_file)))
|
image = Image.open(BytesIO(pybase64.b64decode(image_file, validate=True)))
|
||||||
elif image_file.startswith("video:"):
|
elif image_file.startswith("video:"):
|
||||||
image_file = image_file.replace("video:", "")
|
image_file = image_file.replace("video:", "")
|
||||||
image, image_size = decode_video_base64(image_file)
|
image, image_size = decode_video_base64(image_file)
|
||||||
elif isinstance(image_file, str):
|
elif isinstance(image_file, str):
|
||||||
image = Image.open(BytesIO(base64.b64decode(image_file)))
|
image = Image.open(BytesIO(pybase64.b64decode(image_file, validate=True)))
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"Invalid image: {image}")
|
raise ValueError(f"Invalid image: {image}")
|
||||||
|
|
||||||
@@ -1866,7 +1868,7 @@ class MultiprocessingSerializer:
|
|||||||
|
|
||||||
if output_str:
|
if output_str:
|
||||||
# Convert bytes to base64-encoded string
|
# Convert bytes to base64-encoded string
|
||||||
output = base64.b64encode(output).decode("utf-8")
|
output = pybase64.b64encode(output).decode("utf-8")
|
||||||
|
|
||||||
return output
|
return output
|
||||||
|
|
||||||
@@ -1883,7 +1885,7 @@ class MultiprocessingSerializer:
|
|||||||
"""
|
"""
|
||||||
if isinstance(data, str):
|
if isinstance(data, str):
|
||||||
# Decode base64 string to bytes
|
# Decode base64 string to bytes
|
||||||
data = base64.b64decode(data)
|
data = pybase64.b64decode(data, validate=True)
|
||||||
|
|
||||||
return ForkingPickler.loads(data)
|
return ForkingPickler.loads(data)
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
"""Common utilities"""
|
"""Common utilities"""
|
||||||
|
|
||||||
import base64
|
|
||||||
import importlib
|
import importlib
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
@@ -20,6 +19,7 @@ from json import dumps
|
|||||||
from typing import Any, Callable, List, Optional, Tuple, Type, Union
|
from typing import Any, Callable, List, Optional, Tuple, Type, Union
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import pybase64
|
||||||
import requests
|
import requests
|
||||||
from IPython.display import HTML, display
|
from IPython.display import HTML, display
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
@@ -148,15 +148,15 @@ def encode_image_base64(image_path: Union[str, bytes]):
|
|||||||
if isinstance(image_path, str):
|
if isinstance(image_path, str):
|
||||||
with open(image_path, "rb") as image_file:
|
with open(image_path, "rb") as image_file:
|
||||||
data = image_file.read()
|
data = image_file.read()
|
||||||
return base64.b64encode(data).decode("utf-8")
|
return pybase64.b64encode(data).decode("utf-8")
|
||||||
elif isinstance(image_path, bytes):
|
elif isinstance(image_path, bytes):
|
||||||
return base64.b64encode(image_path).decode("utf-8")
|
return pybase64.b64encode(image_path).decode("utf-8")
|
||||||
else:
|
else:
|
||||||
# image_path is PIL.WebPImagePlugin.WebPImageFile
|
# image_path is PIL.WebPImagePlugin.WebPImageFile
|
||||||
image = image_path
|
image = image_path
|
||||||
buffered = BytesIO()
|
buffered = BytesIO()
|
||||||
image.save(buffered, format="PNG")
|
image.save(buffered, format="PNG")
|
||||||
return base64.b64encode(buffered.getvalue()).decode("utf-8")
|
return pybase64.b64encode(buffered.getvalue()).decode("utf-8")
|
||||||
|
|
||||||
|
|
||||||
def encode_frame(frame):
|
def encode_frame(frame):
|
||||||
@@ -223,7 +223,7 @@ def encode_video_base64(video_path: str, num_frames: int = 16):
|
|||||||
video_bytes = b"".join(encoded_frames)
|
video_bytes = b"".join(encoded_frames)
|
||||||
|
|
||||||
# Encode the concatenated bytes to base64
|
# Encode the concatenated bytes to base64
|
||||||
video_base64 = "video:" + base64.b64encode(video_bytes).decode("utf-8")
|
video_base64 = "video:" + pybase64.b64encode(video_bytes).decode("utf-8")
|
||||||
|
|
||||||
return video_base64
|
return video_base64
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,4 @@
|
|||||||
import base64
|
import base64
|
||||||
import copy
|
|
||||||
import io
|
import io
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
|
|||||||
Reference in New Issue
Block a user