diff --git a/benchmark/mmmu/data_utils.py b/benchmark/mmmu/data_utils.py
index cf8916934..8c36768d0 100644
--- a/benchmark/mmmu/data_utils.py
+++ b/benchmark/mmmu/data_utils.py
@@ -75,12 +75,6 @@ CAT_SHORT2LONG = {
 }
 
 
-# DATA SAVING
-def save_json(filename, ds):
-    with open(filename, "w") as f:
-        json.dump(ds, f, indent=4)
-
-
 def get_multi_choice_info(options):
     """
     Given the list of options for multiple choice question
diff --git a/examples/runtime/multimodal/llava_onevision_server.py b/examples/runtime/multimodal/llava_onevision_server.py
index ee921b558..b5636f0a1 100644
--- a/examples/runtime/multimodal/llava_onevision_server.py
+++ b/examples/runtime/multimodal/llava_onevision_server.py
@@ -6,7 +6,6 @@ python3 -m sglang.launch_server --model-path lmms-lab/llava-onevision-qwen2-72b-
 python3 llava_onevision_server.py
 """
 
-import base64
 import io
 import os
 import sys
@@ -14,6 +13,7 @@ import time
 
 import numpy as np
 import openai
+import pybase64
 import requests
 from decord import VideoReader, cpu
 from PIL import Image
@@ -213,7 +213,7 @@ def prepare_video_messages(video_path):
         pil_img = Image.fromarray(frame)
         buff = io.BytesIO()
         pil_img.save(buff, format="JPEG")
-        base64_str = base64.b64encode(buff.getvalue()).decode("utf-8")
+        base64_str = pybase64.b64encode(buff.getvalue()).decode("utf-8")
         base64_frames.append(base64_str)
 
     messages = [{"role": "user", "content": []}]
diff --git a/python/sglang/bench_serving.py b/python/sglang/bench_serving.py
index 21b822f04..5e4e2c8ef 100644
--- a/python/sglang/bench_serving.py
+++ b/python/sglang/bench_serving.py
@@ -31,7 +31,10 @@ from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple, Union
 
 import aiohttp
 import numpy as np
+import pybase64
 import requests
+from datasets import load_dataset
+from PIL import Image
 from tqdm.asyncio import tqdm
 from transformers import (
     AutoProcessor,
@@ -1020,14 +1023,6 @@ def sample_mmmu_requests(
     Returns:
         List of tuples (prompt, prompt_token_len, output_token_len).
     """
-    try:
-        import io
-
-        import pybase64
-        from datasets import load_dataset
-    except ImportError:
-        raise ImportError("Please install datasets: pip install datasets")
-
     print("Loading MMMU dataset from HuggingFace...")
 
     try:
@@ -1396,13 +1391,6 @@ def sample_image_requests(
     - Text lengths follow the 'random' dataset sampling rule. ``prompt_len``
       only counts text tokens and excludes image data.
     """
-    try:
-        import pybase64
-        from PIL import Image
-    except ImportError as e:
-        raise ImportError(
-            "Please install Pillow to generate random images: pip install pillow"
-        ) from e
 
     # Parse resolution (supports presets and 'heightxwidth')
     width, height = parse_image_resolution(image_resolution)
diff --git a/python/sglang/srt/distributed/naive_distributed.py b/python/sglang/srt/distributed/naive_distributed.py
index b340ff44d..b59380d07 100644
--- a/python/sglang/srt/distributed/naive_distributed.py
+++ b/python/sglang/srt/distributed/naive_distributed.py
@@ -1,9 +1,9 @@
-import base64
 import pickle
 import time
 from pathlib import Path
 from typing import Any, List, Optional
 
+import pybase64
 import torch
 
 from sglang.srt.utils import MultiprocessingSerializer
@@ -77,14 +77,16 @@ class NaiveDistributed:
             )
 
         _get_path(self._rank).write_text(
-            base64.b64encode(pickle.dumps(obj)).decode("utf-8") + text_postfix
+            pybase64.b64encode(pickle.dumps(obj)).decode("utf-8") + text_postfix
         )
 
         def _read_one(interesting_rank: int):
             p = _get_path(interesting_rank)
             while True:
                 if p.exists() and (text := p.read_text()).endswith(text_postfix):
-                    return pickle.loads(base64.b64decode(text[: -len(text_postfix)]))
+                    return pickle.loads(
+                        pybase64.b64decode(text[: -len(text_postfix)], validate=True)
+                    )
                 time.sleep(0.001)
 
         return [
diff --git a/python/sglang/srt/utils/common.py b/python/sglang/srt/utils/common.py
index 42fb93374..97415b280 100644
--- a/python/sglang/srt/utils/common.py
+++ b/python/sglang/srt/utils/common.py
@@ -872,9 +872,9 @@ def get_image_bytes(image_file: Union[str, bytes]):
             return f.read()
     elif image_file.startswith("data:"):
         image_file = image_file.split(",")[1]
-        return pybase64.b64decode(image_file)
+        return pybase64.b64decode(image_file, validate=True)
     elif isinstance(image_file, str):
-        return pybase64.b64decode(image_file)
+        return pybase64.b64decode(image_file, validate=True)
     else:
         raise NotImplementedError(f"Invalid image: {image_file}")
 
@@ -911,7 +911,7 @@ def load_video(video_file: Union[str, bytes], use_gpu: bool = True):
                 vr = VideoReader(tmp_file.name, ctx=ctx)
             elif video_file.startswith("data:"):
                 _, encoded = video_file.split(",", 1)
-                video_bytes = pybase64.b64decode(encoded)
+                video_bytes = pybase64.b64decode(encoded, validate=True)
                 tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
                 tmp_file.write(video_bytes)
                 tmp_file.close()
@@ -919,7 +919,7 @@ def load_video(video_file: Union[str, bytes], use_gpu: bool = True):
             elif os.path.isfile(video_file):
                 vr = VideoReader(video_file, ctx=ctx)
             else:
-                video_bytes = pybase64.b64decode(video_file)
+                video_bytes = pybase64.b64decode(video_file, validate=True)
                 tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
                 tmp_file.write(video_bytes)
                 tmp_file.close()
@@ -2083,7 +2083,7 @@ class MultiprocessingSerializer:
 
         if output_str:
             # Convert bytes to base64-encoded string
-            output = pybase64.b64encode(output).decode("utf-8")
+            pybase64.b64encode(output).decode("utf-8")
 
         return output
 
diff --git a/test/srt/test_bnb.py b/test/srt/test_bnb.py
index 1d9f0201d..4505b20cf 100644
--- a/test/srt/test_bnb.py
+++ b/test/srt/test_bnb.py
@@ -4,7 +4,6 @@ python3 -m unittest test_bnb.TestVisionModel.test_vlm
 python3 -m unittest test_bnb.TestLanguageModel.test_mmlu
 """
 
-import base64
 import io
 import json
 import multiprocessing as mp
@@ -15,6 +14,7 @@ from types import SimpleNamespace
 
 import numpy as np
 import openai
+import pybase64
 import requests
 from PIL import Image
 
diff --git a/test/srt/test_vision_chunked_prefill.py b/test/srt/test_vision_chunked_prefill.py
index 3876e915b..90fe21330 100644
--- a/test/srt/test_vision_chunked_prefill.py
+++ b/test/srt/test_vision_chunked_prefill.py
@@ -3,7 +3,6 @@ Usage:
 python3 -m unittest test_vision_chunked_prefill.TestVisionChunkedPrefill.test_chunked_prefill
 """
 
-import base64
 import io
 import os
 import unittest
@@ -11,6 +10,7 @@ from concurrent.futures import ThreadPoolExecutor
 from typing import Union
 
 import numpy as np
+import pybase64
 import requests
 from PIL import Image
 
@@ -45,7 +45,7 @@ class TestVisionChunkedPrefill(CustomTestCase):
             pil_img = Image.fromarray(frame)
             buff = io.BytesIO()
             pil_img.save(buff, format="JPEG")
-            base64_str = base64.b64encode(buff.getvalue()).decode("utf-8")
+            base64_str = pybase64.b64encode(buff.getvalue()).decode("utf-8")
             base64_frames.append(base64_str)
 
         messages = [{"role": "user", "content": []}]
diff --git a/test/srt/test_vision_openai_server_common.py b/test/srt/test_vision_openai_server_common.py
index 66f0c0d7c..392ccf0f8 100644
--- a/test/srt/test_vision_openai_server_common.py
+++ b/test/srt/test_vision_openai_server_common.py
@@ -1,10 +1,10 @@
-import base64
 import io
 import os
 from concurrent.futures import ThreadPoolExecutor
 
 import numpy as np
 import openai
+import pybase64
 import requests
 from PIL import Image
 
@@ -386,7 +386,7 @@ class ImageOpenAITestMixin(TestOpenAIMLLMServerBase):
             pil_img = Image.fromarray(frame)
             buff = io.BytesIO()
             pil_img.save(buff, format="JPEG")
-            base64_str = base64.b64encode(buff.getvalue()).decode("utf-8")
+            base64_str = pybase64.b64encode(buff.getvalue()).decode("utf-8")
             base64_frames.append(base64_str)
 
         messages = [{"role": "user", "content": []}]