diff --git a/python/sglang/srt/server.py b/python/sglang/srt/server.py
index 18d67cac7..ce615d43a 100644
--- a/python/sglang/srt/server.py
+++ b/python/sglang/srt/server.py
@@ -13,11 +13,13 @@ setattr(threading, "_register_atexit", lambda *args, **kwargs: None)
 
 import aiohttp
 import psutil
+import pydantic
 import requests
 import uvicorn
 import uvloop
 from fastapi import FastAPI, HTTPException, Request
 from fastapi.responses import Response, StreamingResponse
+from pydantic import BaseModel
 from sglang.backend.runtime_endpoint import RuntimeEndpoint
 from sglang.srt.conversation import (
     Conversation,
@@ -57,6 +59,15 @@ tokenizer_manager = None
 chat_template_name = None
 
 
+# FIXME: Remove this once we drop support for pydantic 1.x
+IS_PYDANTIC_1 = int(pydantic.VERSION.split(".")[0]) == 1
+
+def jsonify_pydantic_model(obj: BaseModel):
+    if IS_PYDANTIC_1:
+        return obj.json(ensure_ascii=False)
+    return obj.model_dump_json()
+
+
 @app.get("/health")
 async def health() -> Response:
     """Health check."""
@@ -75,7 +86,8 @@ async def get_model_info():
 async def flush_cache():
     await tokenizer_manager.flush_cache()
     return Response(
-        content="Cache flushed.\nPlease check backend logs for more details. (When there are running or waiting requests, the operation will not be performed.)\n",
+        content="Cache flushed.\nPlease check backend logs for more details. "
+        "(When there are running or waiting requests, the operation will not be performed.)\n",
         status_code=200,
     )
 
@@ -152,7 +164,7 @@ async def v1_completions(raw_request: Request):
                         total_tokens=prompt_tokens + completion_tokens,
                     ),
                 )
-                yield f"data: {chunk.json(exclude_unset=True, ensure_ascii=False)}\n\n"
+                yield f"data: {jsonify_pydantic_model(chunk)}\n\n"
             yield "data: [DONE]\n\n"
 
         return StreamingResponse(gnerate_stream_resp(), media_type="text/event-stream")
@@ -262,7 +274,7 @@ async def v1_chat_completions(raw_request: Request):
                         choices=[choice_data],
                         model=request.model,
                     )
-                    yield f"data: {chunk.json(exclude_unset=True, ensure_ascii=False)}\n\n"
+                    yield f"data: {jsonify_pydantic_model(chunk)}\n\n"
 
                 text = content["text"]
                 delta = text[len(stream_buffer) :]
@@ -275,7 +287,7 @@ async def v1_chat_completions(raw_request: Request):
                     choices=[choice_data],
                     model=request.model,
                 )
-                yield f"data: {chunk.json(exclude_unset=True, ensure_ascii=False)}\n\n"
+                yield f"data: {jsonify_pydantic_model(chunk)}\n\n"
             yield "data: [DONE]\n\n"
 
         return StreamingResponse(gnerate_stream_resp(), media_type="text/event-stream")