Add support for OpenAI API o1 model (#3363)

Co-authored-by: Shan Yu <shanyu1@g.ucla.edu>
2025-02-13 19:43:00 -08:00
parent 31eec35ba8
commit 6cc309557a
3 changed files with 63 additions and 0 deletions
--- a/examples/frontend_language/quick_start/openai_example_o1.py
+++ b/examples/frontend_language/quick_start/openai_example_o1.py
@@ -0,0 +1,57 @@
+"""
+Usage:
+export OPENAI_API_KEY=sk-******
+python3 openai_example_chat.py
+"""
+
+import sglang as sgl
+
+
+@sgl.function
+def multi_turn_question(s, question_1, question_2):
+    s += sgl.system("You are a helpful assistant.")
+    s += sgl.user(question_1)
+    s += sgl.assistant(sgl.gen("answer_1", max_tokens=100))
+    s += sgl.user(question_2)
+    s += sgl.assistant(sgl.gen("answer_2"))
+
+
+def single():
+    state = multi_turn_question.run(
+        question_1="What is the capital of the United States?",
+        question_2="List two local attractions.",
+    )
+
+    for m in state.messages():
+        print(m["role"], ":", m["content"])
+
+    print("\n-- answer_1 --\n", state["answer_1"])
+
+
+def batch():
+    states = multi_turn_question.run_batch(
+        [
+            {
+                "question_1": "What is the capital of the United States?",
+                "question_2": "List two local attractions.",
+            },
+            {
+                "question_1": "What is the capital of France?",
+                "question_2": "What is the population of this city?",
+            },
+        ]
+    )
+
+    for s in states:
+        print(s.messages())
+
+
+if __name__ == "__main__":
+    sgl.set_default_backend(sgl.OpenAI("o1"))
+
+    # Run a single request
+    print("\n========== single ==========\n")
+    single()
+    # Run a batch of requests
+    print("\n========== batch ==========\n")
+    batch()
--- a/python/sglang/lang/backend/openai.py
+++ b/python/sglang/lang/backend/openai.py
@@ -161,6 +161,10 @@ class OpenAI(BaseBackend):
                prompt = s.text_

            kwargs = sampling_params.to_openai_kwargs()
+            if self.model_name.startswith("o1") or self.model_name.startswith("o3"):
+                kwargs.pop("max_tokens", None)
+            else:
+                kwargs.pop("max_completion_tokens", None)
            comp = openai_completion(
                client=self.client,
                token_usage=self.token_usage,
@@ -175,6 +179,7 @@ class OpenAI(BaseBackend):
            ), "constrained type not supported on chat model"
            kwargs = sampling_params.to_openai_kwargs()
            kwargs.pop("stop")
+
            comp = openai_completion(
                client=self.client,
                token_usage=self.token_usage,
--- a/python/sglang/lang/ir.py
+++ b/python/sglang/lang/ir.py
@@ -63,6 +63,7 @@ class SglSamplingParams:
            warnings.warn("Regular expression is not supported in the OpenAI backend.")
        return {
            "max_tokens": self.max_new_tokens,
+            "max_completion_tokens": self.max_new_tokens,
            "stop": self.stop or None,
            "temperature": self.temperature,
            "top_p": self.top_p,