From 0b14159fc4e07c57bed6f4603056bb3dd0303e8d Mon Sep 17 00:00:00 2001 From: Vincent Zhong <207368749+vincentzed@users.noreply.github.com> Date: Sun, 14 Sep 2025 23:27:40 -0400 Subject: [PATCH] Add reasoning examples for GPT-OSS in Markdown examples (#9626) --- docs/basic_usage/gpt_oss.md | 12 +++++++++++- python/sglang/srt/entrypoints/openai/protocol.py | 2 +- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/docs/basic_usage/gpt_oss.md b/docs/basic_usage/gpt_oss.md index 240463ec4..798fd678d 100644 --- a/docs/basic_usage/gpt_oss.md +++ b/docs/basic_usage/gpt_oss.md @@ -6,7 +6,7 @@ Please refer to [https://github.com/sgl-project/sglang/issues/8833](https://gith ### Responses API -GPT‑OSS is compatible with the OpenAI Responses API. Use `client.responses.create(...)` with `model`, `instructions`, `input`, and optional `tools` to enable built‑in tool use. +GPT‑OSS is compatible with the OpenAI Responses API. Use `client.responses.create(...)` with `model`, `instructions`, `input`, and optional `tools` to enable built‑in tool use. You can set reasoning level via `instructions`, e.g., "Reasoning: high" (also supports "medium" and "low") — levels: low (fast), medium (balanced), high (deep). ### Built-in Tools @@ -69,6 +69,16 @@ tools = [ {"type": "web_search_preview"}, ] +# Reasoning level example +response = client.responses.create( + model="openai/gpt-oss-120b", + instructions="You are a helpful assistant." + reasoning_effort="high" # Supports high, medium, or low + input="In one sentence, explain the transformer architecture.", +) +print("====== reasoning: high ======") +print(response.output_text) + # Test python tool response = client.responses.create( model="openai/gpt-oss-120b", diff --git a/python/sglang/srt/entrypoints/openai/protocol.py b/python/sglang/srt/entrypoints/openai/protocol.py index 7fed16703..8111f1939 100644 --- a/python/sglang/srt/entrypoints/openai/protocol.py +++ b/python/sglang/srt/entrypoints/openai/protocol.py @@ -450,7 +450,7 @@ class ChatCompletionRequest(BaseModel): description="Constrains effort on reasoning for reasoning models. " "'low' is the least effort, 'high' is the most effort. Reducing reasoning effort can " "result in faster responses and fewer tokens used on reasoning in a response. " - "Currently only supported for OpenAI models.", + "Currently only supported for OpenAI models in the harmony path, i.e GPT-OSS models.", ) @model_validator(mode="before")