Add reasoning examples for GPT-OSS in Markdown examples (#9626)

2025-09-14 23:27:40 -04:00
parent 1489cd6c02
commit 0b14159fc4
2 changed files with 12 additions and 2 deletions
--- a/docs/basic_usage/gpt_oss.md
+++ b/docs/basic_usage/gpt_oss.md
@@ -6,7 +6,7 @@ Please refer to [https://github.com/sgl-project/sglang/issues/8833](https://gith
 ### Responses API
-GPT‑OSS is compatible with the OpenAI Responses API. Use `client.responses.create(...)` with `model`, `instructions`, `input`, and optional `tools` to enable built‑in tool use.
+GPT‑OSS is compatible with the OpenAI Responses API. Use `client.responses.create(...)` with `model`, `instructions`, `input`, and optional `tools` to enable built‑in tool use. You can set reasoning level via `instructions`, e.g., "Reasoning: high" (also supports "medium" and "low") — levels: low (fast), medium (balanced), high (deep).
 ### Built-in Tools
@@ -69,6 +69,16 @@ tools = [
    {"type": "web_search_preview"},
 ]
 # Reasoning level example
 response = client.responses.create(
    model="openai/gpt-oss-120b",
    instructions="You are a helpful assistant."
    reasoning_effort="high" # Supports high, medium, or low
    input="In one sentence, explain the transformer architecture.",
 )
 print("====== reasoning: high ======")
 print(response.output_text)
 # Test python tool
 response = client.responses.create(
    model="openai/gpt-oss-120b",
--- a/python/sglang/srt/entrypoints/openai/protocol.py
+++ b/python/sglang/srt/entrypoints/openai/protocol.py
@@ -450,7 +450,7 @@ class ChatCompletionRequest(BaseModel):
        description="Constrains effort on reasoning for reasoning models. "
        "'low' is the least effort, 'high' is the most effort. Reducing reasoning effort can "
        "result in faster responses and fewer tokens used on reasoning in a response. "
-        "Currently only supported for OpenAI models.",
+        "Currently only supported for OpenAI models in the harmony path, i.e GPT-OSS models.",
    )
    @model_validator(mode="before")