[Doc] Update max_tokens to max_completion_tokens in all docs (#6248)

### What this PR does / why we need it? Fix: ``` DeprecationWarning: max_tokens is deprecated in favor of the max_completion_tokens field. ``` - vLLM version: v0.14.1 - vLLM main: d68209402d Signed-off-by: shen-shanshan <467638484@qq.com>
2026-01-26 11:57:40 +08:00
parent 418fccf0bc
commit e3eefdecbd
28 changed files with 43 additions and 43 deletions
--- a/docs/source/tutorials/310p.md
+++ b/docs/source/tutorials/310p.md
@@ -78,7 +78,7 @@ curl http://localhost:8000/v1/completions \
  -H "Content-Type: application/json" \
  -d '{
    "prompt": "The future of AI is",
-    "max_tokens": 64,
+    "max_completion_tokens": 64,
    "top_p": 0.95,
    "top_k": 50,
    "temperature": 0.6
@@ -108,7 +108,7 @@ curl http://localhost:8000/v1/completions \
  -H "Content-Type: application/json" \
  -d '{
    "prompt": "The future of AI is",
-    "max_tokens": 64,
+    "max_completion_tokens": 64,
    "top_p": 0.95,
    "top_k": 50,
    "temperature": 0.6
@@ -138,7 +138,7 @@ curl http://localhost:8000/v1/completions \
  -H "Content-Type: application/json" \
  -d '{
    "prompt": "The future of AI is",
-    "max_tokens": 64,
+    "max_completion_tokens": 64,
    "top_p": 0.95,
    "top_k": 50,
    "temperature": 0.6
@@ -179,7 +179,7 @@ curl http://localhost:8000/v1/completions \
  -H "Content-Type: application/json" \
  -d '{
    "prompt": "[unused9]系统：[unused10][unused9]用户：'${question}'[unused10][unused9]助手：",
-    "max_tokens": 64,
+    "max_completion_tokens": 64,
    "top_p": 0.95,
    "top_k": 50,
    "temperature": 0.6
@@ -221,7 +221,7 @@ prompts = [
    "The future of AI is",
 ]
 # Create a sampling params object.
-sampling_params = SamplingParams(max_tokens=100, temperature=0.0)
+sampling_params = SamplingParams(max_completion_tokens=100, temperature=0.0)
 # Create an LLM.
 llm = LLM(
    model="Qwen/Qwen3-0.6B",
@@ -264,7 +264,7 @@ prompts = [
    "The future of AI is",
 ]
 # Create a sampling params object.
-sampling_params = SamplingParams(max_tokens=100, temperature=0.0)
+sampling_params = SamplingParams(max_completion_tokens=100, temperature=0.0)
 # Create an LLM.
 llm = LLM(
    model="Qwen/Qwen2.5-7B-Instruct",
@@ -307,7 +307,7 @@ prompts = [
    "The future of AI is",
 ]
 # Create a sampling params object.
-sampling_params = SamplingParams(max_tokens=100, top_p=0.95, top_k=50, temperature=0.6)
+sampling_params = SamplingParams(max_completion_tokens=100, top_p=0.95, top_k=50, temperature=0.6)
 # Create an LLM.
 llm = LLM(
    model="Qwen/Qwen2.5-VL-3B-Instruct",
--- a/docs/source/tutorials/DeepSeek-R1.md
+++ b/docs/source/tutorials/DeepSeek-R1.md
@@ -243,7 +243,7 @@ curl http://<node0_ip>:<port>/v1/completions \
    -d '{
        "model": "deepseek_r1",
        "prompt": "The future of AI is",
-        "max_tokens": 50,
+        "max_completion_tokens": 50,
        "temperature": 0
    }'
 ```
--- a/docs/source/tutorials/DeepSeek-V3.1.md
+++ b/docs/source/tutorials/DeepSeek-V3.1.md
@@ -661,7 +661,7 @@ curl http://<node0_ip>:<port>/v1/completions \
    -d '{
        "model": "deepseek_v3",
        "prompt": "The future of AI is",
-        "max_tokens": 50,
+        "max_completion_tokens": 50,
        "temperature": 0
    }'
 ```
--- a/docs/source/tutorials/DeepSeek-V3.2.md
+++ b/docs/source/tutorials/DeepSeek-V3.2.md
@@ -840,7 +840,7 @@ curl http://<node0_ip>:<port>/v1/completions \
    -d '{
        "model": "deepseek_v3.2",
        "prompt": "The future of AI is",
-        "max_tokens": 50,
+        "max_completion_tokens": 50,
        "temperature": 0
    }'
 ```
--- a/docs/source/tutorials/Qwen-VL-Dense.md
+++ b/docs/source/tutorials/Qwen-VL-Dense.md
@@ -142,7 +142,7 @@ llm = LLM(
 )

 sampling_params = SamplingParams(
-    max_tokens=512
+    max_completion_tokens=512
 )

 image_messages = [
@@ -238,7 +238,7 @@ llm = LLM(
 )

 sampling_params = SamplingParams(
-    max_tokens=512
+    max_completion_tokens=512
 )

 image_messages = [
--- a/docs/source/tutorials/Qwen2.5-7B.md
+++ b/docs/source/tutorials/Qwen2.5-7B.md
@@ -127,7 +127,7 @@ curl http://<IP>:<Port>/v1/completions \
    -d '{
        "model": "qwen-2.5-7b-instruct",
        "prompt": "Beijing is a",
-        "max_tokens": 5,
+        "max_completion_tokens": 5,
        "temperature": 0
    }'
 ```
--- a/docs/source/tutorials/Qwen2.5-Omni.md
+++ b/docs/source/tutorials/Qwen2.5-Omni.md
@@ -156,7 +156,7 @@ curl http://127.0.0.1:8000/v1/chat/completions   -H "Content-Type: application/j
        ]
      }
    ],
-    "max_tokens": 100,
+    "max_completion_tokens": 100,
    "temperature": 0.7
  }'

--- a/docs/source/tutorials/Qwen3-235B-A22B.md
+++ b/docs/source/tutorials/Qwen3-235B-A22B.md
@@ -269,7 +269,7 @@ curl http://<node0_ip>:<port>/v1/completions \
    -d '{
        "model": "qwen3",
        "prompt": "The future of AI is",
-        "max_tokens": 50,
+        "max_completion_tokens": 50,
        "temperature": 0
    }'
 ```
--- a/docs/source/tutorials/Qwen3-30B-A3B.md
+++ b/docs/source/tutorials/Qwen3-30B-A3B.md
@@ -62,7 +62,7 @@ curl http://localhost:8000/v1/chat/completions -H "Content-Type: application/jso
  "temperature": 0.6,
  "top_p": 0.95,
  "top_k": 20,
-  "max_tokens": 4096
+  "max_completion_tokens": 4096
 }'
 ```

--- a/docs/source/tutorials/Qwen3-32B-W4A4.md
+++ b/docs/source/tutorials/Qwen3-32B-W4A4.md
@@ -108,7 +108,7 @@ curl http://localhost:8000/v1/completions \
    -d '{
        "model": "qwen3-32b-w4a4",
        "prompt": "what is large language model?",
-        "max_tokens": "128",
+        "max_completion_tokens": "128",
        "top_p": "0.95",
        "top_k": "40",
        "temperature": "0.0"
--- a/docs/source/tutorials/Qwen3-8B-W4A8.md
+++ b/docs/source/tutorials/Qwen3-8B-W4A8.md
@@ -106,7 +106,7 @@ curl http://localhost:8000/v1/completions \
    -d '{
        "model": "qwen3-8b-w4a8",
        "prompt": "what is large language model?",
-        "max_tokens": "128",
+        "max_completion_tokens": "128",
        "top_p": "0.95",
        "top_k": "40",
        "temperature": "0.0"
--- a/docs/source/tutorials/Qwen3-Coder-30B-A3B.md
+++ b/docs/source/tutorials/Qwen3-Coder-30B-A3B.md
@@ -82,7 +82,7 @@ curl http://localhost:8000/v1/chat/completions -H "Content-Type: application/jso
  "temperature": 0.6,
  "top_p": 0.95,
  "top_k": 20,
-  "max_tokens": 4096
+  "max_completion_tokens": 4096
 }'
 ```

--- a/docs/source/tutorials/Qwen3-Dense.md
+++ b/docs/source/tutorials/Qwen3-Dense.md
@@ -214,7 +214,7 @@ curl http://localhost:8113/v1/chat/completions -H "Content-Type: application/jso
  "temperature": 0.6,
  "top_p": 0.95,
  "top_k": 20,
-  "max_tokens": 4096
+  "max_completion_tokens": 4096
 }'
 ```

--- a/docs/source/tutorials/Qwen3-Next.md
+++ b/docs/source/tutorials/Qwen3-Next.md
@@ -75,7 +75,7 @@ curl http://localhost:8000/v1/chat/completions -H "Content-Type: application/jso
  "temperature": 0.6,
  "top_p": 0.95,
  "top_k": 20,
-  "max_tokens": 32
+  "max_completion_tokens": 32
 }'
 ```

@@ -103,7 +103,7 @@ if __name__ == '__main__':
    prompts = [
        "Who are you?",
    ]
-    sampling_params = SamplingParams(temperature=0.6, top_p=0.95, top_k=40, max_tokens=32)
+    sampling_params = SamplingParams(temperature=0.6, top_p=0.95, top_k=40, max_completion_tokens=32)
    llm = LLM(model="Qwen/Qwen3-Next-80B-A3B-Instruct",
              tensor_parallel_size=4,
              enforce_eager=True,
--- a/docs/source/tutorials/Qwen3-Omni-30B-A3B-Thinking.md
+++ b/docs/source/tutorials/Qwen3-Omni-30B-A3B-Thinking.md
@@ -123,7 +123,7 @@ def main():
        temperature=0.6,
        top_p=0.95,
        top_k=20,
-        max_tokens=16384,
+        max_completion_tokens=16384,
    )

    processor = Qwen3OmniMoeProcessor.from_pretrained(MODEL_PATH)
@@ -243,7 +243,7 @@ evalscope eval \
    --datasets omni_bench, gsm8k, bbh \
    --dataset-args '{"omni_bench": { "extra_params": { "use_image": true, "use_audio": false}}}' \
    --eval-batch-size 1 \
-    --generation-config '{"max_tokens": 10000, "temperature": 0.6}' \
+    --generation-config '{"max_completion_tokens": 10000, "temperature": 0.6}' \
    --limit 100
 ```

--- a/docs/source/tutorials/Qwen3-VL-30B-A3B-Instruct.md
+++ b/docs/source/tutorials/Qwen3-VL-30B-A3B-Instruct.md
@@ -120,7 +120,7 @@ curl http://localhost:8000/v1/chat/completions \
            {"type": "text", "text": "What is the text in the illustrate?"}
        ]}
    ],
-    "max_tokens": 100
+    "max_completion_tokens": 100
    }'
 ```

@@ -182,7 +182,7 @@ curl http://localhost:8000/v1/chat/completions \
            {"type": "text", "text": "What is in this video?"}
        ]}
    ],
-    "max_tokens": 100
+    "max_completion_tokens": 100
    }'
 ```

--- a/docs/source/tutorials/pd_disaggregation_mooncake_multi_node.md
+++ b/docs/source/tutorials/pd_disaggregation_mooncake_multi_node.md
@@ -932,7 +932,7 @@ curl http://192.0.0.1:8080/v1/completions \
    -d '{
        "model": "qwen3-moe",
        "prompt": "Who are you?",
-        "max_tokens": 100,
+        "max_completion_tokens": 100,
        "temperature": 0
    }'
 ```
--- a/docs/source/tutorials/pd_disaggregation_mooncake_single_node.md
+++ b/docs/source/tutorials/pd_disaggregation_mooncake_single_node.md
@@ -271,7 +271,7 @@ curl http://192.0.0.1:8080/v1/chat/completions \
                {"type": "text", "text": "What is the text in the illustrate?"}
            ]}
            ],
-        "max_tokens": 100,
+        "max_completion_tokens": 100,
        "temperature": 0
    }'
 ```
--- a/docs/source/tutorials/ray.md
+++ b/docs/source/tutorials/ray.md
@@ -186,7 +186,7 @@ curl http://localhost:8000/v1/completions \
    -d '{
        "model": "qwen",
        "prompt": "tell me how to sleep well",
-        "max_tokens": 100,
+        "max_completion_tokens": 100,
        "temperature": 0
    }'
 ```