[Doc] Update max_tokens to max_completion_tokens in all docs (#6248)

### What this PR does / why we need it?

Fix:

```
DeprecationWarning: max_tokens is deprecated in favor of the max_completion_tokens field.
```

- vLLM version: v0.14.1
- vLLM main:
d68209402d

Signed-off-by: shen-shanshan <467638484@qq.com>
This commit is contained in:
Shanshan Shen
2026-01-26 11:57:40 +08:00
committed by GitHub
parent 418fccf0bc
commit e3eefdecbd
28 changed files with 43 additions and 43 deletions

View File

@@ -78,7 +78,7 @@ curl http://localhost:8000/v1/completions \
-H "Content-Type: application/json" \
-d '{
"prompt": "The future of AI is",
"max_tokens": 64,
"max_completion_tokens": 64,
"top_p": 0.95,
"top_k": 50,
"temperature": 0.6
@@ -108,7 +108,7 @@ curl http://localhost:8000/v1/completions \
-H "Content-Type: application/json" \
-d '{
"prompt": "The future of AI is",
"max_tokens": 64,
"max_completion_tokens": 64,
"top_p": 0.95,
"top_k": 50,
"temperature": 0.6
@@ -138,7 +138,7 @@ curl http://localhost:8000/v1/completions \
-H "Content-Type: application/json" \
-d '{
"prompt": "The future of AI is",
"max_tokens": 64,
"max_completion_tokens": 64,
"top_p": 0.95,
"top_k": 50,
"temperature": 0.6
@@ -179,7 +179,7 @@ curl http://localhost:8000/v1/completions \
-H "Content-Type: application/json" \
-d '{
"prompt": "[unused9]系统:[unused10][unused9]用户:'${question}'[unused10][unused9]助手:",
"max_tokens": 64,
"max_completion_tokens": 64,
"top_p": 0.95,
"top_k": 50,
"temperature": 0.6
@@ -221,7 +221,7 @@ prompts = [
"The future of AI is",
]
# Create a sampling params object.
sampling_params = SamplingParams(max_tokens=100, temperature=0.0)
sampling_params = SamplingParams(max_completion_tokens=100, temperature=0.0)
# Create an LLM.
llm = LLM(
model="Qwen/Qwen3-0.6B",
@@ -264,7 +264,7 @@ prompts = [
"The future of AI is",
]
# Create a sampling params object.
sampling_params = SamplingParams(max_tokens=100, temperature=0.0)
sampling_params = SamplingParams(max_completion_tokens=100, temperature=0.0)
# Create an LLM.
llm = LLM(
model="Qwen/Qwen2.5-7B-Instruct",
@@ -307,7 +307,7 @@ prompts = [
"The future of AI is",
]
# Create a sampling params object.
sampling_params = SamplingParams(max_tokens=100, top_p=0.95, top_k=50, temperature=0.6)
sampling_params = SamplingParams(max_completion_tokens=100, top_p=0.95, top_k=50, temperature=0.6)
# Create an LLM.
llm = LLM(
model="Qwen/Qwen2.5-VL-3B-Instruct",

View File

@@ -243,7 +243,7 @@ curl http://<node0_ip>:<port>/v1/completions \
-d '{
"model": "deepseek_r1",
"prompt": "The future of AI is",
"max_tokens": 50,
"max_completion_tokens": 50,
"temperature": 0
}'
```

View File

@@ -661,7 +661,7 @@ curl http://<node0_ip>:<port>/v1/completions \
-d '{
"model": "deepseek_v3",
"prompt": "The future of AI is",
"max_tokens": 50,
"max_completion_tokens": 50,
"temperature": 0
}'
```

View File

@@ -840,7 +840,7 @@ curl http://<node0_ip>:<port>/v1/completions \
-d '{
"model": "deepseek_v3.2",
"prompt": "The future of AI is",
"max_tokens": 50,
"max_completion_tokens": 50,
"temperature": 0
}'
```

View File

@@ -142,7 +142,7 @@ llm = LLM(
)
sampling_params = SamplingParams(
max_tokens=512
max_completion_tokens=512
)
image_messages = [
@@ -238,7 +238,7 @@ llm = LLM(
)
sampling_params = SamplingParams(
max_tokens=512
max_completion_tokens=512
)
image_messages = [

View File

@@ -127,7 +127,7 @@ curl http://<IP>:<Port>/v1/completions \
-d '{
"model": "qwen-2.5-7b-instruct",
"prompt": "Beijing is a",
"max_tokens": 5,
"max_completion_tokens": 5,
"temperature": 0
}'
```

View File

@@ -156,7 +156,7 @@ curl http://127.0.0.1:8000/v1/chat/completions -H "Content-Type: application/j
]
}
],
"max_tokens": 100,
"max_completion_tokens": 100,
"temperature": 0.7
}'

View File

@@ -269,7 +269,7 @@ curl http://<node0_ip>:<port>/v1/completions \
-d '{
"model": "qwen3",
"prompt": "The future of AI is",
"max_tokens": 50,
"max_completion_tokens": 50,
"temperature": 0
}'
```

View File

@@ -62,7 +62,7 @@ curl http://localhost:8000/v1/chat/completions -H "Content-Type: application/jso
"temperature": 0.6,
"top_p": 0.95,
"top_k": 20,
"max_tokens": 4096
"max_completion_tokens": 4096
}'
```

View File

@@ -108,7 +108,7 @@ curl http://localhost:8000/v1/completions \
-d '{
"model": "qwen3-32b-w4a4",
"prompt": "what is large language model?",
"max_tokens": "128",
"max_completion_tokens": "128",
"top_p": "0.95",
"top_k": "40",
"temperature": "0.0"

View File

@@ -106,7 +106,7 @@ curl http://localhost:8000/v1/completions \
-d '{
"model": "qwen3-8b-w4a8",
"prompt": "what is large language model?",
"max_tokens": "128",
"max_completion_tokens": "128",
"top_p": "0.95",
"top_k": "40",
"temperature": "0.0"

View File

@@ -82,7 +82,7 @@ curl http://localhost:8000/v1/chat/completions -H "Content-Type: application/jso
"temperature": 0.6,
"top_p": 0.95,
"top_k": 20,
"max_tokens": 4096
"max_completion_tokens": 4096
}'
```

View File

@@ -214,7 +214,7 @@ curl http://localhost:8113/v1/chat/completions -H "Content-Type: application/jso
"temperature": 0.6,
"top_p": 0.95,
"top_k": 20,
"max_tokens": 4096
"max_completion_tokens": 4096
}'
```

View File

@@ -75,7 +75,7 @@ curl http://localhost:8000/v1/chat/completions -H "Content-Type: application/jso
"temperature": 0.6,
"top_p": 0.95,
"top_k": 20,
"max_tokens": 32
"max_completion_tokens": 32
}'
```
@@ -103,7 +103,7 @@ if __name__ == '__main__':
prompts = [
"Who are you?",
]
sampling_params = SamplingParams(temperature=0.6, top_p=0.95, top_k=40, max_tokens=32)
sampling_params = SamplingParams(temperature=0.6, top_p=0.95, top_k=40, max_completion_tokens=32)
llm = LLM(model="Qwen/Qwen3-Next-80B-A3B-Instruct",
tensor_parallel_size=4,
enforce_eager=True,

View File

@@ -123,7 +123,7 @@ def main():
temperature=0.6,
top_p=0.95,
top_k=20,
max_tokens=16384,
max_completion_tokens=16384,
)
processor = Qwen3OmniMoeProcessor.from_pretrained(MODEL_PATH)
@@ -243,7 +243,7 @@ evalscope eval \
--datasets omni_bench, gsm8k, bbh \
--dataset-args '{"omni_bench": { "extra_params": { "use_image": true, "use_audio": false}}}' \
--eval-batch-size 1 \
--generation-config '{"max_tokens": 10000, "temperature": 0.6}' \
--generation-config '{"max_completion_tokens": 10000, "temperature": 0.6}' \
--limit 100
```

View File

@@ -120,7 +120,7 @@ curl http://localhost:8000/v1/chat/completions \
{"type": "text", "text": "What is the text in the illustrate?"}
]}
],
"max_tokens": 100
"max_completion_tokens": 100
}'
```
@@ -182,7 +182,7 @@ curl http://localhost:8000/v1/chat/completions \
{"type": "text", "text": "What is in this video?"}
]}
],
"max_tokens": 100
"max_completion_tokens": 100
}'
```

View File

@@ -932,7 +932,7 @@ curl http://192.0.0.1:8080/v1/completions \
-d '{
"model": "qwen3-moe",
"prompt": "Who are you?",
"max_tokens": 100,
"max_completion_tokens": 100,
"temperature": 0
}'
```

View File

@@ -271,7 +271,7 @@ curl http://192.0.0.1:8080/v1/chat/completions \
{"type": "text", "text": "What is the text in the illustrate?"}
]}
],
"max_tokens": 100,
"max_completion_tokens": 100,
"temperature": 0
}'
```

View File

@@ -186,7 +186,7 @@ curl http://localhost:8000/v1/completions \
-d '{
"model": "qwen",
"prompt": "tell me how to sleep well",
"max_tokens": 100,
"max_completion_tokens": 100,
"temperature": 0
}'
```