[Doc] Update max_tokens to max_completion_tokens in all docs (#6248)
### What this PR does / why we need it?
Fix:
```
DeprecationWarning: max_tokens is deprecated in favor of the max_completion_tokens field.
```
- vLLM version: v0.14.1
- vLLM main:
d68209402d
Signed-off-by: shen-shanshan <467638484@qq.com>
This commit is contained in:
@@ -78,7 +78,7 @@ curl http://localhost:8000/v1/completions \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"prompt": "The future of AI is",
|
||||
"max_tokens": 64,
|
||||
"max_completion_tokens": 64,
|
||||
"top_p": 0.95,
|
||||
"top_k": 50,
|
||||
"temperature": 0.6
|
||||
@@ -108,7 +108,7 @@ curl http://localhost:8000/v1/completions \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"prompt": "The future of AI is",
|
||||
"max_tokens": 64,
|
||||
"max_completion_tokens": 64,
|
||||
"top_p": 0.95,
|
||||
"top_k": 50,
|
||||
"temperature": 0.6
|
||||
@@ -138,7 +138,7 @@ curl http://localhost:8000/v1/completions \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"prompt": "The future of AI is",
|
||||
"max_tokens": 64,
|
||||
"max_completion_tokens": 64,
|
||||
"top_p": 0.95,
|
||||
"top_k": 50,
|
||||
"temperature": 0.6
|
||||
@@ -179,7 +179,7 @@ curl http://localhost:8000/v1/completions \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"prompt": "[unused9]系统:[unused10][unused9]用户:'${question}'[unused10][unused9]助手:",
|
||||
"max_tokens": 64,
|
||||
"max_completion_tokens": 64,
|
||||
"top_p": 0.95,
|
||||
"top_k": 50,
|
||||
"temperature": 0.6
|
||||
@@ -221,7 +221,7 @@ prompts = [
|
||||
"The future of AI is",
|
||||
]
|
||||
# Create a sampling params object.
|
||||
sampling_params = SamplingParams(max_tokens=100, temperature=0.0)
|
||||
sampling_params = SamplingParams(max_completion_tokens=100, temperature=0.0)
|
||||
# Create an LLM.
|
||||
llm = LLM(
|
||||
model="Qwen/Qwen3-0.6B",
|
||||
@@ -264,7 +264,7 @@ prompts = [
|
||||
"The future of AI is",
|
||||
]
|
||||
# Create a sampling params object.
|
||||
sampling_params = SamplingParams(max_tokens=100, temperature=0.0)
|
||||
sampling_params = SamplingParams(max_completion_tokens=100, temperature=0.0)
|
||||
# Create an LLM.
|
||||
llm = LLM(
|
||||
model="Qwen/Qwen2.5-7B-Instruct",
|
||||
@@ -307,7 +307,7 @@ prompts = [
|
||||
"The future of AI is",
|
||||
]
|
||||
# Create a sampling params object.
|
||||
sampling_params = SamplingParams(max_tokens=100, top_p=0.95, top_k=50, temperature=0.6)
|
||||
sampling_params = SamplingParams(max_completion_tokens=100, top_p=0.95, top_k=50, temperature=0.6)
|
||||
# Create an LLM.
|
||||
llm = LLM(
|
||||
model="Qwen/Qwen2.5-VL-3B-Instruct",
|
||||
|
||||
@@ -243,7 +243,7 @@ curl http://<node0_ip>:<port>/v1/completions \
|
||||
-d '{
|
||||
"model": "deepseek_r1",
|
||||
"prompt": "The future of AI is",
|
||||
"max_tokens": 50,
|
||||
"max_completion_tokens": 50,
|
||||
"temperature": 0
|
||||
}'
|
||||
```
|
||||
|
||||
@@ -661,7 +661,7 @@ curl http://<node0_ip>:<port>/v1/completions \
|
||||
-d '{
|
||||
"model": "deepseek_v3",
|
||||
"prompt": "The future of AI is",
|
||||
"max_tokens": 50,
|
||||
"max_completion_tokens": 50,
|
||||
"temperature": 0
|
||||
}'
|
||||
```
|
||||
|
||||
@@ -840,7 +840,7 @@ curl http://<node0_ip>:<port>/v1/completions \
|
||||
-d '{
|
||||
"model": "deepseek_v3.2",
|
||||
"prompt": "The future of AI is",
|
||||
"max_tokens": 50,
|
||||
"max_completion_tokens": 50,
|
||||
"temperature": 0
|
||||
}'
|
||||
```
|
||||
|
||||
@@ -142,7 +142,7 @@ llm = LLM(
|
||||
)
|
||||
|
||||
sampling_params = SamplingParams(
|
||||
max_tokens=512
|
||||
max_completion_tokens=512
|
||||
)
|
||||
|
||||
image_messages = [
|
||||
@@ -238,7 +238,7 @@ llm = LLM(
|
||||
)
|
||||
|
||||
sampling_params = SamplingParams(
|
||||
max_tokens=512
|
||||
max_completion_tokens=512
|
||||
)
|
||||
|
||||
image_messages = [
|
||||
|
||||
@@ -127,7 +127,7 @@ curl http://<IP>:<Port>/v1/completions \
|
||||
-d '{
|
||||
"model": "qwen-2.5-7b-instruct",
|
||||
"prompt": "Beijing is a",
|
||||
"max_tokens": 5,
|
||||
"max_completion_tokens": 5,
|
||||
"temperature": 0
|
||||
}'
|
||||
```
|
||||
|
||||
@@ -156,7 +156,7 @@ curl http://127.0.0.1:8000/v1/chat/completions -H "Content-Type: application/j
|
||||
]
|
||||
}
|
||||
],
|
||||
"max_tokens": 100,
|
||||
"max_completion_tokens": 100,
|
||||
"temperature": 0.7
|
||||
}'
|
||||
|
||||
|
||||
@@ -269,7 +269,7 @@ curl http://<node0_ip>:<port>/v1/completions \
|
||||
-d '{
|
||||
"model": "qwen3",
|
||||
"prompt": "The future of AI is",
|
||||
"max_tokens": 50,
|
||||
"max_completion_tokens": 50,
|
||||
"temperature": 0
|
||||
}'
|
||||
```
|
||||
|
||||
@@ -62,7 +62,7 @@ curl http://localhost:8000/v1/chat/completions -H "Content-Type: application/jso
|
||||
"temperature": 0.6,
|
||||
"top_p": 0.95,
|
||||
"top_k": 20,
|
||||
"max_tokens": 4096
|
||||
"max_completion_tokens": 4096
|
||||
}'
|
||||
```
|
||||
|
||||
|
||||
@@ -108,7 +108,7 @@ curl http://localhost:8000/v1/completions \
|
||||
-d '{
|
||||
"model": "qwen3-32b-w4a4",
|
||||
"prompt": "what is large language model?",
|
||||
"max_tokens": "128",
|
||||
"max_completion_tokens": "128",
|
||||
"top_p": "0.95",
|
||||
"top_k": "40",
|
||||
"temperature": "0.0"
|
||||
|
||||
@@ -106,7 +106,7 @@ curl http://localhost:8000/v1/completions \
|
||||
-d '{
|
||||
"model": "qwen3-8b-w4a8",
|
||||
"prompt": "what is large language model?",
|
||||
"max_tokens": "128",
|
||||
"max_completion_tokens": "128",
|
||||
"top_p": "0.95",
|
||||
"top_k": "40",
|
||||
"temperature": "0.0"
|
||||
|
||||
@@ -82,7 +82,7 @@ curl http://localhost:8000/v1/chat/completions -H "Content-Type: application/jso
|
||||
"temperature": 0.6,
|
||||
"top_p": 0.95,
|
||||
"top_k": 20,
|
||||
"max_tokens": 4096
|
||||
"max_completion_tokens": 4096
|
||||
}'
|
||||
```
|
||||
|
||||
|
||||
@@ -214,7 +214,7 @@ curl http://localhost:8113/v1/chat/completions -H "Content-Type: application/jso
|
||||
"temperature": 0.6,
|
||||
"top_p": 0.95,
|
||||
"top_k": 20,
|
||||
"max_tokens": 4096
|
||||
"max_completion_tokens": 4096
|
||||
}'
|
||||
```
|
||||
|
||||
|
||||
@@ -75,7 +75,7 @@ curl http://localhost:8000/v1/chat/completions -H "Content-Type: application/jso
|
||||
"temperature": 0.6,
|
||||
"top_p": 0.95,
|
||||
"top_k": 20,
|
||||
"max_tokens": 32
|
||||
"max_completion_tokens": 32
|
||||
}'
|
||||
```
|
||||
|
||||
@@ -103,7 +103,7 @@ if __name__ == '__main__':
|
||||
prompts = [
|
||||
"Who are you?",
|
||||
]
|
||||
sampling_params = SamplingParams(temperature=0.6, top_p=0.95, top_k=40, max_tokens=32)
|
||||
sampling_params = SamplingParams(temperature=0.6, top_p=0.95, top_k=40, max_completion_tokens=32)
|
||||
llm = LLM(model="Qwen/Qwen3-Next-80B-A3B-Instruct",
|
||||
tensor_parallel_size=4,
|
||||
enforce_eager=True,
|
||||
|
||||
@@ -123,7 +123,7 @@ def main():
|
||||
temperature=0.6,
|
||||
top_p=0.95,
|
||||
top_k=20,
|
||||
max_tokens=16384,
|
||||
max_completion_tokens=16384,
|
||||
)
|
||||
|
||||
processor = Qwen3OmniMoeProcessor.from_pretrained(MODEL_PATH)
|
||||
@@ -243,7 +243,7 @@ evalscope eval \
|
||||
--datasets omni_bench, gsm8k, bbh \
|
||||
--dataset-args '{"omni_bench": { "extra_params": { "use_image": true, "use_audio": false}}}' \
|
||||
--eval-batch-size 1 \
|
||||
--generation-config '{"max_tokens": 10000, "temperature": 0.6}' \
|
||||
--generation-config '{"max_completion_tokens": 10000, "temperature": 0.6}' \
|
||||
--limit 100
|
||||
```
|
||||
|
||||
|
||||
@@ -120,7 +120,7 @@ curl http://localhost:8000/v1/chat/completions \
|
||||
{"type": "text", "text": "What is the text in the illustrate?"}
|
||||
]}
|
||||
],
|
||||
"max_tokens": 100
|
||||
"max_completion_tokens": 100
|
||||
}'
|
||||
```
|
||||
|
||||
@@ -182,7 +182,7 @@ curl http://localhost:8000/v1/chat/completions \
|
||||
{"type": "text", "text": "What is in this video?"}
|
||||
]}
|
||||
],
|
||||
"max_tokens": 100
|
||||
"max_completion_tokens": 100
|
||||
}'
|
||||
```
|
||||
|
||||
|
||||
@@ -932,7 +932,7 @@ curl http://192.0.0.1:8080/v1/completions \
|
||||
-d '{
|
||||
"model": "qwen3-moe",
|
||||
"prompt": "Who are you?",
|
||||
"max_tokens": 100,
|
||||
"max_completion_tokens": 100,
|
||||
"temperature": 0
|
||||
}'
|
||||
```
|
||||
|
||||
@@ -271,7 +271,7 @@ curl http://192.0.0.1:8080/v1/chat/completions \
|
||||
{"type": "text", "text": "What is the text in the illustrate?"}
|
||||
]}
|
||||
],
|
||||
"max_tokens": 100,
|
||||
"max_completion_tokens": 100,
|
||||
"temperature": 0
|
||||
}'
|
||||
```
|
||||
|
||||
@@ -186,7 +186,7 @@ curl http://localhost:8000/v1/completions \
|
||||
-d '{
|
||||
"model": "qwen",
|
||||
"prompt": "tell me how to sleep well",
|
||||
"max_tokens": 100,
|
||||
"max_completion_tokens": 100,
|
||||
"temperature": 0
|
||||
}'
|
||||
```
|
||||
|
||||
Reference in New Issue
Block a user