[Doc] Update max_tokens to max_completion_tokens in all docs (#6248)
### What this PR does / why we need it?
Fix:
```
DeprecationWarning: max_tokens is deprecated in favor of the max_completion_tokens field.
```
- vLLM version: v0.14.1
- vLLM main:
d68209402d
Signed-off-by: shen-shanshan <467638484@qq.com>
This commit is contained in:
@@ -78,7 +78,7 @@ curl http://localhost:8000/v1/completions \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"prompt": "The future of AI is",
|
||||
"max_tokens": 64,
|
||||
"max_completion_tokens": 64,
|
||||
"top_p": 0.95,
|
||||
"top_k": 50,
|
||||
"temperature": 0.6
|
||||
@@ -108,7 +108,7 @@ curl http://localhost:8000/v1/completions \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"prompt": "The future of AI is",
|
||||
"max_tokens": 64,
|
||||
"max_completion_tokens": 64,
|
||||
"top_p": 0.95,
|
||||
"top_k": 50,
|
||||
"temperature": 0.6
|
||||
@@ -138,7 +138,7 @@ curl http://localhost:8000/v1/completions \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"prompt": "The future of AI is",
|
||||
"max_tokens": 64,
|
||||
"max_completion_tokens": 64,
|
||||
"top_p": 0.95,
|
||||
"top_k": 50,
|
||||
"temperature": 0.6
|
||||
@@ -179,7 +179,7 @@ curl http://localhost:8000/v1/completions \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"prompt": "[unused9]系统:[unused10][unused9]用户:'${question}'[unused10][unused9]助手:",
|
||||
"max_tokens": 64,
|
||||
"max_completion_tokens": 64,
|
||||
"top_p": 0.95,
|
||||
"top_k": 50,
|
||||
"temperature": 0.6
|
||||
@@ -221,7 +221,7 @@ prompts = [
|
||||
"The future of AI is",
|
||||
]
|
||||
# Create a sampling params object.
|
||||
sampling_params = SamplingParams(max_tokens=100, temperature=0.0)
|
||||
sampling_params = SamplingParams(max_completion_tokens=100, temperature=0.0)
|
||||
# Create an LLM.
|
||||
llm = LLM(
|
||||
model="Qwen/Qwen3-0.6B",
|
||||
@@ -264,7 +264,7 @@ prompts = [
|
||||
"The future of AI is",
|
||||
]
|
||||
# Create a sampling params object.
|
||||
sampling_params = SamplingParams(max_tokens=100, temperature=0.0)
|
||||
sampling_params = SamplingParams(max_completion_tokens=100, temperature=0.0)
|
||||
# Create an LLM.
|
||||
llm = LLM(
|
||||
model="Qwen/Qwen2.5-7B-Instruct",
|
||||
@@ -307,7 +307,7 @@ prompts = [
|
||||
"The future of AI is",
|
||||
]
|
||||
# Create a sampling params object.
|
||||
sampling_params = SamplingParams(max_tokens=100, top_p=0.95, top_k=50, temperature=0.6)
|
||||
sampling_params = SamplingParams(max_completion_tokens=100, top_p=0.95, top_k=50, temperature=0.6)
|
||||
# Create an LLM.
|
||||
llm = LLM(
|
||||
model="Qwen/Qwen2.5-VL-3B-Instruct",
|
||||
|
||||
Reference in New Issue
Block a user