Add a parallel sampling case (#34)

This commit is contained in:
Lianmin Zheng
2024-01-17 22:26:32 -08:00
parent 501f944445
commit b240f75100
3 changed files with 42 additions and 2 deletions

View File

@@ -19,7 +19,7 @@ python3 -m vllm.entrypoints.api_server --model meta-llama/Llama-2-7b-chat-hf --d
```
```
python3 bench_throughput.py --backend vllm --tokenizer meta-llama/Llama-2-7b-chat-hf --dataset ShareGPT_V3_unfiltered_cleaned_split.json --num-prompts 10 --request-rate 10
python3 bench_throughput.py --backend vllm --tokenizer meta-llama/Llama-2-7b-chat-hf --dataset ShareGPT_V3_unfiltered_cleaned_split.json --num-prompts 10 --request-rate 10 --port 21000
```