Add a parallel sampling case (#34)

2024-01-17 22:26:32 -08:00
parent 501f944445
commit b240f75100
3 changed files with 42 additions and 2 deletions
--- a/benchmark/latency_throughput/README.md
+++ b/benchmark/latency_throughput/README.md
@@ -19,7 +19,7 @@ python3 -m vllm.entrypoints.api_server --model meta-llama/Llama-2-7b-chat-hf --d
 ```

 ```
-python3 bench_throughput.py --backend vllm --tokenizer meta-llama/Llama-2-7b-chat-hf --dataset ShareGPT_V3_unfiltered_cleaned_split.json --num-prompts 10 --request-rate 10
+python3 bench_throughput.py --backend vllm --tokenizer meta-llama/Llama-2-7b-chat-hf --dataset ShareGPT_V3_unfiltered_cleaned_split.json --num-prompts 10 --request-rate 10 --port 21000
 ```