Multi-node Tensor Parallelism (#550)
Co-authored-by: Lianmin Zheng <lianminzheng@gmail.com>
This commit is contained in:
@@ -20,7 +20,7 @@ python3 bench_throughput.py --backend srt --tokenizer meta-llama/Llama-2-7b-chat
|
||||
|
||||
```
|
||||
# run synthetic
|
||||
python3 synthetic_benchmark.py --backend srt --tokenizer meta-llama/Llama-2-7b-chat-hf --num-prompt 1000 --request-rate 100 --input-len 1024 --output-len 256 --port 30000
|
||||
python3 bench_throughput.py --backend srt --tokenizer meta-llama/Llama-2-7b-chat-hf --num-prompt 1000 --request-rate 100 --input-len 1024 --output-len 256 --port 30000
|
||||
```
|
||||
|
||||
|
||||
@@ -36,7 +36,7 @@ python3 bench_throughput.py --backend vllm --tokenizer meta-llama/Llama-2-7b-cha
|
||||
|
||||
```
|
||||
# run synthetic
|
||||
python3 synthetic_benchmark.py --backend vllm --tokenizer meta-llama/Llama-2-7b-chat-hf --num-prompt 1000 --request-rate 100 --input-len 1024 --output-len 256 --port 30000
|
||||
python3 bench_throughput.py --backend vllm --tokenizer meta-llama/Llama-2-7b-chat-hf --num-prompt 1000 --request-rate 100 --input-len 1024 --output-len 256 --port 30000
|
||||
```
|
||||
|
||||
|
||||
|
||||
@@ -24,7 +24,7 @@ if __name__ == "__main__":
|
||||
raise ValueError(f"Invalid backend: {args.backend}")
|
||||
|
||||
url = f"{args.host}:{args.port}"
|
||||
a = random.randint(0, 1 << 20)
|
||||
a = 20
|
||||
max_new_tokens = 256
|
||||
prompt = f"{a, }"
|
||||
|
||||
|
||||
Reference in New Issue
Block a user