init
This commit is contained in:
16
benchmarks/launch_tgi_server.sh
Executable file
16
benchmarks/launch_tgi_server.sh
Executable file
@@ -0,0 +1,16 @@
|
||||
#!/bin/bash
|
||||
|
||||
PORT=8000
|
||||
MODEL=$1
|
||||
TOKENS=$2
|
||||
|
||||
docker run --gpus all --shm-size 1g -p $PORT:80 \
|
||||
-v $PWD/data:/data \
|
||||
ghcr.io/huggingface/text-generation-inference:1.4.0 \
|
||||
--model-id $MODEL \
|
||||
--sharded false \
|
||||
--max-input-length 1024 \
|
||||
--max-total-tokens 2048 \
|
||||
--max-best-of 5 \
|
||||
--max-concurrent-requests 5000 \
|
||||
--max-batch-total-tokens $TOKENS
|
||||
Reference in New Issue
Block a user