[Fix] Fix all the Huggingface paths (#1553)

This commit is contained in:
Theresa Barton
2024-10-02 10:12:07 -07:00
committed by GitHub
parent 8cdc76f6d4
commit 2c7d0a5b8b
11 changed files with 24 additions and 24 deletions

View File

@@ -30,7 +30,7 @@ apt install nsight-systems-cli
```bash
# server
# set the delay and duration times according to needs
nsys profile --trace-fork-before-exec=true --cuda-graph-trace=node -o sglang.out --delay 60 --duration 70 python3 -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-8B-Instruct --disable-radix-cache
nsys profile --trace-fork-before-exec=true --cuda-graph-trace=node -o sglang.out --delay 60 --duration 70 python3 -m sglang.launch_server --model-path meta-llama/Llama-3.1-8B-Instruct --disable-radix-cache
# client
python3 -m sglang.bench_serving --backend sglang --num-prompts 6000 --dataset-name random --random-input 4096 --random-output 2048

View File

@@ -35,7 +35,7 @@ docker run --gpus all \
--env "HF_TOKEN=<secret>" \
--ipc=host \
lmsysorg/sglang:latest \
python3 -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-8B-Instruct --host 0.0.0.0 --port 30000
python3 -m sglang.launch_server --model-path meta-llama/Llama-3.1-8B-Instruct --host 0.0.0.0 --port 30000
```
### Method 4: Using docker compose
@@ -75,7 +75,7 @@ resources:
run: |
conda deactivate
python3 -m sglang.launch_server \
--model-path meta-llama/Meta-Llama-3.1-8B-Instruct \
--model-path meta-llama/Llama-3.1-8B-Instruct \
--host 0.0.0.0 \
--port 30000
```