diff --git a/.github/workflows/pr-e2e-test.yml b/.github/workflows/pr-e2e-test.yml index 09aff84d3..0abdbfc35 100644 --- a/.github/workflows/pr-e2e-test.yml +++ b/.github/workflows/pr-e2e-test.yml @@ -19,6 +19,9 @@ jobs: pr-e2e-test: runs-on: self-hosted + env: + HF_TOKEN : ${{ secrets.HF_TOKEN }} + steps: - name: Checkout code uses: actions/checkout@v3 @@ -35,7 +38,7 @@ jobs: - name: Benchmark Serving run: | cd /data/zhyncs/venv && source ./bin/activate && cd - - python3 -m sglang.launch_server --model /data/zhyncs/Meta-Llama-3.1-8B-Instruct --port 8413 --disable-radix-cache & + python3 -m sglang.launch_server --model meta-llama/Meta-Llama-3.1-8B-Instruct --port 8413 --disable-radix-cache & echo "Waiting for server to start..." for i in {1..120}; do diff --git a/docs/en/setup_runner.md b/docs/en/setup_runner.md index 1d72ea354..107837953 100644 --- a/docs/en/setup_runner.md +++ b/docs/en/setup_runner.md @@ -15,6 +15,7 @@ add `/lib/systemd/system/runner.service` StartLimitIntervalSec=0 [Service] Environment="CUDA_VISIBLE_DEVICES=7" +Environment="XDG_CACHE_HOME=/data/.cache" Restart=always RestartSec=1 ExecStart=/data/zhyncs/actions-runner/run.sh