diff --git a/.github/workflows/e2e-test.yml b/.github/workflows/e2e-test.yml index 7b59054fe..3514377e5 100644 --- a/.github/workflows/e2e-test.yml +++ b/.github/workflows/e2e-test.yml @@ -21,26 +21,24 @@ jobs: e2e-test: runs-on: self-hosted - env: - HF_TOKEN : ${{ secrets.HF_TOKEN }} - steps: - name: Checkout code uses: actions/checkout@v3 - name: Install dependencies run: | - cd /data/zhyncs/venv && source ./bin/activate && cd - + source $HOME/venv/bin/activate + echo "$HOME/venv/bin" >> $GITHUB_PATH pip cache purge pip install --upgrade pip pip install -e "python[all]" pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.3/ --force-reinstall pip install --upgrade transformers + pip install python-multipart - name: Benchmark Serving Throughput run: | - cd /data/zhyncs/venv && source ./bin/activate && cd - python3 -m sglang.launch_server --model meta-llama/Meta-Llama-3.1-8B-Instruct --port 8413 --disable-radix-cache & echo "Waiting for server to start..." @@ -56,7 +54,7 @@ jobs: sleep 1 done - cd /data/zhyncs && python3 -m sglang.bench_serving --backend sglang --port 8413 --dataset-name random --num-prompts 3000 --random-input 256 --random-output 512 + cd $HOME && python3 -m sglang.bench_serving --backend sglang --port 8413 --dataset-name random --num-prompts 3000 --random-input 256 --random-output 512 echo "Stopping server..." kill -9 $(ps aux | grep sglang | grep Meta-Llama-3.1-8B-Instruct | grep -v grep | awk '{print $2}') diff --git a/.github/workflows/unit-test.yml b/.github/workflows/unit-test.yml index e73a4fc9e..d176a6907 100644 --- a/.github/workflows/unit-test.yml +++ b/.github/workflows/unit-test.yml @@ -21,16 +21,14 @@ jobs: unit-test: runs-on: self-hosted - env: - HF_TOKEN : ${{ secrets.HF_TOKEN }} - steps: - name: Checkout code uses: actions/checkout@v3 - name: Install dependencies run: | - cd /data/zhyncs/venv && source ./bin/activate && cd - + source $HOME/venv/bin/activate + echo "$HOME/venv/bin" >> $GITHUB_PATH pip cache purge pip install --upgrade pip @@ -40,29 +38,20 @@ jobs: - name: Test Frontend Language with OpenAI Backend run: | - cd /data/zhyncs/venv && source ./bin/activate && cd - - export OPENAI_API_KEY=${{ secrets.OPENAI_API_KEY }} - cd test/lang python3 test_openai_backend.py - name: Test Frontend Language with SRT Backend run: | - cd /data/zhyncs/venv && source ./bin/activate && cd - - cd test/lang python3 test_srt_backend.py - name: Test OpenAI API Server run: | - cd /data/zhyncs/venv && source ./bin/activate && cd - - cd test/srt python3 test_openai_server.py - name: Test Accuracy run: | - cd /data/zhyncs/venv && source ./bin/activate && cd - - cd test/srt python3 test_eval_accuracy.py diff --git a/test/lang/test_bind_cache.py b/test/lang/test_bind_cache.py index 702f27c06..53e1b9754 100644 --- a/test/lang/test_bind_cache.py +++ b/test/lang/test_bind_cache.py @@ -1,6 +1,7 @@ import unittest import sglang as sgl +from sglang.test.test_utils import MODEL_NAME_FOR_TEST class TestBind(unittest.TestCase): @@ -8,7 +9,7 @@ class TestBind(unittest.TestCase): @classmethod def setUpClass(cls): - cls.backend = sgl.Runtime(model_path="meta-llama/Meta-Llama-3-8B-Instruct") + cls.backend = sgl.Runtime(model_path=MODEL_NAME_FOR_TEST) sgl.set_default_backend(cls.backend) @classmethod