From 7f6c690b67a9d6311041d3a65ec1be1123b475cf Mon Sep 17 00:00:00 2001 From: Yineng Zhang Date: Fri, 2 Aug 2024 03:12:20 +0800 Subject: [PATCH] misc: use pip cache purge and add unit test ci (#871) --- .github/workflows/pr-e2e-test.yml | 15 +++++------ .github/workflows/unit-test.yml | 42 +++++++++++++++++++++++++++++++ python/sglang/README.md | 3 ++- 3 files changed, 50 insertions(+), 10 deletions(-) create mode 100644 .github/workflows/unit-test.yml diff --git a/.github/workflows/pr-e2e-test.yml b/.github/workflows/pr-e2e-test.yml index 20702cb24..76fd3da9a 100644 --- a/.github/workflows/pr-e2e-test.yml +++ b/.github/workflows/pr-e2e-test.yml @@ -16,7 +16,7 @@ concurrency: cancel-in-progress: true jobs: - gpu-job: + pr-e2e-test: runs-on: self-hosted env: CUDA_VISIBLE_DEVICES: 6 @@ -27,20 +27,17 @@ jobs: - name: Install dependencies run: | + cd /data/zhyncs/venv && source ./bin/activate && cd - + pip cache purge pip install --upgrade pip pip install -e "python[all]" pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.3/ --force-reinstall pip install --upgrade transformers - - name: Test OpenAI Backend - run: | - export OPENAI_API_KEY=${{ secrets.OPENAI_API_KEY }} - cd test/lang - python3 test_openai_backend.py - - name: Benchmark Serving run: | - python3 -m sglang.launch_server --model /home/lmzheng/zhyncs/Meta-Llama-3.1-8B-Instruct --port 8413 --disable-radix-cache & + cd /data/zhyncs/venv && source ./bin/activate && cd - + python3 -m sglang.launch_server --model /data/zhyncs/Meta-Llama-3.1-8B-Instruct --port 8413 --disable-radix-cache & echo "Waiting for server to start..." for i in {1..120}; do @@ -55,7 +52,7 @@ jobs: sleep 1 done - cd /home/lmzheng/zhyncs && python3 -m sglang.bench_serving --backend sglang --port 8413 --dataset-name random --num-prompts 3000 --random-input 256 --random-output 512 + cd /data/zhyncs && python3 -m sglang.bench_serving --backend sglang --port 8413 --dataset-name random --num-prompts 3000 --random-input 256 --random-output 512 echo "Stopping server..." kill -9 $(ps aux | grep sglang | grep Meta-Llama-3.1-8B-Instruct | grep -v grep | awk '{print $2}') diff --git a/.github/workflows/unit-test.yml b/.github/workflows/unit-test.yml new file mode 100644 index 000000000..f4bddb64b --- /dev/null +++ b/.github/workflows/unit-test.yml @@ -0,0 +1,42 @@ +name: Unit Test + +on: + push: + branches: [ main ] + paths: + - "python/sglang/**" + pull_request: + branches: [ main ] + paths: + - "python/sglang/**" + workflow_dispatch: + +concurrency: + group: unit-test-${{ github.ref }} + cancel-in-progress: true + +jobs: + unit-test: + runs-on: self-hosted + env: + CUDA_VISIBLE_DEVICES: 6 + + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Install dependencies + run: | + cd /data/zhyncs/venv && source ./bin/activate && cd - + pip cache purge + pip install --upgrade pip + pip install -e "python[all]" + pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.3/ --force-reinstall + pip install --upgrade transformers + + - name: Test OpenAI Backend + run: | + cd /data/zhyncs/venv && source ./bin/activate && cd - + cd test/lang + export OPENAI_API_KEY=${{ secrets.OPENAI_API_KEY }} + python3 test_openai_backend.py diff --git a/python/sglang/README.md b/python/sglang/README.md index 38cfb5a3b..c873e1d63 100644 --- a/python/sglang/README.md +++ b/python/sglang/README.md @@ -1,4 +1,5 @@ -# Code Structure +# Code Structures + - `lang`: The frontend language. - `srt`: The backend engine for running local models. (SRT = SGLang Runtime).