Update CI workflows (#1210)

This commit is contained in:
Lianmin Zheng
2024-08-25 16:43:07 -07:00
committed by GitHub
parent 308d024092
commit 15f1a49d2d
8 changed files with 43 additions and 48 deletions

View File

@@ -20,7 +20,7 @@ concurrency:
jobs: jobs:
accuracy-test: accuracy-test:
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
runs-on: accuracy runs-on: accuracy-test
steps: steps:
- name: Checkout code - name: Checkout code
@@ -28,9 +28,6 @@ jobs:
- name: Install dependencies - name: Install dependencies
run: | run: |
source $HOME/venv/bin/activate
echo "$HOME/venv/bin" >> $GITHUB_PATH
pip install --upgrade pip pip install --upgrade pip
pip install -e "python[all]" pip install -e "python[all]"
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
@@ -40,7 +37,7 @@ jobs:
pip install -e . pip install -e .
- name: Evaluate Accuracy - name: Evaluate Accuracy
timeout-minutes: 20
run: | run: |
cd test/srt cd test/srt
python3 test_eval_accuracy_large.py python3 test_eval_accuracy_large.py
timeout-minutes: 20

View File

@@ -20,7 +20,7 @@ concurrency:
jobs: jobs:
e2e-test: e2e-test:
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
runs-on: e2e runs-on: e2e-test
steps: steps:
- name: Checkout code - name: Checkout code
@@ -28,27 +28,24 @@ jobs:
- name: Install dependencies - name: Install dependencies
run: | run: |
source $HOME/venv/bin/activate
echo "$HOME/venv/bin" >> $GITHUB_PATH
pip install --upgrade pip pip install --upgrade pip
pip install -e "python[all]" pip install -e "python[all]"
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
- name: Benchmark Serving Throughput - name: Benchmark Serving Throughput
timeout-minutes: 10
run: | run: |
cd test/srt cd test/srt
python3 -m unittest test_serving_throughput.TestServingThroughput.test_default python3 -m unittest test_serving_throughput.TestServingThroughput.test_default
timeout-minutes: 10
- name: Benchmark Serving Throughput (w/o RadixAttention) - name: Benchmark Serving Throughput (w/o RadixAttention)
timeout-minutes: 10
run: | run: |
cd test/srt cd test/srt
python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_radix_cache python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_radix_cache
timeout-minutes: 10
- name: Benchmark Serving Throughput (w/o ChunkedPrefill) - name: Benchmark Serving Throughput (w/o ChunkedPrefill)
timeout-minutes: 10
run: | run: |
cd test/srt cd test/srt
python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_chunked_prefill python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_chunked_prefill
timeout-minutes: 10

View File

@@ -18,30 +18,28 @@ concurrency:
cancel-in-progress: true cancel-in-progress: true
jobs: jobs:
moe-test: moe-test:
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
runs-on: accuracy runs-on: moe-test
steps: steps:
- name: Checkout code - name: Checkout code
uses: actions/checkout@v3 uses: actions/checkout@v3
- name: Install dependencies - name: Install dependencies
run: | run: |
source $HOME/venv/bin/activate pip install --upgrade pip
echo "$HOME/venv/bin" >> $GITHUB_PATH pip install -e "python[all]"
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
pip install --upgrade pip - name: Benchmark MoE Serving Throughput
pip install -e "python[all]" timeout_minutes: 10
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall run: |
cd test/srt
python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default
- name: Benchmark MOE Serving Throughput - name: Benchmark MoE Serving Throughput (w/o RadixAttention)
uses: nick-fields/retry@v3 timeout_minutes: 10
with: run: |
timeout_minutes: 15 cd test/srt
max_attempts: 2 python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default_without_radix_cache
retry_on: error
command: |
cd test/srt
python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default
python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default_without_radix_cache

View File

@@ -20,7 +20,7 @@ concurrency:
jobs: jobs:
unit-test: unit-test:
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
runs-on: unit runs-on: unit-test
steps: steps:
- name: Checkout code - name: Checkout code
@@ -28,9 +28,6 @@ jobs:
- name: Install dependencies - name: Install dependencies
run: | run: |
source $HOME/venv/bin/activate
echo "$HOME/venv/bin" >> $GITHUB_PATH
pip install --upgrade pip pip install --upgrade pip
pip install -e "python[all]" pip install -e "python[all]"
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
@@ -38,13 +35,13 @@ jobs:
pip install sentence_transformers pip install sentence_transformers
- name: Test Backend Runtime - name: Test Backend Runtime
timeout-minutes: 20
run: | run: |
cd test/srt cd test/srt
python3 run_suite.py --suite minimal python3 run_suite.py --suite minimal
timeout-minutes: 20
- name: Test Frontend Language - name: Test Frontend Language
timeout-minutes: 10
run: | run: |
cd test/lang cd test/lang
python3 run_suite.py --suite minimal python3 run_suite.py --suite minimal
timeout-minutes: 10

View File

@@ -24,6 +24,7 @@ import torch.nn.functional as F
from transformers import AutoModelForCausalLM, AutoTokenizer from transformers import AutoModelForCausalLM, AutoTokenizer
from sglang.srt.server import Runtime from sglang.srt.server import Runtime
from sglang.test.test_utils import DEFAULT_PORT_FOR_SRT_TEST_RUNNER
DEFAULT_PROMPTS = [ DEFAULT_PROMPTS = [
# the output of gemma-2-2b from SRT is unstable on the commented prompt # the output of gemma-2-2b from SRT is unstable on the commented prompt
@@ -171,7 +172,7 @@ class SRTRunner:
torch_dtype, torch_dtype,
is_generation, is_generation,
tp_size=1, tp_size=1,
port=5157, port=DEFAULT_PORT_FOR_SRT_TEST_RUNNER,
): ):
self.is_generation = is_generation self.is_generation = is_generation
self.runtime = Runtime( self.runtime = Runtime(

View File

@@ -25,6 +25,7 @@ DEFAULT_MODEL_NAME_FOR_TEST = "meta-llama/Meta-Llama-3.1-8B-Instruct"
DEFAULT_MOE_MODEL_NAME_FOR_TEST = "mistralai/Mixtral-8x7B-Instruct-v0.1" DEFAULT_MOE_MODEL_NAME_FOR_TEST = "mistralai/Mixtral-8x7B-Instruct-v0.1"
if os.getenv("SGLANG_IS_IN_CI", "false") == "true": if os.getenv("SGLANG_IS_IN_CI", "false") == "true":
DEFAULT_PORT_FOR_SRT_TEST_RUNNER = 5157
DEFAULT_URL_FOR_MOE_TEST = "http://127.0.0.1:6157" DEFAULT_URL_FOR_MOE_TEST = "http://127.0.0.1:6157"
DEFAULT_URL_FOR_ACCURACY_TEST = "http://127.0.0.1:7157" DEFAULT_URL_FOR_ACCURACY_TEST = "http://127.0.0.1:7157"
DEFAULT_URL_FOR_UNIT_TEST = "http://127.0.0.1:8157" DEFAULT_URL_FOR_UNIT_TEST = "http://127.0.0.1:8157"

View File

@@ -5,7 +5,11 @@ from multiprocessing import Process
import requests import requests
from sglang.srt.utils import kill_child_process from sglang.srt.utils import kill_child_process
from sglang.test.test_utils import DEFAULT_MODEL_NAME_FOR_TEST, popen_launch_server from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_URL_FOR_UNIT_TEST,
popen_launch_server,
)
class TestBatchPenalizerE2E(unittest.TestCase): class TestBatchPenalizerE2E(unittest.TestCase):
@@ -13,7 +17,7 @@ class TestBatchPenalizerE2E(unittest.TestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_MODEL_NAME_FOR_TEST cls.model = DEFAULT_MODEL_NAME_FOR_TEST
cls.base_url = f"http://127.0.0.1:{8157}" cls.base_url = DEFAULT_URL_FOR_UNIT_TEST
cls.process = popen_launch_server( cls.process = popen_launch_server(
cls.model, cls.model,
cls.base_url, cls.base_url,

View File

@@ -67,7 +67,7 @@ class TestOpenAIVisionServer(unittest.TestCase):
assert response.choices[0].message.role == "assistant" assert response.choices[0].message.role == "assistant"
text = response.choices[0].message.content text = response.choices[0].message.content
assert isinstance(text, str) assert isinstance(text, str)
assert "car" in text or "taxi" in text, text assert "logo" in text, text
assert response.id assert response.id
assert response.created assert response.created
assert response.usage.prompt_tokens > 0 assert response.usage.prompt_tokens > 0