Update CI workflows (#1210)

This commit is contained in:
Lianmin Zheng
2024-08-25 16:43:07 -07:00
committed by GitHub
parent 308d024092
commit 15f1a49d2d
8 changed files with 43 additions and 48 deletions

View File

@@ -20,7 +20,7 @@ concurrency:
jobs: jobs:
accuracy-test: accuracy-test:
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
runs-on: accuracy runs-on: accuracy-test
steps: steps:
- name: Checkout code - name: Checkout code
@@ -28,9 +28,6 @@ jobs:
- name: Install dependencies - name: Install dependencies
run: | run: |
source $HOME/venv/bin/activate
echo "$HOME/venv/bin" >> $GITHUB_PATH
pip install --upgrade pip pip install --upgrade pip
pip install -e "python[all]" pip install -e "python[all]"
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
@@ -40,7 +37,7 @@ jobs:
pip install -e . pip install -e .
- name: Evaluate Accuracy - name: Evaluate Accuracy
timeout-minutes: 20
run: | run: |
cd test/srt cd test/srt
python3 test_eval_accuracy_large.py python3 test_eval_accuracy_large.py
timeout-minutes: 20

View File

@@ -20,7 +20,7 @@ concurrency:
jobs: jobs:
e2e-test: e2e-test:
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
runs-on: e2e runs-on: e2e-test
steps: steps:
- name: Checkout code - name: Checkout code
@@ -28,27 +28,24 @@ jobs:
- name: Install dependencies - name: Install dependencies
run: | run: |
source $HOME/venv/bin/activate
echo "$HOME/venv/bin" >> $GITHUB_PATH
pip install --upgrade pip pip install --upgrade pip
pip install -e "python[all]" pip install -e "python[all]"
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
- name: Benchmark Serving Throughput - name: Benchmark Serving Throughput
timeout-minutes: 10
run: | run: |
cd test/srt cd test/srt
python3 -m unittest test_serving_throughput.TestServingThroughput.test_default python3 -m unittest test_serving_throughput.TestServingThroughput.test_default
timeout-minutes: 10
- name: Benchmark Serving Throughput (w/o RadixAttention) - name: Benchmark Serving Throughput (w/o RadixAttention)
timeout-minutes: 10
run: | run: |
cd test/srt cd test/srt
python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_radix_cache python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_radix_cache
timeout-minutes: 10
- name: Benchmark Serving Throughput (w/o ChunkedPrefill) - name: Benchmark Serving Throughput (w/o ChunkedPrefill)
timeout-minutes: 10
run: | run: |
cd test/srt cd test/srt
python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_chunked_prefill python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_chunked_prefill
timeout-minutes: 10

View File

@@ -18,30 +18,28 @@ concurrency:
cancel-in-progress: true cancel-in-progress: true
jobs: jobs:
moe-test: moe-test:
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
runs-on: accuracy runs-on: moe-test
steps:
- name: Checkout code
uses: actions/checkout@v3
- name: Install dependencies
run: |
source $HOME/venv/bin/activate
echo "$HOME/venv/bin" >> $GITHUB_PATH
pip install --upgrade pip
pip install -e "python[all]"
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
- name: Benchmark MOE Serving Throughput steps:
uses: nick-fields/retry@v3 - name: Checkout code
with: uses: actions/checkout@v3
timeout_minutes: 15
max_attempts: 2 - name: Install dependencies
retry_on: error run: |
command: | pip install --upgrade pip
cd test/srt pip install -e "python[all]"
python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default_without_radix_cache
- name: Benchmark MoE Serving Throughput
timeout_minutes: 10
run: |
cd test/srt
python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default
- name: Benchmark MoE Serving Throughput (w/o RadixAttention)
timeout_minutes: 10
run: |
cd test/srt
python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default_without_radix_cache

View File

@@ -20,7 +20,7 @@ concurrency:
jobs: jobs:
unit-test: unit-test:
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
runs-on: unit runs-on: unit-test
steps: steps:
- name: Checkout code - name: Checkout code
@@ -28,9 +28,6 @@ jobs:
- name: Install dependencies - name: Install dependencies
run: | run: |
source $HOME/venv/bin/activate
echo "$HOME/venv/bin" >> $GITHUB_PATH
pip install --upgrade pip pip install --upgrade pip
pip install -e "python[all]" pip install -e "python[all]"
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
@@ -38,13 +35,13 @@ jobs:
pip install sentence_transformers pip install sentence_transformers
- name: Test Backend Runtime - name: Test Backend Runtime
timeout-minutes: 20
run: | run: |
cd test/srt cd test/srt
python3 run_suite.py --suite minimal python3 run_suite.py --suite minimal
timeout-minutes: 20
- name: Test Frontend Language - name: Test Frontend Language
timeout-minutes: 10
run: | run: |
cd test/lang cd test/lang
python3 run_suite.py --suite minimal python3 run_suite.py --suite minimal
timeout-minutes: 10

View File

@@ -24,6 +24,7 @@ import torch.nn.functional as F
from transformers import AutoModelForCausalLM, AutoTokenizer from transformers import AutoModelForCausalLM, AutoTokenizer
from sglang.srt.server import Runtime from sglang.srt.server import Runtime
from sglang.test.test_utils import DEFAULT_PORT_FOR_SRT_TEST_RUNNER
DEFAULT_PROMPTS = [ DEFAULT_PROMPTS = [
# the output of gemma-2-2b from SRT is unstable on the commented prompt # the output of gemma-2-2b from SRT is unstable on the commented prompt
@@ -171,7 +172,7 @@ class SRTRunner:
torch_dtype, torch_dtype,
is_generation, is_generation,
tp_size=1, tp_size=1,
port=5157, port=DEFAULT_PORT_FOR_SRT_TEST_RUNNER,
): ):
self.is_generation = is_generation self.is_generation = is_generation
self.runtime = Runtime( self.runtime = Runtime(

View File

@@ -25,6 +25,7 @@ DEFAULT_MODEL_NAME_FOR_TEST = "meta-llama/Meta-Llama-3.1-8B-Instruct"
DEFAULT_MOE_MODEL_NAME_FOR_TEST = "mistralai/Mixtral-8x7B-Instruct-v0.1" DEFAULT_MOE_MODEL_NAME_FOR_TEST = "mistralai/Mixtral-8x7B-Instruct-v0.1"
if os.getenv("SGLANG_IS_IN_CI", "false") == "true": if os.getenv("SGLANG_IS_IN_CI", "false") == "true":
DEFAULT_PORT_FOR_SRT_TEST_RUNNER = 5157
DEFAULT_URL_FOR_MOE_TEST = "http://127.0.0.1:6157" DEFAULT_URL_FOR_MOE_TEST = "http://127.0.0.1:6157"
DEFAULT_URL_FOR_ACCURACY_TEST = "http://127.0.0.1:7157" DEFAULT_URL_FOR_ACCURACY_TEST = "http://127.0.0.1:7157"
DEFAULT_URL_FOR_UNIT_TEST = "http://127.0.0.1:8157" DEFAULT_URL_FOR_UNIT_TEST = "http://127.0.0.1:8157"

View File

@@ -5,7 +5,11 @@ from multiprocessing import Process
import requests import requests
from sglang.srt.utils import kill_child_process from sglang.srt.utils import kill_child_process
from sglang.test.test_utils import DEFAULT_MODEL_NAME_FOR_TEST, popen_launch_server from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_URL_FOR_UNIT_TEST,
popen_launch_server,
)
class TestBatchPenalizerE2E(unittest.TestCase): class TestBatchPenalizerE2E(unittest.TestCase):
@@ -13,7 +17,7 @@ class TestBatchPenalizerE2E(unittest.TestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.model = DEFAULT_MODEL_NAME_FOR_TEST cls.model = DEFAULT_MODEL_NAME_FOR_TEST
cls.base_url = f"http://127.0.0.1:{8157}" cls.base_url = DEFAULT_URL_FOR_UNIT_TEST
cls.process = popen_launch_server( cls.process = popen_launch_server(
cls.model, cls.model,
cls.base_url, cls.base_url,

View File

@@ -67,7 +67,7 @@ class TestOpenAIVisionServer(unittest.TestCase):
assert response.choices[0].message.role == "assistant" assert response.choices[0].message.role == "assistant"
text = response.choices[0].message.content text = response.choices[0].message.content
assert isinstance(text, str) assert isinstance(text, str)
assert "car" in text or "taxi" in text, text assert "logo" in text, text
assert response.id assert response.id
assert response.created assert response.created
assert response.usage.prompt_tokens > 0 assert response.usage.prompt_tokens > 0