Update CI workflows (#1210)
This commit is contained in:
7
.github/workflows/accuracy-test.yml
vendored
7
.github/workflows/accuracy-test.yml
vendored
@@ -20,7 +20,7 @@ concurrency:
|
|||||||
jobs:
|
jobs:
|
||||||
accuracy-test:
|
accuracy-test:
|
||||||
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
|
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
|
||||||
runs-on: accuracy
|
runs-on: accuracy-test
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout code
|
- name: Checkout code
|
||||||
@@ -28,9 +28,6 @@ jobs:
|
|||||||
|
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: |
|
||||||
source $HOME/venv/bin/activate
|
|
||||||
echo "$HOME/venv/bin" >> $GITHUB_PATH
|
|
||||||
|
|
||||||
pip install --upgrade pip
|
pip install --upgrade pip
|
||||||
pip install -e "python[all]"
|
pip install -e "python[all]"
|
||||||
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
|
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
|
||||||
@@ -40,7 +37,7 @@ jobs:
|
|||||||
pip install -e .
|
pip install -e .
|
||||||
|
|
||||||
- name: Evaluate Accuracy
|
- name: Evaluate Accuracy
|
||||||
|
timeout-minutes: 20
|
||||||
run: |
|
run: |
|
||||||
cd test/srt
|
cd test/srt
|
||||||
python3 test_eval_accuracy_large.py
|
python3 test_eval_accuracy_large.py
|
||||||
timeout-minutes: 20
|
|
||||||
|
|||||||
11
.github/workflows/e2e-test.yml
vendored
11
.github/workflows/e2e-test.yml
vendored
@@ -20,7 +20,7 @@ concurrency:
|
|||||||
jobs:
|
jobs:
|
||||||
e2e-test:
|
e2e-test:
|
||||||
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
|
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
|
||||||
runs-on: e2e
|
runs-on: e2e-test
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout code
|
- name: Checkout code
|
||||||
@@ -28,27 +28,24 @@ jobs:
|
|||||||
|
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: |
|
||||||
source $HOME/venv/bin/activate
|
|
||||||
echo "$HOME/venv/bin" >> $GITHUB_PATH
|
|
||||||
|
|
||||||
pip install --upgrade pip
|
pip install --upgrade pip
|
||||||
pip install -e "python[all]"
|
pip install -e "python[all]"
|
||||||
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
|
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
|
||||||
|
|
||||||
- name: Benchmark Serving Throughput
|
- name: Benchmark Serving Throughput
|
||||||
|
timeout-minutes: 10
|
||||||
run: |
|
run: |
|
||||||
cd test/srt
|
cd test/srt
|
||||||
python3 -m unittest test_serving_throughput.TestServingThroughput.test_default
|
python3 -m unittest test_serving_throughput.TestServingThroughput.test_default
|
||||||
timeout-minutes: 10
|
|
||||||
|
|
||||||
- name: Benchmark Serving Throughput (w/o RadixAttention)
|
- name: Benchmark Serving Throughput (w/o RadixAttention)
|
||||||
|
timeout-minutes: 10
|
||||||
run: |
|
run: |
|
||||||
cd test/srt
|
cd test/srt
|
||||||
python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_radix_cache
|
python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_radix_cache
|
||||||
timeout-minutes: 10
|
|
||||||
|
|
||||||
- name: Benchmark Serving Throughput (w/o ChunkedPrefill)
|
- name: Benchmark Serving Throughput (w/o ChunkedPrefill)
|
||||||
|
timeout-minutes: 10
|
||||||
run: |
|
run: |
|
||||||
cd test/srt
|
cd test/srt
|
||||||
python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_chunked_prefill
|
python3 -m unittest test_serving_throughput.TestServingThroughput.test_default_without_chunked_prefill
|
||||||
timeout-minutes: 10
|
|
||||||
|
|||||||
50
.github/workflows/moe-test.yml
vendored
50
.github/workflows/moe-test.yml
vendored
@@ -18,30 +18,28 @@ concurrency:
|
|||||||
cancel-in-progress: true
|
cancel-in-progress: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
moe-test:
|
moe-test:
|
||||||
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
|
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
|
||||||
runs-on: accuracy
|
runs-on: moe-test
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Checkout code
|
|
||||||
uses: actions/checkout@v3
|
|
||||||
|
|
||||||
- name: Install dependencies
|
|
||||||
run: |
|
|
||||||
source $HOME/venv/bin/activate
|
|
||||||
echo "$HOME/venv/bin" >> $GITHUB_PATH
|
|
||||||
|
|
||||||
pip install --upgrade pip
|
|
||||||
pip install -e "python[all]"
|
|
||||||
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
|
|
||||||
|
|
||||||
- name: Benchmark MOE Serving Throughput
|
steps:
|
||||||
uses: nick-fields/retry@v3
|
- name: Checkout code
|
||||||
with:
|
uses: actions/checkout@v3
|
||||||
timeout_minutes: 15
|
|
||||||
max_attempts: 2
|
- name: Install dependencies
|
||||||
retry_on: error
|
run: |
|
||||||
command: |
|
pip install --upgrade pip
|
||||||
cd test/srt
|
pip install -e "python[all]"
|
||||||
python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default
|
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
|
||||||
python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default_without_radix_cache
|
|
||||||
|
- name: Benchmark MoE Serving Throughput
|
||||||
|
timeout_minutes: 10
|
||||||
|
run: |
|
||||||
|
cd test/srt
|
||||||
|
python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default
|
||||||
|
|
||||||
|
- name: Benchmark MoE Serving Throughput (w/o RadixAttention)
|
||||||
|
timeout_minutes: 10
|
||||||
|
run: |
|
||||||
|
cd test/srt
|
||||||
|
python3 -m unittest test_moe_serving_throughput.TestServingThroughput.test_default_without_radix_cache
|
||||||
|
|||||||
9
.github/workflows/unit-test.yml
vendored
9
.github/workflows/unit-test.yml
vendored
@@ -20,7 +20,7 @@ concurrency:
|
|||||||
jobs:
|
jobs:
|
||||||
unit-test:
|
unit-test:
|
||||||
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
|
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
|
||||||
runs-on: unit
|
runs-on: unit-test
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout code
|
- name: Checkout code
|
||||||
@@ -28,9 +28,6 @@ jobs:
|
|||||||
|
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: |
|
||||||
source $HOME/venv/bin/activate
|
|
||||||
echo "$HOME/venv/bin" >> $GITHUB_PATH
|
|
||||||
|
|
||||||
pip install --upgrade pip
|
pip install --upgrade pip
|
||||||
pip install -e "python[all]"
|
pip install -e "python[all]"
|
||||||
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
|
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.4/ --force-reinstall
|
||||||
@@ -38,13 +35,13 @@ jobs:
|
|||||||
pip install sentence_transformers
|
pip install sentence_transformers
|
||||||
|
|
||||||
- name: Test Backend Runtime
|
- name: Test Backend Runtime
|
||||||
|
timeout-minutes: 20
|
||||||
run: |
|
run: |
|
||||||
cd test/srt
|
cd test/srt
|
||||||
python3 run_suite.py --suite minimal
|
python3 run_suite.py --suite minimal
|
||||||
timeout-minutes: 20
|
|
||||||
|
|
||||||
- name: Test Frontend Language
|
- name: Test Frontend Language
|
||||||
|
timeout-minutes: 10
|
||||||
run: |
|
run: |
|
||||||
cd test/lang
|
cd test/lang
|
||||||
python3 run_suite.py --suite minimal
|
python3 run_suite.py --suite minimal
|
||||||
timeout-minutes: 10
|
|
||||||
|
|||||||
@@ -24,6 +24,7 @@ import torch.nn.functional as F
|
|||||||
from transformers import AutoModelForCausalLM, AutoTokenizer
|
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||||
|
|
||||||
from sglang.srt.server import Runtime
|
from sglang.srt.server import Runtime
|
||||||
|
from sglang.test.test_utils import DEFAULT_PORT_FOR_SRT_TEST_RUNNER
|
||||||
|
|
||||||
DEFAULT_PROMPTS = [
|
DEFAULT_PROMPTS = [
|
||||||
# the output of gemma-2-2b from SRT is unstable on the commented prompt
|
# the output of gemma-2-2b from SRT is unstable on the commented prompt
|
||||||
@@ -171,7 +172,7 @@ class SRTRunner:
|
|||||||
torch_dtype,
|
torch_dtype,
|
||||||
is_generation,
|
is_generation,
|
||||||
tp_size=1,
|
tp_size=1,
|
||||||
port=5157,
|
port=DEFAULT_PORT_FOR_SRT_TEST_RUNNER,
|
||||||
):
|
):
|
||||||
self.is_generation = is_generation
|
self.is_generation = is_generation
|
||||||
self.runtime = Runtime(
|
self.runtime = Runtime(
|
||||||
|
|||||||
@@ -25,6 +25,7 @@ DEFAULT_MODEL_NAME_FOR_TEST = "meta-llama/Meta-Llama-3.1-8B-Instruct"
|
|||||||
DEFAULT_MOE_MODEL_NAME_FOR_TEST = "mistralai/Mixtral-8x7B-Instruct-v0.1"
|
DEFAULT_MOE_MODEL_NAME_FOR_TEST = "mistralai/Mixtral-8x7B-Instruct-v0.1"
|
||||||
|
|
||||||
if os.getenv("SGLANG_IS_IN_CI", "false") == "true":
|
if os.getenv("SGLANG_IS_IN_CI", "false") == "true":
|
||||||
|
DEFAULT_PORT_FOR_SRT_TEST_RUNNER = 5157
|
||||||
DEFAULT_URL_FOR_MOE_TEST = "http://127.0.0.1:6157"
|
DEFAULT_URL_FOR_MOE_TEST = "http://127.0.0.1:6157"
|
||||||
DEFAULT_URL_FOR_ACCURACY_TEST = "http://127.0.0.1:7157"
|
DEFAULT_URL_FOR_ACCURACY_TEST = "http://127.0.0.1:7157"
|
||||||
DEFAULT_URL_FOR_UNIT_TEST = "http://127.0.0.1:8157"
|
DEFAULT_URL_FOR_UNIT_TEST = "http://127.0.0.1:8157"
|
||||||
|
|||||||
@@ -5,7 +5,11 @@ from multiprocessing import Process
|
|||||||
import requests
|
import requests
|
||||||
|
|
||||||
from sglang.srt.utils import kill_child_process
|
from sglang.srt.utils import kill_child_process
|
||||||
from sglang.test.test_utils import DEFAULT_MODEL_NAME_FOR_TEST, popen_launch_server
|
from sglang.test.test_utils import (
|
||||||
|
DEFAULT_MODEL_NAME_FOR_TEST,
|
||||||
|
DEFAULT_URL_FOR_UNIT_TEST,
|
||||||
|
popen_launch_server,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestBatchPenalizerE2E(unittest.TestCase):
|
class TestBatchPenalizerE2E(unittest.TestCase):
|
||||||
@@ -13,7 +17,7 @@ class TestBatchPenalizerE2E(unittest.TestCase):
|
|||||||
@classmethod
|
@classmethod
|
||||||
def setUpClass(cls):
|
def setUpClass(cls):
|
||||||
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
|
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
|
||||||
cls.base_url = f"http://127.0.0.1:{8157}"
|
cls.base_url = DEFAULT_URL_FOR_UNIT_TEST
|
||||||
cls.process = popen_launch_server(
|
cls.process = popen_launch_server(
|
||||||
cls.model,
|
cls.model,
|
||||||
cls.base_url,
|
cls.base_url,
|
||||||
|
|||||||
@@ -67,7 +67,7 @@ class TestOpenAIVisionServer(unittest.TestCase):
|
|||||||
assert response.choices[0].message.role == "assistant"
|
assert response.choices[0].message.role == "assistant"
|
||||||
text = response.choices[0].message.content
|
text = response.choices[0].message.content
|
||||||
assert isinstance(text, str)
|
assert isinstance(text, str)
|
||||||
assert "car" in text or "taxi" in text, text
|
assert "logo" in text, text
|
||||||
assert response.id
|
assert response.id
|
||||||
assert response.created
|
assert response.created
|
||||||
assert response.usage.prompt_tokens > 0
|
assert response.usage.prompt_tokens > 0
|
||||||
|
|||||||
Reference in New Issue
Block a user