Add CI timeout guidelines (#10829)
This commit is contained in:
30
.github/workflows/experiment-runner.yml
vendored
30
.github/workflows/experiment-runner.yml
vendored
@@ -1,30 +0,0 @@
|
|||||||
name: Experiment Runner
|
|
||||||
|
|
||||||
on:
|
|
||||||
workflow_dispatch:
|
|
||||||
inputs:
|
|
||||||
script:
|
|
||||||
description: "Experiment Runner Script"
|
|
||||||
default: "configs/sharegpt_config.yaml"
|
|
||||||
|
|
||||||
concurrency:
|
|
||||||
group: experiment-runner-${{ github.ref }}
|
|
||||||
cancel-in-progress: true
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
experiment-runner-1-gpu:
|
|
||||||
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
|
|
||||||
runs-on: 1-gpu-runner
|
|
||||||
steps:
|
|
||||||
- name: Checkout code
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
|
|
||||||
- name: Install dependencies
|
|
||||||
run: |
|
|
||||||
bash scripts/ci/ci_install_dependency.sh
|
|
||||||
|
|
||||||
- name: Test experiment runner
|
|
||||||
timeout-minutes: 120
|
|
||||||
run: |
|
|
||||||
cd test/srt
|
|
||||||
python3 experiment_runner.py --config ${{ inputs.script }}
|
|
||||||
2
.github/workflows/pr-test.yml
vendored
2
.github/workflows/pr-test.yml
vendored
@@ -615,7 +615,7 @@ jobs:
|
|||||||
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} IS_BLACKWELL=1 bash scripts/ci/ci_install_dependency.sh
|
CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} IS_BLACKWELL=1 bash scripts/ci/ci_install_dependency.sh
|
||||||
|
|
||||||
- name: Run test
|
- name: Run test
|
||||||
timeout-minutes: 60
|
timeout-minutes: 45
|
||||||
run: |
|
run: |
|
||||||
cd test/srt
|
cd test/srt
|
||||||
python3 run_suite.py --suite per-commit-4-gpu-b200 --auto-partition-id 0 --auto-partition-size 1 --timeout-per-file 3600
|
python3 run_suite.py --suite per-commit-4-gpu-b200 --auto-partition-id 0 --auto-partition-size 1 --timeout-per-file 3600
|
||||||
|
|||||||
2
.github/workflows/release-docs.yml
vendored
2
.github/workflows/release-docs.yml
vendored
@@ -41,7 +41,7 @@ jobs:
|
|||||||
make compile
|
make compile
|
||||||
|
|
||||||
- name: Push HTML to sgl-project.github.io
|
- name: Push HTML to sgl-project.github.io
|
||||||
timeout-minutes: 60
|
timeout-minutes: 30
|
||||||
env:
|
env:
|
||||||
GITHUB_TOKEN: ${{ secrets.GH_PAT_FOR_DOCUMENTATION }}
|
GITHUB_TOKEN: ${{ secrets.GH_PAT_FOR_DOCUMENTATION }}
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
2
.github/workflows/vllm-dependency-test.yml
vendored
2
.github/workflows/vllm-dependency-test.yml
vendored
@@ -39,4 +39,4 @@ jobs:
|
|||||||
timeout-minutes: 30
|
timeout-minutes: 30
|
||||||
run: |
|
run: |
|
||||||
cd test/srt
|
cd test/srt
|
||||||
python3 run_suite.py --suite vllm_dependency_test --timeout-per-file 600
|
python3 run_suite.py --suite vllm_dependency_test
|
||||||
|
|||||||
@@ -78,10 +78,14 @@ To trigger CI, the pull request must have the "run-ci" label.
|
|||||||
## General code style
|
## General code style
|
||||||
- Avoid code duplication. If the same code snippet (more than five lines) appears multiple times, extract it into a shared function.
|
- Avoid code duplication. If the same code snippet (more than five lines) appears multiple times, extract it into a shared function.
|
||||||
- Minimize device synchronization. Reduce expensive CPU-GPU synchronization operations, such as `tensor.item()` or `tensor.cpu()`, whenever possible. Use vectorized code.
|
- Minimize device synchronization. Reduce expensive CPU-GPU synchronization operations, such as `tensor.item()` or `tensor.cpu()`, whenever possible. Use vectorized code.
|
||||||
- Keep files concise. If a file exceeds 2,000 lines of code, split it into multiple smaller files.
|
|
||||||
- Prioritize extreme efficiency. SGLang is a runtime, and most of your code runs on the critical path for every request. Optimize all minor overheads as much as possible, especially in the model forward code.
|
- Prioritize extreme efficiency. SGLang is a runtime, and most of your code runs on the critical path for every request. Optimize all minor overheads as much as possible, especially in the model forward code.
|
||||||
- A common pattern is some runtime checks in the model forward pass (e.g., [this](https://github.com/sgl-project/sglang/blob/f1b0eda55c2c4838e8ab90a0fac7fb1e3d7064ab/python/sglang/srt/models/deepseek_v2.py#L486-L491)). These are very likely the same for every layer. Please cache the result as a single boolean value whenever possible.
|
- A common pattern is some runtime checks in the model forward pass (e.g., [this](https://github.com/sgl-project/sglang/blob/f1b0eda55c2c4838e8ab90a0fac7fb1e3d7064ab/python/sglang/srt/models/deepseek_v2.py#L486-L491)). These are very likely the same for every layer. Please cache the result as a single boolean value whenever possible.
|
||||||
- Strive to make functions as pure as possible. Avoid in-place modification of arguments.
|
- Make functions as pure as possible. Avoid in-place modification of arguments.
|
||||||
|
- Keep files concise. If a file exceeds 2,000 lines of code, split it into multiple smaller files. (e.g., `scheduler.py`, `scheduler_output_processor_mixin.py`)
|
||||||
|
- Keep tests run fast.
|
||||||
|
- If a single test file run longer than 500 seconds, split it into multiple smaller files (e.g., `test_eagle_infer_a.py`, `test_eagle_infer_b.py`).
|
||||||
|
- If a single job in a github workflow runs longer than 30 mins, split it into smaller jobs/steps.
|
||||||
|
- Reuse server launches in your unit tests to make tests run faster.
|
||||||
- When supporting new hardware or features, follow these guidelines:
|
- When supporting new hardware or features, follow these guidelines:
|
||||||
- Do not drastically change existing code.
|
- Do not drastically change existing code.
|
||||||
- Always prefer new files to introduce specific components for your new hardware (e.g., `allocator_ascend.py`).
|
- Always prefer new files to introduce specific components for your new hardware (e.g., `allocator_ascend.py`).
|
||||||
|
|||||||
@@ -154,7 +154,7 @@ suites = {
|
|||||||
],
|
],
|
||||||
"per-commit-4-gpu-b200": [
|
"per-commit-4-gpu-b200": [
|
||||||
# TestFile("test_gpt_oss_4gpu.py", 600),
|
# TestFile("test_gpt_oss_4gpu.py", 600),
|
||||||
TestFile("test_deepseek_v3_fp4_4gpu.py", 3600),
|
# TestFile("test_deepseek_v3_fp4_4gpu.py", 3600),
|
||||||
],
|
],
|
||||||
"per-commit-4-gpu-deepep": [
|
"per-commit-4-gpu-deepep": [
|
||||||
TestFile("ep/test_deepep_small.py", 531),
|
TestFile("ep/test_deepep_small.py", 531),
|
||||||
@@ -369,7 +369,7 @@ if __name__ == "__main__":
|
|||||||
arg_parser.add_argument(
|
arg_parser.add_argument(
|
||||||
"--timeout-per-file",
|
"--timeout-per-file",
|
||||||
type=int,
|
type=int,
|
||||||
default=1800,
|
default=1200,
|
||||||
help="The time limit for running one file in seconds.",
|
help="The time limit for running one file in seconds.",
|
||||||
)
|
)
|
||||||
arg_parser.add_argument(
|
arg_parser.add_argument(
|
||||||
|
|||||||
@@ -35,7 +35,7 @@ class TestStandaloneSpeculativeDecodingBase(CustomTestCase):
|
|||||||
|
|
||||||
model = DEFAULT_LOOKAHEAD_SPECULATIVE_TARGET_MODEL_FOR_TEST
|
model = DEFAULT_LOOKAHEAD_SPECULATIVE_TARGET_MODEL_FOR_TEST
|
||||||
base_url = DEFAULT_URL_FOR_TEST
|
base_url = DEFAULT_URL_FOR_TEST
|
||||||
accuracy_threshold = 0.8 # derived tests need to override this
|
accuracy_threshold = 0.79 # derived tests need to override this
|
||||||
spec_decode_threshold = 1.8 # derived spec decoding tests need to override this
|
spec_decode_threshold = 1.8 # derived spec decoding tests need to override this
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|||||||
Reference in New Issue
Block a user