diff --git a/.github/workflows/pr-test.yml b/.github/workflows/pr-test.yml index 2785c3a9a..2ff8b64d1 100644 --- a/.github/workflows/pr-test.yml +++ b/.github/workflows/pr-test.yml @@ -114,25 +114,25 @@ jobs: run: | bash scripts/ci_install_dependency.sh - - name: Benchmark Single Latency + - name: Benchmark single latency timeout-minutes: 10 run: | cd test/srt python3 -m unittest test_bench_latency.TestBenchLatency.test_default - - name: Benchmark Online Latency + - name: Benchmark online latency timeout-minutes: 10 run: | cd test/srt python3 -m unittest test_bench_serving.TestBenchServing.test_online_latency_default - - name: Benchmark Offline Throughput + - name: Benchmark offline throughput timeout-minutes: 10 run: | cd test/srt python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_default - - name: Benchmark Offline Throughput (Non-streaming, small batch size) + - name: Benchmark offline throughput (Non-streaming, small batch size) timeout-minutes: 10 run: | cd test/srt @@ -149,19 +149,19 @@ jobs: run: | bash scripts/ci_install_dependency.sh - - name: Benchmark Offline Throughput (w/o RadixAttention) + - name: Benchmark offline throughput (w/o RadixAttention) timeout-minutes: 10 run: | cd test/srt python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_without_radix_cache - - name: Benchmark Offline Throughput (w/ Triton) + - name: Benchmark offline throughput (w/ Triton) timeout-minutes: 10 run: | cd test/srt python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_with_triton_attention_backend - - name: Benchmark Offline Throughput (w/ FP8) + - name: Benchmark offline throughput (w/ FP8) timeout-minutes: 10 run: | cd test/srt @@ -178,19 +178,19 @@ jobs: run: | bash scripts/ci_install_dependency.sh - - name: Benchmark Offline Throughput (TP=2) + - name: Benchmark offline throughput (TP=2) timeout-minutes: 10 run: | cd test/srt python3 -m unittest test_bench_serving.TestBenchServing.test_moe_offline_throughput_default - - name: Benchmark Offline Throughput (w/o RadixAttention) (TP=2) + - name: Benchmark offline throughput (w/o RadixAttention) (TP=2) timeout-minutes: 10 run: | cd test/srt python3 -m unittest test_bench_serving.TestBenchServing.test_moe_offline_throughput_without_radix_cache - - name: Benchmark Single Latency (TP=2) + - name: Benchmark single latency (TP=2) timeout-minutes: 10 run: | cd test/srt @@ -211,7 +211,7 @@ jobs: cd human-eval pip install -e . - - name: Evaluate Accuracy + - name: Evaluate accuracy timeout-minutes: 20 run: | cd test/srt @@ -232,20 +232,20 @@ jobs: cd human-eval pip install -e . - - name: Evaluate Accuracy (TP=2) + - name: Evaluate accuracy (TP=2) timeout-minutes: 20 run: | cd test/srt python3 test_moe_eval_accuracy_large.py - - name: Evaluate MLA Accuracy (TP=2) + - name: Evaluate MLA accuracy (TP=2) timeout-minutes: 10 run: | cd test/srt python3 test_mla.py python3 test_mla_fp8.py - - name: Evaluate Data Parallelism Accuracy (DP=2) + - name: Evaluate data parallelism accuracy (DP=2) timeout-minutes: 10 run: | cd test/srt diff --git a/python/sglang/srt/managers/io_struct.py b/python/sglang/srt/managers/io_struct.py index b6555183b..96009ffb2 100644 --- a/python/sglang/srt/managers/io_struct.py +++ b/python/sglang/srt/managers/io_struct.py @@ -184,7 +184,7 @@ class TokenizedGenerateReqInput: input_text: str # The input token ids input_ids: List[int] - # The image input + # The image inputs image_inputs: dict # The sampling parameters sampling_params: SamplingParams diff --git a/python/sglang/srt/managers/schedule_batch.py b/python/sglang/srt/managers/schedule_batch.py index 4d67ce6ff..fe75b7743 100644 --- a/python/sglang/srt/managers/schedule_batch.py +++ b/python/sglang/srt/managers/schedule_batch.py @@ -212,7 +212,7 @@ class Req: # this does not include the jump forward tokens. self.completion_tokens_wo_jump_forward = 0 - # For vision inputs + # For multimodal inputs self.image_inputs: Optional[ImageInputs] = None # Prefix info