diff --git a/.github/workflows/pr-test.yml b/.github/workflows/pr-test.yml index 150fb9258..29b976a12 100644 --- a/.github/workflows/pr-test.yml +++ b/.github/workflows/pr-test.yml @@ -76,7 +76,7 @@ jobs: timeout-minutes: 20 run: | cd test/srt - python3 run_suite.py --suite minimal --range-begin 5 --range-end 16 + python3 run_suite.py --suite minimal --range-begin 5 --range-end 15 unit-test-backend-part-3: if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' @@ -96,7 +96,7 @@ jobs: timeout-minutes: 20 run: | cd test/srt - python3 run_suite.py --suite minimal --range-begin 16 + python3 run_suite.py --suite minimal --range-begin 15 performance-test-1-gpu-part-1: if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' diff --git a/python/sglang/srt/managers/schedule_batch.py b/python/sglang/srt/managers/schedule_batch.py index 156e830d1..a2f48131d 100644 --- a/python/sglang/srt/managers/schedule_batch.py +++ b/python/sglang/srt/managers/schedule_batch.py @@ -405,9 +405,9 @@ class ScheduleBatch: sampling_info: SamplingBatchInfo = None # Batched arguments to model runner - input_ids: List[int] = None - req_pool_indices: List[int] = None - seq_lens: List[int] = None + input_ids: torch.Tensor = None + req_pool_indices: torch.Tensor = None + seq_lens: torch.Tensor = None out_cache_loc: torch.Tensor = None # For processing logprobs diff --git a/python/sglang/srt/managers/scheduler.py b/python/sglang/srt/managers/scheduler.py index c6df4a2e8..a4ada01aa 100644 --- a/python/sglang/srt/managers/scheduler.py +++ b/python/sglang/srt/managers/scheduler.py @@ -17,7 +17,6 @@ limitations under the License. import json import logging -import multiprocessing import os import time import warnings