[PP] Fix init_memory_pool desync & add PP for mixtral (#6223)

This commit is contained in:
Ying Sheng
2025-05-12 12:38:09 -07:00
committed by GitHub
parent 12319a6787
commit bad7c26fdc
8 changed files with 179 additions and 47 deletions

View File

@@ -229,6 +229,18 @@ jobs:
cd test/srt
python3 -m unittest test_bench_serving.TestBenchServing.test_moe_offline_throughput_without_radix_cache
- name: Benchmark offline decode throughput (PP=2)
timeout-minutes: 10
run: |
cd test/srt
python3 -m unittest test_bench_serving.TestBenchServing.test_pp_offline_throughput_default_decode
- name: Benchmark offline prefill throughput (PP=2)
timeout-minutes: 10
run: |
cd test/srt
python3 -m unittest test_bench_serving.TestBenchServing.test_pp_long_context_prefill
accuracy-test-1-gpu:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
github.event.pull_request.draft == false