hotfix: revert sampler CUDA Graph (#1242)

This commit is contained in:
Yineng Zhang
2024-08-28 21:16:47 +10:00
committed by GitHub
parent 184ae1c683
commit f25f4dfde5
33 changed files with 119 additions and 348 deletions

View File

@@ -38,11 +38,6 @@ jobs:
cd test/srt
python3 -m unittest test_serving_throughput.TestServingThroughput.test_default
- name: Benchmark Serving Latency
timeout-minutes: 10
run: |
python3 -m sglang.bench_latency --model meta-llama/Meta-Llama-3.1-8B-Instruct --batch-size 1 --input 128 --output 8
- name: Benchmark Serving Throughput (w/o RadixAttention)
timeout-minutes: 10
run: |