diff --git a/benchmark/gsm8k/bench_sglang.py b/benchmark/gsm8k/bench_sglang.py index d1ed22cbe..298ec11d7 100644 --- a/benchmark/gsm8k/bench_sglang.py +++ b/benchmark/gsm8k/bench_sglang.py @@ -64,7 +64,7 @@ def main(args): @sgl.function def few_shot_gsm8k(s, question): s += few_shot_examples + question - s += sgl.gen("answer", max_tokens=256, stop="Question") + s += sgl.gen("answer", max_tokens=512, stop="Question") ##################################### ########## SGL Program End ########## diff --git a/python/sglang/srt/managers/controller/cuda_graph_runner.py b/python/sglang/srt/managers/controller/cuda_graph_runner.py index b37a82729..2a9a0af6d 100644 --- a/python/sglang/srt/managers/controller/cuda_graph_runner.py +++ b/python/sglang/srt/managers/controller/cuda_graph_runner.py @@ -150,8 +150,8 @@ class CudaGraphRunner: index = bisect.bisect_left(self.batch_size_list, raw_bs) bs = self.batch_size_list[index] if bs != raw_bs: - self.seq_lens.zero_() - self.position_ids_offsets.fill_(1) + self.seq_lens.fill_(1) + self.position_ids_offsets.zero_() self.out_cache_loc.zero_() # Common inputs