From 948278f1730ee61a175d951a1959894e8f4457db Mon Sep 17 00:00:00 2001 From: b8zhong Date: Thu, 2 Oct 2025 06:05:01 -0700 Subject: [PATCH] fix cpp JIT compilation issue of ngram speculative decoding (#10837) --- python/sglang/srt/server_args.py | 2 +- python/sglang/srt/speculative/cpp_ngram/ngram.cpp | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/python/sglang/srt/server_args.py b/python/sglang/srt/server_args.py index 84c92983e..baa08d4af 100644 --- a/python/sglang/srt/server_args.py +++ b/python/sglang/srt/server_args.py @@ -679,7 +679,7 @@ class ServerArgs: [1, 2, 4, 8, 12] + list(range(16, 257, 8)) + list(range(272, 512, 16)) - + list(range(512, self.cuda_graph_max_bs + 1)) + + list(range(512, self.cuda_graph_max_bs + 1, 32)) ) else: # Spec decoding case: list(range(1, 9, 1)) + list(range(10, 33, 2)) + list(range(40, 64, 4)) + list(range(72, 257, 8)) diff --git a/python/sglang/srt/speculative/cpp_ngram/ngram.cpp b/python/sglang/srt/speculative/cpp_ngram/ngram.cpp index 51172c5dd..d1e982358 100644 --- a/python/sglang/srt/speculative/cpp_ngram/ngram.cpp +++ b/python/sglang/srt/speculative/cpp_ngram/ngram.cpp @@ -1,6 +1,9 @@ #include "ngram.h" +#include +#include #include +#include #include namespace ngram {