diff --git a/python/sglang/srt/server_args.py b/python/sglang/srt/server_args.py index 84c92983e..baa08d4af 100644 --- a/python/sglang/srt/server_args.py +++ b/python/sglang/srt/server_args.py @@ -679,7 +679,7 @@ class ServerArgs: [1, 2, 4, 8, 12] + list(range(16, 257, 8)) + list(range(272, 512, 16)) - + list(range(512, self.cuda_graph_max_bs + 1)) + + list(range(512, self.cuda_graph_max_bs + 1, 32)) ) else: # Spec decoding case: list(range(1, 9, 1)) + list(range(10, 33, 2)) + list(range(40, 64, 4)) + list(range(72, 257, 8)) diff --git a/python/sglang/srt/speculative/cpp_ngram/ngram.cpp b/python/sglang/srt/speculative/cpp_ngram/ngram.cpp index 51172c5dd..d1e982358 100644 --- a/python/sglang/srt/speculative/cpp_ngram/ngram.cpp +++ b/python/sglang/srt/speculative/cpp_ngram/ngram.cpp @@ -1,6 +1,9 @@ #include "ngram.h" +#include +#include #include +#include #include namespace ngram {