fix cpp JIT compilation issue of ngram speculative decoding (#10837)

This commit is contained in:
b8zhong
2025-10-02 06:05:01 -07:00
committed by GitHub
parent 7d00479950
commit 948278f173
2 changed files with 4 additions and 1 deletions

View File

@@ -679,7 +679,7 @@ class ServerArgs:
[1, 2, 4, 8, 12]
+ list(range(16, 257, 8))
+ list(range(272, 512, 16))
+ list(range(512, self.cuda_graph_max_bs + 1))
+ list(range(512, self.cuda_graph_max_bs + 1, 32))
)
else:
# Spec decoding case: list(range(1, 9, 1)) + list(range(10, 33, 2)) + list(range(40, 64, 4)) + list(range(72, 257, 8))

View File

@@ -1,6 +1,9 @@
#include "ngram.h"
#include <algorithm>
#include <cstring>
#include <limits>
#include <queue>
#include <vector>
namespace ngram {