sglangv0.5.2 & support Qwen3-Next-80B-A3B-Instruct

2025-09-13 17:00:20 +08:00
commit 118f1fc726
2037 changed files with 515371 additions and 0 deletions
--- a/sgl-kernel/python/sgl_kernel/grammar.py
+++ b/sgl-kernel/python/sgl_kernel/grammar.py
@@ -0,0 +1,15 @@
+from typing import List, Optional, Union
+
+import torch
+
+
+def apply_token_bitmask_inplace_cuda(
+    logits: torch.Tensor,
+    bitmask: torch.Tensor,
+    indices: Optional[Union[List[int], torch.Tensor]] = None,
+) -> None:
+    if isinstance(indices, list):
+        indices = torch.tensor(indices, dtype=torch.int32, device=logits.device)
+    if indices is not None:
+        indices = indices.to(logits.device)
+    torch.ops.sgl_kernel.apply_token_bitmask_inplace_cuda(logits, bitmask, indices)