use sgl_per_token_group_quant_fp8 kernel (#3493)

2025-02-12 18:40:42 +08:00
parent b96e92e6e6
commit 45e3a7bc41
3 changed files with 43 additions and 2 deletions
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -25,7 +25,7 @@ runtime_common = [
 ]
 srt = [
    "sglang[runtime_common]", "cuda-python",
-    "sgl-kernel>=0.0.3.post3", "torch", "vllm>=0.6.4.post1,<=0.7.2",
+    "sgl-kernel>=0.0.3.post4", "torch", "vllm>=0.6.4.post1,<=0.7.2",
    "flashinfer_python>=0.2.0.post2", "outlines>=0.0.44,<=0.1.11"
 ]