Amd test fp8 (#4261)

2025-03-11 01:12:09 +08:00
parent 5a6400eec5
commit 2ac189edc8
6 changed files with 84 additions and 0 deletions
--- a/python/sglang/srt/layers/quantization/fp8_utils.py
+++ b/python/sglang/srt/layers/quantization/fp8_utils.py
@@ -32,6 +32,10 @@ if _is_cuda:
    else:
        from sgl_kernel import fp8_scaled_mm

+# Input scaling factors are no longer optional in _scaled_mm starting
+# from pytorch 2.5. Allocating a dummy tensor to pass as input_scale
+TORCH_DEVICE_IDENTITY = torch.ones(1, dtype=torch.float32)
+

 def cutlass_fp8_supported():
    if not _is_cuda: