Support NVFP4 quantized dense models on AMD CDNA2/CDNA3 GPUs (#7302)

Co-authored-by: HAI <hixiao@gmail.com> Co-authored-by: Sai Enduri <saimanas.enduri@amd.com>
2025-07-18 19:59:39 -07:00
parent 3964b352c3
commit d918ab7985
7 changed files with 361 additions and 0 deletions
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -79,6 +79,7 @@ blackwell = [
 srt_hip = [
    "sglang[runtime_common]",
    "torch",
+    "petit_kernel",
 ]

 # xpu is not enabled in public vllm and torch whl,