[AMD] Support Wave attention backend with AMD GPU optimizations (#8660)

Signed-off-by: Stanley Winata <stanley.winata@amd.com> Signed-off-by: Harsh Menon <harsh@nod-labs.com> Signed-off-by: nithinsubbiah <nithinsubbiah@gmail.com> Signed-off-by: Ivan Butygin <ivan.butygin@gmail.com> Signed-off-by: xintin <gaurav.verma@amd.com> Co-authored-by: Harsh Menon <harsh@nod-labs.com> Co-authored-by: Stanley Winata <stanley.winata@amd.com> Co-authored-by: Stanley Winata <68087699+raikonenfnu@users.noreply.github.com> Co-authored-by: Stanley Winata <stanley@nod-labs.com> Co-authored-by: Ivan Butygin <ivan.butygin@gmail.com> Co-authored-by: nithinsubbiah <nithinsubbiah@gmail.com> Co-authored-by: Nithin Meganathan <18070964+nithinsubbiah@users.noreply.github.com> Co-authored-by: Ivan Butygin <ibutygin@amd.com>
2025-08-13 04:49:11 +08:00
parent 03d114496f
commit 25caa7a8a9
11 changed files with 1437 additions and 0 deletions
--- a/python/sglang/srt/model_executor/model_runner.py
+++ b/python/sglang/srt/model_executor/model_runner.py
@@ -1487,6 +1487,10 @@ class ModelRunner:
            from sglang.srt.layers.attention.aiter_backend import AiterAttnBackend

            return AiterAttnBackend(self)
+        elif self.server_args.attention_backend == "wave":
+            from sglang.srt.layers.attention.wave_backend import WaveAttnBackend
+
+            return WaveAttnBackend(self)
        elif backend_str == "ascend":
            from sglang.srt.layers.attention.ascend_backend import AscendAttnBackend