[Feature, Hardware] Enable SGLang on AMD GPUs via PyTorch for ROCm (#1420)

2024-09-17 00:43:52 -07:00
parent 2fa5cec775
commit 3a6e04185b
11 changed files with 104 additions and 24 deletions
--- a/python/sglang/srt/layers/activation.py
+++ b/python/sglang/srt/layers/activation.py
@@ -13,6 +13,7 @@ limitations under the License.

 """Fused operators for activation layers."""

+import logging
 from typing import Optional

 import torch
@@ -28,6 +29,10 @@ from vllm.model_executor.custom_op import CustomOp
 from vllm.model_executor.layers.quantization import QuantizationConfig
 from vllm.model_executor.utils import set_weight_attrs

+from sglang.srt.utils import is_hip
+
+logger = logging.getLogger(__name__)
+

 class SiluAndMul(CustomOp):
    def forward_native(self, x: torch.Tensor) -> torch.Tensor:
@@ -135,3 +140,10 @@ def get_act_fn(
            act_fn, intermediate_size, input_is_parallel, params_dtype
        )
    return act_fn
+
+
+if is_hip():
+    logger.info(
+        "FlashInfer is not available on AMD GPUs. Fallback to other kernel libraries."
+    )
+    from vllm.model_executor.layers.activation import GeluAndMul, SiluAndMul