[Feature, Hardware] Enable SGLang on AMD GPUs via PyTorch for ROCm (#1420)

This commit is contained in:
HAI
2024-09-17 00:43:52 -07:00
committed by GitHub
parent 2fa5cec775
commit 3a6e04185b
11 changed files with 104 additions and 24 deletions

View File

@@ -13,6 +13,7 @@ limitations under the License.
"""Fused operators for activation layers."""
import logging
from typing import Optional
import torch
@@ -28,6 +29,10 @@ from vllm.model_executor.custom_op import CustomOp
from vllm.model_executor.layers.quantization import QuantizationConfig
from vllm.model_executor.utils import set_weight_attrs
from sglang.srt.utils import is_hip
logger = logging.getLogger(__name__)
class SiluAndMul(CustomOp):
def forward_native(self, x: torch.Tensor) -> torch.Tensor:
@@ -135,3 +140,10 @@ def get_act_fn(
act_fn, intermediate_size, input_is_parallel, params_dtype
)
return act_fn
if is_hip():
logger.info(
"FlashInfer is not available on AMD GPUs. Fallback to other kernel libraries."
)
from vllm.model_executor.layers.activation import GeluAndMul, SiluAndMul