adapt to sglang v0.5.2rc1 on dcu

This commit is contained in:
maxiao
2025-09-04 15:56:33 +08:00
commit 909abb58f5
2320 changed files with 489411 additions and 0 deletions

View File

@@ -0,0 +1,87 @@
/* Copyright 2025 SGLang Team. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#pragma once
#include "utils.h"
#define kBitsToLoad 128
#define kBytesToLoad (kBitsToLoad / 8)
// Adapted from
// [flashinfer::activation::act_and_mul_kernel](https://github.com/flashinfer-ai/flashinfer/blob/4e8eb1879f9c3ba6d75511e5893183bf8f289a62/include/flashinfer/activation.cuh#L29)
namespace sgl_hip {
namespace activation {
template <typename T, T (*Activation)(const T&)>
__global__ void act_and_mul_kernel(T* __restrict__ out, const T* __restrict__ input, const int d) {
constexpr uint32_t vec_size = kBytesToLoad / sizeof(T);
const int64_t token_idx = blockIdx.x;
const int64_t thread_idx = threadIdx.x;
const int64_t stride = blockDim.x;
const int64_t offset = token_idx * 2 * d;
#pragma unroll 1
for (uint32_t idx = thread_idx; idx < d / vec_size; idx += stride) {
sgl_hip::vec_t<T, vec_size> x_vec, y_vec, out_vec;
x_vec.cast_load(input + offset + idx * vec_size);
y_vec.cast_load(input + offset + d + idx * vec_size);
#pragma unroll
for (uint32_t i = 0; i < vec_size; ++i) {
out_vec[i] = Activation(x_vec[i]) * y_vec[i];
}
out_vec.cast_store(out + token_idx * d + idx * vec_size);
}
const int64_t remaining_offset = d - d % (stride * vec_size);
// process the remaining elements
#pragma unroll 1
for (int64_t idx = thread_idx; idx < d % (stride * vec_size); idx += stride) {
T x = input[offset + remaining_offset + idx], y = input[offset + remaining_offset + d + idx];
out[token_idx * d + remaining_offset + idx] = Activation(x) * y;
}
}
template <typename T, T (*Activation)(const T&)>
__global__ void act_only_kernel(T* __restrict__ out, const T* __restrict__ input, const int d) {
constexpr uint32_t vec_size = kBytesToLoad / sizeof(T);
const int64_t token_idx = blockIdx.x;
const int64_t thread_idx = threadIdx.x;
const int64_t stride = blockDim.x;
const int64_t offset = token_idx * d;
#pragma unroll 1
for (uint32_t idx = thread_idx; idx < d / vec_size; idx += stride) {
sgl_hip::vec_t<T, vec_size> x_vec, y_vec, out_vec;
x_vec.cast_load(input + offset + idx * vec_size);
#pragma unroll
for (uint32_t i = 0; i < vec_size; ++i) {
out_vec[i] = Activation(x_vec[i]);
}
out_vec.cast_store(out + token_idx * d + idx * vec_size);
}
const int64_t remaining_offset = d - d % (stride * vec_size);
// process the remaining elements
#pragma unroll 1
for (int64_t idx = thread_idx; idx < d % (stride * vec_size); idx += stride) {
T x = input[offset + remaining_offset + idx];
out[token_idx * d + remaining_offset + idx] = Activation(x);
}
}
} // namespace activation
} // namespace sgl_hip

View File

@@ -0,0 +1,94 @@
/* Copyright 2025 SGLang Team. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#pragma once
#ifdef USE_ROCM
#include <hip/hip_bf16.h>
#include <hip/hip_common.h>
#include <hip/hip_fp16.h>
// Adapted from flashinfer-rocm [PR#491](https://github.com/flashinfer-ai/flashinfer/pull/491)
namespace amdgpu {
template <typename T>
__forceinline__ __device__ T shfl_xor_sync(unsigned mask, T var, int laneMask, int width = warpSize);
template <typename srcDtype, typename destDtype>
__forceinline__ __device__ destDtype cast(srcDtype val);
// specialization
template <>
__forceinline__ __device__ float shfl_xor_sync(unsigned mask, float var, int laneMask, int width) {
return __shfl_xor(var, laneMask, width);
}
template <>
__forceinline__ __device__ int shfl_xor_sync(unsigned mask, int var, int laneMask, int width) {
return __shfl_xor(var, laneMask, width);
}
template <>
__forceinline__ __device__ float cast(float val) {
return val;
}
template <>
__forceinline__ __device__ float cast(__half val) {
return __half2float(val);
}
template <>
__forceinline__ __device__ float cast(__hip_bfloat16 val) {
return __bfloat162float(val);
}
template <>
__forceinline__ __device__ __half cast(float fval) {
return __float2half(fval);
}
template <>
__forceinline__ __device__ __hip_bfloat16 cast(float fval) {
return __float2bfloat16(fval);
}
} // namespace amdgpu
template <typename T>
__forceinline__ __device__ T __shfl_xor_sync(unsigned mask, T var, int laneMask, int width = warpSize) {
return amdgpu::shfl_xor_sync(mask, var, laneMask, width);
}
template <typename srcDtype>
__device__ __forceinline__ float castToFloat(srcDtype val) {
return amdgpu::cast<srcDtype, float>(val);
}
template <typename dstDtype>
__device__ __forceinline__ dstDtype castFromFloat(float val) {
return amdgpu::cast<float, dstDtype>(val);
}
// operator overload to support flashinfer
__host__ __device__ __forceinline__ __half operator*(const __half& x, const __half& y) {
__half h_x = x;
__half h_y = y;
return __hmul(h_x, h_y);
}
#endif

View File

@@ -0,0 +1,101 @@
/* Copyright 2025 SGLang Team. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#pragma once
#if USE_ROCM
#include <hip/hip_bf16.h>
#include <hip/hip_common.h>
#include <hip/hip_fp16.h>
// Adapted from flashinfer-rocm [PR#491](https://github.com/flashinfer-ai/flashinfer/pull/491)d
#define SGL_HIP_INLINE inline __attribute__((always_inline)) __device__
namespace sgl_hip {
template <typename float_t, size_t vec_size>
struct vec_t;
template <typename srcDtype, typename dstDtype, size_t vec_size>
SGL_HIP_INLINE void cast_load_impl(vec_t<dstDtype, vec_size>& dst, const srcDtype* src);
template <typename srcDtype, typename dstDtype, size_t vec_size>
SGL_HIP_INLINE void cast_store_impl(dstDtype* dst_ptr, const vec_t<srcDtype, vec_size>& src);
template <typename float_t, size_t vec_size>
struct vec_t {
SGL_HIP_INLINE float_t& operator[](size_t i);
SGL_HIP_INLINE const float_t& operator[](size_t i) const;
SGL_HIP_INLINE float_t* ptr();
SGL_HIP_INLINE void load(const float_t* ptr);
SGL_HIP_INLINE void store(float_t* ptr) const;
template <typename T>
SGL_HIP_INLINE void cast_from(const vec_t<T, vec_size>& src);
template <typename T>
SGL_HIP_INLINE void cast_load(const T* ptr);
template <typename T>
SGL_HIP_INLINE void cast_store(T* ptr) const;
};
} // namespace sgl_hip
// **** impl *****
namespace sgl_hip {
template <typename srcDtype, typename dstDtype, size_t vec_size>
SGL_HIP_INLINE void cast_load_impl(vec_t<dstDtype, vec_size>& dst, const srcDtype* src_ptr) {
if constexpr (std::is_same<srcDtype, dstDtype>::value) {
dst.load(src_ptr);
} else {
vec_t<srcDtype, vec_size> tmp;
tmp.load(src_ptr);
dst.cast_from(tmp);
}
}
template <typename srcDtype, typename dstDtype, size_t vec_size>
SGL_HIP_INLINE void cast_store_impl(dstDtype* dst_ptr, const vec_t<srcDtype, vec_size>& src) {
if constexpr (std::is_same<srcDtype, dstDtype>::value) {
src.store(dst_ptr);
} else {
vec_t<dstDtype, vec_size> tmp;
tmp.cast_from(src);
tmp.store(dst_ptr);
}
}
template <typename float_t, size_t vec_size>
template <typename T>
SGL_HIP_INLINE void vec_t<float_t, vec_size>::cast_load(const T* ptr) {
cast_load_impl(*this, ptr);
}
template <typename float_t, size_t vec_size>
template <typename T>
SGL_HIP_INLINE void vec_t<float_t, vec_size>::cast_store(T* ptr) const {
cast_store_impl(ptr, *this);
}
} // namespace sgl_hip
#include "impl/hip_vec_bf16_impl.h"
#include "impl/hip_vec_fp32_impl.h"
#include "impl/hip_vec_half_impl.h"
#endif

View File

@@ -0,0 +1,177 @@
#pragma once
#if USE_ROCM
#include <hip/hip_bf16.h>
#include <hip/hip_common.h>
// Adapted from flashinfer-rocm [PR#491](https://github.com/flashinfer-ai/flashinfer/pull/491)
using nv_bfloat16 = __hip_bfloat16;
using nv_bfloat162 = __hip_bfloat162;
__BF16_HOST_DEVICE_STATIC__ __hip_bfloat162 make_bfloat162(const __hip_bfloat16 x, const __hip_bfloat16 y) {
__hip_bfloat162 t;
t.x = x;
t.y = y;
return t;
}
namespace sgl_hip {
// nv_bfloat16 x 1
template <>
struct vec_t<nv_bfloat16, 1> {
nv_bfloat16 data;
SGL_HIP_INLINE nv_bfloat16& operator[](size_t i) {
return ((nv_bfloat16*)(&data))[i];
}
SGL_HIP_INLINE const nv_bfloat16& operator[](size_t i) const {
return ((const nv_bfloat16*)(&data))[i];
}
SGL_HIP_INLINE nv_bfloat16* ptr() {
return reinterpret_cast<nv_bfloat16*>(&data);
}
SGL_HIP_INLINE void load(const nv_bfloat16* ptr);
SGL_HIP_INLINE void store(nv_bfloat16* ptr) const;
template <typename T>
SGL_HIP_INLINE void cast_from(const vec_t<T, 1>& src) {
cast_from_impl(*this, src);
}
template <typename T>
SGL_HIP_INLINE void cast_load(const T* ptr) {
cast_load_impl(*this, ptr);
}
template <typename T>
SGL_HIP_INLINE void cast_store(T* ptr) const {
cast_store_impl(ptr, *this);
}
};
SGL_HIP_INLINE void vec_t<nv_bfloat16, 1>::load(const nv_bfloat16* ptr) {
data = *ptr;
}
SGL_HIP_INLINE void vec_t<nv_bfloat16, 1>::store(nv_bfloat16* ptr) const {
*ptr = data;
}
// nv_bfloat16 x 2
template <>
struct vec_t<nv_bfloat16, 2> {
nv_bfloat162 data;
SGL_HIP_INLINE nv_bfloat16& operator[](size_t i) {
return ((nv_bfloat16*)(&data))[i];
}
SGL_HIP_INLINE const nv_bfloat16& operator[](size_t i) const {
return ((const nv_bfloat16*)(&data))[i];
}
SGL_HIP_INLINE nv_bfloat16* ptr() {
return reinterpret_cast<nv_bfloat16*>(&data);
}
SGL_HIP_INLINE void load(const nv_bfloat16* ptr);
SGL_HIP_INLINE void store(nv_bfloat16* ptr) const;
template <typename T>
SGL_HIP_INLINE void cast_from(const vec_t<T, 2>& src) {
cast_from_impl(*this, src);
}
template <typename T>
SGL_HIP_INLINE void cast_load(const T* ptr) {
cast_load_impl(*this, ptr);
}
template <typename T>
SGL_HIP_INLINE void cast_store(T* ptr) const {
cast_store_impl(ptr, *this);
}
};
SGL_HIP_INLINE void vec_t<nv_bfloat16, 2>::load(const nv_bfloat16* ptr) {
data = *((nv_bfloat162*)ptr);
}
SGL_HIP_INLINE void vec_t<nv_bfloat16, 2>::store(nv_bfloat16* ptr) const {
*((nv_bfloat162*)ptr) = data;
}
template <>
struct vec_t<nv_bfloat16, 4> {
uint2 data;
SGL_HIP_INLINE nv_bfloat16& operator[](size_t i) {
return ((nv_bfloat16*)(&data))[i];
}
SGL_HIP_INLINE const nv_bfloat16& operator[](size_t i) const {
return ((const nv_bfloat16*)(&data))[i];
}
SGL_HIP_INLINE nv_bfloat16* ptr() {
return reinterpret_cast<nv_bfloat16*>(&data);
}
SGL_HIP_INLINE void load(const nv_bfloat16* ptr);
SGL_HIP_INLINE void store(nv_bfloat16* ptr) const;
template <typename T>
SGL_HIP_INLINE void cast_from(const vec_t<T, 4>& src) {
cast_from_impl(*this, src);
}
template <typename T>
SGL_HIP_INLINE void cast_load(const T* ptr) {
cast_load_impl(*this, ptr);
}
template <typename T>
SGL_HIP_INLINE void cast_store(T* ptr) const {
cast_store_impl(ptr, *this);
}
};
SGL_HIP_INLINE void vec_t<nv_bfloat16, 4>::load(const nv_bfloat16* ptr) {
data = *((uint2*)ptr);
}
SGL_HIP_INLINE void vec_t<nv_bfloat16, 4>::store(nv_bfloat16* ptr) const {
*((uint2*)ptr) = data;
}
// nv_bfloat16 x 8 or more
template <size_t vec_size>
struct vec_t<nv_bfloat16, vec_size> {
uint4 data[vec_size / 8];
SGL_HIP_INLINE nv_bfloat16& operator[](size_t i) {
return ((nv_bfloat16*)data)[i];
}
SGL_HIP_INLINE const nv_bfloat16& operator[](size_t i) const {
return ((const nv_bfloat16*)data)[i];
}
SGL_HIP_INLINE nv_bfloat16* ptr() {
return reinterpret_cast<nv_bfloat16*>(&data);
}
SGL_HIP_INLINE void load(const nv_bfloat16* ptr) {
#pragma unoll
for (size_t i = 0; i < vec_size / 8; ++i) {
data[i] = ((uint4*)ptr)[i];
}
}
SGL_HIP_INLINE void store(nv_bfloat16* ptr) const {
#pragma unoll
for (size_t i = 0; i < vec_size / 8; ++i) {
((uint4*)ptr)[i] = data[i];
}
}
template <typename T>
SGL_HIP_INLINE void cast_from(const vec_t<T, vec_size>& src) {
cast_from_impl(*this, src);
}
template <typename T>
SGL_HIP_INLINE void cast_load(const T* ptr) {
cast_load_impl(*this, ptr);
}
template <typename T>
SGL_HIP_INLINE void cast_store(T* ptr) const {
cast_store_impl(ptr, *this);
}
};
} // namespace sgl_hip
#endif

View File

@@ -0,0 +1,129 @@
#pragma once
#if USE_ROCM
#include <hip/hip_common.h>
// Adapted from flashinfer-rocm [PR#491](https://github.com/flashinfer-ai/flashinfer/pull/491)
namespace sgl_hip {
template <>
struct vec_t<float, 1> {
float data;
SGL_HIP_INLINE float& operator[](size_t i) {
return ((float*)(&data))[i];
}
SGL_HIP_INLINE const float& operator[](size_t i) const {
return ((const float*)(&data))[i];
}
SGL_HIP_INLINE float* ptr() {
return reinterpret_cast<float*>(&data);
}
SGL_HIP_INLINE void load(const float* ptr);
SGL_HIP_INLINE void store(float* ptr) const;
template <typename T>
SGL_HIP_INLINE void cast_from(const vec_t<T, 1>& src) {
cast_from_impl(*this, src);
}
template <typename T>
SGL_HIP_INLINE void cast_load(const T* ptr) {
cast_load_impl(*this, ptr);
}
template <typename T>
SGL_HIP_INLINE void cast_store(T* ptr) const {
cast_store_impl(ptr, *this);
}
};
SGL_HIP_INLINE void vec_t<float, 1>::load(const float* ptr) {
data = *ptr;
}
SGL_HIP_INLINE void vec_t<float, 1>::store(float* ptr) const {
*ptr = data;
}
// float x 2
template <>
struct vec_t<float, 2> {
float2 data;
SGL_HIP_INLINE float& operator[](size_t i) {
return ((float*)(&data))[i];
}
SGL_HIP_INLINE const float& operator[](size_t i) const {
return ((const float*)(&data))[i];
}
SGL_HIP_INLINE float* ptr() {
return reinterpret_cast<float*>(&data);
}
SGL_HIP_INLINE void load(const float* ptr);
SGL_HIP_INLINE void store(float* ptr) const;
template <typename T>
SGL_HIP_INLINE void cast_from(const vec_t<T, 2>& src) {
cast_from_impl(*this, src);
}
template <typename T>
SGL_HIP_INLINE void cast_load(const T* ptr) {
cast_load_impl(*this, ptr);
}
template <typename T>
SGL_HIP_INLINE void cast_store(T* ptr) const {
cast_store_impl(ptr, *this);
}
};
SGL_HIP_INLINE void vec_t<float, 2>::load(const float* ptr) {
data = *((float2*)ptr);
}
SGL_HIP_INLINE void vec_t<float, 2>::store(float* ptr) const {
*((float2*)ptr) = data;
}
// float x 4 or more
template <size_t vec_size>
struct vec_t<float, vec_size> {
float4 data[vec_size / 4];
SGL_HIP_INLINE float& operator[](size_t i) {
return ((float*)(data))[i];
}
SGL_HIP_INLINE const float& operator[](size_t i) const {
return ((const float*)(data))[i];
}
SGL_HIP_INLINE float* ptr() {
return reinterpret_cast<float*>(&data);
}
SGL_HIP_INLINE void load(const float* ptr) {
#pragma unroll
for (size_t i = 0; i < vec_size / 4; ++i) {
data[i] = ((float4*)ptr)[i];
}
}
SGL_HIP_INLINE void store(float* ptr) const {
#pragma unroll
for (size_t i = 0; i < vec_size / 4; ++i) {
((float4*)ptr)[i] = data[i];
}
}
template <typename T>
SGL_HIP_INLINE void cast_from(const vec_t<T, vec_size>& src) {
cast_from_impl(*this, src);
}
template <typename T>
SGL_HIP_INLINE void cast_load(const T* ptr) {
cast_load_impl(*this, ptr);
}
template <typename T>
SGL_HIP_INLINE void cast_store(T* ptr) const {
cast_store_impl(ptr, *this);
}
};
} // namespace sgl_hip
#endif

View File

@@ -0,0 +1,172 @@
#pragma once
#if USE_ROCM
#include <hip/hip_common.h>
#include <hip/hip_fp16.h>
// Adapted from flashinfer-rocm [PR#491](https://github.com/flashinfer-ai/flashinfer/pull/491)
using half = __half;
using half2 = __half2;
namespace sgl_hip {
// half x 1
template <>
struct vec_t<half, 1> {
half data;
SGL_HIP_INLINE half& operator[](size_t i) {
return ((half*)(&data))[i];
}
SGL_HIP_INLINE const half& operator[](size_t i) const {
return ((const half*)(&data))[i];
}
SGL_HIP_INLINE half* ptr() {
return reinterpret_cast<half*>(&data);
}
SGL_HIP_INLINE void load(const half* ptr);
SGL_HIP_INLINE void store(half* ptr) const;
template <typename T>
SGL_HIP_INLINE void cast_from(const vec_t<T, 1>& src) {
cast_from_impl(*this, src);
}
template <typename T>
SGL_HIP_INLINE void cast_load(const T* ptr) {
cast_load_impl(*this, ptr);
}
template <typename T>
SGL_HIP_INLINE void cast_store(T* ptr) const {
cast_store_impl(ptr, *this);
}
};
SGL_HIP_INLINE void vec_t<half, 1>::load(const half* ptr) {
data = *ptr;
}
SGL_HIP_INLINE void vec_t<half, 1>::store(half* ptr) const {
*ptr = data;
}
// half x 2
template <>
struct vec_t<half, 2> {
half2 data;
SGL_HIP_INLINE half& operator[](size_t i) {
return ((half*)(&data))[i];
}
SGL_HIP_INLINE const half& operator[](size_t i) const {
return ((const half*)(&data))[i];
}
SGL_HIP_INLINE half* ptr() {
return reinterpret_cast<half*>(&data);
}
SGL_HIP_INLINE void load(const half* ptr);
SGL_HIP_INLINE void store(half* ptr) const;
template <typename T>
SGL_HIP_INLINE void cast_from(const vec_t<T, 2>& src) {
cast_from_impl(*this, src);
}
template <typename T>
SGL_HIP_INLINE void cast_load(const T* ptr) {
cast_load_impl(*this, ptr);
}
template <typename T>
SGL_HIP_INLINE void cast_store(T* ptr) const {
cast_store_impl(ptr, *this);
}
};
SGL_HIP_INLINE void vec_t<half, 2>::load(const half* ptr) {
data = *((half2*)ptr);
}
SGL_HIP_INLINE void vec_t<half, 2>::store(half* ptr) const {
*((half2*)ptr) = data;
}
// half x 4
template <>
struct vec_t<half, 4> {
uint2 data;
SGL_HIP_INLINE half& operator[](size_t i) {
return ((half*)(&data))[i];
}
SGL_HIP_INLINE const half& operator[](size_t i) const {
return ((const half*)(&data))[i];
}
SGL_HIP_INLINE half* ptr() {
return reinterpret_cast<half*>(&data);
}
SGL_HIP_INLINE void load(const half* ptr);
SGL_HIP_INLINE void store(half* ptr) const;
template <typename T>
SGL_HIP_INLINE void cast_from(const vec_t<T, 4>& src) {
cast_from_impl(*this, src);
}
template <typename T>
SGL_HIP_INLINE void cast_load(const T* ptr) {
cast_load_impl(*this, ptr);
}
template <typename T>
SGL_HIP_INLINE void cast_store(T* ptr) const {
cast_store_impl(ptr, *this);
}
};
SGL_HIP_INLINE void vec_t<half, 4>::load(const half* ptr) {
data = *((uint2*)ptr);
}
SGL_HIP_INLINE void vec_t<half, 4>::store(half* ptr) const {
*((uint2*)ptr) = data;
}
// half x 8 or more
template <size_t vec_size>
struct vec_t<half, vec_size> {
uint4 data[vec_size / 8];
SGL_HIP_INLINE half& operator[](size_t i) {
return ((half*)data)[i];
}
SGL_HIP_INLINE const half& operator[](size_t i) const {
return ((const half*)data)[i];
}
SGL_HIP_INLINE half* ptr() {
return reinterpret_cast<half*>(&data);
}
SGL_HIP_INLINE void load(const half* ptr) {
#pragma unroll
for (size_t i = 0; i < vec_size / 8; ++i) {
data[i] = ((uint4*)ptr)[i];
}
}
SGL_HIP_INLINE void store(half* ptr) const {
#pragma unroll
for (size_t i = 0; i < vec_size / 8; ++i) {
((uint4*)ptr)[i] = data[i];
}
}
template <typename T>
SGL_HIP_INLINE void cast_from(const vec_t<T, vec_size>& src) {
cast_from_impl(*this, src);
}
template <typename T>
SGL_HIP_INLINE void cast_load(const T* ptr) {
cast_load_impl(*this, ptr);
}
template <typename T>
SGL_HIP_INLINE void cast_store(T* ptr) const {
cast_store_impl(ptr, *this);
}
};
} // namespace sgl_hip
#endif