Overlapped weight offload (#8034)
This commit is contained in:
@@ -2954,3 +2954,13 @@ class ConcurrentCounter:
|
||||
@lru_cache(maxsize=1)
|
||||
def is_triton_kernels_available() -> bool:
|
||||
return importlib.util.find_spec("triton_kernels") is not None
|
||||
|
||||
|
||||
def check_cuda_result(raw_output):
|
||||
import cuda.bindings.runtime as cuda_rt
|
||||
|
||||
err, *results = raw_output
|
||||
if err != cuda_rt.cudaError_t.cudaSuccess:
|
||||
raise Exception(f"CUDA error: {err}")
|
||||
|
||||
return results
|
||||
|
||||
Reference in New Issue
Block a user