Overlapped weight offload (#8034)

2025-08-23 17:06:46 +08:00
parent ccd3fb946e
commit 2600fc0d47
9 changed files with 584 additions and 10 deletions
--- a/python/sglang/srt/utils.py
+++ b/python/sglang/srt/utils.py
@@ -2954,3 +2954,13 @@ class ConcurrentCounter:
@lru_cache(maxsize=1)
 def is_triton_kernels_available() -> bool:
    return importlib.util.find_spec("triton_kernels") is not None
+
+
+def check_cuda_result(raw_output):
+    import cuda.bindings.runtime as cuda_rt
+
+    err, *results = raw_output
+    if err != cuda_rt.cudaError_t.cudaSuccess:
+        raise Exception(f"CUDA error: {err}")
+
+    return results