Expert Parallelism for GPT-OSS (#8944)

This commit is contained in:
Cheng Wan
2025-08-08 00:46:42 -07:00
committed by GitHub
parent 444013585d
commit 1d24db8348
8 changed files with 269 additions and 119 deletions

View File

@@ -2961,3 +2961,8 @@ class ConcurrentCounter:
other tasks to run while waiting. When the counter becomes zero, the coroutine resumes.
"""
self.wait_for(lambda count: count == 0)
@lru_cache(maxsize=1)
def is_triton_kernels_available() -> bool:
return importlib.util.find_spec("triton_kernels") is not None