Support trtllm_allreduce_fusion in flashinfer for cuda<12.8 (#9339)

Co-authored-by: Zhang Kaihong <zhangkaihong.zkh@alibaba-inc.com>
2025-08-21 07:54:30 +08:00
parent 8f5b9910c1
commit 88fbc31b50
3 changed files with 37 additions and 3 deletions
--- a/python/sglang/srt/layers/communicator.py
+++ b/python/sglang/srt/layers/communicator.py
@@ -292,7 +292,6 @@ class LayerCommunicator:
            (not self.is_last_layer)
            and (self._context.tp_size > 1)
            and global_server_args_dict.get("enable_flashinfer_allreduce_fusion", False)
-            and _is_sm100_supported
            and _is_flashinfer_available
        )