Support trtllm_allreduce_fusion in flashinfer for cuda<12.8 (#9339)

Co-authored-by: Zhang Kaihong <zhangkaihong.zkh@alibaba-inc.com>
This commit is contained in:
strgrb
2025-08-21 07:54:30 +08:00
committed by GitHub
parent 8f5b9910c1
commit 88fbc31b50
3 changed files with 37 additions and 3 deletions

View File

@@ -292,7 +292,6 @@ class LayerCommunicator:
(not self.is_last_layer)
and (self._context.tp_size > 1)
and global_server_args_dict.get("enable_flashinfer_allreduce_fusion", False)
and _is_sm100_supported
and _is_flashinfer_available
)