fix custom allreduce performance/accuracy problem (#4477)

This commit is contained in:
Yi Zhang
2025-03-17 03:16:30 +08:00
committed by GitHub
parent a53fe428f9
commit 25e1816eff
2 changed files with 7 additions and 20 deletions

View File

@@ -39,7 +39,7 @@ limitations under the License.
namespace trt_llm {
constexpr size_t WARP_SIZE = 32;
constexpr size_t MAX_ALL_REDUCE_BLOCKS = 36;
constexpr size_t MAX_ALL_REDUCE_BLOCKS = 32;
constexpr size_t MAX_RANKS_PER_NODE = 8;
constexpr size_t DEFAULT_BLOCK_SIZE = 512;