[ROCm] Optimal MOE Tuning for AMD Radeon Graphics (#3567)

This commit is contained in:
yigex
2025-02-18 09:54:10 +08:00
committed by GitHub
parent 2eab113206
commit ddf39d3fce
2 changed files with 51 additions and 51 deletions

View File

@@ -175,7 +175,7 @@ def get_rocm_configs_compute_bound() -> List[Dict[str, int]]:
for block_m in [32, 64, 128, 256]:
for block_k in [32, 64, 128, 256]:
for block_n in [16, 32, 64, 128, 256]:
for num_warps in [4, 8]:
for num_warps in [1, 2, 4, 8]:
for group_size in [1, 4, 8, 16, 32]:
configs.append(
{