diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/fused_moe_triton_config.py b/python/sglang/srt/layers/moe/fused_moe_triton/fused_moe_triton_config.py index 06e57f1e6..0c2939935 100644 --- a/python/sglang/srt/layers/moe/fused_moe_triton/fused_moe_triton_config.py +++ b/python/sglang/srt/layers/moe/fused_moe_triton/fused_moe_triton_config.py @@ -51,10 +51,14 @@ def get_moe_configs( # We found that using the fused_moe_kernel config from Triton 3.1.0 with Triton 3.2.0 results in negative performance gains, # so we also include the Triton version as a key for finding the fused_moe_kernel config to achieve the best performance. + config_dir = os.environ.get( + "SGLANG_MOE_CONFIG_DIR", os.path.dirname(os.path.realpath(__file__)) + ) + triton_version = triton.__version__ version_dir = f"triton_{triton_version.replace('.', '_')}" config_file_path = os.path.join( - os.path.dirname(os.path.realpath(__file__)), + config_dir, "configs", version_dir, json_file_name, @@ -75,7 +79,7 @@ def get_moe_configs( if try_triton_version == triton_version: continue try_config_file_path = os.path.join( - os.path.dirname(os.path.realpath(__file__)), + config_dir, "configs", f"triton_{try_triton_version.replace('.', '_')}", json_file_name,