diff --git a/test/srt/test_deepseek_v3_fp4_4gpu.py b/test/srt/test_deepseek_v3_fp4_4gpu.py
index 81f6bdfb3..bb9e13554 100644
--- a/test/srt/test_deepseek_v3_fp4_4gpu.py
+++ b/test/srt/test_deepseek_v3_fp4_4gpu.py
@@ -160,5 +160,56 @@ class TestDeepseekV3FP4MTP(CustomTestCase):
             self.assertGreater(speed, 130)
 
 
+class TestDeepseekV3FP4CutlassMoE(CustomTestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.model = FULL_DEEPSEEK_V3_FP4_MODEL_PATH
+        cls.base_url = DEFAULT_URL_FOR_TEST
+        other_args = [
+            "--tp",
+            "4",
+            "--ep",
+            "4",
+            "--attention-backend",
+            "trtllm_mla",
+            "--moe-runner-backend",
+            "flashinfer_cutlass",
+            "--quantization",
+            "modelopt_fp4",
+        ]
+        cls.process = popen_launch_server(
+            cls.model,
+            cls.base_url,
+            timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
+            other_args=other_args,
+        )
+
+    @classmethod
+    def tearDownClass(cls):
+        kill_process_tree(cls.process.pid)
+
+    def test_a_gsm8k(
+        self,
+    ):  # Append an "a" to make this test run first (alphabetically) to warm up the server
+        args = SimpleNamespace(
+            num_shots=8,
+            data_path=None,
+            num_questions=1319,
+            parallel=1319,
+            max_new_tokens=512,
+            host="http://127.0.0.1",
+            port=int(self.base_url.split(":")[-1]),
+        )
+        metrics = run_eval_few_shot_gsm8k(args)
+        print(f"{metrics=}")
+
+        if is_in_ci():
+            write_github_step_summary(
+                f"### test_gsm8k (deepseek-v3-fp4-cutlass-moe)\n"
+                f'{metrics["accuracy"]=:.3f}\n'
+            )
+            self.assertGreater(metrics["accuracy"], 0.935)
+
+
 if __name__ == "__main__":
     unittest.main()