From 9179ea15957bad2af5f678cf8bbdb5ddc8d2ab41 Mon Sep 17 00:00:00 2001
From: Chunyuan WU <chunyuan.wu@intel.com>
Date: Thu, 19 Jun 2025 10:12:14 +0800
Subject: [PATCH] add seed in CPU UTs to avoid flaky failure (#7333)

---
 test/srt/cpu/test_activation.py    | 2 ++
 test/srt/cpu/test_decode.py        | 2 ++
 test/srt/cpu/test_extend.py        | 2 ++
 test/srt/cpu/test_gemm.py          | 2 ++
 test/srt/cpu/test_mla.py           | 2 ++
 test/srt/cpu/test_moe.py           | 2 ++
 test/srt/cpu/test_norm.py          | 2 ++
 test/srt/cpu/test_rope.py          | 2 ++
 test/srt/cpu/test_shared_expert.py | 2 ++
 test/srt/cpu/test_topk.py          | 2 ++
 10 files changed, 20 insertions(+)

diff --git a/test/srt/cpu/test_activation.py b/test/srt/cpu/test_activation.py
index a5bfce912..f6bce4f0b 100644
--- a/test/srt/cpu/test_activation.py
+++ b/test/srt/cpu/test_activation.py
@@ -8,6 +8,8 @@ from utils import SiluAndMul, precision
 
 from sglang.test.test_utils import CustomTestCase
 
+torch.manual_seed(0)
+
 
 class TestActivation(CustomTestCase):
     M = [128, 129, 257]
diff --git a/test/srt/cpu/test_decode.py b/test/srt/cpu/test_decode.py
index 7e15a58aa..9f88da378 100644
--- a/test/srt/cpu/test_decode.py
+++ b/test/srt/cpu/test_decode.py
@@ -6,6 +6,8 @@ from torch.nn.functional import scaled_dot_product_attention
 
 from sglang.test.test_utils import CustomTestCase
 
+torch.manual_seed(0)
+
 
 class TestDecodeAttention(CustomTestCase):
     def _run_sdpa_forward_decode(
diff --git a/test/srt/cpu/test_extend.py b/test/srt/cpu/test_extend.py
index c119c1524..57dfad2f7 100644
--- a/test/srt/cpu/test_extend.py
+++ b/test/srt/cpu/test_extend.py
@@ -6,6 +6,8 @@ from torch.nn.functional import scaled_dot_product_attention
 
 from sglang.test.test_utils import CustomTestCase
 
+torch.manual_seed(0)
+
 
 class TestExtendAttention(CustomTestCase):
 
diff --git a/test/srt/cpu/test_gemm.py b/test/srt/cpu/test_gemm.py
index c7ee838d9..7404d060e 100644
--- a/test/srt/cpu/test_gemm.py
+++ b/test/srt/cpu/test_gemm.py
@@ -14,6 +14,8 @@ from utils import (
 
 from sglang.test.test_utils import CustomTestCase
 
+torch.manual_seed(0)
+
 
 class Mod(nn.Module):
     def __init__(self, input_channel, output_channel, has_bias):
diff --git a/test/srt/cpu/test_mla.py b/test/srt/cpu/test_mla.py
index 217e33b71..620d04273 100644
--- a/test/srt/cpu/test_mla.py
+++ b/test/srt/cpu/test_mla.py
@@ -8,6 +8,8 @@ from utils import precision
 
 from sglang.test.test_utils import CustomTestCase
 
+torch.manual_seed(0)
+
 
 class TestMLA(CustomTestCase):
     def _run_sdpa_forward_decode(
diff --git a/test/srt/cpu/test_moe.py b/test/srt/cpu/test_moe.py
index c5852408c..17b534bac 100644
--- a/test/srt/cpu/test_moe.py
+++ b/test/srt/cpu/test_moe.py
@@ -8,6 +8,8 @@ import torch
 
 kernel = torch.ops.sgl_kernel
 
+torch.manual_seed(0)
+
 from utils import (
     BLOCK_K,
     BLOCK_N,
diff --git a/test/srt/cpu/test_norm.py b/test/srt/cpu/test_norm.py
index 6f1065d61..008973468 100644
--- a/test/srt/cpu/test_norm.py
+++ b/test/srt/cpu/test_norm.py
@@ -8,6 +8,8 @@ from utils import make_non_contiguous, precision
 
 from sglang.test.test_utils import CustomTestCase
 
+torch.manual_seed(0)
+
 
 class TestNorm(CustomTestCase):
     M = [4096, 1024]
diff --git a/test/srt/cpu/test_rope.py b/test/srt/cpu/test_rope.py
index be4e63c44..76e80d5a5 100644
--- a/test/srt/cpu/test_rope.py
+++ b/test/srt/cpu/test_rope.py
@@ -10,6 +10,8 @@ from sglang.srt.layers.rotary_embedding import (
 )
 from sglang.test.test_utils import CustomTestCase
 
+torch.manual_seed(0)
+
 
 class TestROPE(CustomTestCase):
     def test_deepseek_v2_rope(self):
diff --git a/test/srt/cpu/test_shared_expert.py b/test/srt/cpu/test_shared_expert.py
index bf7840b53..654aa55e1 100644
--- a/test/srt/cpu/test_shared_expert.py
+++ b/test/srt/cpu/test_shared_expert.py
@@ -22,6 +22,8 @@ from utils import (
 
 from sglang.test.test_utils import CustomTestCase
 
+torch.manual_seed(0)
+
 
 class TestSharedExpert(CustomTestCase):
     M = [2, 121]
diff --git a/test/srt/cpu/test_topk.py b/test/srt/cpu/test_topk.py
index 3e08794d7..cf81d0ecb 100644
--- a/test/srt/cpu/test_topk.py
+++ b/test/srt/cpu/test_topk.py
@@ -13,6 +13,8 @@ from sglang.srt.layers.moe.topk import grouped_topk_gpu as native_grouped_topk
 from sglang.srt.models.llama4 import Llama4MoE
 from sglang.test.test_utils import CustomTestCase
 
+torch.manual_seed(0)
+
 
 # This is used by the Deepseek-V2 model
 class TestGroupedTopK(CustomTestCase):