diff --git a/python/sglang/test/test_utils.py b/python/sglang/test/test_utils.py
index bd962a7f8..0d3d769f4 100644
--- a/python/sglang/test/test_utils.py
+++ b/python/sglang/test/test_utils.py
@@ -42,7 +42,8 @@ DEFAULT_MODEL_NAME_FOR_TEST = "meta-llama/Llama-3.1-8B-Instruct"
 DEFAULT_SMALL_MODEL_NAME_FOR_TEST = "meta-llama/Llama-3.2-1B-Instruct"
 DEFAULT_SMALL_MODEL_NAME_FOR_TEST_BASE = "meta-llama/Llama-3.2-1B"
 DEFAULT_MOE_MODEL_NAME_FOR_TEST = "mistralai/Mixtral-8x7B-Instruct-v0.1"
-DEFAULT_SMALL_MOE_MODEL_NAME_FOR_TEST = "Qwen/Qwen1.5-MoE-A2.7B"
+DEFAULT_SMALL_MOE_MODEL_NAME_FOR_TEST_BASE = "Qwen/Qwen1.5-MoE-A2.7B"
+DEFAULT_SMALL_MOE_MODEL_NAME_FOR_TEST_CHAT = "Qwen/Qwen1.5-MoE-A2.7B-Chat"
 
 # MLA test models
 DEFAULT_SMALL_EMBEDDING_MODEL_NAME_FOR_TEST = "Alibaba-NLP/gte-Qwen2-1.5B-instruct"
diff --git a/test/srt/test_expert_distribution.py b/test/srt/test_expert_distribution.py
index f98c97766..5d4add72f 100755
--- a/test/srt/test_expert_distribution.py
+++ b/test/srt/test_expert_distribution.py
@@ -8,7 +8,7 @@ import torch
 
 from sglang.srt.utils import kill_process_tree
 from sglang.test.test_utils import (
-    DEFAULT_SMALL_MOE_MODEL_NAME_FOR_TEST,
+    DEFAULT_SMALL_MOE_MODEL_NAME_FOR_TEST_BASE,
     DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
     DEFAULT_URL_FOR_TEST,
     CustomTestCase,
diff --git a/test/srt/test_release_memory_occupation.py b/test/srt/test_release_memory_occupation.py
index eb20fc46b..35be029df 100644
--- a/test/srt/test_release_memory_occupation.py
+++ b/test/srt/test_release_memory_occupation.py
@@ -38,6 +38,8 @@ from sglang.srt.constants import GPU_MEMORY_TYPE_KV_CACHE, GPU_MEMORY_TYPE_WEIGH
 from sglang.test.test_utils import (
     DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
     DEFAULT_SMALL_MODEL_NAME_FOR_TEST_BASE,
+    DEFAULT_SMALL_MOE_MODEL_NAME_FOR_TEST_BASE,
+    DEFAULT_SMALL_MOE_MODEL_NAME_FOR_TEST_CHAT,
     CustomTestCase,
 )
 
@@ -50,7 +52,7 @@ def get_gpu_memory_gb():
 
 
 class TestReleaseMemoryOccupation(CustomTestCase):
-    def _setup_engine(self, model_name, mem_fraction_static=0.8, tp_size=1):
+    def _setup_engine(self, model_name, mem_fraction_static=0.8, tp_size=1, ep_size=1):
         """Common setup for engine and HF model."""
         engine = sgl.Engine(
             model_path=model_name,
@@ -58,6 +60,7 @@ class TestReleaseMemoryOccupation(CustomTestCase):
             enable_memory_saver=True,
             mem_fraction_static=mem_fraction_static,
             tp_size=tp_size,
+            ep_size=ep_size,
             # disable_cuda_graph=True,  # for debugging only
         )
 
@@ -70,6 +73,10 @@ class TestReleaseMemoryOccupation(CustomTestCase):
             "sampling_params": {"temperature": 0, "max_new_tokens": 8},
             "expect_output_before_update_weights": " to spend it outdoors. I decided to",
             "expect_output_after_update_weights": " to go for a walk. I like",
+            "prompt_moe": "The weather is nice today, and I want to",
+            "sampling_params_moe": {"temperature": 0, "max_new_tokens": 16},
+            "expect_output_before_update_weights_moe": " go to the park. I have a picnic basket, a book, and a",
+            "expect_output_after_update_weights_moe": " go to the park. I have a lot of things to do, but I",
         }
 
     def _test_initial_generation(
@@ -250,6 +257,72 @@ class TestReleaseMemoryOccupation(CustomTestCase):
             self.assertEqual(outputs, params["expect_output_after_update_weights"])
             engine.shutdown()
 
+    def test_moe_model_release_and_resume(self):
+        # Test with MoE model
+        model_name = DEFAULT_SMALL_MOE_MODEL_NAME_FOR_TEST_CHAT
+
+        tp_size = ep_size = 2
+
+        print(
+            f"Testing tp_size={tp_size} and ep_size={ep_size} for test_moe_model_release_and_resume"
+        )
+        engine = sgl.Engine(
+            model_path=model_name,
+            random_seed=42,
+            enable_memory_saver=True,
+            mem_fraction_static=0.5,
+            tp_size=tp_size,
+            ep_size=ep_size,
+        )
+        params = self._common_test_params()
+
+        self._test_initial_generation(
+            engine,
+            params["prompt_moe"],
+            params["sampling_params_moe"],
+            params["expect_output_before_update_weights_moe"],
+        )
+
+        t = time.perf_counter()
+        gpu_memory_usage_before_release = get_gpu_memory_gb()
+        engine.release_memory_occupation()
+        gpu_memory_usage_after_release = get_gpu_memory_gb()
+        self.assertLess(
+            gpu_memory_usage_after_release,
+            gpu_memory_usage_before_release,
+        )
+
+        print(
+            f"Release took {time.perf_counter() - t:.2f}s, memory: {gpu_memory_usage_before_release:.1f} GB → {gpu_memory_usage_after_release:.1f} GB"
+        )
+
+        if _DEBUG_EXTRA:
+            time.sleep(3)
+
+        t = time.perf_counter()
+        engine.resume_memory_occupation()
+        print(
+            f"Resume took {time.perf_counter() - t:.2f}s, memory: {get_gpu_memory_gb():.1f} GB"
+        )
+
+        hf_model_new = AutoModelForCausalLM.from_pretrained(
+            DEFAULT_SMALL_MOE_MODEL_NAME_FOR_TEST_BASE,
+            torch_dtype="bfloat16",
+            device_map="cuda",
+        )
+        engine.update_weights_from_tensor(list(hf_model_new.named_parameters()))
+
+        # destroy the hf model
+        del hf_model_new
+        torch.cuda.empty_cache()
+
+        print("generate (#2)")
+        outputs = engine.generate(params["prompt_moe"], params["sampling_params_moe"])[
+            "text"
+        ]
+        self.assertEqual(outputs, params["expect_output_after_update_weights_moe"])
+        engine.shutdown()
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/test/srt/test_torch_compile_moe.py b/test/srt/test_torch_compile_moe.py
index 62c7f8078..8bc7b45d3 100644
--- a/test/srt/test_torch_compile_moe.py
+++ b/test/srt/test_torch_compile_moe.py
@@ -7,7 +7,7 @@ import requests
 from sglang.srt.utils import is_cuda, kill_process_tree
 from sglang.test.run_eval import run_eval
 from sglang.test.test_utils import (
-    DEFAULT_SMALL_MOE_MODEL_NAME_FOR_TEST,
+    DEFAULT_SMALL_MOE_MODEL_NAME_FOR_TEST_BASE,
     DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
     DEFAULT_URL_FOR_TEST,
     CustomTestCase,
@@ -18,7 +18,7 @@ from sglang.test.test_utils import (
 class TestTorchCompileMoe(CustomTestCase):
     @classmethod
     def setUpClass(cls):
-        cls.model = DEFAULT_SMALL_MOE_MODEL_NAME_FOR_TEST
+        cls.model = DEFAULT_SMALL_MOE_MODEL_NAME_FOR_TEST_BASE
         cls.base_url = DEFAULT_URL_FOR_TEST
         cls.process = popen_launch_server(
             cls.model,