[Hicache]: Add E2E CI For 3FS-KVStore (#10131)

2025-09-08 16:54:50 +08:00
parent 78f139812a
commit ec99668ab7
7 changed files with 417 additions and 214 deletions
--- a/test/srt/hicache/test_hicache_storage_3fs_backend.py
+++ b/test/srt/hicache/test_hicache_storage_3fs_backend.py
@@ -0,0 +1,135 @@
+"""
+Benchmark tests for HiCache Storage with 3FS backend.
+Usage:
+    python3 -m pytest test/srt/hicache/test_hicache_storage_3fs_backend.py -v
+"""
+
+import json
+import os
+import time
+import unittest
+from types import SimpleNamespace
+
+from test_hicache_storage_file_backend import HiCacheStorageBaseMixin
+
+from sglang.test.few_shot_gsm8k import run_eval as run_eval_few_shot_gsm8k
+from sglang.test.test_utils import CustomTestCase
+
+
+class HiCacheStorage3FSBackendBaseMixin(HiCacheStorageBaseMixin):
+    """Base mixin class with common setup and utilities"""
+
+    @classmethod
+    def _get_additional_server_args_and_env(cls):
+        """Get additional server arguments specific to configuration - override in subclasses"""
+        # Create a temporary JSON config file for HF3FS
+        hf3fs_config = {
+            "file_path_prefix": os.path.join(cls.temp_dir, "hicache"),
+            "file_size": 1024 * 1024 * 1024 * 2,
+            "numjobs": 2,
+            "entries": 8,
+            "use_mock_hf3fs_client": True,
+        }
+
+        # Write config to temporary file
+        config_file = os.path.join(cls.temp_dir, "hf3fs_config.json")
+        with open(config_file, "w") as f:
+            json.dump(hf3fs_config, f, indent=2)
+
+        server_args = {
+            "--tp-size": 1,
+            "--hicache-ratio": 1.2,
+            "--hicache-storage-backend": "hf3fs",
+            "--hicache-storage-backend-extra-config": json.dumps(hf3fs_config),
+        }
+
+        # Set the environment variable to point to our config file
+        env_vars = {
+            "SGLANG_HICACHE_HF3FS_CONFIG_PATH": config_file,
+        }
+
+        return server_args, env_vars
+
+
+class TestHf3fsBackendLayerFirstLayout(
+    HiCacheStorage3FSBackendBaseMixin, CustomTestCase
+):
+    """Layer first layout tests for HiCache-Hf3fs backend"""
+
+    @classmethod
+    def _get_additional_server_args_and_env(cls):
+        """Get additional server arguments specific to configuration - override in subclasses"""
+        server_args, env_vars = super()._get_additional_server_args_and_env()
+        server_args["--hicache-mem-layout"] = "layer_first"
+        server_args["--hicache-io-backend"] = "direct"
+        return server_args, env_vars
+
+
+class TestHf3fsBackendPageFirstLayout(
+    HiCacheStorage3FSBackendBaseMixin, CustomTestCase
+):
+    """Page first layout tests for HiCache-Hf3fs backend"""
+
+    @classmethod
+    def _get_additional_server_args_and_env(cls):
+        """Get additional server arguments specific to configuration - override in subclasses"""
+        server_args, env_vars = super()._get_additional_server_args_and_env()
+        server_args["--hicache-mem-layout"] = "page_first"
+        return server_args, env_vars
+
+
+class TestHf3fsBackendAccuracy(HiCacheStorage3FSBackendBaseMixin, CustomTestCase):
+    """Accuracy tests for HiCache-Hf3fs backend"""
+
+    @classmethod
+    def _get_additional_server_args_and_env(cls):
+        """Get additional server arguments specific to configuration - override in subclasses"""
+        server_args, env_vars = super()._get_additional_server_args_and_env()
+        server_args["--hicache-ratio"] = 1.5
+        server_args["--tp-size"] = 2
+        return server_args, env_vars
+
+    def test_eval_accuracy(self):
+        """Test eval accuracy with cache persistence across cache flushes"""
+        print("\n=== Testing Eval Accuracy with Cache Persistence ===")
+
+        # First evaluation - populate cache
+        print("Phase 1: Running initial GSM8K evaluation to populate cache...")
+        args_initial = SimpleNamespace(
+            num_shots=5,
+            data_path=None,
+            num_questions=50,
+            max_new_tokens=512,
+            parallel=10,
+            host=f"http://{self.base_host}",
+            port=int(self.base_port),
+        )
+        metrics_initial = run_eval_few_shot_gsm8k(args_initial)
+
+        # Flush cache to force remote storage access
+        print("Phase 2: Flushing device cache...")
+        self.assertTrue(self.flush_cache(), "Cache flush should succeed")
+        time.sleep(2)
+
+        # Second evaluation - should use remote cache
+        print("Phase 3: Running second GSM8K evaluation using remote cache...")
+        metrics_cached = run_eval_few_shot_gsm8k(args_initial)
+
+        # Verify accuracy consistency
+        accuracy_diff = abs(metrics_initial["accuracy"] - metrics_cached["accuracy"])
+        print(f"Accuracy difference: {accuracy_diff:.4f}")
+
+        # Assertions
+        self.assertGreater(
+            metrics_initial["accuracy"], 0.6, "Initial accuracy should be reasonable"
+        )
+        self.assertGreater(
+            metrics_cached["accuracy"], 0.6, "Cached accuracy should be reasonable"
+        )
+        self.assertLess(
+            accuracy_diff, 0.05, "Accuracy should be consistent between cache states"
+        )
+
+
+if __name__ == "__main__":
+    unittest.main(verbosity=2)
--- a/test/srt/hicache/test_hicache_storage_benchmark.py
+++ b/test/srt/hicache/test_hicache_storage_benchmark.py
@@ -1,192 +0,0 @@
-"""
-Benchmark tests for HiCache Storage functionality.
-Usage:
-    python3 -m pytest test/srt/hicache/test_hicache_storage_benchmark.py -v
-"""
-
-import time
-import unittest
-from types import SimpleNamespace
-from typing import Dict
-
-import requests
-from test_hicache_storage_e2e import HiCacheStorageBaseTest
-
-from sglang.test.few_shot_gsm8k import run_eval as run_eval_few_shot_gsm8k
-from sglang.test.test_utils import is_in_ci, write_github_step_summary
-
-
-class TestHiCacheStorageBenchmark(HiCacheStorageBaseTest):
-    """Benchmark tests for HiCache Storage functionality"""
-
-    @classmethod
-    def _get_additional_server_args_and_env(cls):
-        """Get additional server arguments specific to configuration - override in subclasses"""
-        server_args = {"--tp-size": 2, "--hicache-ratio": 1.5}
-        return server_args, {}
-
-    def flush_cache(self) -> bool:
-        """Flush device cache to force remote storage access"""
-        try:
-            response = requests.post(f"{self.base_url}/flush_cache", timeout=10)
-            return response.status_code == 200
-        except requests.RequestException:
-            return False
-
-    # === Accuracy Tests ===
-    def test_eval_accuracy_with_cache_persistence(self):
-        """Test eval accuracy with cache persistence across cache flushes"""
-        print("\n=== Testing Eval Accuracy with Cache Persistence ===")
-
-        # First evaluation - populate cache
-        print("Phase 1: Running initial GSM8K evaluation to populate cache...")
-        args_initial = SimpleNamespace(
-            num_shots=5,
-            data_path=None,
-            num_questions=400,
-            max_new_tokens=512,
-            parallel=32,
-            host=f"http://{self.base_host}",
-            port=int(self.base_port),
-        )
-        metrics_initial = run_eval_few_shot_gsm8k(args_initial)
-        print(f"Evaluation metrics: {metrics_initial}")
-        self.assertGreater(metrics_initial["accuracy"], 0.60)
-
-        # Flush cache to force remote storage access
-        print("Phase 2: Flushing device cache...")
-        self.assertTrue(self.flush_cache(), "Cache flush should succeed")
-        time.sleep(2)
-
-        # Second evaluation - should use remote cache
-        print("Phase 3: Running second GSM8K evaluation using remote cache...")
-
-        start_time = time.time()
-        metrics_cached = run_eval_few_shot_gsm8k(args_initial)
-        cached_time = time.time() - start_time
-
-        print(f"Cached evaluation completed in {cached_time:.2f}s")
-        print(f"Cached accuracy: {metrics_cached['accuracy']:.3f}")
-        print(f"Cached throughput: {metrics_cached['output_throughput']:.2f} token/s")
-
-        # Verify accuracy consistency
-        accuracy_diff = abs(metrics_initial["accuracy"] - metrics_cached["accuracy"])
-        print(f"Accuracy difference: {accuracy_diff:.4f}")
-
-        # Assertions
-        self.assertGreater(
-            metrics_initial["accuracy"], 0.5, "Initial accuracy should be reasonable"
-        )
-        self.assertGreater(
-            metrics_cached["accuracy"], 0.5, "Cached accuracy should be reasonable"
-        )
-        self.assertLess(
-            accuracy_diff, 0.05, "Accuracy should be consistent between cache states"
-        )
-
-        # Performance should be similar or better with cache
-        throughput_ratio = (
-            metrics_cached["output_throughput"] / metrics_initial["output_throughput"]
-        )
-        print(f"Throughput ratio (cached/initial): {throughput_ratio:.2f}")
-
-        if is_in_ci():
-            write_github_step_summary(
-                f"### HiCache Storage Accuracy Test\n"
-                f"Initial accuracy: {metrics_initial['accuracy']:.3f}\n"
-                f"Cached accuracy: {metrics_cached['accuracy']:.3f}\n"
-                f"Accuracy difference: {accuracy_diff:.4f}\n"
-                f"Throughput ratio: {throughput_ratio:.2f}\n"
-            )
-
-    # === Performance Benchmark Tests ===
-
-    def test_throughput_benchmark_with_hicache(self):
-        """Benchmark throughput performance with HiCache enabled"""
-        print("\n=== Benchmarking Throughput with HiCache ===")
-
-        # throughput test
-        res1 = self._run_throughput_benchmark(
-            test_name="hicache_offline_throughput",
-            num_prompts=200,
-            request_rate=10,
-            additional_args=[],
-        )
-
-        # Flush cache to force remote storage access
-        print("Phase 2: Flushing device cache...")
-        self.assertTrue(self.flush_cache(), "Cache flush should succeed")
-        time.sleep(2)
-
-        # Second benchmark, should use remote cache
-        res2 = self._run_throughput_benchmark(
-            test_name="hicache_online_throughput",
-            num_prompts=400,
-            request_rate=10,
-            additional_args=[],
-        )
-
-        if is_in_ci():
-            write_github_step_summary(
-                f"### HiCache Storage FileBackend Benchmark Test\n"
-                f"First time throughput: {res1['input_throughput']:.2f} token/s\n"
-                f"Second time throughput: {res2['input_throughput']:.2f} token/s\n"
-                f"First time TTFT: {res1['mean_ttft_ms']:.2f} ms\n"
-                f"Second time TTFT: {res2['mean_ttft_ms']:.2f} ms\n"
-            )
-
-    def _run_throughput_benchmark(
-        self,
-        test_name: str,
-        num_prompts: int,
-        request_rate: float,
-        dataset_name: str = "random",
-        additional_args: list = None,
-    ) -> Dict:
-        """Helper method to run throughput benchmarks"""
-        if additional_args is None:
-            additional_args = []
-
-        print(f"Running {test_name} benchmark...")
-        start_time = time.time()
-
-        try:
-            # Use the existing server instead of launching a new one
-            from sglang.bench_serving import run_benchmark
-            from sglang.test.test_utils import get_benchmark_args
-
-            args = get_benchmark_args(
-                base_url=self.base_url,
-                dataset_name=dataset_name,
-                tokenizer=self.model,
-                num_prompts=num_prompts,
-                request_rate=request_rate,
-                random_input_len=1024,
-                random_output_len=64,
-            )
-
-            # Run benchmark
-            result = run_benchmark(args)
-
-            elapsed_time = time.time() - start_time
-            print(f"{test_name} completed in {elapsed_time:.2f}s")
-            print(
-                f"Output throughput: {result.get('output_throughput', 0.0):.2f} token/s"
-            )
-
-            return result
-
-        except Exception as e:
-            print(f"Benchmark {test_name} failed: {e}")
-            # Fallback to avoid hard failure; return minimal metrics
-            return {
-                "output_throughput": 0.0,
-                "input_throughput": 0.0,
-                "mean_ttft_ms": float("inf"),
-                "mean_latency_ms": float("inf"),
-                "p99_ttft_ms": float("inf"),
-            }
-
-
-if __name__ == "__main__":
-    unittest.main(verbosity=2)
--- a/test/srt/hicache/test_hicache_storage_file_backend.py
+++ b/test/srt/hicache/test_hicache_storage_file_backend.py
@@ -9,6 +9,7 @@ import random
 import tempfile
 import time
 import unittest
+from types import SimpleNamespace
 from typing import Dict
 from urllib.parse import urlparse

@@ -16,6 +17,7 @@ import requests

 from sglang.bench_serving import get_tokenizer
 from sglang.srt.utils import kill_process_tree
+from sglang.test.few_shot_gsm8k import run_eval as run_eval_few_shot_gsm8k
 from sglang.test.test_utils import (
    DEFAULT_MLA_MODEL_NAME_FOR_TEST,
    DEFAULT_MODEL_NAME_FOR_TEST,
@@ -26,8 +28,8 @@ from sglang.test.test_utils import (
 )


-class HiCacheStorageBaseTest(CustomTestCase):
-    """Base test class with common setup and utilities"""
+class HiCacheStorageBaseMixin:
+    """Base mixin class with common setup and utilities"""

    @classmethod
    def setUpClass(cls):
@@ -166,11 +168,7 @@ class HiCacheStorageBaseTest(CustomTestCase):
            return False

    def gen_prompt(self, token_num: int) -> str:
-        """Generate a random prompt of specified token length using tokenizer vocabulary.
-
-        This function mimics the implementation from bench_serving.py to create
-        realistic prompts for testing cache behavior.
-        """
+        """Generate a random prompt of specified token length using tokenizer vocabulary."""
        all_available_tokens = list(self.tokenizer.get_vocab().values())
        selected_tokens = random.choices(all_available_tokens, k=token_num)
        return self.tokenizer.decode(selected_tokens)
@@ -201,10 +199,9 @@ class HiCacheStorageBaseTest(CustomTestCase):

        # Second request with extended prompt - should hit remote cache
        print("Step 2: Testing cache hit from remote storage...")
-        extended_prompt = base_prompt + "\n\n" + self.gen_prompt(64)

        start_time = time.time()
-        response2 = self.send_request(extended_prompt, max_tokens=150)
+        response2 = self.send_request(base_prompt, max_tokens=150)
        retrieval_time = time.time() - start_time

        cached_tokens = self.get_cached_tokens(response2)
@@ -213,12 +210,12 @@ class HiCacheStorageBaseTest(CustomTestCase):
        )

        # Assert cached tokens indicate a remote hit
-        self.assertEqual(
-            cached_tokens, 768, "Expected significant cached tokens for remote hit"
+        self.assertGreater(
+            cached_tokens, 700, "Expected significant cached tokens for remote hit"
        )


-class TestHiCacheStorageTP(HiCacheStorageBaseTest):
+class TestHiCacheStorageTP(HiCacheStorageBaseMixin, CustomTestCase):
    """Multi-TP tests for HiCache Storage functionality"""

    @classmethod
@@ -228,7 +225,7 @@ class TestHiCacheStorageTP(HiCacheStorageBaseTest):
        return server_args, {}


-class TestHiCacheStorageLayerFirstDirectIO(HiCacheStorageBaseTest):
+class TestHiCacheStorageLayerFirstDirectIO(HiCacheStorageBaseMixin, CustomTestCase):
    """Layer first direct tests for HiCache Storage functionality"""

    @classmethod
@@ -241,7 +238,7 @@ class TestHiCacheStorageLayerFirstDirectIO(HiCacheStorageBaseTest):
        return server_args, {}


-class TestHiCacheStoragePageFirstLayout(HiCacheStorageBaseTest):
+class TestHiCacheStoragePageFirstLayout(HiCacheStorageBaseMixin, CustomTestCase):
    """Page first layout tests for HiCache Storage functionality"""

    @classmethod
@@ -251,7 +248,7 @@ class TestHiCacheStoragePageFirstLayout(HiCacheStorageBaseTest):
        return server_args, {}


-class TestHiCacheStorageMLA(HiCacheStorageBaseTest):
+class TestHiCacheStorageMLA(HiCacheStorageBaseMixin, CustomTestCase):
    """MLA Model tests for HiCache Storage functionality"""

    @classmethod
@@ -266,6 +263,57 @@ class TestHiCacheStorageMLA(HiCacheStorageBaseTest):
        return server_args, {}


+class TestHiCacheStorageAccuracy(HiCacheStorageBaseMixin, CustomTestCase):
+    """Accuracy tests for HiCache Storage functionality"""
+
+    @classmethod
+    def _get_additional_server_args_and_env(cls):
+        """Get additional server arguments specific to configuration - override in subclasses"""
+        server_args = {"--tp-size": 2, "--hicache-ratio": 1.5}
+        return server_args, {}
+
+    def test_eval_accuracy(self):
+        """Test eval accuracy with cache persistence across cache flushes"""
+        print("\n=== Testing Eval Accuracy with Cache Persistence ===")
+
+        # First evaluation - populate cache
+        print("Phase 1: Running initial GSM8K evaluation to populate cache...")
+        args_initial = SimpleNamespace(
+            num_shots=5,
+            data_path=None,
+            num_questions=50,
+            max_new_tokens=512,
+            parallel=10,
+            host=f"http://{self.base_host}",
+            port=int(self.base_port),
+        )
+        metrics_initial = run_eval_few_shot_gsm8k(args_initial)
+
+        # Flush cache to force remote storage access
+        print("Phase 2: Flushing device cache...")
+        self.assertTrue(self.flush_cache(), "Cache flush should succeed")
+        time.sleep(2)
+
+        # Second evaluation - should use remote cache
+        print("Phase 3: Running second GSM8K evaluation using remote cache...")
+        metrics_cached = run_eval_few_shot_gsm8k(args_initial)
+
+        # Verify accuracy consistency
+        accuracy_diff = abs(metrics_initial["accuracy"] - metrics_cached["accuracy"])
+        print(f"Accuracy difference: {accuracy_diff:.4f}")
+
+        # Assertions
+        self.assertGreater(
+            metrics_initial["accuracy"], 0.6, "Initial accuracy should be reasonable"
+        )
+        self.assertGreater(
+            metrics_cached["accuracy"], 0.6, "Cached accuracy should be reasonable"
+        )
+        self.assertLess(
+            accuracy_diff, 0.05, "Accuracy should be consistent between cache states"
+        )
+
+
 # TODO: Add other backends tests（3fs/mooncake）
 # class TestHiCacheStorageMooncakeBackend(HiCacheStorageBaseTest):
 #     """Mooncake backend tests for HiCache Storage functionality"""
--- a/test/srt/run_suite.py
+++ b/test/srt/run_suite.py
@@ -125,8 +125,8 @@ suites = {
        TestFile("test_dp_attention.py", 277),
        TestFile("test_patch_torch.py", 19),
        TestFile("test_release_memory_occupation.py", 127),
-        TestFile("hicache/test_hicache_storage_e2e.py", 400),
-        TestFile("hicache/test_hicache_storage_benchmark.py", 400),
+        TestFile("hicache/test_hicache_storage_file_backend.py", 400),
+        TestFile("hicache/test_hicache_storage_3fs_backend.py", 400),
    ],
    "per-commit-4-gpu": [
        TestFile("test_gpt_oss_4gpu.py", 600),