[HICache]: Refactor HiCache CI (#11011)

Co-authored-by: pansicheng <sicheng.pan.chn@gmail.com>
Co-authored-by: Zhiqiang Xie <xiezhq@stanford.edu>
This commit is contained in:
hzh0425
2025-10-04 08:51:56 +08:00
committed by GitHub
parent c61b9a1d01
commit c70e58e837
4 changed files with 81 additions and 193 deletions

View File

@@ -19,6 +19,7 @@ from sglang.test.test_utils import (
DEFAULT_MLA_MODEL_NAME_FOR_TEST,
CustomTestCase,
find_available_port,
is_in_ci,
)
@@ -226,6 +227,7 @@ class TestMooncakeBackendLayerFirstLayout(
'''
@unittest.skipIf(is_in_ci(), "To reduce the CI execution time.")
class TestMooncakeBackendPageFirstLayout(
HiCacheStorageMooncakeBackendBaseMixin, CustomTestCase
):
@@ -236,21 +238,6 @@ class TestMooncakeBackendPageFirstLayout(
"""Get additional server arguments specific to configuration - override in subclasses"""
server_args, env_vars = super()._get_additional_server_args_and_env()
server_args["--hicache-mem-layout"] = "page_first"
server_args["--hicache-io-backend"] = "kernel"
return server_args, env_vars
class TestMooncakeBackendPageFirstDirectLayout(
HiCacheStorageMooncakeBackendBaseMixin, CustomTestCase
):
"""Page first layout tests for HiCache-Mooncake backend"""
@classmethod
def _get_additional_server_args_and_env(cls):
"""Get additional server arguments specific to configuration - override in subclasses"""
server_args, env_vars = super()._get_additional_server_args_and_env()
server_args["--hicache-mem-layout"] = "page_first_direct"
server_args["--hicache-io-backend"] = "direct"
return server_args, env_vars
@@ -284,48 +271,15 @@ class TestMooncakeBackendAccuracy(
server_args, env_vars = super()._get_additional_server_args_and_env()
server_args["--hicache-ratio"] = 1.5
server_args["--tp-size"] = 2
server_args["--hicache-mem-layout"] = "page_first_direct"
server_args["--hicache-io-backend"] = "direct"
return server_args, env_vars
def test_eval_accuracy(self):
"""Test eval accuracy with cache persistence across cache flushes"""
print("\n=== Testing Eval Accuracy with Cache Persistence ===")
from test_hicache_storage_file_backend import run_eval_accuracy_test
# First evaluation - populate cache
print("Phase 1: Running initial GSM8K evaluation to populate cache...")
args_initial = SimpleNamespace(
num_shots=5,
data_path=None,
num_questions=50,
max_new_tokens=512,
parallel=10,
host=f"http://{self.base_host}",
port=int(self.base_port),
)
metrics_initial = run_eval_few_shot_gsm8k(args_initial)
# Flush cache to force remote storage access
print("Phase 2: Flushing device cache...")
self.assertTrue(self.flush_cache(), "Cache flush should succeed")
time.sleep(2)
# Second evaluation - should use remote cache
print("Phase 3: Running second GSM8K evaluation using remote cache...")
metrics_cached = run_eval_few_shot_gsm8k(args_initial)
# Verify accuracy consistency
accuracy_diff = abs(metrics_initial["accuracy"] - metrics_cached["accuracy"])
print(f"Accuracy difference: {accuracy_diff:.4f}")
# Assertions
self.assertGreater(
metrics_initial["accuracy"], 0.6, "Initial accuracy should be reasonable"
)
self.assertGreater(
metrics_cached["accuracy"], 0.6, "Cached accuracy should be reasonable"
)
self.assertLess(
accuracy_diff, 0.05, "Accuracy should be consistent between cache states"
)
run_eval_accuracy_test(self)
if __name__ == "__main__":