[HICache]: Refactor HiCache CI (#11011)
Co-authored-by: pansicheng <sicheng.pan.chn@gmail.com> Co-authored-by: Zhiqiang Xie <xiezhq@stanford.edu>
This commit is contained in:
@@ -19,6 +19,7 @@ from sglang.test.test_utils import (
|
||||
DEFAULT_MLA_MODEL_NAME_FOR_TEST,
|
||||
CustomTestCase,
|
||||
find_available_port,
|
||||
is_in_ci,
|
||||
)
|
||||
|
||||
|
||||
@@ -226,6 +227,7 @@ class TestMooncakeBackendLayerFirstLayout(
|
||||
'''
|
||||
|
||||
|
||||
@unittest.skipIf(is_in_ci(), "To reduce the CI execution time.")
|
||||
class TestMooncakeBackendPageFirstLayout(
|
||||
HiCacheStorageMooncakeBackendBaseMixin, CustomTestCase
|
||||
):
|
||||
@@ -236,21 +238,6 @@ class TestMooncakeBackendPageFirstLayout(
|
||||
"""Get additional server arguments specific to configuration - override in subclasses"""
|
||||
server_args, env_vars = super()._get_additional_server_args_and_env()
|
||||
server_args["--hicache-mem-layout"] = "page_first"
|
||||
server_args["--hicache-io-backend"] = "kernel"
|
||||
return server_args, env_vars
|
||||
|
||||
|
||||
class TestMooncakeBackendPageFirstDirectLayout(
|
||||
HiCacheStorageMooncakeBackendBaseMixin, CustomTestCase
|
||||
):
|
||||
"""Page first layout tests for HiCache-Mooncake backend"""
|
||||
|
||||
@classmethod
|
||||
def _get_additional_server_args_and_env(cls):
|
||||
"""Get additional server arguments specific to configuration - override in subclasses"""
|
||||
server_args, env_vars = super()._get_additional_server_args_and_env()
|
||||
server_args["--hicache-mem-layout"] = "page_first_direct"
|
||||
server_args["--hicache-io-backend"] = "direct"
|
||||
return server_args, env_vars
|
||||
|
||||
|
||||
@@ -284,48 +271,15 @@ class TestMooncakeBackendAccuracy(
|
||||
server_args, env_vars = super()._get_additional_server_args_and_env()
|
||||
server_args["--hicache-ratio"] = 1.5
|
||||
server_args["--tp-size"] = 2
|
||||
server_args["--hicache-mem-layout"] = "page_first_direct"
|
||||
server_args["--hicache-io-backend"] = "direct"
|
||||
return server_args, env_vars
|
||||
|
||||
def test_eval_accuracy(self):
|
||||
"""Test eval accuracy with cache persistence across cache flushes"""
|
||||
print("\n=== Testing Eval Accuracy with Cache Persistence ===")
|
||||
from test_hicache_storage_file_backend import run_eval_accuracy_test
|
||||
|
||||
# First evaluation - populate cache
|
||||
print("Phase 1: Running initial GSM8K evaluation to populate cache...")
|
||||
args_initial = SimpleNamespace(
|
||||
num_shots=5,
|
||||
data_path=None,
|
||||
num_questions=50,
|
||||
max_new_tokens=512,
|
||||
parallel=10,
|
||||
host=f"http://{self.base_host}",
|
||||
port=int(self.base_port),
|
||||
)
|
||||
metrics_initial = run_eval_few_shot_gsm8k(args_initial)
|
||||
|
||||
# Flush cache to force remote storage access
|
||||
print("Phase 2: Flushing device cache...")
|
||||
self.assertTrue(self.flush_cache(), "Cache flush should succeed")
|
||||
time.sleep(2)
|
||||
|
||||
# Second evaluation - should use remote cache
|
||||
print("Phase 3: Running second GSM8K evaluation using remote cache...")
|
||||
metrics_cached = run_eval_few_shot_gsm8k(args_initial)
|
||||
|
||||
# Verify accuracy consistency
|
||||
accuracy_diff = abs(metrics_initial["accuracy"] - metrics_cached["accuracy"])
|
||||
print(f"Accuracy difference: {accuracy_diff:.4f}")
|
||||
|
||||
# Assertions
|
||||
self.assertGreater(
|
||||
metrics_initial["accuracy"], 0.6, "Initial accuracy should be reasonable"
|
||||
)
|
||||
self.assertGreater(
|
||||
metrics_cached["accuracy"], 0.6, "Cached accuracy should be reasonable"
|
||||
)
|
||||
self.assertLess(
|
||||
accuracy_diff, 0.05, "Accuracy should be consistent between cache states"
|
||||
)
|
||||
run_eval_accuracy_test(self)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Reference in New Issue
Block a user